Skip to content
Snippets Groups Projects
wasmobj.go 34.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2018 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    package wasm
    
    import (
    	"bytes"
    	"cmd/internal/obj"
    	"cmd/internal/objabi"
    	"cmd/internal/sys"
    	"encoding/binary"
    	"fmt"
    
    	"io"
    	"math"
    )
    
    var Register = map[string]int16{
    
    	"SP":    REG_SP,
    	"CTXT":  REG_CTXT,
    	"g":     REG_g,
    	"RET0":  REG_RET0,
    	"RET1":  REG_RET1,
    	"RET2":  REG_RET2,
    	"RET3":  REG_RET3,
    	"PAUSE": REG_PAUSE,
    
    
    	"R0":  REG_R0,
    	"R1":  REG_R1,
    	"R2":  REG_R2,
    	"R3":  REG_R3,
    	"R4":  REG_R4,
    	"R5":  REG_R5,
    	"R6":  REG_R6,
    	"R7":  REG_R7,
    	"R8":  REG_R8,
    	"R9":  REG_R9,
    	"R10": REG_R10,
    	"R11": REG_R11,
    	"R12": REG_R12,
    	"R13": REG_R13,
    	"R14": REG_R14,
    	"R15": REG_R15,
    
    	"F0":  REG_F0,
    	"F1":  REG_F1,
    	"F2":  REG_F2,
    	"F3":  REG_F3,
    	"F4":  REG_F4,
    	"F5":  REG_F5,
    	"F6":  REG_F6,
    	"F7":  REG_F7,
    	"F8":  REG_F8,
    	"F9":  REG_F9,
    	"F10": REG_F10,
    	"F11": REG_F11,
    	"F12": REG_F12,
    	"F13": REG_F13,
    	"F14": REG_F14,
    	"F15": REG_F15,
    
    	"F16": REG_F16,
    	"F17": REG_F17,
    	"F18": REG_F18,
    	"F19": REG_F19,
    	"F20": REG_F20,
    	"F21": REG_F21,
    	"F22": REG_F22,
    	"F23": REG_F23,
    	"F24": REG_F24,
    	"F25": REG_F25,
    	"F26": REG_F26,
    	"F27": REG_F27,
    	"F28": REG_F28,
    	"F29": REG_F29,
    	"F30": REG_F30,
    	"F31": REG_F31,
    
    
    }
    
    var registerNames []string
    
    func init() {
    	obj.RegisterRegister(MINREG, MAXREG, rconv)
    	obj.RegisterOpcode(obj.ABaseWasm, Anames)
    
    	registerNames = make([]string, MAXREG-MINREG)
    	for name, reg := range Register {
    		registerNames[reg-MINREG] = name
    	}
    }
    
    func rconv(r int) string {
    	return registerNames[r-MINREG]
    }
    
    var unaryDst = map[obj.As]bool{
    	ASet:          true,
    	ATee:          true,
    	ACall:         true,
    	ACallIndirect: true,
    	ABr:           true,
    	ABrIf:         true,
    	ABrTable:      true,
    	AI32Store:     true,
    	AI64Store:     true,
    	AF32Store:     true,
    	AF64Store:     true,
    	AI32Store8:    true,
    	AI32Store16:   true,
    	AI64Store8:    true,
    	AI64Store16:   true,
    	AI64Store32:   true,
    	ACALLNORESUME: true,
    }
    
    var Linkwasm = obj.LinkArch{
    	Arch:       sys.ArchWasm,
    	Init:       instinit,
    	Preprocess: preprocess,
    	Assemble:   assemble,
    	UnaryDst:   unaryDst,
    }
    
    var (
    	morestack       *obj.LSym
    	morestackNoCtxt *obj.LSym
    	sigpanic        *obj.LSym
    )
    
    const (
    	/* mark flags */
    	WasmImport = 1 << 0
    )
    
    
    const (
    	// This is a special wasm module name that when used as the module name
    	// in //go:wasmimport will cause the generated code to pass the stack pointer
    	// directly to the imported function. In other words, any function that
    	// uses the gojs module understands the internal Go WASM ABI directly.
    	GojsModule = "gojs"
    )
    
    
    func instinit(ctxt *obj.Link) {
    	morestack = ctxt.Lookup("runtime.morestack")
    	morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt")
    
    	sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal)
    
    }
    
    func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
    	appendp := func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog {
    		if p.As != obj.ANOP {
    			p2 := obj.Appendp(p, newprog)
    			p2.Pc = p.Pc
    			p = p2
    		}
    		p.As = as
    		switch len(args) {
    		case 0:
    			p.From = obj.Addr{}
    			p.To = obj.Addr{}
    		case 1:
    			if unaryDst[as] {
    				p.From = obj.Addr{}
    				p.To = args[0]
    			} else {
    				p.From = args[0]
    				p.To = obj.Addr{}
    			}
    		case 2:
    			p.From = args[0]
    			p.To = args[1]
    		default:
    			panic("bad args")
    		}
    		return p
    	}
    
    
    	framesize := s.Func().Text.To.Offset
    
    	if framesize < 0 {
    		panic("bad framesize")
    	}
    
    	s.Func().Args = s.Func().Text.To.Val.(int32)
    	s.Func().Locals = int32(framesize)
    
    	// If the function exits just to call out to a wasmimport, then
    	// generate the code to translate from our internal Go-stack
    	// based call convention to the native webassembly call convention.
    
    	if s.Func().WasmImport != nil {
    		genWasmImportWrapper(s, appendp)
    
    
    		// It should be 0 already, but we'll set it to 0 anyway just to be sure
    		// that the code below which adds frame expansion code to the function body
    		// isn't run. We don't want the frame expansion code because our function
    		// body is just the code to translate and call the imported function.
    		framesize = 0
    	} else if s.Func().Text.From.Sym.Wrapper() {
    
    		// if g._panic != nil && g._panic.argp == FP {
    		//   g._panic.argp = bottom-of-frame
    		// }
    		//
    		// MOVD g_panic(g), R0
    		// Get R0
    		// I64Eqz
    		// Not
    		// If
    		//   Get SP
    
    		//   I64Const $framesize+8
    		//   I64Add
    		//   I64Load panic_argp(R0)
    		//   I64Eq
    		//   If
    		//     MOVD SP, panic_argp(R0)
    		//   End
    		// End
    
    		gpanic := obj.Addr{
    			Type:   obj.TYPE_MEM,
    			Reg:    REGG,
    			Offset: 4 * 8, // g_panic
    		}
    
    		panicargp := obj.Addr{
    			Type:   obj.TYPE_MEM,
    			Reg:    REG_R0,
    			Offset: 0, // panic.argp
    		}
    
    
    		p := s.Func().Text
    
    		p = appendp(p, AMOVD, gpanic, regAddr(REG_R0))
    
    		p = appendp(p, AGet, regAddr(REG_R0))
    		p = appendp(p, AI64Eqz)
    		p = appendp(p, ANot)
    		p = appendp(p, AIf)
    
    		p = appendp(p, AGet, regAddr(REG_SP))
    
    		p = appendp(p, AI64ExtendI32U)
    
    		p = appendp(p, AI64Const, constAddr(framesize+8))
    		p = appendp(p, AI64Add)
    		p = appendp(p, AI64Load, panicargp)
    
    		p = appendp(p, AI64Eq)
    		p = appendp(p, AIf)
    		p = appendp(p, AMOVD, regAddr(REG_SP), panicargp)
    		p = appendp(p, AEnd)
    
    		p = appendp(p, AEnd)
    	}
    
    	if framesize > 0 {
    
    		p := s.Func().Text
    
    		p = appendp(p, AGet, regAddr(REG_SP))
    		p = appendp(p, AI32Const, constAddr(framesize))
    		p = appendp(p, AI32Sub)
    		p = appendp(p, ASet, regAddr(REG_SP))
    		p.Spadj = int32(framesize)
    	}
    
    
    	// If the framesize is 0, then imply nosplit because it's a specially
    	// generated function.
    	needMoreStack := framesize > 0 && !s.Func().Text.From.Sym.NoSplit()
    
    
    	// If the maymorestack debug option is enabled, insert the
    	// call to maymorestack *before* processing resume points so
    	// we can construct a resume point after maymorestack for
    	// morestack to resume at.
    	var pMorestack = s.Func().Text
    	if needMoreStack && ctxt.Flag_maymorestack != "" {
    		p := pMorestack
    
    		// Save REGCTXT on the stack.
    		const tempFrame = 8
    		p = appendp(p, AGet, regAddr(REG_SP))
    		p = appendp(p, AI32Const, constAddr(tempFrame))
    		p = appendp(p, AI32Sub)
    		p = appendp(p, ASet, regAddr(REG_SP))
    		p.Spadj = tempFrame
    		ctxtp := obj.Addr{
    			Type:   obj.TYPE_MEM,
    			Reg:    REG_SP,
    			Offset: 0,
    		}
    		p = appendp(p, AMOVD, regAddr(REGCTXT), ctxtp)
    
    		// maymorestack must not itself preempt because we
    		// don't have full stack information, so this can be
    		// ACALLNORESUME.
    		p = appendp(p, ACALLNORESUME, constAddr(0))
    		// See ../x86/obj6.go
    		sym := ctxt.LookupABI(ctxt.Flag_maymorestack, s.ABI())
    		p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: sym}
    
    		// Restore REGCTXT.
    		p = appendp(p, AMOVD, ctxtp, regAddr(REGCTXT))
    		p = appendp(p, AGet, regAddr(REG_SP))
    		p = appendp(p, AI32Const, constAddr(tempFrame))
    		p = appendp(p, AI32Add)
    		p = appendp(p, ASet, regAddr(REG_SP))
    		p.Spadj = -tempFrame
    
    		// Add an explicit ARESUMEPOINT after maymorestack for
    		// morestack to resume at.
    		pMorestack = appendp(p, ARESUMEPOINT)
    	}
    
    
    	// Introduce resume points for CALL instructions
    	// and collect other explicit resume points.
    	numResumePoints := 0
    	explicitBlockDepth := 0
    	pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction
    	var tableIdxs []uint64
    	tablePC := int64(0)
    
    	base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base()
    	for p := s.Func().Text; p != nil; p = p.Link {
    
    		prevBase := base
    		base = ctxt.PosTable.Pos(p.Pos).Base()
    		switch p.As {
    		case ABlock, ALoop, AIf:
    			explicitBlockDepth++
    
    		case AEnd:
    			if explicitBlockDepth == 0 {
    				panic("End without block")
    			}
    			explicitBlockDepth--
    
    		case ARESUMEPOINT:
    			if explicitBlockDepth != 0 {
    				panic("RESUME can only be used on toplevel")
    			}
    			p.As = AEnd
    			for tablePC <= pc {
    				tableIdxs = append(tableIdxs, uint64(numResumePoints))
    				tablePC++
    			}
    			numResumePoints++
    			pc++
    
    		case obj.ACALL:
    			if explicitBlockDepth != 0 {
    				panic("CALL can only be used on toplevel, try CALLNORESUME instead")
    			}
    			appendp(p, ARESUMEPOINT)
    		}
    
    		p.Pc = pc
    
    		// Increase pc whenever some pc-value table needs a new entry. Don't increase it
    		// more often to avoid bloat of the BrTable instruction.
    		// The "base != prevBase" condition detects inlined instructions. They are an
    		// implicit call, so entering and leaving this section affects the stack trace.
    
    		if p.As == ACALLNORESUME || p.As == obj.ANOP || p.As == ANop || p.Spadj != 0 || base != prevBase {
    
    			if p.To.Sym == sigpanic {
    				// The panic stack trace expects the PC at the call of sigpanic,
    				// not the next one. However, runtime.Caller subtracts 1 from the
    				// PC. To make both PC and PC-1 work (have the same line number),
    				// we advance the PC by 2 at sigpanic.
    				pc++
    			}
    
    		}
    	}
    	tableIdxs = append(tableIdxs, uint64(numResumePoints))
    	s.Size = pc + 1
    
    
    	if needMoreStack {
    		p := pMorestack
    
    			// small stack: SP <= stackguard
    			// Get SP
    			// Get g
    			// I32WrapI64
    			// I32Load $stackguard0
    			// I32GtU
    
    			p = appendp(p, AGet, regAddr(REG_SP))
    			p = appendp(p, AGet, regAddr(REGG))
    			p = appendp(p, AI32WrapI64)
    			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
    			p = appendp(p, AI32LeU)
    		} else {
    			// large stack: SP-framesize <= stackguard-StackSmall
    			//              SP <= stackguard+(framesize-StackSmall)
    			// Get SP
    			// Get g
    			// I32WrapI64
    			// I32Load $stackguard0
    			// I32Const $(framesize-StackSmall)
    			// I32Add
    			// I32GtU
    
    			p = appendp(p, AGet, regAddr(REG_SP))
    			p = appendp(p, AGet, regAddr(REGG))
    			p = appendp(p, AI32WrapI64)
    			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
    
    			p = appendp(p, AI32Const, constAddr(framesize-abi.StackSmall))
    
    			p = appendp(p, AI32Add)
    			p = appendp(p, AI32LeU)
    		}
    		// TODO(neelance): handle wraparound case
    
    		p = appendp(p, AIf)
    
    		// This CALL does *not* have a resume point after it
    		// (we already inserted all of the resume points). As
    		// a result, morestack will resume at the *previous*
    		// resume point (typically, the beginning of the
    		// function) and perform the morestack check again.
    		// This is why we don't need an explicit loop like
    		// other architectures.
    
    		p = appendp(p, obj.ACALL, constAddr(0))
    
    		if s.Func().Text.From.Sym.NeedCtxt() {
    
    			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack}
    		} else {
    			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt}
    		}
    		p = appendp(p, AEnd)
    	}
    
    
    	// record the branches targeting the entry loop and the unwind exit,
    	// their targets with be filled in later
    	var entryPointLoopBranches []*obj.Prog
    	var unwindExitBranches []*obj.Prog
    
    	currentDepth := 0
    
    	for p := s.Func().Text; p != nil; p = p.Link {
    
    		switch p.As {
    		case ABlock, ALoop, AIf:
    			currentDepth++
    		case AEnd:
    			currentDepth--
    		}
    
    		switch p.As {
    		case obj.AJMP:
    			jmp := *p
    			p.As = obj.ANOP
    
    			if jmp.To.Type == obj.TYPE_BRANCH {
    				// jump to basic block
    				p = appendp(p, AI32Const, constAddr(jmp.To.Val.(*obj.Prog).Pc))
    
    				p = appendp(p, ASet, regAddr(REG_PC_B)) // write next basic block to PC_B
    				p = appendp(p, ABr)                     // jump to beginning of entryPointLoop
    				entryPointLoopBranches = append(entryPointLoopBranches, p)
    
    				break
    			}
    
    			// low-level WebAssembly call to function
    			switch jmp.To.Type {
    			case obj.TYPE_MEM:
    
    				if !notUsePC_B[jmp.To.Sym.Name] {
    					// Set PC_B parameter to function entry.
    					p = appendp(p, AI32Const, constAddr(0))
    				}
    
    				p = appendp(p, ACall, jmp.To)
    
    			case obj.TYPE_NONE:
    				// (target PC is on stack)
    				p = appendp(p, AI32WrapI64)
    				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
    				p = appendp(p, AI32ShrU)
    
    
    				// Set PC_B parameter to function entry.
    				// We need to push this before pushing the target PC_F,
    				// so temporarily pop PC_F, using our REG_PC_B as a
    				// scratch register, and push it back after pushing 0.
    				p = appendp(p, ASet, regAddr(REG_PC_B))
    				p = appendp(p, AI32Const, constAddr(0))
    				p = appendp(p, AGet, regAddr(REG_PC_B))
    
    
    				p = appendp(p, ACallIndirect)
    
    			default:
    				panic("bad target for JMP")
    			}
    
    			p = appendp(p, AReturn)
    
    		case obj.ACALL, ACALLNORESUME:
    			call := *p
    			p.As = obj.ANOP
    
    			pcAfterCall := call.Link.Pc
    			if call.To.Sym == sigpanic {
    				pcAfterCall-- // sigpanic expects to be called without advancing the pc
    			}
    
    			// SP -= 8
    			p = appendp(p, AGet, regAddr(REG_SP))
    			p = appendp(p, AI32Const, constAddr(8))
    			p = appendp(p, AI32Sub)
    			p = appendp(p, ASet, regAddr(REG_SP))
    
    			// write return address to Go stack
    			p = appendp(p, AGet, regAddr(REG_SP))
    			p = appendp(p, AI64Const, obj.Addr{
    				Type:   obj.TYPE_ADDR,
    				Name:   obj.NAME_EXTERN,
    				Sym:    s,           // PC_F
    				Offset: pcAfterCall, // PC_B
    			})
    			p = appendp(p, AI64Store, constAddr(0))
    
    			// low-level WebAssembly call to function
    			switch call.To.Type {
    			case obj.TYPE_MEM:
    
    				if !notUsePC_B[call.To.Sym.Name] {
    					// Set PC_B parameter to function entry.
    					p = appendp(p, AI32Const, constAddr(0))
    				}
    
    				p = appendp(p, ACall, call.To)
    
    			case obj.TYPE_NONE:
    				// (target PC is on stack)
    				p = appendp(p, AI32WrapI64)
    				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
    				p = appendp(p, AI32ShrU)
    
    
    				// Set PC_B parameter to function entry.
    				// We need to push this before pushing the target PC_F,
    				// so temporarily pop PC_F, using our PC_B as a
    				// scratch register, and push it back after pushing 0.
    				p = appendp(p, ASet, regAddr(REG_PC_B))
    				p = appendp(p, AI32Const, constAddr(0))
    				p = appendp(p, AGet, regAddr(REG_PC_B))
    
    
    				p = appendp(p, ACallIndirect)
    
    			default:
    				panic("bad target for CALL")
    			}
    
    			// return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack
    
    			if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes
    
    				// trying to unwind WebAssembly stack but call has no resume point, terminate with error
    
    				p = appendp(p, obj.AUNDEF)
    
    			} else {
    				// unwinding WebAssembly stack to switch goroutine, return 1
    
    				p = appendp(p, ABrIf)
    				unwindExitBranches = append(unwindExitBranches, p)
    
    			ret := *p
    			p.As = obj.ANOP
    
    			if framesize > 0 {
    				// SP += framesize
    				p = appendp(p, AGet, regAddr(REG_SP))
    				p = appendp(p, AI32Const, constAddr(framesize))
    				p = appendp(p, AI32Add)
    				p = appendp(p, ASet, regAddr(REG_SP))
    				// TODO(neelance): This should theoretically set Spadj, but it only works without.
    				// p.Spadj = int32(-framesize)
    			}
    
    			if ret.To.Type == obj.TYPE_MEM {
    
    				// Set PC_B parameter to function entry.
    
    				p = appendp(p, AI32Const, constAddr(0))
    
    				// low-level WebAssembly call to function
    				p = appendp(p, ACall, ret.To)
    				p = appendp(p, AReturn)
    				break
    			}
    
    			// SP += 8
    			p = appendp(p, AGet, regAddr(REG_SP))
    			p = appendp(p, AI32Const, constAddr(8))
    			p = appendp(p, AI32Add)
    			p = appendp(p, ASet, regAddr(REG_SP))
    
    
    			if ret.As == ARETUNWIND {
    				// function needs to unwind the WebAssembly stack, return 1
    				p = appendp(p, AI32Const, constAddr(1))
    				p = appendp(p, AReturn)
    				break
    			}
    
    			// not unwinding the WebAssembly stack, return 0
    
    			p = appendp(p, AI32Const, constAddr(0))
    			p = appendp(p, AReturn)
    		}
    	}
    
    
    	for p := s.Func().Text; p != nil; p = p.Link {
    
    		switch p.From.Name {
    		case obj.NAME_AUTO:
    
    			p.From.Offset += framesize
    
    		case obj.NAME_PARAM:
    			p.From.Reg = REG_SP
    
    			p.From.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
    
    		}
    
    		switch p.To.Name {
    		case obj.NAME_AUTO:
    
    			p.To.Offset += framesize
    
    		case obj.NAME_PARAM:
    			p.To.Reg = REG_SP
    
    			p.To.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
    
    		}
    
    		switch p.As {
    		case AGet:
    			if p.From.Type == obj.TYPE_ADDR {
    				get := *p
    				p.As = obj.ANOP
    
    				switch get.From.Name {
    				case obj.NAME_EXTERN:
    					p = appendp(p, AI64Const, get.From)
    				case obj.NAME_AUTO, obj.NAME_PARAM:
    					p = appendp(p, AGet, regAddr(get.From.Reg))
    					if get.From.Reg == REG_SP {
    
    						p = appendp(p, AI64ExtendI32U)
    
    					}
    					if get.From.Offset != 0 {
    						p = appendp(p, AI64Const, constAddr(get.From.Offset))
    						p = appendp(p, AI64Add)
    					}
    				default:
    					panic("bad Get: invalid name")
    				}
    			}
    
    		case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
    			if p.From.Type == obj.TYPE_MEM {
    				as := p.As
    				from := p.From
    
    				p.As = AGet
    				p.From = regAddr(from.Reg)
    
    				if from.Reg != REG_SP {
    					p = appendp(p, AI32WrapI64)
    				}
    
    				p = appendp(p, as, constAddr(from.Offset))
    			}
    
    		case AMOVB, AMOVH, AMOVW, AMOVD:
    			mov := *p
    			p.As = obj.ANOP
    
    			var loadAs obj.As
    			var storeAs obj.As
    			switch mov.As {
    			case AMOVB:
    				loadAs = AI64Load8U
    				storeAs = AI64Store8
    			case AMOVH:
    				loadAs = AI64Load16U
    				storeAs = AI64Store16
    			case AMOVW:
    				loadAs = AI64Load32U
    				storeAs = AI64Store32
    			case AMOVD:
    				loadAs = AI64Load
    				storeAs = AI64Store
    			}
    
    			appendValue := func() {
    				switch mov.From.Type {
    				case obj.TYPE_CONST:
    					p = appendp(p, AI64Const, constAddr(mov.From.Offset))
    
    				case obj.TYPE_ADDR:
    					switch mov.From.Name {
    					case obj.NAME_NONE, obj.NAME_PARAM, obj.NAME_AUTO:
    						p = appendp(p, AGet, regAddr(mov.From.Reg))
    						if mov.From.Reg == REG_SP {
    
    							p = appendp(p, AI64ExtendI32U)
    
    						}
    						p = appendp(p, AI64Const, constAddr(mov.From.Offset))
    						p = appendp(p, AI64Add)
    					case obj.NAME_EXTERN:
    						p = appendp(p, AI64Const, mov.From)
    					default:
    						panic("bad name for MOV")
    					}
    
    				case obj.TYPE_REG:
    					p = appendp(p, AGet, mov.From)
    					if mov.From.Reg == REG_SP {
    
    						p = appendp(p, AI64ExtendI32U)
    
    					}
    
    				case obj.TYPE_MEM:
    					p = appendp(p, AGet, regAddr(mov.From.Reg))
    					if mov.From.Reg != REG_SP {
    						p = appendp(p, AI32WrapI64)
    					}
    					p = appendp(p, loadAs, constAddr(mov.From.Offset))
    
    				default:
    					panic("bad MOV type")
    				}
    			}
    
    			switch mov.To.Type {
    			case obj.TYPE_REG:
    				appendValue()
    				if mov.To.Reg == REG_SP {
    					p = appendp(p, AI32WrapI64)
    				}
    				p = appendp(p, ASet, mov.To)
    
    			case obj.TYPE_MEM:
    				switch mov.To.Name {
    				case obj.NAME_NONE, obj.NAME_PARAM:
    					p = appendp(p, AGet, regAddr(mov.To.Reg))
    					if mov.To.Reg != REG_SP {
    						p = appendp(p, AI32WrapI64)
    					}
    				case obj.NAME_EXTERN:
    					p = appendp(p, AI32Const, obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: mov.To.Sym})
    				default:
    					panic("bad MOV name")
    				}
    				appendValue()
    				p = appendp(p, storeAs, constAddr(mov.To.Offset))
    
    			default:
    				panic("bad MOV type")
    			}
    		}
    
    		p := s.Func().Text
    
    		if len(unwindExitBranches) > 0 {
    			p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack
    			for _, b := range unwindExitBranches {
    				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
    			}
    		}
    		if len(entryPointLoopBranches) > 0 {
    			p = appendp(p, ALoop) // entryPointLoop, used to jump between basic blocks
    			for _, b := range entryPointLoopBranches {
    				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
    			}
    		}
    		if numResumePoints > 0 {
    			// Add Block instructions for resume points and BrTable to jump to selected resume point.
    			for i := 0; i < numResumePoints+1; i++ {
    				p = appendp(p, ABlock)
    			}
    			p = appendp(p, AGet, regAddr(REG_PC_B)) // read next basic block from PC_B
    			p = appendp(p, ABrTable, obj.Addr{Val: tableIdxs})
    			p = appendp(p, AEnd) // end of Block
    		}
    		for p.Link != nil {
    			p = p.Link // function instructions
    		}
    		if len(entryPointLoopBranches) > 0 {
    			p = appendp(p, AEnd) // end of entryPointLoop
    		}
    		p = appendp(p, obj.AUNDEF)
    		if len(unwindExitBranches) > 0 {
    			p = appendp(p, AEnd) // end of unwindExit
    			p = appendp(p, AI32Const, constAddr(1))
    		}
    	}
    
    	currentDepth = 0
    	blockDepths := make(map[*obj.Prog]int)
    
    	for p := s.Func().Text; p != nil; p = p.Link {
    
    		switch p.As {
    		case ABlock, ALoop, AIf:
    			currentDepth++
    			blockDepths[p] = currentDepth
    		case AEnd:
    			currentDepth--
    		}
    
    		switch p.As {
    		case ABr, ABrIf:
    			if p.To.Type == obj.TYPE_BRANCH {
    				blockDepth, ok := blockDepths[p.To.Val.(*obj.Prog)]
    				if !ok {
    					panic("label not at block")
    				}
    				p.To = constAddr(int64(currentDepth - blockDepth))
    			}
    		}
    
    // Generate function body for wasmimport wrapper function.
    func genWasmImportWrapper(s *obj.LSym, appendp func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog) {
    	wi := s.Func().WasmImport
    	wi.CreateAuxSym()
    	p := s.Func().Text
    	if p.Link != nil {
    		panic("wrapper functions for WASM imports should not have a body")
    	}
    	to := obj.Addr{
    		Type: obj.TYPE_MEM,
    		Name: obj.NAME_EXTERN,
    		Sym:  s,
    	}
    
    	// If the module that the import is for is our magic "gojs" module, then this
    	// indicates that the called function understands the Go stack-based call convention
    	// so we just pass the stack pointer to it, knowing it will read the params directly
    	// off the stack and push the results into memory based on the stack pointer.
    	if wi.Module == GojsModule {
    		// The called function has a signature of 'func(sp int)'. It has access to the memory
    		// value somewhere to be able to address the memory based on the "sp" value.
    
    		p = appendp(p, AGet, regAddr(REG_SP))
    		p = appendp(p, ACall, to)
    
    		p.Mark = WasmImport
    	} else {
    		if len(wi.Results) > 1 {
    			// TODO(evanphx) implement support for the multi-value proposal:
    			// https://github.com/WebAssembly/multi-value/blob/master/proposals/multi-value/Overview.md
    			panic("invalid results type") // impossible until multi-value proposal has landed
    		}
    		if len(wi.Results) == 1 {
    			// If we have a result (rather than returning nothing at all), then
    			// we'll write the result to the Go stack relative to the current stack pointer.
    			// We cache the current stack pointer value on the wasm stack here and then use
    			// it after the Call instruction to store the result.
    			p = appendp(p, AGet, regAddr(REG_SP))
    		}
    		for _, f := range wi.Params {
    			// Each load instructions will consume the value of sp on the stack, so
    			// we need to read sp for each param. WASM appears to not have a stack dup instruction
    			// (a strange omission for a stack-based VM), if it did, we'd be using the dup here.
    			p = appendp(p, AGet, regAddr(REG_SP))
    
    			// Offset is the location of the param on the Go stack (ie relative to sp).
    			// Because of our call convention, the parameters are located an additional 8 bytes
    			// from sp because we store the return address as an int64 at the bottom of the stack.
    			// Ie the stack looks like [return_addr, param3, param2, param1, etc]
    
    			// Ergo, we add 8 to the true byte offset of the param to skip the return address.
    			loadOffset := f.Offset + 8
    
    			// We're reading the value from the Go stack onto the WASM stack and leaving it there
    			// for CALL to pick them up.
    			switch f.Type {
    			case obj.WasmI32:
    				p = appendp(p, AI32Load, constAddr(loadOffset))
    			case obj.WasmI64:
    				p = appendp(p, AI64Load, constAddr(loadOffset))
    			case obj.WasmF32:
    				p = appendp(p, AF32Load, constAddr(loadOffset))
    			case obj.WasmF64:
    				p = appendp(p, AF64Load, constAddr(loadOffset))
    			case obj.WasmPtr:
    				p = appendp(p, AI64Load, constAddr(loadOffset))
    				p = appendp(p, AI32WrapI64)
    			default:
    				panic("bad param type")
    			}
    		}
    
    		// The call instruction is marked as being for a wasm import so that a later phase
    		// will generate relocation information that allows us to patch this with then
    		// offset of the imported function in the wasm imports.
    		p = appendp(p, ACall, to)
    		p.Mark = WasmImport
    
    		if len(wi.Results) == 1 {
    			f := wi.Results[0]
    
    			// Much like with the params, we need to adjust the offset we store the result value
    			// to by 8 bytes to account for the return address on the Go stack.
    			storeOffset := f.Offset + 8
    
    			// This code is paired the code above that reads the stack pointer onto the wasm
    			// stack. We've done this so we have a consistent view of the sp value as it might
    			// be manipulated by the call and we want to ignore that manipulation here.
    			switch f.Type {
    			case obj.WasmI32:
    				p = appendp(p, AI32Store, constAddr(storeOffset))
    			case obj.WasmI64:
    				p = appendp(p, AI64Store, constAddr(storeOffset))
    			case obj.WasmF32:
    				p = appendp(p, AF32Store, constAddr(storeOffset))
    			case obj.WasmF64:
    				p = appendp(p, AF64Store, constAddr(storeOffset))
    			case obj.WasmPtr:
    				p = appendp(p, AI64ExtendI32U)
    				p = appendp(p, AI64Store, constAddr(storeOffset))
    			default:
    				panic("bad result type")
    			}
    		}
    	}
    
    	p = appendp(p, obj.ARET)
    }
    
    
    func constAddr(value int64) obj.Addr {
    	return obj.Addr{Type: obj.TYPE_CONST, Offset: value}
    }
    
    func regAddr(reg int16) obj.Addr {
    	return obj.Addr{Type: obj.TYPE_REG, Reg: reg}
    }
    
    
    // Most of the Go functions has a single parameter (PC_B) in
    // Wasm ABI. This is a list of exceptions.
    var notUsePC_B = map[string]bool{
    
    	"_rt0_wasm_js":            true,
    
    	"_rt0_wasm_wasip1":        true,
    
    	"wasm_export_run":         true,
    	"wasm_export_resume":      true,
    	"wasm_export_getsp":       true,
    	"wasm_pc_f_loop":          true,
    	"gcWriteBarrier":          true,
    	"runtime.gcWriteBarrier1": true,
    	"runtime.gcWriteBarrier2": true,
    	"runtime.gcWriteBarrier3": true,
    	"runtime.gcWriteBarrier4": true,
    	"runtime.gcWriteBarrier5": true,
    	"runtime.gcWriteBarrier6": true,
    	"runtime.gcWriteBarrier7": true,
    	"runtime.gcWriteBarrier8": true,
    	"runtime.wasmDiv":         true,
    	"runtime.wasmTruncS":      true,
    	"runtime.wasmTruncU":      true,
    	"cmpbody":                 true,
    	"memeqbody":               true,
    	"memcmp":                  true,
    	"memchr":                  true,
    
    func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
    
    	type regVar struct {
    		global bool
    		index  uint64
    	}
    
    	type varDecl struct {
    		count uint64
    		typ   valueType
    	}
    
    	hasLocalSP := false
    
    	regVars := [MAXREG - MINREG]*regVar{
    		REG_SP - MINREG:    {true, 0},
    		REG_CTXT - MINREG:  {true, 1},
    		REG_g - MINREG:     {true, 2},
    		REG_RET0 - MINREG:  {true, 3},
    		REG_RET1 - MINREG:  {true, 4},
    		REG_RET2 - MINREG:  {true, 5},
    		REG_RET3 - MINREG:  {true, 6},
    		REG_PAUSE - MINREG: {true, 7},
    	}
    	var varDecls []*varDecl
    	useAssemblyRegMap := func() {
    		for i := int16(0); i < 16; i++ {
    			regVars[REG_R0+i-MINREG] = &regVar{false, uint64(i)}
    		}
    	}
    
    	// Function starts with declaration of locals: numbers and types.
    
    	// Some functions use a special calling convention.
    
    	switch s.Name {
    
    	case "_rt0_wasm_js", "_rt0_wasm_wasip1", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp",
    		"wasm_pc_f_loop", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
    
    		varDecls = []*varDecl{}
    		useAssemblyRegMap()
    
    	case "memchr", "memcmp":
    
    		varDecls = []*varDecl{{count: 2, typ: i32}}
    		useAssemblyRegMap()
    
    		varDecls = []*varDecl{{count: 2, typ: i64}}
    		useAssemblyRegMap()
    
    	case "gcWriteBarrier":
    		varDecls = []*varDecl{{count: 5, typ: i64}}
    		useAssemblyRegMap()
    	case "runtime.gcWriteBarrier1",
    		"runtime.gcWriteBarrier2",
    		"runtime.gcWriteBarrier3",
    		"runtime.gcWriteBarrier4",
    		"runtime.gcWriteBarrier5",
    		"runtime.gcWriteBarrier6",
    		"runtime.gcWriteBarrier7",
    		"runtime.gcWriteBarrier8":
    		// no locals
    
    		// Normal calling convention: PC_B as WebAssembly parameter. First local variable is local SP cache.
    		regVars[REG_PC_B-MINREG] = &regVar{false, 0}
    
    		hasLocalSP = true
    
    
    		var regUsed [MAXREG - MINREG]bool
    
    		for p := s.Func().Text; p != nil; p = p.Link {