Files
wazero/internal/engine/compiler/impl_arm64.go
2023-06-19 19:18:18 +10:00

4330 lines
158 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// This file implements the compiler for arm64 target.
// Please refer to https://developer.arm.com/documentation/102374/latest/
// if unfamiliar with arm64 instructions and semantics.
package compiler
import (
"bytes"
"errors"
"fmt"
"math"
"github.com/tetratelabs/wazero/internal/asm"
"github.com/tetratelabs/wazero/internal/asm/arm64"
"github.com/tetratelabs/wazero/internal/wasm"
"github.com/tetratelabs/wazero/internal/wazeroir"
)
type arm64Compiler struct {
assembler arm64.Assembler
ir *wazeroir.CompilationResult
// locationStack holds the state of wazeroir virtual stack.
// and each item is either placed in register or the actual memory stack.
locationStack *runtimeValueLocationStack
// labels maps a label (e.g. ".L1_then") to *arm64LabelInfo.
labels [wazeroir.LabelKindNum][]arm64LabelInfo
// stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation.
stackPointerCeil uint64
// assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling.
assignStackPointerCeilNeeded asm.Node
compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node
withListener bool
typ *wasm.FunctionType
br *bytes.Reader
// locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack,
// we cache it here, and reset and set to .locationStack in the Init method.
locationStackForEntrypoint runtimeValueLocationStack
// frameIDMax tracks the maximum value of frame id per function.
frameIDMax int
brTableTmp []runtimeValueLocation
}
func newArm64Compiler() compiler {
return &arm64Compiler{
assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary),
locationStackForEntrypoint: newRuntimeValueLocationStack(),
br: bytes.NewReader(nil),
}
}
// Init implements compiler.Init.
func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) {
c.assembler.Reset()
c.locationStackForEntrypoint.reset()
c.resetLabels()
*c = arm64Compiler{
ir: ir,
withListener: withListener,
typ: typ,
assembler: c.assembler,
labels: c.labels,
br: c.br,
brTableTmp: c.brTableTmp,
locationStackForEntrypoint: c.locationStackForEntrypoint,
}
// Reuses the initial location stack for the compilation of subsequent functions.
c.locationStack = &c.locationStackForEntrypoint
}
// resetLabels resets the existing content in arm64Compiler.labels so that
// we could reuse the allocated slices and stacks in the subsequent compilations.
func (c *arm64Compiler) resetLabels() {
for i := range c.labels {
for j := range c.labels[i] {
if j > c.frameIDMax {
// Only need to reset until the maximum frame id. This makes the compilation faster for large binary.
break
}
l := &c.labels[i][j]
l.initialInstruction = nil
l.stackInitialized = false
l.initialStack.reset()
}
}
}
var (
arm64UnreservedVectorRegisters = []asm.Register{
arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3,
arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8,
arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13,
arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18,
arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23,
arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28,
arm64.RegV29, arm64.RegV30, arm64.RegV31,
}
// Note (see arm64 section in https://go.dev/doc/asm):
// * RegR18 is reserved as a platform register, and we don't use it in Compiler.
// * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler.
arm64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint
arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8,
arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13,
arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19,
arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24,
arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30,
}
)
const (
// arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr)
arm64ReservedRegisterForCallEngine = arm64.RegR0
// arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call.
arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1
// arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr).
arm64ReservedRegisterForMemory = arm64.RegR2
// arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation.
// Note: we choose R27 as that is the temporary register used in Go's assembler.
arm64ReservedRegisterForTemporary = arm64.RegR27
)
var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29
const (
// arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine.
arm64CallEngineArchContextCompilerCallReturnAddressOffset = 144
// arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine.
arm64CallEngineArchContextMinimum32BitSignedIntOffset = 152
// arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine.
arm64CallEngineArchContextMinimum64BitSignedIntOffset = 160
)
func isZeroRegister(r asm.Register) bool {
return r == arm64.RegRZR
}
// compileNOP implements compiler.compileNOP for the arm64 architecture.
func (c *arm64Compiler) compileNOP() asm.Node {
return c.assembler.CompileStandAlone(arm64.NOP)
}
// compile implements compiler.compile for the arm64 architecture.
func (c *arm64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) {
// c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s)
// used for all labels (via setLocationStack), excluding the current one.
// Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil.
stackPointerCeil = c.stackPointerCeil
if stackPointerCeil < c.locationStack.stackPointerCeil {
stackPointerCeil = c.locationStack.stackPointerCeil
}
// Now that the ceil of stack pointer is determined, we are invoking the callback.
// Note: this must be called before Assemble() below.
c.assignStackPointerCeil(stackPointerCeil)
err = c.assembler.Assemble(buf)
return
}
// arm64LabelInfo holds a wazeroir label specific information in this function.
type arm64LabelInfo struct {
// initialInstruction is the initial instruction for this label so other block can branch into it.
initialInstruction asm.Node
// initialStack is the initial value location stack from which we start compiling this label.
initialStack runtimeValueLocationStack
stackInitialized bool
}
// assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture.
func (c *arm64Compiler) assignStackPointerCeil(ceil uint64) {
if c.assignStackPointerCeilNeeded != nil {
c.assignStackPointerCeilNeeded.AssignSourceConstant(int64(ceil) << 3)
}
}
func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo {
kind := label.Kind()
frames := c.labels[kind]
frameID := label.FrameID()
if c.frameIDMax < frameID {
c.frameIDMax = frameID
}
// If the frameID is not allocated yet, expand the slice by twice of the diff,
// so that we could reduce the allocation in the subsequent compilation.
if diff := frameID - len(frames) + 1; diff > 0 {
for i := 0; i < diff; i++ {
frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()})
}
c.labels[kind] = frames
}
return &frames[frameID]
}
// runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture.
func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack {
return c.locationStack
}
// pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64.
func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) {
ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt)
c.markRegisterUsed(reg)
return
}
// pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64.
func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) {
lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo)
c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi)
c.markRegisterUsed(reg)
return
}
func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) {
for _, reg := range regs {
if !isZeroRegister(reg) && reg != asm.NilRegister {
c.locationStack.markRegisterUsed(reg)
}
}
}
func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) {
for _, reg := range regs {
if !isZeroRegister(reg) && reg != asm.NilRegister {
c.locationStack.markRegisterUnused(reg)
}
}
}
func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() }
// compilePreamble implements compiler.compilePreamble for the arm64 architecture.
func (c *arm64Compiler) compilePreamble() error {
c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister)
defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister)
c.locationStack.init(c.typ)
// Check if it's necessary to grow the value stack before entering function body.
if err := c.compileMaybeGrowStack(); err != nil {
return err
}
if err := c.compileModuleContextInitialization(); err != nil {
return err
}
if c.withListener {
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil {
return err
}
}
// We must initialize the stack base pointer register so that we can manipulate the stack properly.
c.compileReservedStackBasePointerRegisterInitialization()
c.compileReservedMemoryRegisterInitialization()
return nil
}
// compileMaybeGrowStack adds instructions to check the necessity to grow the value stack,
// and if so, make the builtin function call to do so. These instructions are called in the function's
// preamble.
func (c *arm64Compiler) compileMaybeGrowStack() error {
tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose)
if !found {
panic("BUG: all the registers should be free at this point")
}
c.markRegisterUsed(tmpX)
tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose)
if !found {
panic("BUG: all the registers should be free at this point")
}
c.markRegisterUsed(tmpY)
// "tmpX = len(ce.stack)"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset,
tmpX,
)
// "tmpY = ce.stackBasePointer"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset,
tmpY,
)
// "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer"
c.assembler.CompileRegisterToRegister(
arm64.SUB,
tmpY,
tmpX,
)
// "tmpY = stackPointerCeil"
loadStackPointerCeil := c.assembler.CompileConstToRegister(
arm64.MOVD,
math.MaxInt32,
tmpY,
)
// At this point of compilation, we don't know the value of stack point ceil,
// so we lazily resolve the value later.
c.assignStackPointerCeilNeeded = loadStackPointerCeil
// Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil)
c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY)
// If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function.
brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS)
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil {
return err
}
// Otherwise, skip calling it.
c.assembler.SetJumpTargetOnNext(brIfStackOK)
c.markRegisterUnused(tmpX, tmpY)
return nil
}
// returnFunction emits instructions to return from the current function frame.
// If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status.
// Otherwise, we branch into the caller's return address.
func (c *arm64Compiler) compileReturnFunction() error {
// Release all the registers as our calling convention requires the caller-save.
if err := c.compileReleaseAllRegistersToStack(); err != nil {
return err
}
if c.withListener {
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil {
return err
}
// After return, we re-initialize the stack base pointer as that is used to return to the caller below.
c.compileReservedStackBasePointerRegisterInitialization()
}
// arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address
// so mark it used so that it won't be used as a free register.
c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister)
defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister)
returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ)
// If the return address is zero, meaning that we return from the execution.
returnAddress.setRegister(arm64ReservedRegisterForTemporary)
c.compileLoadValueOnStackToRegister(returnAddress)
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR)
// Br if the address does not equal zero, otherwise, exit.
// If the address doesn't equal zero, return br into returnAddressRegister (caller's return address).
c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeReturned)
// Alias for readability.
tmp := arm64CallingConventionModuleInstanceAddressRegister
// First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes.
callerStackBasePointerInBytes.setRegister(tmp)
c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes)
c.assembler.CompileRegisterToMemory(arm64.STRD, tmp,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset)
// Next, restore moduleContext.fn from callerFunction.
callerFunction.setRegister(tmp)
c.compileLoadValueOnStackToRegister(callerFunction)
c.assembler.CompileRegisterToMemory(arm64.STRD, tmp,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset)
// Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
tmp, functionModuleInstanceOffset,
arm64CallingConventionModuleInstanceAddressRegister)
c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register)
return nil
}
func (c *arm64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) {
skip := c.assembler.CompileJump(skipCondition)
c.compileExitFromNativeCode(status)
c.assembler.SetJumpTargetOnNext(skip)
}
// compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code.
func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) {
if target := c.compiledTrapTargets[status]; target != nil {
c.assembler.CompileJump(arm64.B).AssignJumpTarget(target)
return
}
switch status {
case nativeCallStatusCodeReturned:
// Save the target for reuse.
c.compiledTrapTargets[status] = c.compileNOP()
case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction:
// Read the return address, and write it to callEngine.exitContext.returnAddress.
c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET)
c.assembler.CompileRegisterToMemory(
arm64.STRD, arm64ReservedRegisterForTemporary,
arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset,
)
default:
if c.ir.IROperationSourceOffsetsInWasmBinary != nil {
// This case, the execution traps, and we want the top frame's source position in the stack trace.
// We store the instruction address onto callEngine.returnAddress.
c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.STRD)
c.assembler.CompileRegisterToMemory(
arm64.STRD, arm64ReservedRegisterForTemporary,
arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset,
)
} else {
// We won't use the source position, so just save the target for reuse.
c.compiledTrapTargets[status] = c.compileNOP()
}
}
// Write the current stack pointer to the ce.stackPointer.
c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary)
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine,
callEngineStackContextStackPointerOffset)
// Write the status to callEngine.exitContext.statusCode.
if status != 0 {
c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary)
c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary,
arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset)
} else {
// If the status == 0, we use zero register to store zero.
c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR,
arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset)
}
// The return address to the Go code is stored in archContext.compilerReturnAddress which
// is embedded in ce. We load the value to the tmpRegister, and then
// invoke RET with that register.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset,
arm64ReservedRegisterForTemporary)
c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary)
}
// compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture.
func (c *arm64Compiler) compileGoDefinedHostFunction() error {
// First we must update the location stack to reflect the number of host function inputs.
c.locationStack.init(c.typ)
if c.withListener {
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction,
builtinFunctionIndexFunctionListenerBefore); err != nil {
return err
}
}
// Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack
// (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack,
// and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function
// without sacrificing the performance.
c.compileReservedStackBasePointerRegisterInitialization()
// Alias for readability.
tmp := arm64CallingConventionModuleInstanceAddressRegister
// Get the location of the callerFunction (*function) in the stack, which depends on the signature.
_, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ)
// Load the value into the tmp register: tmp = &function{..}
callerFunction.setRegister(tmp)
c.compileLoadValueOnStackToRegister(callerFunction)
// tmp = *(tmp+functionModuleInstanceOffset) = &wasm.ModuleInstance{...}
c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, tmp)
// Load it onto callEngine.exitContext.callerModuleInstance.
c.assembler.CompileRegisterToMemory(arm64.STRD,
tmp,
arm64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset)
// Reset the state of callerFunction value location so that we won't mess up subsequent code generation below.
c.locationStack.releaseRegister(callerFunction)
if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil {
return err
}
// Initializes the reserved stack base pointer which is used to retrieve the call frame stack.
c.compileReservedStackBasePointerRegisterInitialization()
// Go function can change the module state in arbitrary way, so we have to force
// the callEngine.moduleContext initialization on the function return. To do so,
// we zero-out callEngine.moduleInstance.
c.assembler.CompileRegisterToMemory(arm64.STRD,
arm64.RegRZR,
arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset)
return c.compileReturnFunction()
}
// setLocationStack sets the given runtimeValueLocationStack to .locationStack field,
// while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks.
// This is called when we branch into different block.
func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) {
if c.stackPointerCeil < c.locationStack.stackPointerCeil {
c.stackPointerCeil = c.locationStack.stackPointerCeil
}
c.locationStack = newStack
}
// compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the arm64 architecture.
func (c *arm64Compiler) compileBuiltinFunctionCheckExitCode() error {
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexCheckExitCode); err != nil {
return err
}
// After return, we re-initialize reserved registers just like preamble of functions.
c.compileReservedStackBasePointerRegisterInitialization()
c.compileReservedMemoryRegisterInitialization()
return nil
}
// compileLabel implements compiler.compileLabel for the arm64 architecture.
func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel bool) {
labelKey := wazeroir.Label(o.U1)
labelInfo := c.label(labelKey)
// If initialStack is not set, that means this label has never been reached.
if !labelInfo.stackInitialized {
skipThisLabel = true
return
}
if labelBegin := labelInfo.initialInstruction; labelBegin == nil {
// We use NOP as a beginning of instructions in a label.
// This should be eventually optimized out by assembler.
labelInfo.initialInstruction = c.assembler.CompileStandAlone(arm64.NOP)
} else {
c.assembler.Add(labelBegin)
}
// Set the initial stack.
c.setLocationStack(&labelInfo.initialStack)
return false
}
// compileUnreachable implements compiler.compileUnreachable for the arm64 architecture.
func (c *arm64Compiler) compileUnreachable() error {
c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable)
return nil
}
// compileSet implements compiler.compileSet for the arm64 architecture.
func (c *arm64Compiler) compileSet(o *wazeroir.UnionOperation) error {
depth := int(o.U1)
isTargetVector := o.B3
setTargetIndex := int(c.locationStack.sp) - 1 - depth
if isTargetVector {
_ = c.locationStack.pop()
}
v := c.locationStack.pop()
if err := c.compileEnsureOnRegister(v); err != nil {
return err
}
targetLocation := &c.locationStack.stack[setTargetIndex]
if targetLocation.onRegister() {
// We no longer need the register previously used by the target location.
c.markRegisterUnused(targetLocation.register)
}
reg := v.register
targetLocation.setRegister(reg)
targetLocation.valueType = v.valueType
if isTargetVector {
hi := &c.locationStack.stack[setTargetIndex+1]
hi.setRegister(reg)
}
return nil
}
// compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture.
func (c *arm64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
index := uint32(o.U1)
wasmValueType := c.ir.Globals[index].ValType
isV128 := wasmValueType == wasm.ValueTypeV128
// Get the address of globals[index] into globalAddressReg.
globalAddressReg, err := c.compileReadGlobalAddress(index)
if err != nil {
return err
}
if isV128 {
resultReg, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg)
c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0,
resultReg, arm64.VectorArrangementQ)
c.pushVectorRuntimeValueLocationOnRegister(resultReg)
} else {
ldr := arm64.NOP
var result asm.Register
var vt runtimeValueType
switch wasmValueType {
case wasm.ValueTypeI32:
ldr = arm64.LDRW
vt = runtimeValueTypeI32
result = globalAddressReg
case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
ldr = arm64.LDRD
vt = runtimeValueTypeI64
result = globalAddressReg
case wasm.ValueTypeF32:
result, err = c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
ldr = arm64.FLDRS
vt = runtimeValueTypeF32
case wasm.ValueTypeF64:
result, err = c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
ldr = arm64.FLDRD
vt = runtimeValueTypeF64
}
// "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)"
c.assembler.CompileMemoryToRegister(
ldr,
globalAddressReg, globalInstanceValueOffset,
result,
)
c.pushRuntimeValueLocationOnRegister(result, vt)
}
return nil
}
// compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture.
func (c *arm64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error {
index := uint32(o.U1)
wasmValueType := c.ir.Globals[index].ValType
isV128 := wasmValueType == wasm.ValueTypeV128
var val *runtimeValueLocation
if isV128 {
val = c.locationStack.popV128()
} else {
val = c.locationStack.pop()
}
if err := c.compileEnsureOnRegister(val); err != nil {
return err
}
globalInstanceAddressRegister, err := c.compileReadGlobalAddress(index)
if err != nil {
return err
}
if isV128 {
c.assembler.CompileVectorRegisterToMemory(arm64.VMOV,
val.register, globalInstanceAddressRegister, globalInstanceValueOffset,
arm64.VectorArrangementQ)
} else {
var str asm.Instruction
switch c.ir.Globals[index].ValType {
case wasm.ValueTypeI32:
str = arm64.STRW
case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref:
str = arm64.STRD
case wasm.ValueTypeF32:
str = arm64.FSTRS
case wasm.ValueTypeF64:
str = arm64.FSTRD
}
// At this point "globalInstanceAddressRegister = globals[index]".
// Therefore, this means "globals[index].Val = val.register"
c.assembler.CompileRegisterToMemory(
str,
val.register,
globalInstanceAddressRegister, globalInstanceValueOffset,
)
}
c.markRegisterUnused(val.register)
return nil
}
// compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register
func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) {
// TODO: rethink about the type used in store `globals []*GlobalInstance`.
// If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here.
destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return
}
// "destinationRegister = globalIndex * 8"
c.assembler.CompileConstToRegister(
// globalIndex is an index to []*GlobalInstance, therefore
// we have to multiply it by the size of *GlobalInstance == the pointer size == 8.
arm64.MOVD, int64(globalIndex)*8, destinationRegister,
)
// "arm64ReservedRegisterForTemporary = &globals[0]"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset,
arm64ReservedRegisterForTemporary,
)
// "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])".
c.assembler.CompileMemoryWithRegisterOffsetToRegister(
arm64.LDRD,
arm64ReservedRegisterForTemporary, destinationRegister,
destinationRegister,
)
return
}
// compileBr implements compiler.compileBr for the arm64 architecture.
func (c *arm64Compiler) compileBr(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
return c.compileBranchInto(wazeroir.Label(o.U1))
}
// compileBrIf implements compiler.compileBrIf for the arm64 architecture.
func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error {
cond := c.locationStack.pop()
var conditionalBR asm.Node
if cond.onConditionalRegister() {
// If the cond is on a conditional register, it corresponds to one of "conditional codes"
// https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes
// Here we represent the conditional codes by using arm64.COND_** registers, and that means the
// conditional jump can be performed if we use arm64.B**.
// For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before
// this compileBrIf and BrIf can be achieved by arm64.BCONDEQ.
var brInst asm.Instruction
switch cond.conditionalRegister {
case arm64.CondEQ:
brInst = arm64.BCONDEQ
case arm64.CondNE:
brInst = arm64.BCONDNE
case arm64.CondHS:
brInst = arm64.BCONDHS
case arm64.CondLO:
brInst = arm64.BCONDLO
case arm64.CondMI:
brInst = arm64.BCONDMI
case arm64.CondHI:
brInst = arm64.BCONDHI
case arm64.CondLS:
brInst = arm64.BCONDLS
case arm64.CondGE:
brInst = arm64.BCONDGE
case arm64.CondLT:
brInst = arm64.BCONDLT
case arm64.CondGT:
brInst = arm64.BCONDGT
case arm64.CondLE:
brInst = arm64.BCONDLE
default:
// BUG: This means that we use the cond.conditionalRegister somewhere in this file,
// but not covered in switch ^. That shouldn't happen.
return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister)
}
conditionalBR = c.assembler.CompileJump(brInst)
} else {
// If the value is not on the conditional register, we compare the value with the zero register,
// and then do the conditional BR if the value doesn't equal zero.
if err := c.compileEnsureOnRegister(cond); err != nil {
return err
}
// Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase,
// so we use CMPW (32-bit compare) here.
c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR)
conditionalBR = c.assembler.CompileJump(arm64.BCONDNE)
c.markRegisterUnused(cond.register)
}
// Emit the code for branching into else branch.
elseTarget := wazeroir.Label(o.U2)
if err := c.compileBranchInto(elseTarget); err != nil {
return err
}
// We branch into here from the original conditional BR (conditionalBR).
c.assembler.SetJumpTargetOnNext(conditionalBR)
thenTarget := wazeroir.Label(o.U1)
if err := compileDropRange(c, o.U3); err != nil {
return err
}
return c.compileBranchInto(thenTarget)
}
func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error {
if target.IsReturnTarget() {
return c.compileReturnFunction()
} else {
if c.ir.LabelCallers[target] > 1 {
// We can only re-use register state if when there's a single call-site.
// Release existing values on registers to the stack if there's multiple ones to have
// the consistent value location state at the beginning of label.
if err := c.compileReleaseAllRegistersToStack(); err != nil {
return err
}
}
// Set the initial stack of the target label, so we can start compiling the label
// with the appropriate value locations. Note we clone the stack here as we maybe
// manipulate the stack before compiler reaches the label.
targetLabel := c.label(target)
if !targetLabel.stackInitialized {
targetLabel.initialStack.cloneFrom(*c.locationStack)
targetLabel.stackInitialized = true
}
br := c.assembler.CompileJump(arm64.B)
c.assignBranchTarget(target, br)
return nil
}
}
// assignBranchTarget assigns the given label's initial instruction to the destination of br.
func (c *arm64Compiler) assignBranchTarget(label wazeroir.Label, br asm.Node) {
target := c.label(label)
targetInst := target.initialInstruction
if targetInst == nil {
// If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction.
targetInst = c.assembler.AllocateNOP()
target.initialInstruction = targetInst
}
br.AssignJumpTarget(targetInst)
}
// compileBrTable implements compiler.compileBrTable for the arm64 architecture.
func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error {
// If the operation only consists of the default target, we branch into it and return early.
if len(o.Us) == 2 {
loc := c.locationStack.pop()
if loc.onRegister() {
c.markRegisterUnused(loc.register)
}
if err := compileDropRange(c, o.Us[1]); err != nil {
return err
}
return c.compileBranchInto(wazeroir.Label(o.Us[0]))
}
index := c.locationStack.pop()
if err := c.compileEnsureOnRegister(index); err != nil {
return err
}
if isZeroRegister(index.register) {
reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
index.setRegister(reg)
c.markRegisterUsed(reg)
// Zero the value on a picked register.
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg)
}
tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
// Load the branch table's length.
// "tmpReg = len(o.Targets)"
c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Us)/2-1), tmpReg)
// Compare the length with offset.
c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register)
// If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index).
brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO)
c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register)
c.assembler.SetJumpTargetOnNext(brDefaultIndex)
// We prepare the asm.StaticConst which holds the offset of
// each target's first instruction (incl. default)
// relative to the beginning of label tables.
//
// For example, if we have targets=[L0, L1] and default=L_DEFAULT,
// we emit the code like this at [Emit the code for each target and default branch] below.
//
// L0:
// 0x123001: XXXX, ...
// .....
// L1:
// 0x123005: YYY, ...
// .....
// L_DEFAULT:
// 0x123009: ZZZ, ...
//
// then offsetData becomes like [0x0, 0x5, 0x8].
// By using this offset list, we could jump into the label for the index by
// "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by ADR instruction.
//
// Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely,
// the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0].
//
// Note: this is similar to how GCC implements Switch statements in C.
offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2)))
// "tmpReg = &offsetData[0]"
c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg)
// "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])"
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register)
// "index.register = *index.register (== offsetData[offset])"
c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register)
// Now we read the address of the beginning of the jump table.
// In the above example, this corresponds to reading the address of 0x123001.
c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B)
// Now we have the address of L0 in tmp register, and the offset to the target label in the index.register.
// So we could achieve the br_table jump by adding them and jump into the resulting address.
c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register)
c.assembler.CompileJumpToRegister(arm64.B, index.register)
// We no longer need the index's register, so mark it unused.
c.markRegisterUnused(index.register)
// [Emit the code for each targets and default branch]
labelInitialInstructions := make([]asm.Node, len(o.Us)/2)
// Since we might end up having the different stack state in each branch,
// we need to save the initial stack state here, and use the same initial state
// for each iteration.
initialLocationStack := c.getSavedTemporaryLocationStack()
for i := range labelInitialInstructions {
// Emit the initial instruction of each target where
// we use NOP as we don't yet know the next instruction in each label.
init := c.assembler.CompileStandAlone(arm64.NOP)
labelInitialInstructions[i] = init
targetLabel := wazeroir.Label(o.Us[i*2])
targetToDrop := o.Us[i*2+1]
if err = compileDropRange(c, targetToDrop); err != nil {
return err
}
if err = c.compileBranchInto(targetLabel); err != nil {
return err
}
// After the iteration, reset the stack's state with initialLocationStack.
c.locationStack.cloneFrom(initialLocationStack)
}
c.assembler.BuildJumpTable(offsetData, labelInitialInstructions)
return nil
}
func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack {
initialLocationStack := *c.locationStack // Take copy!
// Use c.brTableTmp for the underlying stack so that we could reduce the allocations.
if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 {
c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...)
}
copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp])
initialLocationStack.stack = c.brTableTmp
return initialLocationStack
}
// compileCall implements compiler.compileCall for the arm64 architecture.
func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
functionIndex := o.U1
tp := &c.ir.Types[c.ir.Functions[functionIndex]]
targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(targetFunctionAddressReg)
defer c.markRegisterUnused(targetFunctionAddressReg)
// 3) Set rc.next to specify which function is executed on the current call frame.
//
// First, we read the address of the first item of ce.functions slice (= &ce.functions[0])
// into tmp.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset,
targetFunctionAddressReg)
c.assembler.CompileConstToRegister(
arm64.ADD,
int64(functionIndex)*functionSize, // * 8 because the size of *function equals 8 bytes.
targetFunctionAddressReg)
return c.compileCallImpl(targetFunctionAddressReg, tp)
}
// compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture.
func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error {
// Release all the registers as our calling convention requires the caller-save.
if err := c.compileReleaseAllRegistersToStack(); err != nil {
return err
}
tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose)
if !ok {
panic("BUG: cannot take a free register")
}
// The stack should look like:
//
// reserved slots for results (if len(results) > len(args))
// | |
// ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, ....
// | | |
// | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^}
// |
// nextStackBasePointerOffset
//
// where callFrame is used to return to this currently executed function.
nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64)
callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype)
// Save the current stack base pointer at callFrameStackBasePointerInBytesLoc.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset,
tmp)
callFrameStackBasePointerInBytesLoc.setRegister(tmp)
c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc)
// Set callEngine.stackContext.stackBasePointer for the next function.
c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp)
c.assembler.CompileRegisterToMemory(arm64.STRD,
tmp,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset)
// Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset,
tmp)
callFrameFunctionLoc.setRegister(tmp)
c.compileReleaseRegisterToStack(callFrameFunctionLoc)
// Set callEngine.moduleContext.fn to the next *function.
c.assembler.CompileRegisterToMemory(arm64.STRD,
targetFunctionAddressRegister,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset)
// Write the return address into callFrameReturnAddressLoc.
c.assembler.CompileReadInstructionAddress(tmp, arm64.B)
callFrameReturnAddressLoc.setRegister(tmp)
c.compileReleaseRegisterToStack(callFrameReturnAddressLoc)
if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister {
// This case we must move the value on targetFunctionAddressRegister to another register, otherwise
// the address (jump target below) will be modified and result in segfault.
// See #526.
c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp)
targetFunctionAddressRegister = tmp
}
// Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
targetFunctionAddressRegister, functionModuleInstanceOffset,
arm64CallingConventionModuleInstanceAddressRegister,
)
// Then, br into the target function's initial address.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
targetFunctionAddressRegister, functionCodeInitialAddressOffset,
targetFunctionAddressRegister)
c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister)
// We consumed the function parameters, the call frame stack and reserved slots during the call.
c.locationStack.sp = uint64(nextStackBasePointerOffset)
// Also, the function results were pushed by the call.
for _, t := range functype.Results {
loc := c.locationStack.pushRuntimeValueLocationOnStack()
switch t {
case wasm.ValueTypeI32:
loc.valueType = runtimeValueTypeI32
case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref:
loc.valueType = runtimeValueTypeI64
case wasm.ValueTypeF32:
loc.valueType = runtimeValueTypeF32
case wasm.ValueTypeF64:
loc.valueType = runtimeValueTypeF64
case wasm.ValueTypeV128:
loc.valueType = runtimeValueTypeV128Lo
hi := c.locationStack.pushRuntimeValueLocationOnStack()
hi.valueType = runtimeValueTypeV128Hi
}
}
if err := c.compileModuleContextInitialization(); err != nil {
return err
}
// On the function return, we initialize the state for this function.
c.compileReservedStackBasePointerRegisterInitialization()
c.compileReservedMemoryRegisterInitialization()
return nil
}
// compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture.
func (c *arm64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) (err error) {
offset := c.locationStack.pop()
if err = c.compileEnsureOnRegister(offset); err != nil {
return err
}
typeIndex := o.U1
tableIndex := o.U2
offsetReg := offset.register
if isZeroRegister(offsetReg) {
offsetReg, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(offsetReg)
// Zero the value on a picked register.
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetReg)
}
tmp, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(tmp)
tmp2, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(tmp2)
// First, we need to check if the offset doesn't exceed the length of table.
// "tmp = &Tables[0]"
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
tmp,
)
// tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
tmp, int64(tableIndex)*8,
tmp,
)
// tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2)
// "cmp tmp2, offset"
c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offsetReg)
// If it exceeds len(table), we trap.
c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess)
// Otherwise, we proceed to do function type check.
// We need to obtain the absolute address of table element.
// "tmp = &Tables[tableIndex].table[0]"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
tmp, tableInstanceTableOffset,
tmp,
)
// "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])"
// Here we left shifting by 3 in order to get the offset in bytes,
// and the table element type is uintptr which is 8 bytes.
c.assembler.CompileLeftShiftedRegisterToRegister(
arm64.ADD,
offsetReg, pointerSizeLog2,
tmp,
offsetReg,
)
// "offset = (*offset) (== table[offset])"
c.assembler.CompileMemoryToRegister(arm64.LDRD, offsetReg, 0, offsetReg)
// Check if the value of table[offset] equals zero, meaning that the target element is uninitialized.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offsetReg)
// Skipped if the target is initialized.
c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeInvalidTableAccess)
// next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType.
// "tmp = table[offset].typeID"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
offsetReg, functionTypeIDOffset,
tmp,
)
// "tmp2 = ModuleInstance.TypeIDs[index]"
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset,
tmp2)
c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(typeIndex)*4, tmp2)
// Compare these two values, and if they equal, we are ready to make function call.
c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2)
// Skipped if the type matches.
c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall)
targetFunctionType := &c.ir.Types[typeIndex]
if err := c.compileCallImpl(offsetReg, targetFunctionType); err != nil {
return err
}
// The offset register should be marked as un-used as we consumed in the function call.
c.markRegisterUnused(offsetReg, tmp, tmp2)
return nil
}
// compileDrop implements compiler.compileDrop for the arm64 architecture.
func (c *arm64Compiler) compileDrop(o *wazeroir.UnionOperation) error {
return compileDropRange(c, o.U1)
}
func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error {
x2 := c.locationStack.popV128()
if err := c.compileEnsureOnRegister(x2); err != nil {
return err
}
x1 := c.locationStack.popV128()
if err := c.compileEnsureOnRegister(x1); err != nil {
return err
}
c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister)
brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE)
// In this branch, we select the value of x2, so we move the value into x1.register so that
// we can have the result in x1.register regardless of the selection.
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
x2.register, x2.register, x1.register, arm64.VectorArrangement16B)
c.assembler.SetJumpTargetOnNext(brIfNotZero)
// As noted, the result exists in x1.register regardless of the selector.
c.pushVectorRuntimeValueLocationOnRegister(x1.register)
// Plus, x2.register is no longer used.
c.markRegisterUnused(x2.register)
return nil
}
// compileSelect implements compiler.compileSelect for the arm64 architecture.
func (c *arm64Compiler) compileSelect(o *wazeroir.UnionOperation) error {
cv, err := c.popValueOnRegister()
if err != nil {
return err
}
isTargetVector := o.B3
if isTargetVector {
return c.compileSelectV128Impl(cv.register)
}
c.markRegisterUsed(cv.register)
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) && isZeroRegister(x2.register) {
// If both values are zero, the result is always zero.
c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType)
c.markRegisterUnused(cv.register)
return nil
}
// In the following, we emit the code so that x1's register contains the chosen value
// no matter which of original x1 or x2 is selected.
//
// If x1 is currently on zero register, we cannot place the result because
// "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value.
// So we explicitly assign a general purpose register to x1 here.
if isZeroRegister(x1.register) {
// Mark x2 and cv's registers are used so they won't be chosen.
c.markRegisterUsed(x2.register)
// Pick the non-zero register for x1.
x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
x1.setRegister(x1Reg)
// And zero our the picked register.
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg)
}
// At this point, x1 is non-zero register, and x2 is either general purpose or zero register.
c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register)
brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE)
// If cv == 0, we move the value of x2 to the x1.register.
switch x1.valueType {
case runtimeValueTypeI32:
// TODO: use 32-bit mov
c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register)
case runtimeValueTypeI64:
c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register)
case runtimeValueTypeF32:
// TODO: use 32-bit mov
c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register)
case runtimeValueTypeF64:
c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register)
default:
return errors.New("TODO: implement vector type select")
}
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
// Otherwise, nothing to do for select.
c.assembler.SetJumpTargetOnNext(brIfNotZero)
// Only x1.register is reused.
c.markRegisterUnused(cv.register, x2.register)
return nil
}
// compilePick implements compiler.compilePick for the arm64 architecture.
func (c *arm64Compiler) compilePick(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
depth := o.U1
isTargetVector := o.B3
pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)]
pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType())
if err != nil {
return err
}
if pickTarget.onRegister() { // Copy the value to the pickedRegister.
switch pickTarget.valueType {
case runtimeValueTypeI32:
c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister)
case runtimeValueTypeI64:
c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister)
case runtimeValueTypeF32:
c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister)
case runtimeValueTypeF64:
c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister)
case runtimeValueTypeV128Lo:
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR,
pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B)
case runtimeValueTypeV128Hi:
panic("BUG") // since pick target must point to the lower 64-bits of vectors.
}
} else if pickTarget.onStack() {
// Temporarily assign a register to the pick target, and then load the value.
pickTarget.setRegister(pickedRegister)
c.compileLoadValueOnStackToRegister(pickTarget)
// After the load, we revert the register assignment to the pick target.
pickTarget.setRegister(asm.NilRegister)
if isTargetVector {
hi := &c.locationStack.stack[pickTarget.stackPointer+1]
hi.setRegister(asm.NilRegister)
}
}
// Now we have the value of the target on the pickedRegister,
// so push the location.
c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType)
if isTargetVector {
c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi)
}
return nil
}
// compileAdd implements compiler.compileAdd for the arm64 architecture.
func (c *arm64Compiler) compileAdd(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
// Addition can be nop if one of operands is zero.
if isZeroRegister(x1.register) {
c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType)
return nil
} else if isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst asm.Instruction
unsignedType := wazeroir.UnsignedType(o.B1)
switch unsignedType {
case wazeroir.UnsignedTypeI32:
inst = arm64.ADDW
case wazeroir.UnsignedTypeI64:
inst = arm64.ADD
case wazeroir.UnsignedTypeF32:
inst = arm64.FADDS
case wazeroir.UnsignedTypeF64:
inst = arm64.FADDD
}
c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
// The result is placed on a register for x1, so record it.
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileSub implements compiler.compileSub for the arm64 architecture.
func (c *arm64Compiler) compileSub(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
// If both of registers are zeros, this can be nop and push the zero register.
if isZeroRegister(x1.register) && isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType)
return nil
}
// At this point, at least one of x1 or x2 registers is non zero.
// Choose the non-zero register as destination.
destinationReg := x1.register
if isZeroRegister(x1.register) {
destinationReg = x2.register
}
var inst asm.Instruction
var vt runtimeValueType
unsignedType := wazeroir.UnsignedType(o.B1)
switch unsignedType {
case wazeroir.UnsignedTypeI32:
inst = arm64.SUBW
vt = runtimeValueTypeI32
case wazeroir.UnsignedTypeI64:
inst = arm64.SUB
vt = runtimeValueTypeI64
case wazeroir.UnsignedTypeF32:
inst = arm64.FSUBS
vt = runtimeValueTypeF32
case wazeroir.UnsignedTypeF64:
inst = arm64.FSUBD
vt = runtimeValueTypeF64
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg)
c.pushRuntimeValueLocationOnRegister(destinationReg, vt)
return nil
}
// compileMul implements compiler.compileMul for the arm64 architecture.
func (c *arm64Compiler) compileMul(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
// Multiplication can be done by putting a zero register if one of operands is zero.
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType)
return nil
}
var inst asm.Instruction
var vt runtimeValueType
unsignedType := wazeroir.UnsignedType(o.B1)
switch unsignedType {
case wazeroir.UnsignedTypeI32:
inst = arm64.MULW
vt = runtimeValueTypeI32
case wazeroir.UnsignedTypeI64:
inst = arm64.MUL
vt = runtimeValueTypeI64
case wazeroir.UnsignedTypeF32:
inst = arm64.FMULS
vt = runtimeValueTypeF32
case wazeroir.UnsignedTypeF64:
inst = arm64.FMULD
vt = runtimeValueTypeF64
}
c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
// The result is placed on a register for x1, so record it.
c.pushRuntimeValueLocationOnRegister(x1.register, vt)
return nil
}
// compileClz implements compiler.compileClz for the arm64 architecture.
func (c *arm64Compiler) compileClz(o *wazeroir.UnionOperation) error {
v, err := c.popValueOnRegister()
if err != nil {
return err
}
unsignedInt := wazeroir.UnsignedInt(o.B1)
if isZeroRegister(v.register) {
// If the target is zero register, the result is always 32 (or 64 for 64-bits),
// so we allocate a register and put the const on it.
reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
var vt runtimeValueType
if unsignedInt == wazeroir.UnsignedInt32 {
vt = runtimeValueTypeI32
c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg)
} else {
vt = runtimeValueTypeI64
c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg)
}
c.pushRuntimeValueLocationOnRegister(reg, vt)
return nil
}
reg := v.register
var vt runtimeValueType
if unsignedInt == wazeroir.UnsignedInt32 {
vt = runtimeValueTypeI32
c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg)
} else {
vt = runtimeValueTypeI64
c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg)
}
c.pushRuntimeValueLocationOnRegister(reg, vt)
return nil
}
// compileCtz implements compiler.compileCtz for the arm64 architecture.
func (c *arm64Compiler) compileCtz(o *wazeroir.UnionOperation) error {
v, err := c.popValueOnRegister()
if err != nil {
return err
}
unsignedInt := wazeroir.UnsignedInt(o.B1)
reg := v.register
if isZeroRegister(reg) {
// If the target is zero register, the result is always 32 (or 64 for 64-bits),
// so we allocate a register and put the const on it.
reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
var vt runtimeValueType
if unsignedInt == wazeroir.UnsignedInt32 {
vt = runtimeValueTypeI32
c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg)
} else {
vt = runtimeValueTypeI64
c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg)
}
c.pushRuntimeValueLocationOnRegister(reg, vt)
return nil
}
// Since arm64 doesn't have an instruction directly counting trailing zeros,
// we reverse the bits first, and then do CLZ, which is exactly the same as
// gcc implements __builtin_ctz for arm64.
var vt runtimeValueType
if unsignedInt == wazeroir.UnsignedInt32 {
vt = runtimeValueTypeI32
c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg)
c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg)
} else {
vt = runtimeValueTypeI64
c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg)
c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg)
}
c.pushRuntimeValueLocationOnRegister(reg, vt)
return nil
}
// compilePopcnt implements compiler.compilePopcnt for the arm64 architecture.
func (c *arm64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error {
v, err := c.popValueOnRegister()
if err != nil {
return err
}
reg := v.register
if isZeroRegister(reg) {
c.pushRuntimeValueLocationOnRegister(reg, v.valueType)
return nil
}
freg, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
// arm64 doesn't have an instruction for population count on scalar register,
// so we use the vector one (VCNT).
// This exactly what the official Go implements bits.OneCount.
// For example, "func () int { return bits.OneCount(10) }" is compiled as
//
// MOVD $10, R0 ;; Load 10.
// FMOVD R0, F0
// VCNT V0.B8, V0.B8
// UADDLV V0.B8, V0
//
var movInst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
if unsignedInt == wazeroir.UnsignedInt32 {
movInst = arm64.FMOVS
} else {
movInst = arm64.FMOVD
}
c.assembler.CompileRegisterToRegister(movInst, reg, freg)
c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg,
arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone)
c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B,
arm64.VectorIndexNone, arm64.VectorIndexNone)
c.assembler.CompileRegisterToRegister(movInst, freg, reg)
c.pushRuntimeValueLocationOnRegister(reg, v.valueType)
return nil
}
// compileDiv implements compiler.compileDiv for the arm64 architecture.
func (c *arm64Compiler) compileDiv(o *wazeroir.UnionOperation) error {
dividend, divisor, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
signedType := wazeroir.SignedType(o.B1)
// If the divisor is on the zero register, exit from the function deterministically.
if isZeroRegister(divisor.register) {
// Push any value so that the subsequent instruction can have a consistent location stack state.
v := c.locationStack.pushRuntimeValueLocationOnStack()
switch signedType {
case wazeroir.SignedTypeInt32, wazeroir.SignedTypeUint32:
v.valueType = runtimeValueTypeI32
case wazeroir.SignedTypeUint64, wazeroir.SignedTypeInt64:
v.valueType = runtimeValueTypeI64
}
c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero)
return nil
}
var inst asm.Instruction
var vt runtimeValueType
switch signedType {
case wazeroir.SignedTypeUint32:
inst = arm64.UDIVW
if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil {
return err
}
vt = runtimeValueTypeI32
case wazeroir.SignedTypeUint64:
if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil {
return err
}
inst = arm64.UDIV
vt = runtimeValueTypeI64
case wazeroir.SignedTypeInt32:
if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil {
return err
}
inst = arm64.SDIVW
vt = runtimeValueTypeI32
case wazeroir.SignedTypeInt64:
if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil {
return err
}
inst = arm64.SDIV
vt = runtimeValueTypeI64
case wazeroir.SignedTypeFloat32:
inst = arm64.FDIVS
vt = runtimeValueTypeF32
case wazeroir.SignedTypeFloat64:
inst = arm64.FDIVD
vt = runtimeValueTypeF64
}
c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register)
c.pushRuntimeValueLocationOnRegister(dividend.register, vt)
return nil
}
// compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation.
// First, this adds instructions to check if the divisor equals zero, and if so, exits the function.
// Plus, for signed divisions, check if the result might result in overflow or not.
func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error {
// We check the divisor value equals zero.
var cmpInst, movInst, loadInst asm.Instruction
var minValueOffsetInVM int64
if is32Bit {
cmpInst = arm64.CMPW
movInst = arm64.MOVW
loadInst = arm64.LDRW
minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset
} else {
cmpInst = arm64.CMP
movInst = arm64.MOVD
loadInst = arm64.LDRD
minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset
}
c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor)
// If it is zero, we exit with nativeCallStatusIntegerDivisionByZero.
c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero)
// Otherwise, we proceed.
// If the operation is a signed integer div, we have to do an additional check on overflow.
if isSigned {
// For signed division, we have to have branches for "math.MinInt{32,64} / -1"
// case which results in the overflow.
// First, we compare the divisor with -1.
c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary)
c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor)
// If they not equal, we skip the following check.
brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE)
// Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64.
c.assembler.CompileMemoryToRegister(
loadInst,
arm64ReservedRegisterForCallEngine, minValueOffsetInVM,
arm64ReservedRegisterForTemporary,
)
c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend)
// If they not equal, we are safe to execute the division.
// Otherwise, we raise overflow error.
c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerOverflow)
c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne)
}
return nil
}
// compileRem implements compiler.compileRem for the arm64 architecture.
func (c *arm64Compiler) compileRem(o *wazeroir.UnionOperation) error {
dividend, divisor, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
dividendReg := dividend.register
divisorReg := divisor.register
// If the divisor is on the zero register, exit from the function deterministically.
if isZeroRegister(divisor.register) {
// Push any value so that the subsequent instruction can have a consistent location stack state.
v := c.locationStack.pushRuntimeValueLocationOnStack()
v.valueType = runtimeValueTypeI32
c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero)
return nil
}
var divInst, msubInst, cmpInst asm.Instruction
signedInt := wazeroir.SignedInt(o.B1)
switch signedInt {
case wazeroir.SignedUint32:
divInst = arm64.UDIVW
msubInst = arm64.MSUBW
cmpInst = arm64.CMPW
case wazeroir.SignedUint64:
divInst = arm64.UDIV
msubInst = arm64.MSUB
cmpInst = arm64.CMP
case wazeroir.SignedInt32:
divInst = arm64.SDIVW
msubInst = arm64.MSUBW
cmpInst = arm64.CMPW
case wazeroir.SignedInt64:
divInst = arm64.SDIV
msubInst = arm64.MSUB
cmpInst = arm64.CMP
}
// We check the divisor value equals zero.
c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg)
// If it is zero, we exit with nativeCallStatusIntegerDivisionByZero.
c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero)
// Otherwise, we proceed.
// Temporarily mark them used to allocate a result register while keeping these values.
c.markRegisterUsed(dividend.register, divisor.register)
resultReg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
// arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB.
// This exactly the same code that Clang emits.
// [input: x0=dividend, x1=divisor]
// >> UDIV x2, x0, x1
// >> MSUB x3, x2, x1, x0
// [result: x2=quotient, x3=remainder]
//
c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg)
// ResultReg = dividendReg - (divisorReg * resultReg)
c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg)
c.markRegisterUnused(dividend.register, divisor.register)
c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType)
return nil
}
// compileAnd implements compiler.compileAnd for the arm64 architecture.
func (c *arm64Compiler) compileAnd(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
// If either of the registers x1 or x2 is zero,
// the result will always be zero.
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType)
return nil
}
// At this point, at least one of x1 or x2 registers is non zero.
// Choose the non-zero register as destination.
destinationReg := x1.register
if isZeroRegister(x1.register) {
destinationReg = x2.register
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.ANDW
case wazeroir.UnsignedInt64:
inst = arm64.AND
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileOr implements compiler.compileOr for the arm64 architecture.
func (c *arm64Compiler) compileOr(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) {
c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType)
return nil
}
if isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.ORRW
case wazeroir.UnsignedInt64:
inst = arm64.ORR
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileXor implements compiler.compileXor for the arm64 architecture.
func (c *arm64Compiler) compileXor(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
// At this point, at least one of x1 or x2 registers is non zero.
// Choose the non-zero register as destination.
destinationReg := x1.register
if isZeroRegister(x1.register) {
destinationReg = x2.register
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.EORW
case wazeroir.UnsignedInt64:
inst = arm64.EOR
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg)
c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType)
return nil
}
// compileShl implements compiler.compileShl for the arm64 architecture.
func (c *arm64Compiler) compileShl(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.LSLW
case wazeroir.UnsignedInt64:
inst = arm64.LSL
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileShr implements compiler.compileShr for the arm64 architecture.
func (c *arm64Compiler) compileShr(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst asm.Instruction
signedInt := wazeroir.SignedInt(o.B1)
switch signedInt {
case wazeroir.SignedInt32:
inst = arm64.ASRW
case wazeroir.SignedInt64:
inst = arm64.ASR
case wazeroir.SignedUint32:
inst = arm64.LSRW
case wazeroir.SignedUint64:
inst = arm64.LSR
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileRotl implements compiler.compileRotl for the arm64 architecture.
func (c *arm64Compiler) compileRotl(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst, neginst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.RORW
neginst = arm64.NEGW
case wazeroir.UnsignedInt64:
inst = arm64.ROR
neginst = arm64.NEG
}
// Arm64 doesn't have rotate left instruction.
// The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft.
c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register)
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileRotr implements compiler.compileRotr for the arm64 architecture.
func (c *arm64Compiler) compileRotr(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
if isZeroRegister(x1.register) || isZeroRegister(x2.register) {
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.RORW
case wazeroir.UnsignedInt64:
inst = arm64.ROR
}
c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileAbs implements compiler.compileAbs for the arm64 architecture.
func (c *arm64Compiler) compileAbs(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64)
}
}
// compileNeg implements compiler.compileNeg for the arm64 architecture.
func (c *arm64Compiler) compileNeg(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64)
}
}
// compileCeil implements compiler.compileCeil for the arm64 architecture.
func (c *arm64Compiler) compileCeil(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64)
}
}
// compileFloor implements compiler.compileFloor for the arm64 architecture.
func (c *arm64Compiler) compileFloor(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64)
}
}
// compileTrunc implements compiler.compileTrunc for the arm64 architecture.
func (c *arm64Compiler) compileTrunc(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64)
}
}
// compileNearest implements compiler.compileNearest for the arm64 architecture.
func (c *arm64Compiler) compileNearest(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64)
}
}
// compileSqrt implements compiler.compileSqrt for the arm64 architecture.
func (c *arm64Compiler) compileSqrt(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32)
} else {
return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64)
}
}
// compileMin implements compiler.compileMin for the arm64 architecture.
func (c *arm64Compiler) compileMin(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleFloatBinop(arm64.FMINS)
} else {
return c.compileSimpleFloatBinop(arm64.FMIND)
}
}
// compileMax implements compiler.compileMax for the arm64 architecture.
func (c *arm64Compiler) compileMax(o *wazeroir.UnionOperation) error {
if wazeroir.Float(o.B1) == wazeroir.Float32 {
return c.compileSimpleFloatBinop(arm64.FMAXS)
} else {
return c.compileSimpleFloatBinop(arm64.FMAXD)
}
}
func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileCopysign implements compiler.compileCopysign for the arm64 architecture.
func (c *arm64Compiler) compileCopysign(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var ldr asm.Instruction
var minValueOffsetInVM int64
if wazeroir.Float(o.B1) == wazeroir.Float32 {
ldr = arm64.FLDRS
minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset
} else {
ldr = arm64.FLDRD
minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset
}
c.markRegisterUsed(x1.register, x2.register)
freg, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
// This is exactly the same code emitted by GCC for "__builtin_copysign":
//
// mov x0, -9223372036854775808
// fmov d2, x0
// vbit v0.8b, v1.8b, v2.8b
//
// "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)"
c.assembler.CompileMemoryToRegister(
ldr,
arm64ReservedRegisterForCallEngine, minValueOffsetInVM,
freg,
)
// VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1,
// otherwise it leaves the destination bit unchanged.
// See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT
//
// "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1".
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT,
freg, x2.register, x1.register, arm64.VectorArrangement16B)
c.markRegisterUnused(x2.register)
c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType)
return nil
}
// compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture.
func (c *arm64Compiler) compileI32WrapFromI64() error {
return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI32)
}
// compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture.
func (c *arm64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) error {
// Clear the floating point status register (FPSR).
c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR)
var vt runtimeValueType
var convinst asm.Instruction
inputType := wazeroir.Float(o.B1)
outputType := wazeroir.SignedInt(o.B2)
nonTrapping := o.B3
is32bitFloat := inputType == wazeroir.Float32
if is32bitFloat && outputType == wazeroir.SignedInt32 {
convinst = arm64.FCVTZSSW
vt = runtimeValueTypeI32
} else if is32bitFloat && outputType == wazeroir.SignedInt64 {
convinst = arm64.FCVTZSS
vt = runtimeValueTypeI64
} else if !is32bitFloat && outputType == wazeroir.SignedInt32 {
convinst = arm64.FCVTZSDW
vt = runtimeValueTypeI32
} else if !is32bitFloat && outputType == wazeroir.SignedInt64 {
convinst = arm64.FCVTZSD
vt = runtimeValueTypeI64
} else if is32bitFloat && outputType == wazeroir.SignedUint32 {
convinst = arm64.FCVTZUSW
vt = runtimeValueTypeI32
} else if is32bitFloat && outputType == wazeroir.SignedUint64 {
convinst = arm64.FCVTZUS
vt = runtimeValueTypeI64
} else if !is32bitFloat && outputType == wazeroir.SignedUint32 {
convinst = arm64.FCVTZUDW
vt = runtimeValueTypeI32
} else if !is32bitFloat && outputType == wazeroir.SignedUint64 {
convinst = arm64.FCVTZUD
vt = runtimeValueTypeI64
}
source, err := c.popValueOnRegister()
if err != nil {
return err
}
sourceReg := source.register
destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(convinst, sourceReg, destinationReg)
c.pushRuntimeValueLocationOnRegister(destinationReg, vt)
if !nonTrapping {
// Obtain the floating point status register value into the general purpose register,
// so that we can check if the conversion resulted in undefined behavior.
c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary)
// Check if the conversion was undefined by comparing the status with 1.
// See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register
c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1)
brOK := c.assembler.CompileJump(arm64.BCONDNE)
// If so, exit the execution with errors depending on whether or not the source value is NaN.
var floatcmp asm.Instruction
if is32bitFloat {
floatcmp = arm64.FCMPS
} else {
floatcmp = arm64.FCMPD
}
c.assembler.CompileTwoRegistersToNone(floatcmp, sourceReg, sourceReg)
// VS flag is set if at least one of values for FCMP is NaN.
// https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code
// If the source value is not NaN, the operation was overflow.
c.compileMaybeExitFromNativeCode(arm64.BCONDVS, nativeCallStatusIntegerOverflow)
// Otherwise, the operation was invalid as this is trying to convert NaN to integer.
c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion)
// Otherwise, we branch into the next instruction.
c.assembler.SetJumpTargetOnNext(brOK)
}
return nil
}
// compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture.
func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) error {
var convinst asm.Instruction
inputType := wazeroir.SignedInt(o.B1)
outputType := wazeroir.Float(o.B2)
if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 {
convinst = arm64.SCVTFWS
} else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 {
convinst = arm64.SCVTFS
} else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 {
convinst = arm64.SCVTFWD
} else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 {
convinst = arm64.SCVTFD
} else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 {
convinst = arm64.UCVTFWS
} else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 {
convinst = arm64.UCVTFS
} else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 {
convinst = arm64.UCVTFWD
} else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 {
convinst = arm64.UCVTFD
}
var vt runtimeValueType
if outputType == wazeroir.Float32 {
vt = runtimeValueTypeF32
} else {
vt = runtimeValueTypeF64
}
return c.compileSimpleConversion(convinst, registerTypeVector, vt)
}
// compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture.
func (c *arm64Compiler) compileF32DemoteFromF64() error {
return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32)
}
// compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture.
func (c *arm64Compiler) compileF64PromoteFromF32() error {
return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64)
}
// compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture.
func (c *arm64Compiler) compileI32ReinterpretFromF32() error {
if peek := c.locationStack.peek(); peek.onStack() {
// If the value is on the stack, this is no-op as there is nothing to do for converting type.
peek.valueType = runtimeValueTypeI32
return nil
}
return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32)
}
// compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture.
func (c *arm64Compiler) compileI64ReinterpretFromF64() error {
if peek := c.locationStack.peek(); peek.onStack() {
// If the value is on the stack, this is no-op as there is nothing to do for converting type.
peek.valueType = runtimeValueTypeI64
return nil
}
return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64)
}
// compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture.
func (c *arm64Compiler) compileF32ReinterpretFromI32() error {
if peek := c.locationStack.peek(); peek.onStack() {
// If the value is on the stack, this is no-op as there is nothing to do for converting type.
peek.valueType = runtimeValueTypeF32
return nil
}
return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32)
}
// compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture.
func (c *arm64Compiler) compileF64ReinterpretFromI64() error {
if peek := c.locationStack.peek(); peek.onStack() {
// If the value is on the stack, this is no-op as there is nothing to do for converting type.
peek.valueType = runtimeValueTypeF64
return nil
}
return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64)
}
func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error {
source, err := c.popValueOnRegister()
if err != nil {
return err
}
destinationReg, err := c.allocateRegister(destinationRegType)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg)
c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType)
return nil
}
// compileExtend implements compiler.compileExtend for the arm64 architecture.
func (c *arm64Compiler) compileExtend(o *wazeroir.UnionOperation) error {
signed := o.B1 != 0
if signed {
return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64)
} else {
return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64)
}
}
// compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture.
func (c *arm64Compiler) compileSignExtend32From8() error {
return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32)
}
// compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture.
func (c *arm64Compiler) compileSignExtend32From16() error {
return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32)
}
// compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture.
func (c *arm64Compiler) compileSignExtend64From8() error {
return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64)
}
// compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture.
func (c *arm64Compiler) compileSignExtend64From16() error {
return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64)
}
// compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture.
func (c *arm64Compiler) compileSignExtend64From32() error {
return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64)
}
func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error {
v, err := c.popValueOnRegister()
if err != nil {
return err
}
reg := v.register
c.assembler.CompileRegisterToRegister(inst, reg, reg)
c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType)
return nil
}
// compileEq implements compiler.compileEq for the arm64 architecture.
func (c *arm64Compiler) compileEq(o *wazeroir.UnionOperation) error {
return c.emitEqOrNe(true, wazeroir.UnsignedType(o.B1))
}
// compileNe implements compiler.compileNe for the arm64 architecture.
func (c *arm64Compiler) compileNe(o *wazeroir.UnionOperation) error {
return c.emitEqOrNe(false, wazeroir.UnsignedType(o.B1))
}
// emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture.
func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var inst asm.Instruction
switch unsignedType {
case wazeroir.UnsignedTypeI32:
inst = arm64.CMPW
case wazeroir.UnsignedTypeI64:
inst = arm64.CMP
case wazeroir.UnsignedTypeF32:
inst = arm64.FCMPS
case wazeroir.UnsignedTypeF64:
inst = arm64.FCMPD
}
c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register)
// Push the comparison result as a conditional register value.
cond := arm64.CondNE
if isEq {
cond = arm64.CondEQ
}
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond)
return nil
}
// compileEqz implements compiler.compileEqz for the arm64 architecture.
func (c *arm64Compiler) compileEqz(o *wazeroir.UnionOperation) error {
x1, err := c.popValueOnRegister()
if err != nil {
return err
}
var inst asm.Instruction
unsignedInt := wazeroir.UnsignedInt(o.B1)
switch unsignedInt {
case wazeroir.UnsignedInt32:
inst = arm64.CMPW
case wazeroir.UnsignedInt64:
inst = arm64.CMP
}
c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register)
// Push the comparison result as a conditional register value.
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ)
return nil
}
// compileLt implements compiler.compileLt for the arm64 architecture.
func (c *arm64Compiler) compileLt(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var inst asm.Instruction
var conditionalRegister asm.ConditionalRegisterState
signedType := wazeroir.SignedType(o.B1)
switch signedType {
case wazeroir.SignedTypeUint32:
inst = arm64.CMPW
conditionalRegister = arm64.CondLO
case wazeroir.SignedTypeUint64:
inst = arm64.CMP
conditionalRegister = arm64.CondLO
case wazeroir.SignedTypeInt32:
inst = arm64.CMPW
conditionalRegister = arm64.CondLT
case wazeroir.SignedTypeInt64:
inst = arm64.CMP
conditionalRegister = arm64.CondLT
case wazeroir.SignedTypeFloat32:
inst = arm64.FCMPS
conditionalRegister = arm64.CondMI
case wazeroir.SignedTypeFloat64:
inst = arm64.FCMPD
conditionalRegister = arm64.CondMI
}
c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register)
// Push the comparison result as a conditional register value.
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister)
return nil
}
// compileGt implements compiler.compileGt for the arm64 architecture.
func (c *arm64Compiler) compileGt(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var inst asm.Instruction
var conditionalRegister asm.ConditionalRegisterState
signedType := wazeroir.SignedType(o.B1)
switch signedType {
case wazeroir.SignedTypeUint32:
inst = arm64.CMPW
conditionalRegister = arm64.CondHI
case wazeroir.SignedTypeUint64:
inst = arm64.CMP
conditionalRegister = arm64.CondHI
case wazeroir.SignedTypeInt32:
inst = arm64.CMPW
conditionalRegister = arm64.CondGT
case wazeroir.SignedTypeInt64:
inst = arm64.CMP
conditionalRegister = arm64.CondGT
case wazeroir.SignedTypeFloat32:
inst = arm64.FCMPS
conditionalRegister = arm64.CondGT
case wazeroir.SignedTypeFloat64:
inst = arm64.FCMPD
conditionalRegister = arm64.CondGT
}
c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register)
// Push the comparison result as a conditional register value.
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister)
return nil
}
// compileLe implements compiler.compileLe for the arm64 architecture.
func (c *arm64Compiler) compileLe(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var inst asm.Instruction
var conditionalRegister asm.ConditionalRegisterState
signedType := wazeroir.SignedType(o.B1)
switch signedType {
case wazeroir.SignedTypeUint32:
inst = arm64.CMPW
conditionalRegister = arm64.CondLS
case wazeroir.SignedTypeUint64:
inst = arm64.CMP
conditionalRegister = arm64.CondLS
case wazeroir.SignedTypeInt32:
inst = arm64.CMPW
conditionalRegister = arm64.CondLE
case wazeroir.SignedTypeInt64:
inst = arm64.CMP
conditionalRegister = arm64.CondLE
case wazeroir.SignedTypeFloat32:
inst = arm64.FCMPS
conditionalRegister = arm64.CondLS
case wazeroir.SignedTypeFloat64:
inst = arm64.FCMPD
conditionalRegister = arm64.CondLS
}
c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register)
// Push the comparison result as a conditional register value.
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister)
return nil
}
// compileGe implements compiler.compileGe for the arm64 architecture.
func (c *arm64Compiler) compileGe(o *wazeroir.UnionOperation) error {
x1, x2, err := c.popTwoValuesOnRegisters()
if err != nil {
return err
}
var inst asm.Instruction
var conditionalRegister asm.ConditionalRegisterState
signedType := wazeroir.SignedType(o.B1)
switch signedType {
case wazeroir.SignedTypeUint32:
inst = arm64.CMPW
conditionalRegister = arm64.CondHS
case wazeroir.SignedTypeUint64:
inst = arm64.CMP
conditionalRegister = arm64.CondHS
case wazeroir.SignedTypeInt32:
inst = arm64.CMPW
conditionalRegister = arm64.CondGE
case wazeroir.SignedTypeInt64:
inst = arm64.CMP
conditionalRegister = arm64.CondGE
case wazeroir.SignedTypeFloat32:
inst = arm64.FCMPS
conditionalRegister = arm64.CondGE
case wazeroir.SignedTypeFloat64:
inst = arm64.FCMPD
conditionalRegister = arm64.CondGE
}
c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register)
// Push the comparison result as a conditional register value.
c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister)
return nil
}
// compileLoad implements compiler.compileLoad for the arm64 architecture.
func (c *arm64Compiler) compileLoad(o *wazeroir.UnionOperation) error {
var (
isFloat bool
loadInst asm.Instruction
targetSizeInBytes int64
vt runtimeValueType
)
unsignedType := wazeroir.UnsignedType(o.B1)
offset := uint32(o.U2)
switch unsignedType {
case wazeroir.UnsignedTypeI32:
loadInst = arm64.LDRW
targetSizeInBytes = 32 / 8
vt = runtimeValueTypeI32
case wazeroir.UnsignedTypeI64:
loadInst = arm64.LDRD
targetSizeInBytes = 64 / 8
vt = runtimeValueTypeI64
case wazeroir.UnsignedTypeF32:
loadInst = arm64.FLDRS
isFloat = true
targetSizeInBytes = 32 / 8
vt = runtimeValueTypeF32
case wazeroir.UnsignedTypeF64:
loadInst = arm64.FLDRD
isFloat = true
targetSizeInBytes = 64 / 8
vt = runtimeValueTypeF64
}
return c.compileLoadImpl(offset, loadInst, targetSizeInBytes, isFloat, vt)
}
// compileLoad8 implements compiler.compileLoad8 for the arm64 architecture.
func (c *arm64Compiler) compileLoad8(o *wazeroir.UnionOperation) error {
var loadInst asm.Instruction
var vt runtimeValueType
signedInt := wazeroir.SignedInt(o.B1)
offset := uint32(o.U2)
switch signedInt {
case wazeroir.SignedInt32:
loadInst = arm64.LDRSBW
vt = runtimeValueTypeI32
case wazeroir.SignedInt64:
loadInst = arm64.LDRSBD
vt = runtimeValueTypeI64
case wazeroir.SignedUint32:
loadInst = arm64.LDRB
vt = runtimeValueTypeI32
case wazeroir.SignedUint64:
loadInst = arm64.LDRB
vt = runtimeValueTypeI64
}
return c.compileLoadImpl(offset, loadInst, 1, false, vt)
}
// compileLoad16 implements compiler.compileLoad16 for the arm64 architecture.
func (c *arm64Compiler) compileLoad16(o *wazeroir.UnionOperation) error {
var loadInst asm.Instruction
var vt runtimeValueType
signedInt := wazeroir.SignedInt(o.B1)
offset := uint32(o.U2)
switch signedInt {
case wazeroir.SignedInt32:
loadInst = arm64.LDRSHW
vt = runtimeValueTypeI32
case wazeroir.SignedInt64:
loadInst = arm64.LDRSHD
vt = runtimeValueTypeI64
case wazeroir.SignedUint32:
loadInst = arm64.LDRH
vt = runtimeValueTypeI32
case wazeroir.SignedUint64:
loadInst = arm64.LDRH
vt = runtimeValueTypeI64
}
return c.compileLoadImpl(offset, loadInst, 16/8, false, vt)
}
// compileLoad32 implements compiler.compileLoad32 for the arm64 architecture.
func (c *arm64Compiler) compileLoad32(o *wazeroir.UnionOperation) error {
var loadInst asm.Instruction
signed := o.B1 == 1
offset := uint32(o.U2)
if signed {
loadInst = arm64.LDRSW
} else {
loadInst = arm64.LDRW
}
return c.compileLoadImpl(offset, loadInst, 32/8, false, runtimeValueTypeI64)
}
// compileLoadImpl implements compileLoadImpl* variants for arm64 architecture.
func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction,
targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType,
) error {
offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes)
if err != nil {
return err
}
resultRegister := offsetReg
if isFloat {
resultRegister, err = c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
}
// "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]"
// In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]"
c.assembler.CompileMemoryWithRegisterOffsetToRegister(
loadInst,
arm64ReservedRegisterForMemory, offsetReg,
resultRegister,
)
c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType)
return nil
}
// compileStore implements compiler.compileStore for the arm64 architecture.
func (c *arm64Compiler) compileStore(o *wazeroir.UnionOperation) error {
var movInst asm.Instruction
var targetSizeInBytes int64
unsignedType := wazeroir.UnsignedType(o.B1)
offset := uint32(o.U2)
switch unsignedType {
case wazeroir.UnsignedTypeI32:
movInst = arm64.STRW
targetSizeInBytes = 32 / 8
case wazeroir.UnsignedTypeI64:
movInst = arm64.STRD
targetSizeInBytes = 64 / 8
case wazeroir.UnsignedTypeF32:
movInst = arm64.FSTRS
targetSizeInBytes = 32 / 8
case wazeroir.UnsignedTypeF64:
movInst = arm64.FSTRD
targetSizeInBytes = 64 / 8
}
return c.compileStoreImpl(offset, movInst, targetSizeInBytes)
}
// compileStore8 implements compiler.compileStore8 for the arm64 architecture.
func (c *arm64Compiler) compileStore8(o *wazeroir.UnionOperation) error {
return c.compileStoreImpl(uint32(o.U2), arm64.STRB, 1)
}
// compileStore16 implements compiler.compileStore16 for the arm64 architecture.
func (c *arm64Compiler) compileStore16(o *wazeroir.UnionOperation) error {
return c.compileStoreImpl(uint32(o.U2), arm64.STRH, 16/8)
}
// compileStore32 implements compiler.compileStore32 for the arm64 architecture.
func (c *arm64Compiler) compileStore32(o *wazeroir.UnionOperation) error {
return c.compileStoreImpl(uint32(o.U2), arm64.STRW, 32/8)
}
// compileStoreImpl implements compleStore* variants for arm64 architecture.
func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error {
val, err := c.popValueOnRegister()
if err != nil {
return err
}
// Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register.
c.markRegisterUsed(val.register)
offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes)
if err != nil {
return err
}
// "[arm64ReservedRegisterForMemory + offsetReg] = val.register"
// In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register"
c.assembler.CompileRegisterToMemoryWithRegisterOffset(
storeInst, val.register,
arm64ReservedRegisterForMemory, offsetReg,
)
c.markRegisterUnused(val.register)
return nil
}
// compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg"
// into a register, and returns the stored register. We call the result "offset" because we access the memory
// as memory.Buffer[offset: offset+targetSizeInBytes].
//
// Note: this also emits the instructions to check the out of bounds memory access.
// In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status.
func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) {
base, err := c.popValueOnRegister()
if err != nil {
return 0, err
}
offsetRegister = base.register
if isZeroRegister(base.register) {
offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister)
}
if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 {
// "offsetRegister = base + offsetArg + targetSizeInBytes"
c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister)
} else {
// If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds.
c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds)
return
}
// "arm64ReservedRegisterForTemporary = len(memory.Buffer)"
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
arm64ReservedRegisterForTemporary)
// Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer).
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister)
// If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length,
// we exit the function with nativeCallStatusCodeMemoryOutOfBounds.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, nativeCallStatusCodeMemoryOutOfBounds)
// Otherwise, we subtract targetSizeInBytes from offsetRegister.
c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister)
return offsetRegister, nil
}
// compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture.
func (c *arm64Compiler) compileMemoryGrow() error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil {
return err
}
// After return, we re-initialize reserved registers just like preamble of functions.
c.compileReservedStackBasePointerRegisterInitialization()
c.compileReservedMemoryRegisterInitialization()
return nil
}
// compileMemorySize implements compileMemorySize variants for arm64 architecture.
func (c *arm64Compiler) compileMemorySize() error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
// "reg = len(memory.Buffer)"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
reg,
)
// memory.size loads the page size of memory, so we have to divide by the page size.
// "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) "
c.assembler.CompileConstToRegister(
arm64.LSR,
wasm.MemoryPageSizeInBits,
reg,
)
c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32)
return nil
}
// compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter.
// compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or
// nativeCallStatusCodeCallGoHostFunction.
func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error {
// Release all the registers as our calling convention requires the caller-save.
if err := c.compileReleaseAllRegistersToStack(); err != nil {
return err
}
if compilerStatus == nativeCallStatusCodeCallBuiltInFunction {
// Set the target function address to ce.functionCallAddress
// "tmp = $index"
c.assembler.CompileConstToRegister(
arm64.MOVD,
int64(builtinFunction),
arm64ReservedRegisterForTemporary,
)
// "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp"
// In other words, "ce.functionCallAddress = tmp (== $addr)"
c.assembler.CompileRegisterToMemory(
arm64.STRW,
arm64ReservedRegisterForTemporary,
arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset,
)
}
c.compileExitFromNativeCode(compilerStatus)
return nil
}
// compileConstI32 implements compiler.compileConstI32 for the arm64 architecture.
func (c *arm64Compiler) compileConstI32(o *wazeroir.UnionOperation) error {
return c.compileIntConstant(true, o.U1)
}
// compileConstI64 implements compiler.compileConstI64 for the arm64 architecture.
func (c *arm64Compiler) compileConstI64(o *wazeroir.UnionOperation) error {
return c.compileIntConstant(false, o.U1)
}
// compileIntConstant adds instructions to load an integer constant.
// is32bit is true if the target value is originally 32-bit const, false otherwise.
// value holds the (zero-extended for 32-bit case) load target constant.
func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
var inst asm.Instruction
var vt runtimeValueType
if is32bit {
inst = arm64.MOVW
vt = runtimeValueTypeI32
} else {
inst = arm64.MOVD
vt = runtimeValueTypeI64
}
if value == 0 {
c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt)
} else {
// Take a register to load the value.
reg, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileConstToRegister(inst, int64(value), reg)
c.pushRuntimeValueLocationOnRegister(reg, vt)
}
return nil
}
// compileConstF32 implements compiler.compileConstF32 for the arm64 architecture.
func (c *arm64Compiler) compileConstF32(o *wazeroir.UnionOperation) error {
return c.compileFloatConstant(true, o.U1 /*uint64(math.Float32bits(o.Value))*/)
}
// compileConstF64 implements compiler.compileConstF64 for the arm64 architecture.
func (c *arm64Compiler) compileConstF64(o *wazeroir.UnionOperation) error {
return c.compileFloatConstant(false, o.U1 /*math.Float64bits(o.Value)*/)
}
// compileFloatConstant adds instructions to load a float constant.
// is32bit is true if the target value is originally 32-bit const, false otherwise.
// value holds the (zero-extended for 32-bit case) bit representation of load target float constant.
func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
// Take a register to load the value.
reg, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}
tmpReg := arm64.RegRZR
if value != 0 {
tmpReg = arm64ReservedRegisterForTemporary
var inst asm.Instruction
if is32bit {
inst = arm64.MOVW
} else {
inst = arm64.MOVD
}
c.assembler.CompileConstToRegister(inst, int64(value), tmpReg)
}
// Use FMOV instruction to move the value on integer register into the float one.
var inst asm.Instruction
var vt runtimeValueType
if is32bit {
vt = runtimeValueTypeF32
inst = arm64.FMOVS
} else {
vt = runtimeValueTypeF64
inst = arm64.FMOVD
}
c.assembler.CompileRegisterToRegister(inst, tmpReg, reg)
c.pushRuntimeValueLocationOnRegister(reg, vt)
return nil
}
// compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture.
func (c *arm64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error {
dataIndex := uint32(o.U1)
return c.compileInitImpl(false, dataIndex, 0)
}
// compileInitImpl implements compileTableInit and compileMemoryInit.
//
// TODO: the compiled code in this function should be reused and compile at once as
// the code is independent of any module.
func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error {
outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds
if isTable {
outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess
}
copySize, err := c.popValueOnRegister()
if err != nil {
return err
}
c.markRegisterUsed(copySize.register)
sourceOffset, err := c.popValueOnRegister()
if err != nil {
return err
}
if isZeroRegister(sourceOffset.register) {
sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register)
}
c.markRegisterUsed(sourceOffset.register)
destinationOffset, err := c.popValueOnRegister()
if err != nil {
return err
}
if isZeroRegister(destinationOffset.register) {
destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register)
}
c.markRegisterUsed(destinationOffset.register)
tableInstanceAddressReg := asm.NilRegister
if isTable {
tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(tableInstanceAddressReg)
}
if !isZeroRegister(copySize.register) {
// sourceOffset += size.
c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register)
// destinationOffset += size.
c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register)
}
instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
if isTable {
c.compileLoadElemInstanceAddress(index, instanceAddr)
} else {
c.compileLoadDataInstanceAddress(index, instanceAddr)
}
// Check data instance bounds.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8.
arm64ReservedRegisterForTemporary)
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register)
// If not, raise out of bounds memory access error.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus)
// Otherwise, ready to copy the value from destination to source.
// Check destination bounds.
if isTable {
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8
// = &tables[0] + sizeOf(*tableInstance)*8
// = &tables[tableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(tableIndex)*8,
tableInstanceAddressReg)
// arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
tableInstanceAddressReg, tableInstanceTableLenOffset,
arm64ReservedRegisterForTemporary)
} else {
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
arm64ReservedRegisterForTemporary)
}
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register)
// If not, raise out of bounds memory access error.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus)
// Otherwise, ready to copy the value from source to destination.
if !isZeroRegister(copySize.register) {
// If the size equals zero, we can skip the entire instructions beflow.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register)
skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ)
var ldr, str asm.Instruction
var movSize int64
if isTable {
ldr, str = arm64.LDRD, arm64.STRD
movSize = 8
// arm64ReservedRegisterForTemporary = &Table[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg,
tableInstanceTableOffset, arm64ReservedRegisterForTemporary)
// destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
destinationOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, destinationOffset.register)
// arm64ReservedRegisterForTemporary = &ElementInstance.References[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary)
// sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
sourceOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, sourceOffset.register)
// copySize = copySize << pointerSizeLog2
c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register)
} else {
ldr, str = arm64.LDRB, arm64.STRB
movSize = 1
// destinationOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register)
// sourceOffset += data buffer's absolute address.
c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary)
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register)
}
// Negate the counter.
c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register)
beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP)
// arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)]
c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr,
sourceOffset.register, copySize.register,
arm64ReservedRegisterForTemporary)
// [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary.
c.assembler.CompileRegisterToMemoryWithRegisterOffset(str,
arm64ReservedRegisterForTemporary,
destinationOffset.register, copySize.register,
)
// Decrement the size counter and if the value is still negative, continue the loop.
c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register)
c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop)
c.assembler.SetJumpTargetOnNext(skipCopyJump)
}
c.markRegisterUnused(copySize.register, sourceOffset.register,
destinationOffset.register, instanceAddr, tableInstanceAddressReg)
return nil
}
// compileDataDrop implements compiler.compileDataDrop for the arm64 architecture.
func (c *arm64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
tmp, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
dataIndex := uint32(o.U1)
c.compileLoadDataInstanceAddress(dataIndex, tmp)
// Clears the content of DataInstance[o.DataIndex] (== []byte type).
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0)
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8)
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16)
return nil
}
func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) {
// dst = dataIndex * dataInstanceStructSize
c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst)
// arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset,
arm64ReservedRegisterForTemporary,
)
// dst = arm64ReservedRegisterForTemporary + dst
// = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize
// = &moduleInstance.DataInstances[dataIndex]
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst)
}
// compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture.
func (c *arm64Compiler) compileMemoryCopy() error {
return c.compileCopyImpl(false, 0, 0)
}
// compileCopyImpl implements compileTableCopy and compileMemoryCopy.
//
// TODO: the compiled code in this function should be reused and compile at once as
// the code is independent of any module.
func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error {
outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds
if isTable {
outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess
}
copySize, err := c.popValueOnRegister()
if err != nil {
return err
}
c.markRegisterUsed(copySize.register)
sourceOffset, err := c.popValueOnRegister()
if err != nil {
return err
}
if isZeroRegister(sourceOffset.register) {
sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register)
}
c.markRegisterUsed(sourceOffset.register)
destinationOffset, err := c.popValueOnRegister()
if err != nil {
return err
}
if isZeroRegister(destinationOffset.register) {
destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register)
}
c.markRegisterUsed(destinationOffset.register)
if !isZeroRegister(copySize.register) {
// sourceOffset += size.
c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register)
// destinationOffset += size.
c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register)
}
if isTable {
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8
// = &tables[0] + sizeOf(*tableInstance)*8
// = &tables[srcTableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
arm64ReservedRegisterForTemporary)
} else {
// arm64ReservedRegisterForTemporary = len(memoryInst.Buffer).
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
arm64ReservedRegisterForTemporary)
}
// Check memory len >= sourceOffset.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register)
// If not, raise out of bounds memory access error.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus)
// Otherwise, check memory len >= destinationOffset.
if isTable {
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8
// = &tables[0] + sizeOf(*tableInstance)*8
// = &tables[dstTableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
arm64ReservedRegisterForTemporary)
}
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register)
// If not, raise out of bounds memory access error.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus)
// Otherwise, ready to copy the value from source to destination.
var ldr, str asm.Instruction
var movSize int64
if isTable {
ldr, str = arm64.LDRD, arm64.STRD
movSize = 8
} else {
ldr, str = arm64.LDRB, arm64.STRB
movSize = 1
}
// If the size equals zero, we can skip the entire instructions beflow.
if !isZeroRegister(copySize.register) {
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register)
skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ)
// If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i];
c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register)
destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS)
var endJump asm.Node
{
// sourceOffset -= size.
c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register)
// destinationOffset -= size.
c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register)
if isTable {
// arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine,
callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8,
arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
arm64ReservedRegisterForTemporary)
// destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
destinationOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, destinationOffset.register)
// arm64ReservedRegisterForTemporary = &Tables[srcTableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine,
callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8,
arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
arm64ReservedRegisterForTemporary)
// sourceOffset = (sourceOffset<< 3) + &Table[0]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
sourceOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, sourceOffset.register)
// copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one.
c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register)
} else {
// sourceOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register)
// destinationOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register)
}
beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP)
// size -= 1
c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register)
// arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)]
c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr,
sourceOffset.register, copySize.register,
arm64ReservedRegisterForTemporary)
// [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary.
c.assembler.CompileRegisterToMemoryWithRegisterOffset(str,
arm64ReservedRegisterForTemporary,
destinationOffset.register, copySize.register,
)
// If the value on the copySize.register is not equal zero, continue the loop.
c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop)
// Otherwise, exit the loop.
endJump = c.assembler.CompileJump(arm64.B)
}
// Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i];
c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump)
{
if isTable {
// arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine,
callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8,
arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
arm64ReservedRegisterForTemporary)
// destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
destinationOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, destinationOffset.register)
// arm64ReservedRegisterForTemporary = &Tables[srcTableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine,
callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8,
arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
arm64ReservedRegisterForTemporary)
// sourceOffset = (sourceOffset<< 3) + &Table[0]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
sourceOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, sourceOffset.register)
// copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one.
c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register)
} else {
// sourceOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register)
// destinationOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register)
}
// Negate the counter.
c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register)
beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP)
// arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)]
c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr,
sourceOffset.register, copySize.register,
arm64ReservedRegisterForTemporary)
// [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary.
c.assembler.CompileRegisterToMemoryWithRegisterOffset(str,
arm64ReservedRegisterForTemporary,
destinationOffset.register, copySize.register,
)
// size += 1
c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register)
c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop)
}
c.assembler.SetJumpTargetOnNext(skipCopyJump)
c.assembler.SetJumpTargetOnNext(endJump)
}
// Mark all of the operand registers.
c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register)
return nil
}
// compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture.
func (c *arm64Compiler) compileMemoryFill() error {
return c.compileFillImpl(false, 0)
}
// compileFillImpl implements TableFill and MemoryFill.
//
// TODO: the compiled code in this function should be reused and compile at once as
// the code is independent of any module.
func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error {
outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds
if isTable {
outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess
}
fillSize, err := c.popValueOnRegister()
if err != nil {
return err
}
c.markRegisterUsed(fillSize.register)
value, err := c.popValueOnRegister()
if err != nil {
return err
}
c.markRegisterUsed(value.register)
destinationOffset, err := c.popValueOnRegister()
if err != nil {
return err
}
if isZeroRegister(destinationOffset.register) {
destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register)
}
c.markRegisterUsed(destinationOffset.register)
// destinationOffset += size.
c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register)
if isTable {
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8
// = &tables[0] + sizeOf(*tableInstance)*8
// = &tables[srcTableIndex]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(tableIndex)*8,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
arm64ReservedRegisterForTemporary)
} else {
// arm64ReservedRegisterForTemporary = len(memoryInst.Buffer).
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
arm64ReservedRegisterForTemporary)
}
// Check len >= destinationOffset.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register)
// If not, raise the runtime error.
c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus)
// Otherwise, ready to copy the value from destination to source.
// If the size equals zero, we can skip the entire instructions below.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register)
skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ)
// destinationOffset -= size.
c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register)
var str asm.Instruction
var movSize int64
if isTable {
str = arm64.STRD
movSize = 8
// arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine,
callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, int64(tableIndex)*8,
arm64ReservedRegisterForTemporary)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
arm64ReservedRegisterForTemporary)
// destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
destinationOffset.register, pointerSizeLog2,
arm64ReservedRegisterForTemporary, destinationOffset.register)
// copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one.
c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register)
} else {
str = arm64.STRB
movSize = 1
// destinationOffset += memory buffer's absolute address.
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register)
}
// Naively implement the copy with "for loop" by copying byte one by one.
beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP)
// size -= 1
c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register)
// [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary.
c.assembler.CompileRegisterToMemoryWithRegisterOffset(str,
value.register,
destinationOffset.register, fillSize.register,
)
// If the value on the copySizeRgister.register is not equal zero, continue the loop.
continueJump := c.assembler.CompileJump(arm64.BCONDNE)
continueJump.AssignJumpTarget(beginCopyLoop)
// Mark all of the operand registers.
c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register)
c.assembler.SetJumpTargetOnNext(skipCopyJump)
return nil
}
// compileTableInit implements compiler.compileTableInit for the arm64 architecture.
func (c *arm64Compiler) compileTableInit(o *wazeroir.UnionOperation) error {
elemIndex := uint32(o.U1)
tableIndex := uint32(o.U2)
return c.compileInitImpl(true, elemIndex, tableIndex)
}
// compileTableCopy implements compiler.compileTableCopy for the arm64 architecture.
func (c *arm64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error {
return c.compileCopyImpl(true, uint32(o.U1), uint32(o.U2))
}
// compileElemDrop implements compiler.compileElemDrop for the arm64 architecture.
func (c *arm64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
tmp, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
elemIndex := uint32(o.U1)
c.compileLoadElemInstanceAddress(elemIndex, tmp)
// Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type).
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0)
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8)
c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16)
return nil
}
func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) {
// dst = dataIndex * elementInstanceStructSize
c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst)
// arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset,
arm64ReservedRegisterForTemporary,
)
// dst = arm64ReservedRegisterForTemporary + dst
// = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize
// = &moduleInstance.ElementInstances[elemIndex]
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst)
}
// compileRefFunc implements compiler.compileRefFunc for the arm64 architecture.
func (c *arm64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
ref, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset]
// = &moduleEngine.functions[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset,
ref)
// ref = ref + int64(o.FunctionIndex)*sizeOf(function)
// = &moduleEngine.functions[index]
functionIndex := int64(o.U1)
c.assembler.CompileConstToRegister(arm64.ADD,
functionIndex*functionSize,
ref,
)
c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64)
return nil
}
// compileTableGet implements compiler.compileTableGet for the arm64 architecture.
func (c *arm64Compiler) compileTableGet(o *wazeroir.UnionOperation) error {
ref, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(ref)
offset, err := c.popValueOnRegister()
if err != nil {
return err
}
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8]
// = [&tables[0] + TableIndex*sizeOf(*tableInstance)]
// = [&tables[TableIndex]] = tables[TableIndex].
tableIndex := int64(o.U1)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableIndex*8,
arm64ReservedRegisterForTemporary)
// Out of bounds check.
// ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
ref,
)
// "cmp ref, offset"
c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register)
// If it exceeds len(table), we exit the execution.
c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess)
// ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
ref,
)
// ref = (offset << pointerSizeLog2) + ref
// = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset
// = &tables[TableIndex].References[offset]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD,
offset.register, pointerSizeLog2, ref, ref)
// ref = [&tables[TableIndex]] = load the Reference's pointer as uint64.
c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref)
c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime.
return nil
}
// compileTableSet implements compiler.compileTableSet for the arm64 architecture.
func (c *arm64Compiler) compileTableSet(o *wazeroir.UnionOperation) error {
ref := c.locationStack.pop()
if err := c.compileEnsureOnRegister(ref); err != nil {
return err
}
offset := c.locationStack.pop()
if err := c.compileEnsureOnRegister(offset); err != nil {
return err
}
tmp, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8
// = &tables[0] + TableIndex*sizeOf(*tableInstance)
// = &tables[TableIndex]
tableIndex := int64(o.U1)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableIndex*8,
arm64ReservedRegisterForTemporary)
// Out of bounds check.
// tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
tmp,
)
// "cmp tmp, offset"
c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register)
// If it exceeds len(table), we exit the execution.
c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess)
// tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableOffset,
tmp,
)
// tmp = (offset << pointerSizeLog2) + tmp
// = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset
// = &tables[TableIndex].References[offset]
c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp)
// Set the reference's raw pointer.
c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0)
c.markRegisterUnused(offset.register, ref.register, tmp)
return nil
}
// compileTableGrow implements compiler.compileTableGrow for the arm64 architecture.
func (c *arm64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
// Pushes the table index.
tableIndex := o.U1
if err := c.compileIntConstant(true, tableIndex); err != nil {
return err
}
// Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go.
// Therefore, call out to the built function for this purpose.
if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil {
return err
}
// TableGrow consumes three values (table index, number of items, initial value).
for i := 0; i < 3; i++ {
c.locationStack.pop()
}
// Then, the previous length was pushed as the result.
v := c.locationStack.pushRuntimeValueLocationOnStack()
v.valueType = runtimeValueTypeI32
// After return, we re-initialize reserved registers just like preamble of functions.
c.compileReservedStackBasePointerRegisterInitialization()
c.compileReservedMemoryRegisterInitialization()
return nil
}
// compileTableSize implements compiler.compileTableSize for the arm64 architecture.
func (c *arm64Compiler) compileTableSize(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {
return err
}
result, err := c.allocateRegister(registerTypeGeneralPurpose)
if err != nil {
return err
}
c.markRegisterUsed(result)
// arm64ReservedRegisterForTemporary = &tables[0]
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
arm64ReservedRegisterForTemporary)
// arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8]
// = [&tables[0] + TableIndex*sizeOf(*tableInstance)]
// = [&tables[TableIndex]] = tables[TableIndex].
tableIndex := int64(o.U1)
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableIndex*8,
arm64ReservedRegisterForTemporary)
// result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex])
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset,
result,
)
c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32)
return nil
}
// compileTableFill implements compiler.compileTableFill for the arm64 architecture.
func (c *arm64Compiler) compileTableFill(o *wazeroir.UnionOperation) error {
tableIndex := uint32(o.U1)
return c.compileFillImpl(true, tableIndex)
}
// popTwoValuesOnRegisters pops two values from the location stacks, ensures
// these two values are located on registers, and mark them unused.
//
// TODO: wed usually prefix this with compileXXX as this might end up emitting instructions,
// but the name seems awkward.
func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) {
x2 = c.locationStack.pop()
if err = c.compileEnsureOnRegister(x2); err != nil {
return
}
x1 = c.locationStack.pop()
if err = c.compileEnsureOnRegister(x1); err != nil {
return
}
c.markRegisterUnused(x2.register)
c.markRegisterUnused(x1.register)
return
}
// popValueOnRegister pops one value from the location stack, ensures
// that it is located on a register, and mark it unused.
//
// TODO: wed usually prefix this with compileXXX as this might end up emitting instructions,
// but the name seems awkward.
func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) {
v = c.locationStack.pop()
if err = c.compileEnsureOnRegister(v); err != nil {
return
}
c.markRegisterUnused(v.register)
return
}
// compileEnsureOnRegister emits instructions to ensure that a value is located on a register.
func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) {
if loc.onStack() {
reg, err := c.allocateRegister(loc.getRegisterType())
if err != nil {
return err
}
// Record that the value holds the register and the register is marked used.
loc.setRegister(reg)
c.markRegisterUsed(reg)
c.compileLoadValueOnStackToRegister(loc)
} else if loc.onConditionalRegister() {
err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc)
}
return
}
// maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack
// if the value is located on a conditional register.
//
// This is usually called at the beginning of methods on compiler interface where we possibly
// compile instructions without saving the conditional register value.
// compile* functions without calling this function is saving the conditional
// value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top.
func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) {
if c.locationStack.sp > 0 {
if loc := c.locationStack.peek(); loc.onConditionalRegister() {
err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc)
}
}
return
}
// loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value
// to a general purpose register.
func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error {
reg, err := c.allocateRegister(loc.getRegisterType())
if err != nil {
return err
}
c.markRegisterUsed(reg)
c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg)
// Record that now the value is located on a general purpose register.
loc.setRegister(reg)
return nil
}
// compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64.
func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) {
switch loc.valueType {
case runtimeValueTypeI32:
c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress,
int64(loc.stackPointer)*8, loc.register)
case runtimeValueTypeI64:
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress,
int64(loc.stackPointer)*8, loc.register)
case runtimeValueTypeF32:
c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress,
int64(loc.stackPointer)*8, loc.register)
case runtimeValueTypeF64:
c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress,
int64(loc.stackPointer)*8, loc.register)
case runtimeValueTypeV128Lo:
c.assembler.CompileMemoryToVectorRegister(arm64.VMOV,
arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register,
arm64.VectorArrangementQ)
// Higher 64-bits are loaded as well ^^.
hi := &c.locationStack.stack[loc.stackPointer+1]
hi.setRegister(loc.register)
case runtimeValueTypeV128Hi:
panic("BUG: V128Hi must be be loaded to a register along with V128Lo")
}
}
// allocateRegister implements compiler.allocateRegister for arm64.
func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) {
var ok bool
// Try to get the unused register.
reg, ok = c.locationStack.takeFreeRegister(t)
if ok {
return
}
// If not found, we have to steal the register.
stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t)
if !ok {
err = fmt.Errorf("cannot steal register")
return
}
// Release the steal target register value onto stack location.
reg = stealTarget.register
c.compileReleaseRegisterToStack(stealTarget)
return
}
// compileReleaseAllRegistersToStack adds instructions to store all the values located on
// either general purpose or conditional registers onto the memory stack.
// See releaseRegisterToStack.
func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) {
for i := uint64(0); i < c.locationStack.sp; i++ {
if loc := &c.locationStack.stack[i]; loc.onRegister() {
c.compileReleaseRegisterToStack(loc)
} else if loc.onConditionalRegister() {
if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil {
return
}
c.compileReleaseRegisterToStack(loc)
}
}
return
}
// releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region.
func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) {
switch loc.valueType {
case runtimeValueTypeI32:
c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8)
case runtimeValueTypeI64:
c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8)
case runtimeValueTypeF32:
c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8)
case runtimeValueTypeF64:
c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8)
case runtimeValueTypeV128Lo:
c.assembler.CompileVectorRegisterToMemory(arm64.VMOV,
loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8,
arm64.VectorArrangementQ)
// Higher 64-bits are released as well ^^.
hi := &c.locationStack.stack[loc.stackPointer+1]
c.locationStack.releaseRegister(hi)
case runtimeValueTypeV128Hi:
panic("BUG: V128Hi must be released to the stack along with V128Lo")
default:
panic("BUG")
}
// Mark the register is free.
c.locationStack.releaseRegister(loc)
}
// compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress
// so that it points to the absolute address of the stack base for this function.
func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() {
// First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset,
arm64ReservedRegisterForStackBasePointerAddress)
// next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset,
arm64ReservedRegisterForTemporary)
// Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary"
c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress)
}
func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() {
if c.ir.HasMemory || c.ir.UsesMemory {
// "arm64ReservedRegisterForMemory = ce.MemoryElement0Address"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset,
arm64ReservedRegisterForMemory,
)
}
}
// compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on
// ce.moduleContext.ModuleInstanceAddress.
// This is called in two cases: in function preamble, and on the return from (non-Go) function calls.
func (c *arm64Compiler) compileModuleContextInitialization() error {
tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose)
if !found {
panic("BUG: all the registers should be free at this point")
}
c.markRegisterUsed(tmpX)
tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose)
if !found {
panic("BUG: all the registers should be free at this point")
}
c.markRegisterUsed(tmpY)
// "tmpX = ce.ModuleInstanceAddress"
c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, tmpX)
// If the module instance address stays the same, we could skip the entire code below.
c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX)
brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ)
// Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress.
c.assembler.CompileRegisterToMemory(arm64.STRD,
arm64CallingConventionModuleInstanceAddressRegister,
arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset,
)
// Also, we have to update the following fields:
// * callEngine.moduleContext.globalElement0Address
// * callEngine.moduleContext.memoryElement0Address
// * callEngine.moduleContext.memorySliceLen
// * callEngine.moduleContext.memoryInstance
// * callEngine.moduleContext.tableElement0Address
// * callEngine.moduleContext.tableSliceLen
// * callEngine.moduleContext.functionsElement0Address
// * callEngine.moduleContext.typeIDsElement0Address
// * callEngine.moduleContext.dataInstancesElement0Address
// * callEngine.moduleContext.elementInstancesElement0Address
// Update globalElement0Address.
//
// Note: if there's global.get or set instruction in the function, the existence of the globals
// is ensured by function validation at module instantiation phase, and that's why it is ok to
// skip the initialization if the module's globals slice is empty.
if len(c.ir.Globals) > 0 {
// "tmpX = &moduleInstance.Globals[0]"
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset,
tmpX,
)
// "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])"
c.assembler.CompileRegisterToMemory(
arm64.STRD, tmpX,
arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset,
)
}
// Update memoryElement0Address and memorySliceLen.
//
// Note: if there's memory instruction in the function, memory instance must be non-nil.
// That is ensured by function validation at module instantiation phase, and that's
// why it is ok to skip the initialization if the module's memory instance is nil.
if c.ir.HasMemory {
// "tmpX = moduleInstance.Memory"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset,
tmpX,
)
// First, set ce.memoryInstance
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpX,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset,
)
// Next, we write the memory length into ce.MemorySliceLen.
//
// "tmpY = [tmpX + memoryInstanceBufferLenOffset] (== len(memory.Buffer))"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
tmpX, memoryInstanceBufferLenOffset,
tmpY,
)
// "ce.MemorySliceLen = tmpY".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpY,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset,
)
// Finally, we write ce.memoryElement0Address.
//
// "tmpY = *tmpX (== &memory.Buffer[0])"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
tmpX, memoryInstanceBufferOffset,
tmpY,
)
// "ce.memoryElement0Address = tmpY".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpY,
arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset,
)
}
// Update tableElement0Address, tableSliceLen and typeIDsElement0Address.
//
// Note: if there's table instruction in the function, the existence of the table
// is ensured by function validation at module instantiation phase, and that's
// why it is ok to skip the initialization if the module's table doesn't exist.
if c.ir.HasTable {
// "tmpX = &tables[0] (type of **wasm.Table)"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset,
tmpX,
)
// Update ce.tableElement0Address.
// "ce.tableElement0Address = tmpX".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpX,
arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset,
)
// Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address.
c.assembler.CompileMemoryToRegister(arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX)
c.assembler.CompileRegisterToMemory(arm64.STRD,
tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset)
}
// Update callEngine.moduleContext.functionsElement0Address
{
// "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)"
//
// Go's interface is laid out on memory as two quad words as struct {tab, data uintptr}
// where tab points to the interface table, and the latter points to the actual
// implementation of interface. This case, we extract "data" pointer as *moduleEngine.
// See the following references for detail:
// * https://research.swtch.com/interfaces
// * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset,
tmpX,
)
// "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
tmpX, moduleEngineFunctionsOffset,
tmpY,
)
// "callEngine.moduleContext.functionsElement0Address = tmpY".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpY,
arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset,
)
}
// Update dataInstancesElement0Address.
if c.ir.HasDataInstances {
// "tmpX = &moduleInstance.DataInstances[0]"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset,
tmpX,
)
// "callEngine.moduleContext.dataInstancesElement0Address = tmpX".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpX,
arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset,
)
}
// Update callEngine.moduleContext.elementInstancesElement0Address
if c.ir.HasElementInstances {
// "tmpX = &moduleInstance.DataInstances[0]"
c.assembler.CompileMemoryToRegister(
arm64.LDRD,
arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset,
tmpX,
)
// "callEngine.moduleContext.dataInstancesElement0Address = tmpX".
c.assembler.CompileRegisterToMemory(
arm64.STRD,
tmpX,
arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset,
)
}
c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged)
c.markRegisterUnused(tmpX, tmpY)
return nil
}