compiler: reuses allocated runtimeValueLocation stacks (#1348)

Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
This commit is contained in:
Takeshi Yoneda
2023-04-09 22:58:47 -07:00
committed by GitHub
parent f167939c88
commit b6d19696da
9 changed files with 506 additions and 138 deletions

View File

@@ -6,6 +6,7 @@ import (
"github.com/tetratelabs/wazero/internal/asm"
"github.com/tetratelabs/wazero/internal/testing/require"
"github.com/tetratelabs/wazero/internal/wasm"
"github.com/tetratelabs/wazero/internal/wazeroir"
)
@@ -19,6 +20,7 @@ func Test_compileDropRange(t *testing.T) {
t.Run("start at the top", func(t *testing.T) {
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
// Use up all unreserved registers.
for _, reg := range unreservedGeneralPurposeRegisters {
@@ -93,6 +95,7 @@ func Test_getTemporariesForStackedLiveValues(t *testing.T) {
t.Run("no stacked values", func(t *testing.T) {
liveValues := []runtimeValueLocation{{register: 1}, {register: 2}}
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
gpTmp, vecTmp, err := getTemporariesForStackedLiveValues(c, liveValues)
require.NoError(t, err)
@@ -111,6 +114,7 @@ func Test_getTemporariesForStackedLiveValues(t *testing.T) {
{valueType: runtimeValueTypeI64},
}
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
if !freeRegisterExists {
// Use up all the unreserved gp registers.
@@ -151,6 +155,7 @@ func Test_getTemporariesForStackedLiveValues(t *testing.T) {
{valueType: runtimeValueTypeV128Hi},
}
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
if !freeRegisterExists {
// Use up all the unreserved gp registers.
@@ -185,6 +190,7 @@ func Test_migrateLiveValue(t *testing.T) {
t.Run("already on register", func(t *testing.T) {
// This case, we don't use tmp registers.
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
// Push the dummy values.
for i := 0; i < 10; i++ {

View File

@@ -34,7 +34,7 @@ func TestCompiler_releaseRegisterToStack(t *testing.T) {
require.NoError(t, err)
// Set up the location stack so that we push the const on the specified height.
s := runtimeValueLocationStack{
s := &runtimeValueLocationStack{
sp: tc.stackPointer,
stack: make([]runtimeValueLocation, tc.stackPointer),
unreservedVectorRegisters: unreservedVectorRegisters,
@@ -527,6 +527,11 @@ func TestCompiler_compileSelect(t *testing.T) {
t.Run(fmt.Sprintf("x1=0x%x,x2=0x%x", vals[0], vals[1]), func(t *testing.T) {
env := newCompilerEnvironment()
compiler := env.requireNewCompiler(t, &wasm.FunctionType{}, newCompiler, nil)
// To make the assertion below stable, we preallocate the underlying stack,
// so that the pointer to the entry will be stale.
compiler.runtimeValueLocationStack().stack = make([]runtimeValueLocation, 100)
err := compiler.compilePreamble()
require.NoError(t, err)

View File

@@ -247,7 +247,7 @@ type compilerImpl interface {
assignStackPointerCeil(uint64)
setStackPointerCeil(uint64)
compileReleaseRegisterToStack(loc *runtimeValueLocation)
setRuntimeValueLocationStack(runtimeValueLocationStack)
setRuntimeValueLocationStack(*runtimeValueLocationStack)
compileEnsureOnRegister(loc *runtimeValueLocation) error
compileModuleContextInitialization() error
}
@@ -277,6 +277,8 @@ func requireRuntimeLocationStackPointerEqual(t *testing.T, expSP uint64, c compi
// TestCompileI32WrapFromI64 is the regression test for https://github.com/tetratelabs/wazero/issues/1008
func TestCompileI32WrapFromI64(t *testing.T) {
c := newCompiler()
c.Init(&wasm.FunctionType{}, nil, false)
// Push the original i64 value.
loc := c.runtimeValueLocationStack().pushRuntimeValueLocationOnStack()
loc.valueType = runtimeValueTypeI64

View File

@@ -111,7 +111,6 @@ func (v *runtimeValueLocation) String() string {
func newRuntimeValueLocationStack() runtimeValueLocationStack {
return runtimeValueLocationStack{
stack: make([]runtimeValueLocation, 10),
unreservedVectorRegisters: unreservedVectorRegisters,
unreservedGeneralPurposeRegisters: unreservedGeneralPurposeRegisters,
}
@@ -141,14 +140,13 @@ type runtimeValueLocationStack struct {
unreservedGeneralPurposeRegisters, unreservedVectorRegisters []asm.Register
}
func (v *runtimeValueLocationStack) initialized() bool {
return len(v.unreservedGeneralPurposeRegisters) > 0
}
func (v *runtimeValueLocationStack) reset() {
v.stackPointerCeil, v.sp = 0, 0
v.stack = v.stack[:0]
v.usedRegisters = usedRegistersMask(0)
stack := v.stack[:0]
*v = runtimeValueLocationStack{
unreservedVectorRegisters: unreservedVectorRegisters,
unreservedGeneralPurposeRegisters: unreservedGeneralPurposeRegisters,
stack: stack,
}
}
func (v *runtimeValueLocationStack) String() string {
@@ -160,16 +158,19 @@ func (v *runtimeValueLocationStack) String() string {
return fmt.Sprintf("sp=%d, stack=[%s], used_registers=[%s]", v.sp, strings.Join(stackStr, ","), strings.Join(usedRegisters, ","))
}
func (v *runtimeValueLocationStack) clone() runtimeValueLocationStack {
ret := runtimeValueLocationStack{}
ret.sp = v.sp
ret.usedRegisters = v.usedRegisters
ret.stack = make([]runtimeValueLocation, len(v.stack))
copy(ret.stack, v.stack)
ret.stackPointerCeil = v.stackPointerCeil
ret.unreservedGeneralPurposeRegisters = v.unreservedGeneralPurposeRegisters
ret.unreservedVectorRegisters = v.unreservedVectorRegisters
return ret
// cloneFrom clones the values on `from` into self except for the slice of .stack field.
// The content on .stack will be copied from the origin to self, and grow the underlying slice
// if necessary.
func (v *runtimeValueLocationStack) cloneFrom(from runtimeValueLocationStack) {
// Assigns the same values for fields except for the stack which we want to reuse.
prev := v.stack
*v = from
v.stack = prev[:cap(prev)] // Expand the length to the capacity so that we can minimize "diff" below.
// Copy the content in the stack.
if diff := int(from.sp) - len(v.stack); diff > 0 {
v.stack = append(v.stack, make([]runtimeValueLocation, diff)...)
}
copy(v.stack, from.stack[:from.sp])
}
// pushRuntimeValueLocationOnRegister creates a new runtimeValueLocation with a given register and pushes onto

View File

@@ -2,7 +2,6 @@ package compiler
import (
"testing"
"unsafe"
"github.com/tetratelabs/wazero/internal/asm"
"github.com/tetratelabs/wazero/internal/testing/require"
@@ -42,17 +41,6 @@ func TestRuntimeValueLocationStack_basic(t *testing.T) {
s.releaseRegister(loc)
require.False(t, s.usedRegisters.exist(loc.register))
require.Equal(t, asm.NilRegister, loc.register)
// Clone.
cloned := s.clone()
require.Equal(t, s.usedRegisters, cloned.usedRegisters)
require.Equal(t, s.unreservedGeneralPurposeRegisters, cloned.unreservedGeneralPurposeRegisters)
require.Equal(t, s.unreservedVectorRegisters, cloned.unreservedVectorRegisters)
require.Equal(t, len(s.stack), len(cloned.stack))
require.Equal(t, s.sp, cloned.sp)
for i := 0; i < int(s.sp); i++ {
actual, exp := &s.stack[i], &cloned.stack[i]
require.NotEqual(t, uintptr(unsafe.Pointer(exp)), uintptr(unsafe.Pointer(actual)))
}
// Check the max stack pointer.
for i := 0; i < 1000; i++ {
s.pushRuntimeValueLocationOnStack()
@@ -207,7 +195,9 @@ func TestRuntimeValueLocation_pushCallFrame(t *testing.T) {
t.Run(sig.String(), func(t *testing.T) {
s := newRuntimeValueLocationStack()
// pushCallFrame assumes that the parameters are already pushed.
s.sp += uint64(sig.ParamNumInUint64)
for i := 0; i < sig.ParamNumInUint64; i++ {
_ = s.pushRuntimeValueLocationOnStack()
}
retAddr, stackBasePointer, fn := s.pushCallFrame(sig)
@@ -230,3 +220,50 @@ func Test_usedRegistersMask(t *testing.T) {
require.False(t, mask.exist(r))
}
}
func TestRuntimeValueLocation_cloneFrom(t *testing.T) {
t.Run("sp<cap", func(t *testing.T) {
v := runtimeValueLocationStack{sp: 7, stack: make([]runtimeValueLocation, 5, 10)}
orig := v.stack
v.cloneFrom(runtimeValueLocationStack{sp: 3, usedRegisters: 0xffff, stack: []runtimeValueLocation{
{register: 3}, {register: 2}, {register: 1},
}})
require.Equal(t, uint64(3), v.sp)
require.Equal(t, usedRegistersMask(0xffff), v.usedRegisters)
// Underlying stack shouldn't have changed since sp=3 < cap(v.stack).
require.Equal(t, &orig[0], &v.stack[0])
require.Equal(t, v.stack[0].register, asm.Register(3))
require.Equal(t, v.stack[1].register, asm.Register(2))
require.Equal(t, v.stack[2].register, asm.Register(1))
})
t.Run("sp=cap", func(t *testing.T) {
v := runtimeValueLocationStack{stack: make([]runtimeValueLocation, 0, 3)}
orig := v.stack[:cap(v.stack)]
v.cloneFrom(runtimeValueLocationStack{sp: 3, usedRegisters: 0xffff, stack: []runtimeValueLocation{
{register: 3}, {register: 2}, {register: 1},
}})
require.Equal(t, uint64(3), v.sp)
require.Equal(t, usedRegistersMask(0xffff), v.usedRegisters)
// Underlying stack shouldn't have changed since sp=3==cap(v.stack).
require.Equal(t, &orig[0], &v.stack[0])
require.Equal(t, v.stack[0].register, asm.Register(3))
require.Equal(t, v.stack[1].register, asm.Register(2))
require.Equal(t, v.stack[2].register, asm.Register(1))
})
t.Run("sp>cap", func(t *testing.T) {
v := runtimeValueLocationStack{stack: make([]runtimeValueLocation, 0, 3)}
orig := v.stack[:cap(v.stack)]
v.cloneFrom(runtimeValueLocationStack{sp: 5, usedRegisters: 0xffff, stack: []runtimeValueLocation{
{register: 5}, {register: 4}, {register: 3}, {register: 2}, {register: 1},
}})
require.Equal(t, uint64(5), v.sp)
require.Equal(t, usedRegistersMask(0xffff), v.usedRegisters)
// Underlying stack should have changed since sp=5>cap(v.stack).
require.NotEqual(t, &orig[0], &v.stack[0])
require.Equal(t, v.stack[0].register, asm.Register(5))
require.Equal(t, v.stack[1].register, asm.Register(4))
require.Equal(t, v.stack[2].register, asm.Register(3))
require.Equal(t, v.stack[3].register, asm.Register(2))
require.Equal(t, v.stack[4].register, asm.Register(1))
})
}

View File

@@ -87,7 +87,7 @@ type amd64Compiler struct {
cpuFeatures platform.CpuFeatureFlags
// locationStack holds the state of wazeroir virtual stack.
// and each item is either placed in register or the actual memory stack.
locationStack runtimeValueLocationStack
locationStack *runtimeValueLocationStack
// labels hold per wazeroir label specific information in this function.
labels [wazeroir.LabelKindNum][]amd64LabelInfo
// stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation.
@@ -97,47 +97,71 @@ type amd64Compiler struct {
withListener bool
typ *wasm.FunctionType
br *bytes.Reader
// locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack,
// we cache it here, and reset and set to .locationStack in the Init method.
locationStackForEntrypoint runtimeValueLocationStack
// frameIDMax tracks the maximum value of frame id per function.
frameIDMax int
brTableTmp []runtimeValueLocation
}
func newAmd64Compiler() compiler {
c := &amd64Compiler{
assembler: amd64.NewAssembler(),
locationStack: newRuntimeValueLocationStack(),
cpuFeatures: platform.CpuFeatures,
br: bytes.NewReader(nil),
assembler: amd64.NewAssembler(),
locationStackForEntrypoint: newRuntimeValueLocationStack(),
cpuFeatures: platform.CpuFeatures,
br: bytes.NewReader(nil),
}
return c
}
// Init implements compiler.Init.
func (c *amd64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) {
assembler, locationStack := c.assembler, c.locationStack
assembler.Reset()
locationStack.reset()
for i := range c.labels {
c.labels[i] = c.labels[i][:0]
}
c.assembler.Reset()
c.locationStackForEntrypoint.reset()
c.resetLabels()
*c = amd64Compiler{
ir: ir,
assembler: assembler,
locationStack: locationStack,
cpuFeatures: c.cpuFeatures,
withListener: withListener,
labels: c.labels,
typ: typ,
br: c.br,
ir: ir,
withListener: withListener,
typ: typ,
assembler: c.assembler,
cpuFeatures: c.cpuFeatures,
labels: c.labels,
br: c.br,
locationStackForEntrypoint: c.locationStackForEntrypoint,
brTableTmp: c.brTableTmp,
}
// Reuses the initial location stack for the compilation of subsequent functions.
c.locationStack = &c.locationStackForEntrypoint
}
// resetLabels resets the existing content in arm64Compiler.labels so that
// we could reuse the allocated slices and stacks in the subsequent compilations.
func (c *amd64Compiler) resetLabels() {
for i := range c.labels {
for j := range c.labels[i] {
if j > c.frameIDMax {
// Only need to reset until the maximum frame id. This makes the compilation faster for large binary.
break
}
l := &c.labels[i][j]
l.initialInstruction = nil
l.stackInitialized = false
l.initialStack.reset()
}
}
}
// runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture.
func (c *amd64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack {
return &c.locationStack
return c.locationStack
}
// setLocationStack sets the given runtimeValueLocationStack to .locationStack field,
// while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks.
// This is called when we branch into different block.
func (c *amd64Compiler) setLocationStack(newStack runtimeValueLocationStack) {
func (c *amd64Compiler) setLocationStack(newStack *runtimeValueLocationStack) {
if c.stackPointerCeil < c.locationStack.stackPointerCeil {
c.stackPointerCeil = c.locationStack.stackPointerCeil
}
@@ -163,17 +187,23 @@ type amd64LabelInfo struct {
// initialInstruction is the initial instruction for this label so other block can jump into it.
initialInstruction asm.Node
// initialStack is the initial value location stack from which we start compiling this label.
initialStack runtimeValueLocationStack
initialStack runtimeValueLocationStack
stackInitialized bool
}
func (c *amd64Compiler) label(label wazeroir.Label) *amd64LabelInfo {
kind := label.Kind()
frames := c.labels[kind]
frameID := label.FrameID()
if c.frameIDMax < frameID {
c.frameIDMax = frameID
}
// If the frameID is not allocated yet, expand the slice by twice of the diff,
// so that we could reduce the allocation in the subsequent compilation.
if diff := frameID - len(frames) + 1; diff > 0 {
frames = append(frames, make([]amd64LabelInfo, diff*2)...)
for i := 0; i < diff; i++ {
frames = append(frames, amd64LabelInfo{initialStack: newRuntimeValueLocationStack()})
}
c.labels[kind] = frames
}
return &frames[frameID]
@@ -444,10 +474,9 @@ func (c *amd64Compiler) branchInto(target wazeroir.Label) error {
// with the appropriate value locations. Note we clone the stack here as we maybe
// manipulate the stack before compiler reaches the label.
targetLabel := c.label(target)
if !targetLabel.initialStack.initialized() {
// It seems unnecessary to clone as branchInto is always the tail of the current block.
// TODO: verify ^^.
targetLabel.initialStack = c.locationStack.clone()
if !targetLabel.stackInitialized {
targetLabel.initialStack.cloneFrom(*c.locationStack)
targetLabel.stackInitialized = true
}
jmp := c.assembler.CompileJump(amd64.JMP)
c.assignJumpTarget(target, jmp)
@@ -522,38 +551,24 @@ func (c *amd64Compiler) compileBrIf(o *wazeroir.UnionOperation) error {
// Note that .Else branch doesn't have ToDrop as .Else is in reality
// corresponding to either If's Else block or Br_if's else block in Wasm.
// Emit for else branches
saved := c.locationStack
c.setLocationStack(saved.clone())
// Emit the else branch.
if elseTarget.IsReturnTarget() {
if err := c.compileReturnFunction(); err != nil {
return err
}
} else {
elseLabel := elseTarget
if c.ir.LabelCallers[elseLabel] > 1 {
// We can only re-use register state if when there's a single call-site.
// Release existing values on registers to the stack if there's multiple ones to have
// the consistent value location state at the beginning of label.
if err := c.compileReleaseAllRegistersToStack(); err != nil {
return err
}
}
// Set the initial stack of the target label, so we can start compiling the label
// with the appropriate value locations. Note we clone the stack here as we maybe
// manipulate the stack before compiler reaches the label.
labelInfo := c.label(elseLabel)
if !labelInfo.initialStack.initialized() {
labelInfo.initialStack = c.locationStack
labelInfo := c.label(elseTarget)
if !labelInfo.stackInitialized {
labelInfo.initialStack.cloneFrom(*c.locationStack)
labelInfo.stackInitialized = true
}
elseJmp := c.assembler.CompileJump(amd64.JMP)
c.assignJumpTarget(elseLabel, elseJmp)
c.assignJumpTarget(elseTarget, elseJmp)
}
// Handle then branch.
c.assembler.SetJumpTargetOnNext(jmpWithCond)
c.setLocationStack(saved)
if err := compileDropRange(c, thenToDrop); err != nil {
return err
}
@@ -573,8 +588,9 @@ func (c *amd64Compiler) compileBrIf(o *wazeroir.UnionOperation) error {
// with the appropriate value locations. Note we clone the stack here as we maybe
// manipulate the stack before compiler reaches the label.
labelInfo := c.label(thenLabel)
if !labelInfo.initialStack.initialized() {
labelInfo.initialStack = c.locationStack
if !labelInfo.stackInitialized {
labelInfo.initialStack.cloneFrom(*c.locationStack)
labelInfo.stackInitialized = true
}
thenJmp := c.assembler.CompileJump(amd64.JMP)
c.assignJumpTarget(thenLabel, thenJmp)
@@ -670,7 +686,12 @@ func (c *amd64Compiler) compileBrTable(o *wazeroir.UnionOperation) error {
// [Emit the code for each targets and default branch]
labelInitialInstructions := make([]asm.Node, len(o.Us)/2)
saved := c.locationStack
// Since we might end up having the different stack state in each branch,
// we need to save the initial stack state here, and use the same initial state
// for each iteration.
initialLocationStack := c.getSavedTemporaryLocationStack()
for i := range labelInitialInstructions {
// Emit the initial instruction of each target.
// We use NOP as we don't yet know the next instruction in each label.
@@ -679,27 +700,31 @@ func (c *amd64Compiler) compileBrTable(o *wazeroir.UnionOperation) error {
targetLabel := wazeroir.Label(o.Us[i*2])
targetToDrop := o.Us[i*2+1]
if i < len(labelInitialInstructions)-1 {
// Clone the location stack so the branch-specific code doesn't
// affect others.
c.setLocationStack(saved.clone())
} else {
// If this is the default branch, we use the original one
// as this is the last code in this block.
c.setLocationStack(saved)
}
if err = compileDropRange(c, targetToDrop); err != nil {
return err
}
if err = c.branchInto(targetLabel); err != nil {
return err
}
// After the iteration, reset the stack's state with initialLocationStack.
c.locationStack.cloneFrom(initialLocationStack)
}
c.assembler.BuildJumpTable(offsetData, labelInitialInstructions)
return nil
}
func (c *amd64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack {
initialLocationStack := *c.locationStack // Take copy!
// Use c.brTableTmp for the underlying stack so that we could reduce the allocations.
if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 {
c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...)
}
copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp])
initialLocationStack.stack = c.brTableTmp
return initialLocationStack
}
func (c *amd64Compiler) assignJumpTarget(label wazeroir.Label, jmpInstruction asm.Node) {
jmpTargetLabel := c.label(label)
targetInst := jmpTargetLabel.initialInstruction
@@ -717,7 +742,7 @@ func (c *amd64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipLabel bool
labelInfo := c.label(label)
// If initialStack is not set, that means this label has never been reached.
if !labelInfo.initialStack.initialized() {
if !labelInfo.stackInitialized {
skipLabel = true
return
}
@@ -732,7 +757,7 @@ func (c *amd64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipLabel bool
}
// Set the initial stack.
c.setLocationStack(labelInfo.initialStack)
c.setLocationStack(&labelInfo.initialStack)
return
}

View File

@@ -135,6 +135,11 @@ func TestAmd64Compiler_compile_Mul_Div_Rem(t *testing.T) {
const dxValue uint64 = 111111
compiler := env.requireNewCompiler(t, &wasm.FunctionType{}, newAmd64Compiler, nil).(*amd64Compiler)
// To make the assertion below stable, we preallocate the underlying stack,
// so that the pointer to the entry will be stale.
compiler.runtimeValueLocationStack().stack = make([]runtimeValueLocation, 100)
err := compiler.compilePreamble()
require.NoError(t, err)
@@ -261,6 +266,11 @@ func TestAmd64Compiler_compile_Mul_Div_Rem(t *testing.T) {
env := newCompilerEnvironment()
compiler := env.requireNewCompiler(t, &wasm.FunctionType{}, newAmd64Compiler, nil).(*amd64Compiler)
// To make the assertion below stable, we preallocate the underlying stack,
// so that the pointer to the entry will be stale.
compiler.runtimeValueLocationStack().stack = make([]runtimeValueLocation, 100)
err := compiler.compilePreamble()
require.NoError(t, err)
@@ -583,6 +593,130 @@ func (c *amd64Compiler) setStackPointerCeil(v uint64) {
}
// compile implements compilerImpl.setRuntimeValueLocationStack for the amd64 architecture.
func (c *amd64Compiler) setRuntimeValueLocationStack(s runtimeValueLocationStack) {
func (c *amd64Compiler) setRuntimeValueLocationStack(s *runtimeValueLocationStack) {
c.locationStack = s
}
func TestAmd64Compiler_label(t *testing.T) {
c := &amd64Compiler{}
c.label(wazeroir.NewLabel(wazeroir.LabelKindContinuation, 100))
require.Equal(t, 100, c.frameIDMax)
require.Equal(t, 101, len(c.labels[wazeroir.LabelKindContinuation]))
// frameIDMax is for all LabelKind, so this shouldn't change frameIDMax.
c.label(wazeroir.NewLabel(wazeroir.LabelKindHeader, 2))
require.Equal(t, 100, c.frameIDMax)
require.Equal(t, 3, len(c.labels[wazeroir.LabelKindHeader]))
}
func TestAmd64Compiler_Init(t *testing.T) {
c := &amd64Compiler{
locationStackForEntrypoint: newRuntimeValueLocationStack(),
assembler: amd64.NewAssembler(),
}
const stackCap = 12345
c.locationStackForEntrypoint.stack = make([]runtimeValueLocation, stackCap)
c.locationStackForEntrypoint.sp = 5555
c.Init(&wasm.FunctionType{}, nil, false)
// locationStack is the pointer to locationStackForEntrypoint after init.
require.Equal(t, c.locationStack, &c.locationStackForEntrypoint)
// And the underlying stack must be reused (the capacity preserved).
require.Equal(t, stackCap, cap(c.locationStack.stack))
require.Equal(t, stackCap, cap(c.locationStackForEntrypoint.stack))
}
func TestAmd64Compiler_resetLabels(t *testing.T) {
c := newAmd64Compiler().(*amd64Compiler)
nop := c.compileNOP()
const (
frameIDMax = 50
capacity = 12345
)
c.frameIDMax = frameIDMax
for i := range c.labels {
ifs := make([]amd64LabelInfo, frameIDMax*2)
c.labels[i] = ifs
for j := 0; j <= frameIDMax; j++ {
ifs[j].stackInitialized = true
ifs[j].initialInstruction = nop
ifs[j].initialStack = newRuntimeValueLocationStack()
ifs[j].initialStack.sp = 5555 // should be cleared via runtimeLocationStack.Reset().
ifs[j].initialStack.stack = make([]runtimeValueLocation, 0, capacity)
}
}
c.resetLabels()
for i := range c.labels {
for j := 0; j < len(c.labels[i]); j++ {
l := &c.labels[i][j]
require.False(t, l.stackInitialized)
require.Nil(t, l.initialInstruction)
require.Equal(t, 0, len(l.initialStack.stack))
if j > frameIDMax {
require.Equal(t, 0, cap(l.initialStack.stack))
} else {
require.Equal(t, capacity, cap(l.initialStack.stack))
}
require.Equal(t, uint64(0), l.initialStack.sp)
}
}
}
func TestAmd64Compiler_getSavedTemporaryLocationStack(t *testing.T) {
t.Run("len(brTableTmp)<len(current)", func(t *testing.T) {
st := newRuntimeValueLocationStack()
c := &amd64Compiler{locationStack: &st}
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{{stackPointer: 150}, {stackPointer: 200}, {stackPointer: 300}}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, 3, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack)
})
t.Run("len(brTableTmp)==len(current)", func(t *testing.T) {
st := newRuntimeValueLocationStack()
c := &amd64Compiler{locationStack: &st, brTableTmp: make([]runtimeValueLocation, 3)}
initSlicePtr := &c.brTableTmp
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{{stackPointer: 150}, {stackPointer: 200}, {stackPointer: 300}}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, 3, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack)
// The underlying temporary slice shouldn't be changed.
require.Equal(t, initSlicePtr, &c.brTableTmp)
})
t.Run("len(brTableTmp)>len(current)", func(t *testing.T) {
const temporarySliceSize = 100
st := newRuntimeValueLocationStack()
c := &amd64Compiler{locationStack: &st, brTableTmp: make([]runtimeValueLocation, temporarySliceSize)}
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{
{stackPointer: 150},
{stackPointer: 200},
{stackPointer: 300},
{},
{},
{},
{},
{stackPointer: 1231455}, // Entries here shouldn't be copied as they are avobe sp.
}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, temporarySliceSize, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack[:3])
for i := int(actual.sp); i < len(actual.stack); i++ {
// Above the stack pointer, the values must not be copied.
require.Zero(t, actual.stack[i].stackPointer)
}
})
}

View File

@@ -21,7 +21,7 @@ type arm64Compiler struct {
ir *wazeroir.CompilationResult
// locationStack holds the state of wazeroir virtual stack.
// and each item is either placed in register or the actual memory stack.
locationStack runtimeValueLocationStack
locationStack *runtimeValueLocationStack
// labels maps a label (e.g. ".L1_then") to *arm64LabelInfo.
labels [wazeroir.LabelKindNum][]arm64LabelInfo
// stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation.
@@ -31,29 +31,57 @@ type arm64Compiler struct {
withListener bool
typ *wasm.FunctionType
br *bytes.Reader
// locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack,
// we cache it here, and reset and set to .locationStack in the Init method.
locationStackForEntrypoint runtimeValueLocationStack
// frameIDMax tracks the maximum value of frame id per function.
frameIDMax int
brTableTmp []runtimeValueLocation
}
func newArm64Compiler() compiler {
return &arm64Compiler{
assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary),
locationStack: newRuntimeValueLocationStack(),
br: bytes.NewReader(nil),
assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary),
locationStackForEntrypoint: newRuntimeValueLocationStack(),
br: bytes.NewReader(nil),
}
}
// Init implements compiler.Init.
func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) {
assembler, locationStack := c.assembler, c.locationStack
assembler.Reset()
locationStack.reset()
for i := range c.labels {
c.labels[i] = c.labels[i][:0]
}
c.assembler.Reset()
c.locationStackForEntrypoint.reset()
c.resetLabels()
*c = arm64Compiler{
assembler: assembler, locationStack: locationStack,
ir: ir, withListener: withListener, labels: c.labels,
typ: typ,
br: c.br,
ir: ir,
withListener: withListener,
typ: typ,
assembler: c.assembler,
labels: c.labels,
br: c.br,
brTableTmp: c.brTableTmp,
locationStackForEntrypoint: c.locationStackForEntrypoint,
}
// Reuses the initial location stack for the compilation of subsequent functions.
c.locationStack = &c.locationStackForEntrypoint
}
// resetLabels resets the existing content in arm64Compiler.labels so that
// we could reuse the allocated slices and stacks in the subsequent compilations.
func (c *arm64Compiler) resetLabels() {
for i := range c.labels {
for j := range c.labels[i] {
if j > c.frameIDMax {
// Only need to reset until the maximum frame id. This makes the compilation faster for large binary.
break
}
l := &c.labels[i][j]
l.initialInstruction = nil
l.stackInitialized = false
l.initialStack.reset()
}
}
}
@@ -142,7 +170,8 @@ type arm64LabelInfo struct {
// initialInstruction is the initial instruction for this label so other block can branch into it.
initialInstruction asm.Node
// initialStack is the initial value location stack from which we start compiling this label.
initialStack runtimeValueLocationStack
initialStack runtimeValueLocationStack
stackInitialized bool
}
// assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture.
@@ -156,10 +185,15 @@ func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo {
kind := label.Kind()
frames := c.labels[kind]
frameID := label.FrameID()
if c.frameIDMax < frameID {
c.frameIDMax = frameID
}
// If the frameID is not allocated yet, expand the slice by twice of the diff,
// so that we could reduce the allocation in the subsequent compilation.
if diff := frameID - len(frames) + 1; diff > 0 {
frames = append(frames, make([]arm64LabelInfo, diff*2)...)
for i := 0; i < diff; i++ {
frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()})
}
c.labels[kind] = frames
}
return &frames[frameID]
@@ -167,7 +201,7 @@ func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo {
// runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture.
func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack {
return &c.locationStack
return c.locationStack
}
// pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64.
@@ -455,7 +489,7 @@ func (c *arm64Compiler) compileGoDefinedHostFunction() error {
// setLocationStack sets the given runtimeValueLocationStack to .locationStack field,
// while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks.
// This is called when we branch into different block.
func (c *arm64Compiler) setLocationStack(newStack runtimeValueLocationStack) {
func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) {
if c.stackPointerCeil < c.locationStack.stackPointerCeil {
c.stackPointerCeil = c.locationStack.stackPointerCeil
}
@@ -480,7 +514,7 @@ func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel
labelInfo := c.label(labelKey)
// If initialStack is not set, that means this label has never been reached.
if !labelInfo.initialStack.initialized() {
if !labelInfo.stackInitialized {
skipThisLabel = true
return
}
@@ -494,7 +528,7 @@ func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel
}
// Set the initial stack.
c.setLocationStack(labelInfo.initialStack)
c.setLocationStack(&labelInfo.initialStack)
return false
}
@@ -753,18 +787,10 @@ func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error {
}
// Emit the code for branching into else branch.
// We save and clone the location stack because we might end up modifying it inside of branchInto,
// and we have to avoid affecting the code generation for Then branch afterwards.
saved := c.locationStack
c.setLocationStack(saved.clone())
elseTarget := wazeroir.Label(o.U2)
if err := c.compileBranchInto(elseTarget); err != nil {
return err
}
// Now ready to emit the code for branching into then branch.
// Retrieve the original value location stack so that the code below won't be affected by the Else branch ^^.
c.setLocationStack(saved)
// We branch into here from the original conditional BR (conditionalBR).
c.assembler.SetJumpTargetOnNext(conditionalBR)
thenTarget := wazeroir.Label(o.U1)
@@ -790,8 +816,9 @@ func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error {
// with the appropriate value locations. Note we clone the stack here as we maybe
// manipulate the stack before compiler reaches the label.
targetLabel := c.label(target)
if !targetLabel.initialStack.initialized() {
targetLabel.initialStack = c.locationStack.clone()
if !targetLabel.stackInitialized {
targetLabel.initialStack.cloneFrom(*c.locationStack)
targetLabel.stackInitialized = true
}
br := c.assembler.CompileJump(arm64.B)
@@ -910,38 +937,45 @@ func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error {
// [Emit the code for each targets and default branch]
labelInitialInstructions := make([]asm.Node, len(o.Us)/2)
saved := c.locationStack
// Since we might end up having the different stack state in each branch,
// we need to save the initial stack state here, and use the same initial state
// for each iteration.
initialLocationStack := c.getSavedTemporaryLocationStack()
for i := range labelInitialInstructions {
// Emit the initial instruction of each target where
// we use NOP as we don't yet know the next instruction in each label.
init := c.assembler.CompileStandAlone(arm64.NOP)
labelInitialInstructions[i] = init
var locationStack runtimeValueLocationStack
targetLabel := wazeroir.Label(o.Us[i*2])
targetToDrop := o.Us[i*2+1]
if i < len(labelInitialInstructions)-1 {
// Clone the location stack so the branch-specific code doesn't
// affect others.
locationStack = saved.clone()
} else {
// If this is the default branch, we use the original one
// as this is the last code in this block.
locationStack = saved
}
c.setLocationStack(locationStack)
if err = compileDropRange(c, targetToDrop); err != nil {
return err
}
if err = c.compileBranchInto(targetLabel); err != nil {
return err
}
// After the iteration, reset the stack's state with initialLocationStack.
c.locationStack.cloneFrom(initialLocationStack)
}
c.assembler.BuildJumpTable(offsetData, labelInitialInstructions)
return nil
}
func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack {
initialLocationStack := *c.locationStack // Take copy!
// Use c.brTableTmp for the underlying stack so that we could reduce the allocations.
if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 {
c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...)
}
copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp])
initialLocationStack.stack = c.brTableTmp
return initialLocationStack
}
// compileCall implements compiler.compileCall for the arm64 architecture.
func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error {
if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil {

View File

@@ -100,12 +100,136 @@ func TestArm64Compiler_readInstructionAddress(t *testing.T) {
require.Equal(t, nativeCallStatusCodeReturned, env.compilerStatus())
}
func TestArm64Compiler_label(t *testing.T) {
c := &arm64Compiler{}
c.label(wazeroir.NewLabel(wazeroir.LabelKindContinuation, 100))
require.Equal(t, 100, c.frameIDMax)
require.Equal(t, 101, len(c.labels[wazeroir.LabelKindContinuation]))
// frameIDMax is for all LabelKind, so this shouldn't change frameIDMax.
c.label(wazeroir.NewLabel(wazeroir.LabelKindHeader, 2))
require.Equal(t, 100, c.frameIDMax)
require.Equal(t, 3, len(c.labels[wazeroir.LabelKindHeader]))
}
func TestArm64Compiler_Init(t *testing.T) {
c := &arm64Compiler{
locationStackForEntrypoint: newRuntimeValueLocationStack(),
assembler: arm64.NewAssembler(0),
}
const stackCap = 12345
c.locationStackForEntrypoint.stack = make([]runtimeValueLocation, stackCap)
c.locationStackForEntrypoint.sp = 5555
c.Init(&wasm.FunctionType{}, nil, false)
// locationStack is the pointer to locationStackForEntrypoint after init.
require.Equal(t, c.locationStack, &c.locationStackForEntrypoint)
// And the underlying stack must be reused (the capacity preserved).
require.Equal(t, stackCap, cap(c.locationStack.stack))
require.Equal(t, stackCap, cap(c.locationStackForEntrypoint.stack))
}
func TestArm64Compiler_resetLabels(t *testing.T) {
c := newArm64Compiler().(*arm64Compiler)
nop := c.compileNOP()
const (
frameIDMax = 50
capacity = 12345
)
c.frameIDMax = frameIDMax
for i := range c.labels {
ifs := make([]arm64LabelInfo, frameIDMax*2)
c.labels[i] = ifs
for j := 0; j <= frameIDMax; j++ {
ifs[j].stackInitialized = true
ifs[j].initialInstruction = nop
ifs[j].initialStack = newRuntimeValueLocationStack()
ifs[j].initialStack.sp = 5555 // should be cleared via runtimeLocationStack.Reset().
ifs[j].initialStack.stack = make([]runtimeValueLocation, 0, capacity)
}
}
c.resetLabels()
for i := range c.labels {
for j := 0; j < len(c.labels[i]); j++ {
l := &c.labels[i][j]
require.False(t, l.stackInitialized)
require.Nil(t, l.initialInstruction)
require.Equal(t, 0, len(l.initialStack.stack))
if j > frameIDMax {
require.Equal(t, 0, cap(l.initialStack.stack))
} else {
require.Equal(t, capacity, cap(l.initialStack.stack))
}
require.Equal(t, uint64(0), l.initialStack.sp)
}
}
}
func TestArm64Compiler_getSavedTemporaryLocationStack(t *testing.T) {
t.Run("len(brTableTmp)<len(current)", func(t *testing.T) {
st := newRuntimeValueLocationStack()
c := &arm64Compiler{locationStack: &st}
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{{stackPointer: 150}, {stackPointer: 200}, {stackPointer: 300}}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, 3, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack)
})
t.Run("len(brTableTmp)==len(current)", func(t *testing.T) {
st := newRuntimeValueLocationStack()
c := &arm64Compiler{locationStack: &st, brTableTmp: make([]runtimeValueLocation, 3)}
initSlicePtr := &c.brTableTmp
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{{stackPointer: 150}, {stackPointer: 200}, {stackPointer: 300}}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, 3, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack)
// The underlying temporary slice shouldn't be changed.
require.Equal(t, initSlicePtr, &c.brTableTmp)
})
t.Run("len(brTableTmp)>len(current)", func(t *testing.T) {
const temporarySliceSize = 100
st := newRuntimeValueLocationStack()
c := &arm64Compiler{locationStack: &st, brTableTmp: make([]runtimeValueLocation, temporarySliceSize)}
c.locationStack.sp = 3
c.locationStack.stack = []runtimeValueLocation{
{stackPointer: 150},
{stackPointer: 200},
{stackPointer: 300},
{},
{},
{},
{},
{stackPointer: 1231455}, // Entries here shouldn't be copied as they are avobe sp.
}
actual := c.getSavedTemporaryLocationStack()
require.Equal(t, uint64(3), actual.sp)
require.Equal(t, temporarySliceSize, len(actual.stack))
require.Equal(t, c.locationStack.stack[:3], actual.stack[:3])
for i := int(actual.sp); i < len(actual.stack); i++ {
// Above the stack pointer, the values must not be copied.
require.Zero(t, actual.stack[i].stackPointer)
}
})
}
// compile implements compilerImpl.setStackPointerCeil for the amd64 architecture.
func (c *arm64Compiler) setStackPointerCeil(v uint64) {
c.stackPointerCeil = v
}
// compile implements compilerImpl.setRuntimeValueLocationStack for the amd64 architecture.
func (c *arm64Compiler) setRuntimeValueLocationStack(s runtimeValueLocationStack) {
func (c *arm64Compiler) setRuntimeValueLocationStack(s *runtimeValueLocationStack) {
c.locationStack = s
}