interpreter: removes unneeded stack walks (#2274)

This makes the interpreter's compilation faster by removing the 
unnecessary stack walks. As a result, for some binary, we observe
93% drops in the compilation performance(!) with an additional 
but negligible memory usage.

```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero
                      │   old.txt    │              new.txt               │
                      │    sec/op    │   sec/op     vs base               │
Compilation/wazero-10    130.1m ± 1%   124.7m ± 1%   -4.12% (p=0.002 n=6)
Compilation/zig-10      9097.2m ± 0%   549.0m ± 1%  -93.96% (p=0.002 n=6)
Compilation/zz-10         1.159 ± 5%    1.145 ± 3%   -1.20% (p=0.041 n=6)
TinyGo/Compile/container_heap.test-10     16.54m ± 1%   15.60m ± 4%  -5.66% (p=0.002 n=6)
TinyGo/Compile/container_list.test-10     16.36m ± 1%   15.42m ± 1%  -5.74% (p=0.002 n=6)
TinyGo/Compile/container_ring.test-10     16.08m ± 1%   15.17m ± 0%  -5.64% (p=0.002 n=6)
TinyGo/Compile/crypto_des.test-10         16.79m ± 1%   15.84m ± 0%  -5.68% (p=0.002 n=6)
TinyGo/Compile/crypto_md5.test-10         16.61m ± 1%   15.63m ± 0%  -5.88% (p=0.002 n=6)
TinyGo/Compile/crypto_rc4.test-10         15.93m ± 0%   15.05m ± 1%  -5.54% (p=0.002 n=6)
TinyGo/Compile/crypto_sha1.test-10        16.63m ± 0%   15.69m ± 0%  -5.66% (p=0.002 n=6)
TinyGo/Compile/crypto_sha256.test-10      17.15m ± 1%   16.13m ± 1%  -6.00% (p=0.002 n=6)
TinyGo/Compile/crypto_sha512.test-10      17.43m ± 2%   16.30m ± 1%  -6.49% (p=0.002 n=6)
TinyGo/Compile/encoding_ascii85.test-10   16.57m ± 1%   15.66m ± 1%  -5.52% (p=0.002 n=6)
```

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2024-06-27 09:43:47 -07:00
committed by GitHub
parent 6eb6894b9f
commit f9373112f4

View File

@@ -26,11 +26,14 @@ const (
type (
controlFrame struct {
frameID uint32
// originalStackLen holds the number of values on the stack
// originalStackLenWithoutParam holds the number of values on the stack
// when Start executing this control frame minus params for the block.
originalStackLenWithoutParam int
blockType *wasm.FunctionType
kind controlFrameKind
// originalStackLenWithoutParamUint64 is almost the same as originalStackLenWithoutParam
// except that it holds the number of values on the stack in uint64.
originalStackLenWithoutParamUint64 int
blockType *wasm.FunctionType
kind controlFrameKind
}
controlFrames struct{ frames []controlFrame }
)
@@ -157,9 +160,11 @@ type compiler struct {
enabledFeatures api.CoreFeatures
callFrameStackSizeInUint64 int
stack []unsignedType
currentFrameID uint32
controlFrames controlFrames
unreachableState struct {
// stackLenInUint64 is the length of the stack in uint64.
stackLenInUint64 int
currentFrameID uint32
controlFrames controlFrames
unreachableState struct {
on bool
depth int
}
@@ -341,6 +346,7 @@ func (c *compiler) Next() (*compilationResult, error) {
c.pc = 0
c.currentOpPC = 0
c.currentFrameID = 0
c.stackLenInUint64 = 0
c.unreachableState.on, c.unreachableState.depth = false, 0
if err := c.compile(sig, code.Body, code.LocalTypes, code.BodyOffsetInCodeSection); err != nil {
@@ -449,10 +455,11 @@ operatorSwitch:
// Create a new frame -- entering this block.
frame := controlFrame{
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
kind: controlFrameKindBlockWithoutContinuationLabel,
blockType: bt,
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64,
kind: controlFrameKindBlockWithoutContinuationLabel,
blockType: bt,
}
c.controlFrames.push(frame)
@@ -473,10 +480,11 @@ operatorSwitch:
// Create a new frame -- entering loop.
frame := controlFrame{
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
kind: controlFrameKindLoop,
blockType: bt,
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64,
kind: controlFrameKindLoop,
blockType: bt,
}
c.controlFrames.push(frame)
@@ -515,8 +523,9 @@ operatorSwitch:
// Create a new frame -- entering if.
frame := controlFrame{
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
frameID: c.nextFrameID(),
originalStackLenWithoutParam: len(c.stack) - len(bt.Params),
originalStackLenWithoutParamUint64: c.stackLenInUint64 - bt.ParamNumInUint64,
// Note this will be set to controlFrameKindIfWithElse
// when else opcode found later.
kind: controlFrameKindIfWithoutElse,
@@ -543,7 +552,7 @@ operatorSwitch:
// If it is currently in unreachable, and the non-nested if,
// reset the stack so we can correctly handle the else block.
top := c.controlFrames.top()
c.stack = c.stack[:top.originalStackLenWithoutParam]
c.stackSwitchAt(top)
top.kind = controlFrameKindIfWithElse
// Re-push the parameters to the if block so that else block can use them.
@@ -572,7 +581,7 @@ operatorSwitch:
// Reset the stack manipulated by the then block, and re-push the block param types to the stack.
c.stack = c.stack[:frame.originalStackLenWithoutParam]
c.stackSwitchAt(frame)
for _, t := range frame.blockType.Params {
c.stackPush(wasmValueTypeTounsignedType(t))
}
@@ -601,7 +610,7 @@ operatorSwitch:
return nil
}
c.stack = c.stack[:frame.originalStackLenWithoutParam]
c.stackSwitchAt(frame)
for _, t := range frame.blockType.Results {
c.stackPush(wasmValueTypeTounsignedType(t))
}
@@ -628,7 +637,7 @@ operatorSwitch:
// We need to reset the stack so that
// the values pushed inside the block.
dropOp := newOperationDrop(c.getFrameDropRange(frame, true))
c.stack = c.stack[:frame.originalStackLenWithoutParam]
c.stackSwitchAt(frame)
// Push the result types onto the stack.
for _, t := range frame.blockType.Results {
@@ -3505,6 +3514,11 @@ func (c *compiler) stackPeek() (ret unsignedType) {
return
}
func (c *compiler) stackSwitchAt(frame *controlFrame) {
c.stack = c.stack[:frame.originalStackLenWithoutParam]
c.stackLenInUint64 = frame.originalStackLenWithoutParamUint64
}
func (c *compiler) stackPop() (ret unsignedType) {
// No need to check stack bound
// as we can assume that all the operations
@@ -3512,11 +3526,13 @@ func (c *compiler) stackPop() (ret unsignedType) {
// at module validation phase.
ret = c.stack[len(c.stack)-1]
c.stack = c.stack[:len(c.stack)-1]
c.stackLenInUint64 -= 1 + int(unsignedTypeV128&ret>>2)
return
}
func (c *compiler) stackPush(ts unsignedType) {
c.stack = append(c.stack, ts)
c.stackLenInUint64 += 1 + int(unsignedTypeV128&ts>>2)
}
// emit adds the operations into the result.
@@ -3565,7 +3581,7 @@ func (c *compiler) emitDefaultValue(t wasm.ValueType) {
// of the n-th local.
func (c *compiler) localDepth(index wasm.Index) int {
height := c.localIndexToStackHeightInUint64[index]
return c.stackLenInUint64(len(c.stack)) - 1 - int(height)
return c.stackLenInUint64 - 1 - height
}
func (c *compiler) localType(index wasm.Index) (t wasm.ValueType) {
@@ -3592,14 +3608,7 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR
} else {
start = frame.blockType.ResultNumInUint64
}
var end int
if frame.kind == controlFrameKindFunction {
// On the function return, we eliminate all the contents on the stack
// including locals (existing below of frame.originalStackLen)
end = c.stackLenInUint64(len(c.stack)) - 1
} else {
end = c.stackLenInUint64(len(c.stack)) - 1 - c.stackLenInUint64(frame.originalStackLenWithoutParam)
}
end := c.stackLenInUint64 - 1 - frame.originalStackLenWithoutParamUint64
if start <= end {
return inclusiveRange{Start: int32(start), End: int32(end)}
} else {
@@ -3607,17 +3616,6 @@ func (c *compiler) getFrameDropRange(frame *controlFrame, isEnd bool) inclusiveR
}
}
func (c *compiler) stackLenInUint64(ceil int) (ret int) {
for i := 0; i < ceil; i++ {
if c.stack[i] == unsignedTypeV128 {
ret += 2
} else {
ret++
}
}
return
}
func (c *compiler) readMemoryArg(tag string) (memoryArg, error) {
c.result.UsesMemory = true
alignment, num, err := leb128.LoadUint32(c.body[c.pc+1:])