ssa: memory usage optimization around lastDefinitions (#2233)
This avoids the unnecessary duplicated zero initial value definitions
for Wasm function locals. As a result, for certain binaries which
have huge number of locals like Zig standard libraries, we can see
1~10% improvements in compilation performance without impacting
other cases like the below.
### Zig Stdlib tests
```
│ old_zig.txt │ new_zig.txt │
│ sec/op │ sec/op vs base │
Zig/Compile/test-opt.wasm-10 4.466 ± 2% 4.399 ± 0% -1.50% (p=0.007 n=7)
Zig/Run/test-opt.wasm-10 18.86 ± 0% 18.84 ± 0% ~ (p=0.535 n=7)
Zig/Compile/test.wasm-10 5.684 ± 1% 5.084 ± 1% -10.55% (p=0.001 n=7)
Zig/Run/test.wasm-10 19.28 ± 1% 19.25 ± 1% ~ (p=0.535 n=7)
geomean 9.802 9.490 -3.18%
│ old_zig.txt │ new_zig.txt │
│ B/op │ B/op vs base │
Zig/Compile/test-opt.wasm-10 395.4Mi ± 0% 396.7Mi ± 0% +0.32% (p=0.001 n=7)
Zig/Run/test-opt.wasm-10 741.7Mi ± 0% 741.7Mi ± 0% ~ (p=0.941 n=7)
Zig/Compile/test.wasm-10 671.6Mi ± 0% 660.1Mi ± 0% -1.72% (p=0.001 n=7)
Zig/Run/test.wasm-10 1.296Gi ± 0% 1.296Gi ± 0% ~ (p=0.363 n=7)
geomean 715.1Mi 712.6Mi -0.35%
│ old_zig.txt │ new_zig.txt │
│ allocs/op │ allocs/op vs base │
Zig/Compile/test-opt.wasm-10 363.1k ± 0% 363.2k ± 0% ~ (p=0.456 n=7)
Zig/Run/test-opt.wasm-10 51.58k ± 0% 51.58k ± 0% ~ (p=0.812 n=7)
Zig/Compile/test.wasm-10 516.3k ± 0% 515.5k ± 0% -0.16% (p=0.001 n=7)
Zig/Run/test.wasm-10 2.156M ± 0% 2.156M ± 0% ~ (p=0.171 n=7)
geomean 380.0k 379.8k -0.03%
```
### wazero compiled as wasip1 binary
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
Compilation-10 2.418 ± 0% 2.421 ± 0% ~ (p=0.383 n=7)
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
Compilation-10 339.9Mi ± 0% 339.9Mi ± 0% -0.01% (p=0.001 n=7)
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
Compilation-10 603.9k ± 0% 604.0k ± 0% ~ (p=0.620 n=7)
```
### TinyGo
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/stdlibs
│ old_tinygo.txt │ new_tinygo.txt │
│ sec/op │ sec/op vs base │
TinyGo/Compile/container_heap.test-10 414.1m ± 0% 413.1m ± 1% ~ (p=0.165 n=7)
TinyGo/Run/container_heap.test-10 14.48m ± 1% 14.54m ± 1% ~ (p=0.383 n=7)
TinyGo/Compile/container_list.test-10 413.1m ± 1% 412.1m ± 1% ~ (p=0.620 n=7)
TinyGo/Run/container_list.test-10 14.29m ± 0% 14.33m ± 1% ~ (p=0.259 n=7)
TinyGo/Compile/container_ring.test-10 406.8m ± 1% 407.1m ± 1% ~ (p=0.902 n=7)
TinyGo/Run/container_ring.test-10 14.28m ± 1% 14.29m ± 1% ~ (p=0.318 n=7)
TinyGo/Compile/crypto_des.test-10 421.3m ± 0% 422.4m ± 1% ~ (p=0.209 n=7)
TinyGo/Run/crypto_des.test-10 18.32m ± 1% 18.38m ± 2% ~ (p=0.209 n=7)
TinyGo/Compile/crypto_md5.test-10 418.7m ± 1% 419.8m ± 0% ~ (p=0.318 n=7)
TinyGo/Run/crypto_md5.test-10 21.78m ± 5% 20.65m ± 2% ~ (p=0.165 n=7)
TinyGo/Compile/crypto_rc4.test-10 421.8m ± 4% 404.3m ± 1% -4.16% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10 162.6m ± 1% 162.3m ± 1% ~ (p=0.318 n=7)
TinyGo/Compile/crypto_sha1.test-10 420.5m ± 1% 419.4m ± 1% ~ (p=0.535 n=7)
TinyGo/Run/crypto_sha1.test-10 16.04m ± 1% 16.12m ± 1% +0.50% (p=0.038 n=7)
TinyGo/Compile/crypto_sha256.test-10 426.8m ± 0% 427.4m ± 1% ~ (p=1.000 n=7)
TinyGo/Run/crypto_sha256.test-10 16.24m ± ∞ ¹ 16.38m ± ∞ ¹ ~ (p=1.000 n=1) ²
geomean 95.31m 94.88m -0.45%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
│ old_tinygo.txt │ new_tinygo.txt │
│ B/op │ B/op vs base │
TinyGo/Compile/container_heap.test-10 48.58Mi ± 0% 48.55Mi ± 0% -0.06% (p=0.001 n=7)
TinyGo/Run/container_heap.test-10 16.63Mi ± 0% 16.63Mi ± 0% ~ (p=0.620 n=7)
TinyGo/Compile/container_list.test-10 48.56Mi ± 0% 48.54Mi ± 0% -0.05% (p=0.001 n=7)
TinyGo/Run/container_list.test-10 16.40Mi ± 0% 16.40Mi ± 0% ~ (p=0.535 n=7)
TinyGo/Compile/container_ring.test-10 47.81Mi ± 0% 47.78Mi ± 0% -0.05% (p=0.001 n=7)
TinyGo/Run/container_ring.test-10 16.30Mi ± 0% 16.30Mi ± 0% ~ (p=0.871 n=7)
TinyGo/Compile/crypto_des.test-10 48.70Mi ± 0% 48.67Mi ± 0% -0.05% (p=0.001 n=7)
TinyGo/Run/crypto_des.test-10 16.76Mi ± 0% 16.76Mi ± 0% ~ (p=0.119 n=7)
TinyGo/Compile/crypto_md5.test-10 48.75Mi ± 0% 48.73Mi ± 0% -0.03% (p=0.001 n=7)
TinyGo/Run/crypto_md5.test-10 44.97Mi ± 0% 44.97Mi ± 0% ~ (p=0.333 n=7)
TinyGo/Compile/crypto_rc4.test-10 47.79Mi ± 0% 47.76Mi ± 0% -0.06% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10 29.28Mi ± 0% 29.28Mi ± 0% ~ (p=0.274 n=7)
TinyGo/Compile/crypto_sha1.test-10 48.99Mi ± 0% 48.97Mi ± 0% -0.03% (p=0.001 n=7)
TinyGo/Run/crypto_sha1.test-10 17.44Mi ± 0% 17.44Mi ± 0% ~ (p=0.456 n=7)
TinyGo/Compile/crypto_sha256.test-10 48.82Mi ± 0% 48.81Mi ± 0% -0.03% (p=0.004 n=7)
TinyGo/Run/crypto_sha256.test-10 17.53Mi ± ∞ ¹ 17.53Mi ± ∞ ¹ ~ (p=1.000 n=1) ²
geomean 31.45Mi 31.45Mi -0.02%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05
│ old_tinygo.txt │ new_tinygo.txt │
│ allocs/op │ allocs/op vs base │
TinyGo/Compile/container_heap.test-10 83.64k ± 0% 83.63k ± 0% ~ (p=0.365 n=7)
TinyGo/Run/container_heap.test-10 374.9k ± 0% 374.9k ± 0% ~ (p=1.000 n=7)
TinyGo/Compile/container_list.test-10 83.38k ± 0% 83.39k ± 0% ~ (p=0.805 n=7)
TinyGo/Run/container_list.test-10 370.0k ± 0% 370.0k ± 0% ~ (p=0.633 n=7)
TinyGo/Compile/container_ring.test-10 83.30k ± 0% 83.35k ± 0% ~ (p=1.000 n=7)
TinyGo/Run/container_ring.test-10 367.6k ± 0% 367.6k ± 0% ~ (p=0.617 n=7)
TinyGo/Compile/crypto_des.test-10 83.67k ± 0% 83.70k ± 0% ~ (p=0.805 n=7)
TinyGo/Run/crypto_des.test-10 378.1k ± 0% 378.1k ± 0% ~ (p=0.078 n=7)
TinyGo/Compile/crypto_md5.test-10 83.81k ± 0% 83.84k ± 0% ~ (p=0.805 n=7)
TinyGo/Run/crypto_md5.test-10 393.3k ± 0% 393.3k ± 0% ~ (p=0.690 n=7)
TinyGo/Compile/crypto_rc4.test-10 83.39k ± 0% 83.33k ± 0% ~ (p=0.097 n=7)
TinyGo/Run/crypto_rc4.test-10 367.1k ± 0% 367.1k ± 0% ~ (p=0.232 n=7)
TinyGo/Compile/crypto_sha1.test-10 84.00k ± 0% 84.06k ± 0% ~ (p=0.154 n=7)
TinyGo/Run/crypto_sha1.test-10 392.7k ± 0% 392.7k ± 0% ~ (p=1.000 n=7)
TinyGo/Compile/crypto_sha256.test-10 83.85k ± 0% 83.86k ± 0% ~ (p=0.620 n=7)
TinyGo/Run/crypto_sha256.test-10 394.5k ± ∞ ¹ 394.5k ± ∞ ¹ ~ (p=1.000 n=1) ²
```
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -458,11 +458,11 @@ L2 (SSA Block: blk2):
|
||||
name: "single_predecessor_local_refs", m: testcases.SinglePredecessorLocalRefs.Module,
|
||||
afterLoweringARM64: `
|
||||
L1 (SSA Block: blk0):
|
||||
mov x132?, xzr
|
||||
cbz w132?, (L2)
|
||||
L3 (SSA Block: blk1):
|
||||
mov x131?, xzr
|
||||
mov x0, x131?
|
||||
cbz w131?, (L2)
|
||||
L3 (SSA Block: blk1):
|
||||
mov x130?, xzr
|
||||
mov x0, x130?
|
||||
ret
|
||||
L2 (SSA Block: blk2):
|
||||
L4 (SSA Block: blk3):
|
||||
|
||||
@@ -301,26 +301,7 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
|
||||
st := WasmTypeToSSAType(typ)
|
||||
variable := c.ssaBuilder.DeclareVariable(st)
|
||||
c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)
|
||||
|
||||
zeroInst := c.ssaBuilder.AllocateInstruction()
|
||||
switch st {
|
||||
case ssa.TypeI32:
|
||||
zeroInst.AsIconst32(0)
|
||||
case ssa.TypeI64:
|
||||
zeroInst.AsIconst64(0)
|
||||
case ssa.TypeF32:
|
||||
zeroInst.AsF32const(0)
|
||||
case ssa.TypeF64:
|
||||
zeroInst.AsF64const(0)
|
||||
case ssa.TypeV128:
|
||||
zeroInst.AsVconst(0, 0)
|
||||
default:
|
||||
panic("TODO: " + wasm.ValueTypeName(typ))
|
||||
}
|
||||
|
||||
c.ssaBuilder.InsertInstruction(zeroInst)
|
||||
value := zeroInst.Return()
|
||||
c.ssaBuilder.DefineVariable(variable, value, entry)
|
||||
c.ssaBuilder.InsertZeroValue(st)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -375,13 +375,11 @@ blk3: () <-- (blk1)
|
||||
exp: `
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Iconst_32 0x0
|
||||
v3:i32 = Iconst_32 0x0
|
||||
v4:i32 = Iconst_32 0x0
|
||||
Brz v2, blk2
|
||||
Jump blk1
|
||||
|
||||
blk1: () <-- (blk0)
|
||||
Return v4
|
||||
Return v2
|
||||
|
||||
blk2: () <-- (blk0)
|
||||
Jump blk3
|
||||
@@ -392,12 +390,11 @@ blk3: () <-- (blk2)
|
||||
expAfterPasses: `
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Iconst_32 0x0
|
||||
v4:i32 = Iconst_32 0x0
|
||||
Brz v2, blk2
|
||||
Jump fallthrough
|
||||
|
||||
blk1: () <-- (blk0)
|
||||
Return v4
|
||||
Return v2
|
||||
|
||||
blk2: () <-- (blk0)
|
||||
Jump fallthrough
|
||||
|
||||
@@ -127,7 +127,11 @@ type Builder interface {
|
||||
// Idom returns the immediate dominator of the given BasicBlock.
|
||||
Idom(blk BasicBlock) BasicBlock
|
||||
|
||||
// VarLengthPool returns the VarLengthPool of Value.
|
||||
VarLengthPool() *wazevoapi.VarLengthPool[Value]
|
||||
|
||||
// InsertZeroValue inserts a zero value constant instruction of the given type.
|
||||
InsertZeroValue(t Type)
|
||||
}
|
||||
|
||||
// NewBuilder returns a new Builder implementation.
|
||||
@@ -203,6 +207,32 @@ type builder struct {
|
||||
donePostBlockLayoutPasses bool
|
||||
|
||||
currentSourceOffset SourceOffset
|
||||
|
||||
// zeros are the zero value constants for each type.
|
||||
zeros [typeEnd]Value
|
||||
}
|
||||
|
||||
// InsertZeroValue implements Builder.InsertZeroValue.
|
||||
func (b *builder) InsertZeroValue(t Type) {
|
||||
if b.zeros[t].Valid() {
|
||||
return
|
||||
}
|
||||
zeroInst := b.AllocateInstruction()
|
||||
switch t {
|
||||
case TypeI32:
|
||||
zeroInst.AsIconst32(0)
|
||||
case TypeI64:
|
||||
zeroInst.AsIconst64(0)
|
||||
case TypeF32:
|
||||
zeroInst.AsF32const(0)
|
||||
case TypeF64:
|
||||
zeroInst.AsF64const(0)
|
||||
case TypeV128:
|
||||
zeroInst.AsVconst(0, 0)
|
||||
default:
|
||||
panic("TODO: " + t.String())
|
||||
}
|
||||
b.zeros[t] = zeroInst.Insert(b).Return()
|
||||
}
|
||||
|
||||
func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
|
||||
@@ -218,6 +248,7 @@ func (b *builder) ReturnBlock() BasicBlock {
|
||||
func (b *builder) Init(s *Signature) {
|
||||
b.nextVariable = 0
|
||||
b.currentSignature = s
|
||||
b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid}
|
||||
resetBasicBlock(b.returnBlk)
|
||||
b.instructionsPool.Reset()
|
||||
b.basicBlocksPool.Reset()
|
||||
@@ -486,6 +517,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value
|
||||
value: value,
|
||||
})
|
||||
return value
|
||||
} else if blk.EntryBlock() {
|
||||
// If this is the entry block, we reach the uninitialized variable which has zero value.
|
||||
return b.zeros[b.definedVariableType(variable)]
|
||||
}
|
||||
|
||||
if pred := blk.singlePred; pred != nil {
|
||||
|
||||
@@ -21,6 +21,9 @@ const (
|
||||
|
||||
// TypeV128 represents 128-bit SIMD vectors.
|
||||
TypeV128
|
||||
|
||||
// -- Do not add new types after this line. ----
|
||||
typeEnd
|
||||
)
|
||||
|
||||
// String implements fmt.Stringer.
|
||||
|
||||
Reference in New Issue
Block a user