wazevo: aligns globals at 16 byte (#2042)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2024-02-11 15:55:19 -08:00
committed by GitHub
parent 7d1818c227
commit 6d5aba90f7
8 changed files with 114 additions and 78 deletions

View File

@@ -1941,18 +1941,18 @@ L2:
L1 (SSA Block: blk0):
mov x128?, x0
mov x129?, x1
ldr w130?, [x129?, #0x8]
ldr x131?, [x129?, #0x18]
ldr s132?, [x129?, #0x28]
ldr d133?, [x129?, #0x38]
ldr w130?, [x129?, #0x10]
ldr x131?, [x129?, #0x20]
ldr s132?, [x129?, #0x30]
ldr d133?, [x129?, #0x40]
str x129?, [x128?, #0x8]
mov x0, x128?
mov x1, x129?
bl f1
ldr w134?, [x129?, #0x8]
ldr x135?, [x129?, #0x18]
ldr s136?, [x129?, #0x28]
ldr d137?, [x129?, #0x38]
ldr w134?, [x129?, #0x10]
ldr x135?, [x129?, #0x20]
ldr s136?, [x129?, #0x30]
ldr d137?, [x129?, #0x40]
mov v3.8b, v137?.8b
mov v2.8b, v136?.8b
mov x3, x135?
@@ -1970,21 +1970,21 @@ L1 (SSA Block: blk0):
orr x27, xzr, #0x20
str x27, [sp, #-0x10]!
str x1, [sp, #0x10]
ldr w8, [x1, #0x8]
ldr w8, [x1, #0x10]
str w8, [sp, #0x2c]
ldr x9, [x1, #0x18]
ldr x9, [x1, #0x20]
str x9, [sp, #0x24]
ldr s8, [x1, #0x28]
ldr s8, [x1, #0x30]
str s8, [sp, #0x20]
ldr d9, [x1, #0x38]
ldr d9, [x1, #0x40]
str d9, [sp, #0x18]
str x1, [x0, #0x8]
bl f1
ldr x8, [sp, #0x10]
ldr w9, [x8, #0x8]
ldr x10, [x8, #0x18]
ldr s8, [x8, #0x28]
ldr d9, [x8, #0x38]
ldr w9, [x8, #0x10]
ldr x10, [x8, #0x20]
ldr s8, [x8, #0x30]
ldr d9, [x8, #0x40]
mov v3.8b, v9.8b
mov v2.8b, v8.8b
mov x3, x10
@@ -2011,13 +2011,13 @@ L1 (SSA Block: blk0):
L1 (SSA Block: blk0):
mov x129?, x1
orr w137?, wzr, #0x1
str w137?, [x129?, #0x8]
str w137?, [x129?, #0x10]
orr x136?, xzr, #0x2
str x136?, [x129?, #0x18]
str x136?, [x129?, #0x20]
ldr s135?, #8; b 8; data.f32 3.000000
str s135?, [x129?, #0x28]
str s135?, [x129?, #0x30]
ldr d134?, #8; b 16; data.f64 4.000000
str d134?, [x129?, #0x38]
str d134?, [x129?, #0x40]
ret
`,
afterFinalizeARM64: `
@@ -2025,13 +2025,13 @@ L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
orr w8, wzr, #0x1
str w8, [x1, #0x8]
str w8, [x1, #0x10]
orr x8, xzr, #0x2
str x8, [x1, #0x18]
str x8, [x1, #0x20]
ldr s8, #8; b 8; data.f32 3.000000
str s8, [x1, #0x28]
str s8, [x1, #0x30]
ldr d8, #8; b 16; data.f64 4.000000
str d8, [x1, #0x38]
str d8, [x1, #0x40]
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret

View File

@@ -569,7 +569,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
me.opaquePtr = &me.opaque[0]
} else {
if size := compiled.offsets.TotalSize; size != 0 {
opaque := make([]byte, size)
opaque := newAlignedOpaque(size)
me.opaque = opaque
me.opaquePtr = &opaque[0]
}

View File

@@ -1183,11 +1183,11 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32)
m: testcases.GlobalsGet.Module,
exp: `
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Load module_ctx, 0x8
v3:i64 = Load module_ctx, 0x18
v4:f32 = Load module_ctx, 0x28
v5:f64 = Load module_ctx, 0x38
v6:v128 = Load module_ctx, 0x48
v2:i32 = Load module_ctx, 0x10
v3:i64 = Load module_ctx, 0x20
v4:f32 = Load module_ctx, 0x30
v5:f64 = Load module_ctx, 0x40
v6:v128 = Load module_ctx, 0x50
Jump blk_ret, v2, v3, v4, v5, v6
`,
},
@@ -1197,15 +1197,15 @@ blk0: (exec_ctx:i64, module_ctx:i64)
exp: `
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Iconst_32 0x1
Store v2, module_ctx, 0x8
Store v2, module_ctx, 0x10
v3:i64 = Iconst_64 0x2
Store v3, module_ctx, 0x18
Store v3, module_ctx, 0x20
v4:f32 = F32const 3.000000
Store v4, module_ctx, 0x28
Store v4, module_ctx, 0x30
v5:f64 = F64const 4.000000
Store v5, module_ctx, 0x38
Store v5, module_ctx, 0x40
v6:v128 = Vconst 000000000000000a 0000000000000014
Store v6, module_ctx, 0x48
Store v6, module_ctx, 0x50
Jump blk_ret, v2, v3, v4, v5, v6
`,
},
@@ -1217,16 +1217,16 @@ signatures:
sig1: i64i64_v
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Load module_ctx, 0x8
v3:i64 = Load module_ctx, 0x18
v4:f32 = Load module_ctx, 0x28
v5:f64 = Load module_ctx, 0x38
v2:i32 = Load module_ctx, 0x10
v3:i64 = Load module_ctx, 0x20
v4:f32 = Load module_ctx, 0x30
v5:f64 = Load module_ctx, 0x40
Store module_ctx, exec_ctx, 0x8
Call f1:sig1, exec_ctx, module_ctx
v6:i32 = Load module_ctx, 0x8
v7:i64 = Load module_ctx, 0x18
v8:f32 = Load module_ctx, 0x28
v9:f64 = Load module_ctx, 0x38
v6:i32 = Load module_ctx, 0x10
v7:i64 = Load module_ctx, 0x20
v8:f32 = Load module_ctx, 0x30
v9:f64 = Load module_ctx, 0x40
Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9
`,
expAfterOpt: `
@@ -1234,16 +1234,16 @@ signatures:
sig1: i64i64_v
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Load module_ctx, 0x8
v3:i64 = Load module_ctx, 0x18
v4:f32 = Load module_ctx, 0x28
v5:f64 = Load module_ctx, 0x38
v2:i32 = Load module_ctx, 0x10
v3:i64 = Load module_ctx, 0x20
v4:f32 = Load module_ctx, 0x30
v5:f64 = Load module_ctx, 0x40
Store module_ctx, exec_ctx, 0x8
Call f1:sig1, exec_ctx, module_ctx
v6:i32 = Load module_ctx, 0x8
v7:i64 = Load module_ctx, 0x18
v8:f32 = Load module_ctx, 0x28
v9:f64 = Load module_ctx, 0x38
v6:i32 = Load module_ctx, 0x10
v7:i64 = Load module_ctx, 0x20
v8:f32 = Load module_ctx, 0x30
v9:f64 = Load module_ctx, 0x40
Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9
`,
},

View File

@@ -11,7 +11,7 @@ import (
func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque {
size := len(m.CodeSection)*16 + 32
ret := make(moduleContextOpaque, size)
ret := newAlignedOpaque(size)
binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))

View File

@@ -74,6 +74,17 @@ type (
moduleContextOpaque []byte
)
func newAlignedOpaque(size int) moduleContextOpaque {
// Check if the size is a multiple of 16.
if size%16 != 0 {
panic("size must be a multiple of 16")
}
type _16 [16]byte
buf := make([]_16, size/16)
slice := unsafe.Slice(&buf[0][0], size)
return *(*moduleContextOpaque)(&slice)
}
func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
s := uint64(len(mem.Buffer))
var b uint64

View File

@@ -331,3 +331,12 @@ func Test_getTypeIDOf(t *testing.T) {
require.Equal(t, wasm.FunctionTypeID(222), getTypeIDOf(2, m))
require.Equal(t, wasm.FunctionTypeID(111), getTypeIDOf(3, m))
}
func Test_newAlignedOpaque(t *testing.T) {
for i := 0; i < 100; i++ {
s := 16 * (i + 10)
buf := newAlignedOpaque(s)
require.Equal(t, s, len(buf))
require.Equal(t, 0, int(uintptr(unsafe.Pointer(&buf[0]))&15))
}
}

View File

@@ -142,6 +142,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
}
if m.ImportMemoryCount > 0 {
offset = align8(offset)
// *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque)
const importedMemorySizeInOpaqueModuleContext = 16
ret.ImportedMemoryBegin = offset
@@ -152,6 +153,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
}
if m.ImportFunctionCount > 0 {
offset = align8(offset)
ret.ImportedFunctionsBegin = offset
// Each function is stored wazevo.functionInstance.
size := int(m.ImportFunctionCount) * FunctionInstanceSize
@@ -161,6 +163,8 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
}
if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 {
// Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions.
offset = align16(offset)
ret.GlobalsBegin = offset
// Pointers to *wasm.GlobalInstance.
offset += Offset(globals) * 16
@@ -169,6 +173,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
}
if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 {
offset = align8(offset)
ret.TypeIDs1stElement = offset
offset += 8 // First element of TypeIDs.
@@ -181,6 +186,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
}
if withListener {
offset = align8(offset)
ret.BeforeListenerTrampolines1stElement = offset
offset += 8 // First element of BeforeListenerTrampolines.
@@ -197,6 +203,14 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
ret.ElementInstances1stElement = offset
offset += 8 // First element of ElementInstances.
ret.TotalSize = int(offset)
ret.TotalSize = int(align16(offset))
return ret
}
func align16(o Offset) Offset {
return (o + 15) &^ 15
}
func align8(o Offset) Offset {
return (o + 7) &^ 7
}

View File

@@ -28,7 +28,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 8,
ElementInstances1stElement: 16,
TotalSize: 24,
TotalSize: 32, // 16 byte alignment.
},
},
{
@@ -45,7 +45,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 24,
ElementInstances1stElement: 32,
TotalSize: 40,
TotalSize: 48, // 16 byte alignment.
},
},
{
@@ -62,7 +62,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 24,
ElementInstances1stElement: 32,
TotalSize: 40,
TotalSize: 48, // 16 byte alignment.
},
},
{
@@ -79,7 +79,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 10*FunctionInstanceSize + 8,
ElementInstances1stElement: 10*FunctionInstanceSize + 16,
TotalSize: 10*FunctionInstanceSize + 24,
TotalSize: int(align16(Offset(10*FunctionInstanceSize + 24))),
},
},
{
@@ -96,7 +96,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 10*FunctionInstanceSize + 24,
ElementInstances1stElement: 10*FunctionInstanceSize + 32,
TotalSize: 10*FunctionInstanceSize + 40,
TotalSize: int(align16(Offset(10*FunctionInstanceSize + 40))),
},
},
{
@@ -110,17 +110,18 @@ func TestNewModuleContextOffsetData(t *testing.T) {
GlobalSection: make([]wasm.Global, 20),
},
exp: ModuleContextOffsetData{
LocalMemoryBegin: 8,
ImportedMemoryBegin: -1,
ImportedFunctionsBegin: 24,
GlobalsBegin: 24 + 10*FunctionInstanceSize,
TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30,
TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8,
LocalMemoryBegin: 8,
ImportedMemoryBegin: -1,
ImportedFunctionsBegin: 24,
// Align to 16 bytes for globals.
GlobalsBegin: 32 + 10*FunctionInstanceSize,
TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30,
TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8,
BeforeListenerTrampolines1stElement: -1,
AfterListenerTrampolines1stElement: -1,
DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
},
},
{
@@ -135,17 +136,18 @@ func TestNewModuleContextOffsetData(t *testing.T) {
},
withListener: true,
exp: ModuleContextOffsetData{
LocalMemoryBegin: 8,
ImportedMemoryBegin: -1,
ImportedFunctionsBegin: 24,
GlobalsBegin: 24 + 10*FunctionInstanceSize,
TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30,
TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8,
BeforeListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
AfterListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24,
TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32,
LocalMemoryBegin: 8,
ImportedMemoryBegin: -1,
ImportedFunctionsBegin: 24,
// Align to 16 bytes for globals.
GlobalsBegin: 32 + 10*FunctionInstanceSize,
TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30,
TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8,
BeforeListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
AfterListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24,
TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32,
},
},
} {