wazevo: aligns globals at 16 byte (#2042)
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -1941,18 +1941,18 @@ L2:
|
||||
L1 (SSA Block: blk0):
|
||||
mov x128?, x0
|
||||
mov x129?, x1
|
||||
ldr w130?, [x129?, #0x8]
|
||||
ldr x131?, [x129?, #0x18]
|
||||
ldr s132?, [x129?, #0x28]
|
||||
ldr d133?, [x129?, #0x38]
|
||||
ldr w130?, [x129?, #0x10]
|
||||
ldr x131?, [x129?, #0x20]
|
||||
ldr s132?, [x129?, #0x30]
|
||||
ldr d133?, [x129?, #0x40]
|
||||
str x129?, [x128?, #0x8]
|
||||
mov x0, x128?
|
||||
mov x1, x129?
|
||||
bl f1
|
||||
ldr w134?, [x129?, #0x8]
|
||||
ldr x135?, [x129?, #0x18]
|
||||
ldr s136?, [x129?, #0x28]
|
||||
ldr d137?, [x129?, #0x38]
|
||||
ldr w134?, [x129?, #0x10]
|
||||
ldr x135?, [x129?, #0x20]
|
||||
ldr s136?, [x129?, #0x30]
|
||||
ldr d137?, [x129?, #0x40]
|
||||
mov v3.8b, v137?.8b
|
||||
mov v2.8b, v136?.8b
|
||||
mov x3, x135?
|
||||
@@ -1970,21 +1970,21 @@ L1 (SSA Block: blk0):
|
||||
orr x27, xzr, #0x20
|
||||
str x27, [sp, #-0x10]!
|
||||
str x1, [sp, #0x10]
|
||||
ldr w8, [x1, #0x8]
|
||||
ldr w8, [x1, #0x10]
|
||||
str w8, [sp, #0x2c]
|
||||
ldr x9, [x1, #0x18]
|
||||
ldr x9, [x1, #0x20]
|
||||
str x9, [sp, #0x24]
|
||||
ldr s8, [x1, #0x28]
|
||||
ldr s8, [x1, #0x30]
|
||||
str s8, [sp, #0x20]
|
||||
ldr d9, [x1, #0x38]
|
||||
ldr d9, [x1, #0x40]
|
||||
str d9, [sp, #0x18]
|
||||
str x1, [x0, #0x8]
|
||||
bl f1
|
||||
ldr x8, [sp, #0x10]
|
||||
ldr w9, [x8, #0x8]
|
||||
ldr x10, [x8, #0x18]
|
||||
ldr s8, [x8, #0x28]
|
||||
ldr d9, [x8, #0x38]
|
||||
ldr w9, [x8, #0x10]
|
||||
ldr x10, [x8, #0x20]
|
||||
ldr s8, [x8, #0x30]
|
||||
ldr d9, [x8, #0x40]
|
||||
mov v3.8b, v9.8b
|
||||
mov v2.8b, v8.8b
|
||||
mov x3, x10
|
||||
@@ -2011,13 +2011,13 @@ L1 (SSA Block: blk0):
|
||||
L1 (SSA Block: blk0):
|
||||
mov x129?, x1
|
||||
orr w137?, wzr, #0x1
|
||||
str w137?, [x129?, #0x8]
|
||||
str w137?, [x129?, #0x10]
|
||||
orr x136?, xzr, #0x2
|
||||
str x136?, [x129?, #0x18]
|
||||
str x136?, [x129?, #0x20]
|
||||
ldr s135?, #8; b 8; data.f32 3.000000
|
||||
str s135?, [x129?, #0x28]
|
||||
str s135?, [x129?, #0x30]
|
||||
ldr d134?, #8; b 16; data.f64 4.000000
|
||||
str d134?, [x129?, #0x38]
|
||||
str d134?, [x129?, #0x40]
|
||||
ret
|
||||
`,
|
||||
afterFinalizeARM64: `
|
||||
@@ -2025,13 +2025,13 @@ L1 (SSA Block: blk0):
|
||||
stp x30, xzr, [sp, #-0x10]!
|
||||
str xzr, [sp, #-0x10]!
|
||||
orr w8, wzr, #0x1
|
||||
str w8, [x1, #0x8]
|
||||
str w8, [x1, #0x10]
|
||||
orr x8, xzr, #0x2
|
||||
str x8, [x1, #0x18]
|
||||
str x8, [x1, #0x20]
|
||||
ldr s8, #8; b 8; data.f32 3.000000
|
||||
str s8, [x1, #0x28]
|
||||
str s8, [x1, #0x30]
|
||||
ldr d8, #8; b 16; data.f64 4.000000
|
||||
str d8, [x1, #0x38]
|
||||
str d8, [x1, #0x40]
|
||||
add sp, sp, #0x10
|
||||
ldr x30, [sp], #0x10
|
||||
ret
|
||||
|
||||
@@ -569,7 +569,7 @@ func (e *engine) NewModuleEngine(m *wasm.Module, mi *wasm.ModuleInstance) (wasm.
|
||||
me.opaquePtr = &me.opaque[0]
|
||||
} else {
|
||||
if size := compiled.offsets.TotalSize; size != 0 {
|
||||
opaque := make([]byte, size)
|
||||
opaque := newAlignedOpaque(size)
|
||||
me.opaque = opaque
|
||||
me.opaquePtr = &opaque[0]
|
||||
}
|
||||
|
||||
@@ -1183,11 +1183,11 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32)
|
||||
m: testcases.GlobalsGet.Module,
|
||||
exp: `
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Load module_ctx, 0x8
|
||||
v3:i64 = Load module_ctx, 0x18
|
||||
v4:f32 = Load module_ctx, 0x28
|
||||
v5:f64 = Load module_ctx, 0x38
|
||||
v6:v128 = Load module_ctx, 0x48
|
||||
v2:i32 = Load module_ctx, 0x10
|
||||
v3:i64 = Load module_ctx, 0x20
|
||||
v4:f32 = Load module_ctx, 0x30
|
||||
v5:f64 = Load module_ctx, 0x40
|
||||
v6:v128 = Load module_ctx, 0x50
|
||||
Jump blk_ret, v2, v3, v4, v5, v6
|
||||
`,
|
||||
},
|
||||
@@ -1197,15 +1197,15 @@ blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
exp: `
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Iconst_32 0x1
|
||||
Store v2, module_ctx, 0x8
|
||||
Store v2, module_ctx, 0x10
|
||||
v3:i64 = Iconst_64 0x2
|
||||
Store v3, module_ctx, 0x18
|
||||
Store v3, module_ctx, 0x20
|
||||
v4:f32 = F32const 3.000000
|
||||
Store v4, module_ctx, 0x28
|
||||
Store v4, module_ctx, 0x30
|
||||
v5:f64 = F64const 4.000000
|
||||
Store v5, module_ctx, 0x38
|
||||
Store v5, module_ctx, 0x40
|
||||
v6:v128 = Vconst 000000000000000a 0000000000000014
|
||||
Store v6, module_ctx, 0x48
|
||||
Store v6, module_ctx, 0x50
|
||||
Jump blk_ret, v2, v3, v4, v5, v6
|
||||
`,
|
||||
},
|
||||
@@ -1217,16 +1217,16 @@ signatures:
|
||||
sig1: i64i64_v
|
||||
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Load module_ctx, 0x8
|
||||
v3:i64 = Load module_ctx, 0x18
|
||||
v4:f32 = Load module_ctx, 0x28
|
||||
v5:f64 = Load module_ctx, 0x38
|
||||
v2:i32 = Load module_ctx, 0x10
|
||||
v3:i64 = Load module_ctx, 0x20
|
||||
v4:f32 = Load module_ctx, 0x30
|
||||
v5:f64 = Load module_ctx, 0x40
|
||||
Store module_ctx, exec_ctx, 0x8
|
||||
Call f1:sig1, exec_ctx, module_ctx
|
||||
v6:i32 = Load module_ctx, 0x8
|
||||
v7:i64 = Load module_ctx, 0x18
|
||||
v8:f32 = Load module_ctx, 0x28
|
||||
v9:f64 = Load module_ctx, 0x38
|
||||
v6:i32 = Load module_ctx, 0x10
|
||||
v7:i64 = Load module_ctx, 0x20
|
||||
v8:f32 = Load module_ctx, 0x30
|
||||
v9:f64 = Load module_ctx, 0x40
|
||||
Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9
|
||||
`,
|
||||
expAfterOpt: `
|
||||
@@ -1234,16 +1234,16 @@ signatures:
|
||||
sig1: i64i64_v
|
||||
|
||||
blk0: (exec_ctx:i64, module_ctx:i64)
|
||||
v2:i32 = Load module_ctx, 0x8
|
||||
v3:i64 = Load module_ctx, 0x18
|
||||
v4:f32 = Load module_ctx, 0x28
|
||||
v5:f64 = Load module_ctx, 0x38
|
||||
v2:i32 = Load module_ctx, 0x10
|
||||
v3:i64 = Load module_ctx, 0x20
|
||||
v4:f32 = Load module_ctx, 0x30
|
||||
v5:f64 = Load module_ctx, 0x40
|
||||
Store module_ctx, exec_ctx, 0x8
|
||||
Call f1:sig1, exec_ctx, module_ctx
|
||||
v6:i32 = Load module_ctx, 0x8
|
||||
v7:i64 = Load module_ctx, 0x18
|
||||
v8:f32 = Load module_ctx, 0x28
|
||||
v9:f64 = Load module_ctx, 0x38
|
||||
v6:i32 = Load module_ctx, 0x10
|
||||
v7:i64 = Load module_ctx, 0x20
|
||||
v8:f32 = Load module_ctx, 0x30
|
||||
v9:f64 = Load module_ctx, 0x40
|
||||
Jump blk_ret, v2, v3, v4, v5, v6, v7, v8, v9
|
||||
`,
|
||||
},
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
|
||||
func buildHostModuleOpaque(m *wasm.Module, listeners []experimental.FunctionListener) moduleContextOpaque {
|
||||
size := len(m.CodeSection)*16 + 32
|
||||
ret := make(moduleContextOpaque, size)
|
||||
ret := newAlignedOpaque(size)
|
||||
|
||||
binary.LittleEndian.PutUint64(ret[0:], uint64(uintptr(unsafe.Pointer(m))))
|
||||
|
||||
|
||||
@@ -74,6 +74,17 @@ type (
|
||||
moduleContextOpaque []byte
|
||||
)
|
||||
|
||||
func newAlignedOpaque(size int) moduleContextOpaque {
|
||||
// Check if the size is a multiple of 16.
|
||||
if size%16 != 0 {
|
||||
panic("size must be a multiple of 16")
|
||||
}
|
||||
type _16 [16]byte
|
||||
buf := make([]_16, size/16)
|
||||
slice := unsafe.Slice(&buf[0][0], size)
|
||||
return *(*moduleContextOpaque)(&slice)
|
||||
}
|
||||
|
||||
func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
|
||||
s := uint64(len(mem.Buffer))
|
||||
var b uint64
|
||||
|
||||
@@ -331,3 +331,12 @@ func Test_getTypeIDOf(t *testing.T) {
|
||||
require.Equal(t, wasm.FunctionTypeID(222), getTypeIDOf(2, m))
|
||||
require.Equal(t, wasm.FunctionTypeID(111), getTypeIDOf(3, m))
|
||||
}
|
||||
|
||||
func Test_newAlignedOpaque(t *testing.T) {
|
||||
for i := 0; i < 100; i++ {
|
||||
s := 16 * (i + 10)
|
||||
buf := newAlignedOpaque(s)
|
||||
require.Equal(t, s, len(buf))
|
||||
require.Equal(t, 0, int(uintptr(unsafe.Pointer(&buf[0]))&15))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,6 +142,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
}
|
||||
|
||||
if m.ImportMemoryCount > 0 {
|
||||
offset = align8(offset)
|
||||
// *wasm.MemoryInstance + imported memory's owner (moduleContextOpaque)
|
||||
const importedMemorySizeInOpaqueModuleContext = 16
|
||||
ret.ImportedMemoryBegin = offset
|
||||
@@ -152,6 +153,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
}
|
||||
|
||||
if m.ImportFunctionCount > 0 {
|
||||
offset = align8(offset)
|
||||
ret.ImportedFunctionsBegin = offset
|
||||
// Each function is stored wazevo.functionInstance.
|
||||
size := int(m.ImportFunctionCount) * FunctionInstanceSize
|
||||
@@ -161,6 +163,8 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
}
|
||||
|
||||
if globals := int(m.ImportGlobalCount) + len(m.GlobalSection); globals > 0 {
|
||||
// Align to 16 bytes for globals, as f32/f64/v128 might be loaded via SIMD instructions.
|
||||
offset = align16(offset)
|
||||
ret.GlobalsBegin = offset
|
||||
// Pointers to *wasm.GlobalInstance.
|
||||
offset += Offset(globals) * 16
|
||||
@@ -169,6 +173,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
}
|
||||
|
||||
if tables := len(m.TableSection) + int(m.ImportTableCount); tables > 0 {
|
||||
offset = align8(offset)
|
||||
ret.TypeIDs1stElement = offset
|
||||
offset += 8 // First element of TypeIDs.
|
||||
|
||||
@@ -181,6 +186,7 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
}
|
||||
|
||||
if withListener {
|
||||
offset = align8(offset)
|
||||
ret.BeforeListenerTrampolines1stElement = offset
|
||||
offset += 8 // First element of BeforeListenerTrampolines.
|
||||
|
||||
@@ -197,6 +203,14 @@ func NewModuleContextOffsetData(m *wasm.Module, withListener bool) ModuleContext
|
||||
ret.ElementInstances1stElement = offset
|
||||
offset += 8 // First element of ElementInstances.
|
||||
|
||||
ret.TotalSize = int(offset)
|
||||
ret.TotalSize = int(align16(offset))
|
||||
return ret
|
||||
}
|
||||
|
||||
func align16(o Offset) Offset {
|
||||
return (o + 15) &^ 15
|
||||
}
|
||||
|
||||
func align8(o Offset) Offset {
|
||||
return (o + 7) &^ 7
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 8,
|
||||
ElementInstances1stElement: 16,
|
||||
TotalSize: 24,
|
||||
TotalSize: 32, // 16 byte alignment.
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -45,7 +45,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 24,
|
||||
ElementInstances1stElement: 32,
|
||||
TotalSize: 40,
|
||||
TotalSize: 48, // 16 byte alignment.
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -62,7 +62,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 24,
|
||||
ElementInstances1stElement: 32,
|
||||
TotalSize: 40,
|
||||
TotalSize: 48, // 16 byte alignment.
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -79,7 +79,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 10*FunctionInstanceSize + 8,
|
||||
ElementInstances1stElement: 10*FunctionInstanceSize + 16,
|
||||
TotalSize: 10*FunctionInstanceSize + 24,
|
||||
TotalSize: int(align16(Offset(10*FunctionInstanceSize + 24))),
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -96,7 +96,7 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 10*FunctionInstanceSize + 24,
|
||||
ElementInstances1stElement: 10*FunctionInstanceSize + 32,
|
||||
TotalSize: 10*FunctionInstanceSize + 40,
|
||||
TotalSize: int(align16(Offset(10*FunctionInstanceSize + 40))),
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -110,17 +110,18 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
GlobalSection: make([]wasm.Global, 20),
|
||||
},
|
||||
exp: ModuleContextOffsetData{
|
||||
LocalMemoryBegin: 8,
|
||||
ImportedMemoryBegin: -1,
|
||||
ImportedFunctionsBegin: 24,
|
||||
GlobalsBegin: 24 + 10*FunctionInstanceSize,
|
||||
TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30,
|
||||
TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8,
|
||||
LocalMemoryBegin: 8,
|
||||
ImportedMemoryBegin: -1,
|
||||
ImportedFunctionsBegin: 24,
|
||||
// Align to 16 bytes for globals.
|
||||
GlobalsBegin: 32 + 10*FunctionInstanceSize,
|
||||
TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30,
|
||||
TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8,
|
||||
BeforeListenerTrampolines1stElement: -1,
|
||||
AfterListenerTrampolines1stElement: -1,
|
||||
DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
|
||||
ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
|
||||
TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
|
||||
DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
|
||||
ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
|
||||
TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -135,17 +136,18 @@ func TestNewModuleContextOffsetData(t *testing.T) {
|
||||
},
|
||||
withListener: true,
|
||||
exp: ModuleContextOffsetData{
|
||||
LocalMemoryBegin: 8,
|
||||
ImportedMemoryBegin: -1,
|
||||
ImportedFunctionsBegin: 24,
|
||||
GlobalsBegin: 24 + 10*FunctionInstanceSize,
|
||||
TypeIDs1stElement: 24 + 10*FunctionInstanceSize + 16*30,
|
||||
TablesBegin: 24 + 10*FunctionInstanceSize + 16*30 + 8,
|
||||
BeforeListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
|
||||
AfterListenerTrampolines1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
|
||||
DataInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
|
||||
ElementInstances1stElement: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24,
|
||||
TotalSize: 24 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32,
|
||||
LocalMemoryBegin: 8,
|
||||
ImportedMemoryBegin: -1,
|
||||
ImportedFunctionsBegin: 24,
|
||||
// Align to 16 bytes for globals.
|
||||
GlobalsBegin: 32 + 10*FunctionInstanceSize,
|
||||
TypeIDs1stElement: 32 + 10*FunctionInstanceSize + 16*30,
|
||||
TablesBegin: 32 + 10*FunctionInstanceSize + 16*30 + 8,
|
||||
BeforeListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15,
|
||||
AfterListenerTrampolines1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 8,
|
||||
DataInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 16,
|
||||
ElementInstances1stElement: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 24,
|
||||
TotalSize: 32 + 10*FunctionInstanceSize + 16*30 + 8 + 8*15 + 32,
|
||||
},
|
||||
},
|
||||
} {
|
||||
|
||||
Reference in New Issue
Block a user