From d776d8c0250f5eed70670743359c8552d43a8b1c Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Mon, 11 Sep 2023 14:02:08 +0900 Subject: [PATCH] wazevo: fixes extended register operands for arm64, imported memory size type (#1694) Signed-off-by: Takeshi Yoneda --- .../backend/isa/arm64/instr_encoding.go | 52 ++++++++ .../backend/isa/arm64/instr_encoding_test.go | 42 ++++++ .../backend/isa/arm64/lower_instr_operands.go | 14 +- .../isa/arm64/lower_instr_operands_test.go | 122 ++++++++++++++++-- .../engine/wazevo/frontend/frontend_test.go | 16 +-- internal/engine/wazevo/frontend/lower.go | 2 +- 6 files changed, 226 insertions(+), 22 deletions(-) diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index af784a0f..2ebab5c2 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -144,6 +144,16 @@ func (i *instruction) encode(c backend.Compiler) { i.u3 == 1, rn == sp, )) + case aluRRRExtend: + rm, exo, to := i.rm.er() + c.Emit4Bytes(encodeAluRRRExtend( + aluOp(i.u1), + regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rn.realReg()], + regNumberInEncoding[rm.RealReg()], + exo, + to, + )) case aluRRRShift: r, amt, sop := i.rm.sr() c.Emit4Bytes(encodeAluRRRShift( @@ -1062,6 +1072,48 @@ func encodeAluRRRShift(op aluOp, rd, rn, rm, amount uint32, shiftOp shiftOp, _64 return opc<<29 | n<<21 | _31to24<<24 | shift<<22 | rm<<16 | (amount << 10) | (rn << 5) | rd } +// "Add/subtract (extended register)" in +// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_ext +func encodeAluRRRExtend(ao aluOp, rd, rn, rm uint32, extOp extendOp, to byte) uint32 { + var s, op uint32 + switch ao { + case aluOpAdd: + op = 0b0 + case aluOpAddS: + op, s = 0b0, 0b1 + case aluOpSub: + op = 0b1 + case aluOpSubS: + op, s = 0b1, 0b1 + default: + panic("BUG: extended register operand can be used only for add/sub") + } + + var sf uint32 + if to == 64 { + sf = 0b1 + } + + var option uint32 + switch extOp { + case extendOpUXTB: + option = 0b000 + case extendOpUXTH: + option = 0b001 + case extendOpUXTW: + option = 0b010 + case extendOpSXTB: + option = 0b100 + case extendOpSXTH: + option = 0b101 + case extendOpSXTW: + option = 0b110 + case extendOpSXTX, extendOpUXTX: + panic(fmt.Sprintf("%s is essentially noop, and should be handled much earlier than encoding", extOp.String())) + } + return sf<<31 | op<<30 | s<<29 | 0b1011001<<21 | rm<<16 | option<<13 | rn<<5 | rd +} + // encodeAluRRR encodes as Data Processing (register), depending on aluOp. // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en func encodeAluRRR(op aluOp, rd, rn, rm uint32, _64bit, isRnSp bool) uint32 { diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index 9bec716b..00846c98 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -302,6 +302,48 @@ func TestInstruction_encode(t *testing.T) { {want: "f20300b2", setup: func(i *instruction) { i.asALUBitmaskImm(aluOpOrr, xzrVReg, x18VReg, 0x100000001, true) }}, {want: "f21fbf0e", setup: func(i *instruction) { i.asFpuMov64(v18VReg, v31VReg) }}, {want: "f21fbf4e", setup: func(i *instruction) { i.asFpuMov128(v18VReg, v31VReg) }}, + {want: "40a034ab", setup: func(i *instruction) { + i.asALU(aluOpAddS, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) + }}, + {want: "4080348b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 64), false) + }}, + {want: "40a0348b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) + }}, + {want: "40c0348b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 64), false) + }}, + {want: "4080340b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 32), false) + }}, + {want: "40a0340b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 32), false) + }}, + {want: "40c0340b", setup: func(i *instruction) { + i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 32), false) + }}, + {want: "400034eb", setup: func(i *instruction) { + i.asALU(aluOpSubS, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) + }}, + {want: "400034cb", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) + }}, + {want: "402034cb", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 64), false) + }}, + {want: "404034cb", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 64), false) + }}, + {want: "4000344b", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 32), false) + }}, + {want: "4020344b", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 32), false) + }}, + {want: "4040344b", setup: func(i *instruction) { + i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 32), false) + }}, {want: "4000140b", setup: func(i *instruction) { i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) }}, diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go index c0804ce3..202d4887 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands.go @@ -74,12 +74,15 @@ func (o operand) nr() regalloc.VReg { // operandER encodes the given VReg as an operand of operandKindER. func operandER(r regalloc.VReg, eop extendOp, to byte) operand { + if to < 32 { + panic("TODO?BUG?: when we need to extend to less than 32 bits?") + } return operand{kind: operandKindER, data: uint64(r), data2: uint64(eop)<<32 | uint64(to)} } // er decodes the underlying VReg, extend operation, and the target size assuming the operand is of operandKindER. func (o operand) er() (r regalloc.VReg, eop extendOp, to byte) { - return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data) & 0xff + return regalloc.VReg(o.data), extendOp(o.data2>>32) & 0xff, byte(o.data2 & 0xff) } // operandSR encodes the given VReg as an operand of operandKindSR. @@ -215,15 +218,16 @@ func (m *machine) getOperand_ER_SR_NR(def *backend.SSAValueDefinition, mode extM signed := extInstr.Opcode() == ssa.OpcodeSExtend innerExtFromBits, innerExtToBits := extInstr.ExtendFromToBits() - if mode == extModeNone { + modeBits, modeSigned := mode.bits(), mode.signed() + if mode == extModeNone || innerExtToBits == modeBits { eop := extendOpFrom(signed, innerExtFromBits) - op = operandER(m.compiler.VRegOf(extInstr.Arg()), eop, innerExtToBits) + extArg := m.getOperand_NR(m.compiler.ValueDefinition(extInstr.Arg()), extModeNone) + op = operandER(extArg.nr(), eop, innerExtToBits) m.compiler.MarkLowered(extInstr) // We merged the instruction in the operand. return } - modeBits, modeSigned := mode.bits(), mode.signed() - if innerExtToBits >= modeBits { + if innerExtToBits > modeBits { panic("BUG?TODO?: need the results of inner extension to be larger than the mode") } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands_test.go index 8b18e7e7..374f73da 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_operands_test.go @@ -232,6 +232,7 @@ func TestMachine_getOperand_SR_NR(t *testing.T) { } func TestMachine_getOperand_ER_SR_NR(t *testing.T) { + const nextVReg = 100 type testCase struct { setup func(*mockCompiler, ssa.Builder, *machine) (def *backend.SSAValueDefinition, mode extMode, verify func(t *testing.T)) exp operand @@ -239,6 +240,7 @@ func TestMachine_getOperand_ER_SR_NR(t *testing.T) { } runner := func(tc testCase) { ctx, b, m := newSetupWithMockContext() + ctx.vRegCounter = nextVReg - 1 def, mode, verify := tc.setup(ctx, b, m) actual := m.getOperand_ER_SR_NR(def, mode) require.Equal(t, tc.exp, actual) @@ -286,7 +288,9 @@ func TestMachine_getOperand_ER_SR_NR(t *testing.T) { ext.AsUExtend(v, c.from, c.to) } builder.InsertInstruction(ext) + extArg := ext.Arg() ctx.vRegMap[ext.Arg()] = regalloc.VReg(10) + ctx.definitions[v] = &backend.SSAValueDefinition{BlkParamVReg: regalloc.VReg(10), BlockParamValue: extArg} def = &backend.SSAValueDefinition{Instr: ext, N: 0} return def, extModeNone, func(t *testing.T) { _, ok := ctx.lowered[ext] @@ -301,12 +305,14 @@ func TestMachine_getOperand_ER_SR_NR(t *testing.T) { t.Run("valid mode", func(t *testing.T) { const argVReg, resultVReg = regalloc.VReg(10), regalloc.VReg(11) for _, c := range []struct { - name string - from, to byte - signed bool - mode extMode - exp operand - lowered bool + name string + from, to byte + signed bool + mode extMode + exp operand + lowered bool + extArgConst bool + instructions []string }{ { name: "8->16->32: signed", @@ -356,6 +362,93 @@ func TestMachine_getOperand_ER_SR_NR(t *testing.T) { exp: operandER(argVReg, extendOpUXTB, 64), lowered: true, }, + { + name: "8(VReg)->64->64: unsigned", + from: 8, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpUXTB, 64), + lowered: true, + }, + { + name: "8(VReg,Const)->64->64: unsigned", + from: 8, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpUXTB, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + { + name: "16(VReg)->64->64: unsigned", + from: 16, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpUXTH, 64), + lowered: true, + }, + { + name: "16(VReg,Const)->64->64: unsigned", + from: 16, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpUXTH, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + { + name: "32(VReg)->64->64: unsigned", + from: 32, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpUXTW, 64), + lowered: true, + }, + { + name: "32(VReg,Const)->64->64: unsigned", + from: 32, to: 64, signed: false, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpUXTW, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + + /////////// + { + name: "8(VReg)->64->64: signed", + from: 8, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpSXTB, 64), + lowered: true, + }, + { + name: "8(VReg,Const)->64->64: signed", + from: 8, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpSXTB, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + { + name: "16(VReg)->64->64: signed", + from: 16, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpSXTH, 64), + lowered: true, + }, + { + name: "16(VReg,Const)->64->64: signed", + from: 16, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpSXTH, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + { + name: "32(VReg)->64->64: signed", + from: 32, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(argVReg, extendOpSXTW, 64), + lowered: true, + }, + { + name: "32(VReg,Const)->64->64: signed", + from: 32, to: 64, signed: true, mode: extModeZeroExtend64, + exp: operandER(regalloc.VReg(nextVReg).SetRegType(regalloc.RegTypeInt), extendOpSXTW, 64), + lowered: true, + extArgConst: true, + instructions: []string{"movz w100?, #0xffff, lsl 0"}, + }, + // Not lowered cases. { name: "8-signed->16-zero->64", @@ -406,15 +499,28 @@ func TestMachine_getOperand_ER_SR_NR(t *testing.T) { ext.AsUExtend(v, c.from, c.to) } builder.InsertInstruction(ext) - ctx.vRegMap[ext.Arg()] = argVReg + extArg := ext.Arg() + ctx.vRegMap[extArg] = argVReg ctx.vRegMap[ext.Return()] = resultVReg + if c.extArgConst { + iconst := builder.AllocateInstruction().AsIconst32(0xffff).Insert(builder) + m.compiler.(*mockCompiler).definitions[extArg] = &backend.SSAValueDefinition{ + Instr: iconst, + } + } else { + m.compiler.(*mockCompiler).definitions[extArg] = &backend.SSAValueDefinition{ + BlkParamVReg: argVReg, + BlockParamValue: extArg, + } + } def = &backend.SSAValueDefinition{Instr: ext, N: 0} return def, c.mode, func(t *testing.T) { _, ok := ctx.lowered[ext] require.Equal(t, c.lowered, ok) } }, - exp: c.exp, + exp: c.exp, + instructions: c.instructions, }) }) } diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index 53f6482f..8b3a04ce 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -1382,9 +1382,9 @@ blk0: (exec_ctx:i64, module_ctx:i64) v7:i64 = Load module_ctx, 0x8 v8:i64 = Load v7, 0x8 v9:i64 = Load module_ctx, 0x8 - v10:i64 = Load v9, 0x8 + v10:i32 = Load v9, 0x8 v11:i32 = Iconst_32 0x10 - v12:i64 = Ushr v10, v11 + v12:i32 = Ushr v10, v11 v13:i32 = Iconst_32 0xa Store module_ctx, exec_ctx, 0x8 v14:i64 = Load exec_ctx, 0x48 @@ -1402,9 +1402,9 @@ blk0: (exec_ctx:i64, module_ctx:i64) v25:i64 = Load module_ctx, 0x8 v26:i64 = Load v25, 0x8 v27:i64 = Load module_ctx, 0x8 - v28:i64 = Load v27, 0x8 + v28:i32 = Load v27, 0x8 v29:i32 = Iconst_32 0x10 - v30:i64 = Ushr v28, v29 + v30:i32 = Ushr v28, v29 Jump blk_ret, v4, v12, v22, v30 `, expAfterOpt: ` @@ -1418,9 +1418,9 @@ blk0: (exec_ctx:i64, module_ctx:i64) v3:i64 = Load module_ctx, 0x20 v4:i32 = CallIndirect v2:sig0, exec_ctx, v3 v9:i64 = Load module_ctx, 0x8 - v10:i64 = Load v9, 0x8 + v10:i32 = Load v9, 0x8 v11:i32 = Iconst_32 0x10 - v12:i64 = Ushr v10, v11 + v12:i32 = Ushr v10, v11 v13:i32 = Iconst_32 0xa Store module_ctx, exec_ctx, 0x8 v14:i64 = Load exec_ctx, 0x48 @@ -1430,9 +1430,9 @@ blk0: (exec_ctx:i64, module_ctx:i64) v21:i64 = Load module_ctx, 0x20 v22:i32 = CallIndirect v20:sig0, exec_ctx, v21 v27:i64 = Load module_ctx, 0x8 - v28:i64 = Load v27, 0x8 + v28:i32 = Load v27, 0x8 v29:i32 = Iconst_32 0x10 - v30:i64 = Ushr v28, v29 + v30:i32 = Ushr v28, v29 Jump blk_ret, v4, v12, v22, v30 `, }, diff --git a/internal/engine/wazevo/frontend/lower.go b/internal/engine/wazevo/frontend/lower.go index e249c09d..1b1dc931 100644 --- a/internal/engine/wazevo/frontend/lower.go +++ b/internal/engine/wazevo/frontend/lower.go @@ -769,7 +769,7 @@ func (c *Compiler) lowerCurrentOpcode() { Return() memSizeInBytes = builder.AllocateInstruction(). - AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI64). + AsLoad(memInstPtr, memoryInstanceBufSizeOffset, ssa.TypeI32). Insert(builder). Return() } else {