wazevo(arm64): use tmp regs to store values used during branches (#1830)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-11-09 14:17:52 +09:00
committed by GitHub
parent d3b83a596c
commit a4b4801d53
3 changed files with 331 additions and 280 deletions

View File

@@ -1095,181 +1095,201 @@ L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, x0
mov x10.8b, v0.8b
msr fpsr, xzr
fcvtzs x0, d0
fcvtzs x0, d10
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L17)
fcmp d0, d0
mov x9, x8
b.ne #0x70, (L17)
fcmp x10, x10
mov x10, x9
b.vc #0x34, (L16)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L16:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L17:
msr fpsr, xzr
fcvtzs x1, s1
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L15)
fcmp s1, s1
mov x9, x8
mov x10, d1
b.ne #0x70, (L15)
fcmp w10, w10
mov x10, x9
b.vc #0x34, (L14)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L14:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L15:
msr fpsr, xzr
fcvtzs w2, d0
fcvtzs w2, d10
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L13)
fcmp d0, d0
mov x9, x8
b.ne #0x70, (L13)
fcmp x10, x10
mov x10, x9
b.vc #0x34, (L12)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L12:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L13:
msr fpsr, xzr
fcvtzs w3, s1
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L11)
fcmp s1, s1
mov x9, x8
mov x10, d1
b.ne #0x70, (L11)
fcmp w10, w10
mov x10, x9
b.vc #0x34, (L10)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L10:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L11:
msr fpsr, xzr
fcvtzu x4, d0
fcvtzu x4, d10
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L9)
fcmp d0, d0
mov x9, x8
b.ne #0x70, (L9)
fcmp x10, x10
mov x10, x9
b.vc #0x34, (L8)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L8:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L9:
msr fpsr, xzr
fcvtzu x5, s1
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L7)
fcmp s1, s1
mov x9, x8
mov x10, d1
b.ne #0x70, (L7)
fcmp w10, w10
mov x10, x9
b.vc #0x34, (L6)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L6:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L7:
msr fpsr, xzr
fcvtzu w6, d0
fcvtzu w6, d10
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L5)
fcmp d0, d0
mov x9, x8
b.ne #0x70, (L5)
fcmp x10, x10
mov x10, x9
b.vc #0x34, (L4)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x11, #0xc, lsl 0
str w11, [x10]
mov x11, sp
str x11, [x10, #0x38]
adr x11, #0x0
str x11, [x10, #0x30]
exit_sequence x10
L4:
movz x9, #0xb, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xb, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L5:
msr fpsr, xzr
fcvtzu w7, s1
mrs x9 fpsr
subs xzr, x9, #0x1
b.ne #0x6c, (L3)
fcmp s1, s1
mov x9, d1
b.ne #0x70, (L3)
fcmp w9, w9
mov x9, x8
b.vc #0x34, (L2)
movz x9, #0xc, lsl 0
str w9, [x8]
mov x9, sp
str x9, [x8, #0x38]
adr x9, #0x0
str x9, [x8, #0x30]
exit_sequence x8
movz x10, #0xc, lsl 0
str w10, [x9]
mov x10, sp
str x10, [x9, #0x38]
adr x10, #0x0
str x10, [x9, #0x30]
exit_sequence x9
L2:
movz x9, #0xb, lsl 0
str w9, [x8]
@@ -1279,7 +1299,7 @@ L2:
str x9, [x8, #0x30]
exit_sequence x8
L3:
fcvt s0, d0
fcvt s0, x10
fcvt d1, s1
add sp, sp, #0x10
ldr x30, [sp], #0x10
@@ -1645,14 +1665,15 @@ L1 (SSA Block: blk0):
ldr w133?, [x129?, #0x10]
add x134?, x132?, #0x4
subs xzr, x133?, x134?
mov x140?, x128?
b.hs L2
movz x140?, #0x4, lsl 0
str w140?, [x128?]
mov x141?, sp
str x141?, [x128?, #0x38]
adr x142?, #0x0
str x142?, [x128?, #0x30]
exit_sequence x128?
movz x141?, #0x4, lsl 0
str w141?, [x140?]
mov x142?, sp
str x142?, [x140?, #0x38]
adr x143?, #0x0
str x143?, [x140?, #0x30]
exit_sequence x140?
L2:
ldr x136?, [x129?, #0x8]
add x139?, x136?, x132?
@@ -1696,14 +1717,15 @@ L1 (SSA Block: blk0):
ldr w8, [x1, #0x10]
add x9, x10, #0x4
subs xzr, x8, x9
mov x9, x0
b.hs #0x34, (L10)
movz x9, #0x4, lsl 0
str w9, [x0]
mov x9, sp
str x9, [x0, #0x38]
adr x9, #0x0
str x9, [x0, #0x30]
exit_sequence x0
movz x11, #0x4, lsl 0
str w11, [x9]
mov x11, sp
str x11, [x9, #0x38]
adr x11, #0x0
str x11, [x9, #0x30]
exit_sequence x9
L10:
ldr x9, [x1, #0x8]
add x10, x9, x10
@@ -1712,14 +1734,15 @@ L10:
uxtw x10, w10
add x11, x10, #0x8
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L9)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L9:
add x10, x9, x10
str x3, [x10]
@@ -1727,14 +1750,15 @@ L9:
uxtw x10, w10
add x11, x10, #0x4
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L8)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L8:
add x10, x9, x10
str s0, [x10]
@@ -1742,14 +1766,15 @@ L8:
uxtw x10, w10
add x11, x10, #0x8
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L7)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L7:
add x10, x9, x10
str d1, [x10]
@@ -1757,14 +1782,15 @@ L7:
uxtw x10, w10
add x11, x10, #0x1
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L6)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L6:
add x10, x9, x10
strb w2, [x10]
@@ -1772,14 +1798,15 @@ L6:
uxtw x10, w10
add x11, x10, #0x2
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L5)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L5:
add x10, x9, x10
strh w2, [x10]
@@ -1787,14 +1814,15 @@ L5:
uxtw x10, w10
add x11, x10, #0x1
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L4)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L4:
add x10, x9, x10
strb w3, [x10]
@@ -1802,14 +1830,15 @@ L4:
uxtw x10, w10
add x11, x10, #0x2
subs xzr, x8, x11
mov x11, x0
b.hs #0x34, (L3)
movz x11, #0x4, lsl 0
str w11, [x0]
mov x11, sp
str x11, [x0, #0x38]
adr x11, #0x0
str x11, [x0, #0x30]
exit_sequence x0
movz x12, #0x4, lsl 0
str w12, [x11]
mov x12, sp
str x12, [x11, #0x38]
adr x12, #0x0
str x12, [x11, #0x30]
exit_sequence x11
L3:
add x10, x9, x10
strh w3, [x10]

View File

@@ -1189,9 +1189,11 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
// If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit.
// Otherwise, `cond64bit` is ignored.
func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) {
execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
cbr := m.allocateInstr()
m.insert(cbr)
m.lowerExitWithCode(execCtxVReg, code)
m.lowerExitWithCode(execCtxTmp, code)
// Conditional branch target is after exit.
l := m.insertBrTargetLabel()
cbr.asCondBr(c, l, cond64bit)
@@ -1315,6 +1317,9 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
m.insert(alu)
execCtx := m.copyToTmp(ctx, ssa.TypeI64)
_rn := operandNR(m.copyToTmp(rn.nr(), ssa.TypeI64))
// If it is not undefined, we can return the result.
ok := m.allocateInstr()
m.insert(ok)
@@ -1323,12 +1328,12 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
// Comparing itself to check if it is a NaN.
fpuCmp := m.allocateInstr()
fpuCmp.asFpuCmp(rn, rn, src64bit)
fpuCmp.asFpuCmp(_rn, _rn, src64bit)
m.insert(fpuCmp)
// If the VC flag is not set (== VS flag is set), it is a NaN.
m.exitIfNot(ctx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
// Otherwise, it is an overflow.
m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow)
m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
// Conditional branch target is after exit.
l := m.insertBrTargetLabel()
@@ -1838,10 +1843,12 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va
signed := c.Signed()
m.lowerIcmpToFlag(x, y, signed)
execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
// We have to skip the entire exit sequence if the condition is false.
cbr := m.allocateInstr()
m.insert(cbr)
m.lowerExitWithCode(execCtxVReg, code)
m.lowerExitWithCode(execCtxTmp, code)
// conditional branch target is after exit.
l := m.insertBrTargetLabel()
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
@@ -1904,31 +1911,38 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
}
func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
// Declare and insert the conditional branch here jump to label `ifNonZero` below:
// but we cannot forward reference the label.
cbr := m.allocateInstr()
m.insert(cbr)
// First we clear the unnecessary bits of rc by ANDing it with 1.
one := m.compiler.AllocateVReg(ssa.TypeI32)
m.lowerConstantI32(one, 1)
and := m.allocateInstr()
oneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
and.asALU(aluOpAnd, oneOrZero, rc, operandNR(one), false)
m.insert(and)
// If rc is zero, mov rd, rm then jump to end.
mov0 := m.allocateInstr()
mov0.asFpuMov128(rd.nr(), rm.nr())
m.insert(mov0)
// Sets all bits to 1 if rc is not zero.
allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
alu := m.allocateInstr()
alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), oneOrZero, true)
m.insert(alu)
// Declared and insert the non-conditional jump to label `end` below:
// again, we cannot forward reference the label.
br := m.allocateInstr()
m.insert(br)
// Then move the bits to the result vector register.
dup := m.allocateInstr()
dup.asVecDup(rd, allOneOrZero, vecArrangement2D)
m.insert(dup)
// Create and insert the label, and update `cbr` to the real instruction.
ifNonZero := m.insertBrTargetLabel()
cbr.asCondBr(registerAsRegNotZeroCond(rc.nr()), ifNonZero, true)
// If rc is non-zero, set mov rd, rn.
mov := m.allocateInstr()
mov.asFpuMov128(rd.nr(), rn.nr())
m.insert(mov)
// Create and insert the label, and update `br` to the real instruction.
end := m.insertBrTargetLabel()
br.asBr(end)
// Now that `rd` has either all bits one or zero depending on `rc`,
// we can use bsl to select between `rn` and `rm`.
ins := m.allocateInstr()
ins.asVecRRR(vecOpBsl, rd, rn, rm, vecArrangement16B)
m.insert(ins)
}
// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue
// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes
func (m *machine) copyToTmp(v regalloc.VReg, typ ssa.Type) regalloc.VReg {
mov := m.allocateInstr()
tmp := m.compiler.AllocateVReg(typ)
mov.asMove64(tmp, v)
m.insert(mov)
return tmp
}

View File

@@ -297,73 +297,79 @@ func TestMachine_lowerIDiv(t *testing.T) {
name: "32bit unsigned", _64bit: false, signed: false,
exp: `
udiv w1?, w2?, w3?
mov x1?, x65535?
cbnz w3?, L1
movz x1?, #0xa, lsl 0
str w1?, [x65535?]
mov x2?, sp
str x2?, [x65535?, #0x38]
adr x3?, #0x0
str x3?, [x65535?, #0x30]
exit_sequence x65535?
movz x2?, #0xa, lsl 0
str w2?, [x1?]
mov x3?, sp
str x3?, [x1?, #0x38]
adr x4?, #0x0
str x4?, [x1?, #0x30]
exit_sequence x1?
L1:
`,
},
{name: "32bit signed", _64bit: false, signed: true, exp: `
sdiv w1?, w2?, w3?
mov x1?, x65535?
cbnz w3?, L1
movz x1?, #0xa, lsl 0
str w1?, [x65535?]
mov x2?, sp
str x2?, [x65535?, #0x38]
adr x3?, #0x0
str x3?, [x65535?, #0x30]
exit_sequence x65535?
movz x2?, #0xa, lsl 0
str w2?, [x1?]
mov x3?, sp
str x3?, [x1?, #0x38]
adr x4?, #0x0
str x4?, [x1?, #0x30]
exit_sequence x1?
L1:
adds wzr, w3?, #0x1
ccmp w2?, #0x1, #0x0, eq
mov x5?, x65535?
b.vc L2
movz x4?, #0xb, lsl 0
str w4?, [x65535?]
mov x5?, sp
str x5?, [x65535?, #0x38]
adr x6?, #0x0
str x6?, [x65535?, #0x30]
exit_sequence x65535?
movz x6?, #0xb, lsl 0
str w6?, [x5?]
mov x7?, sp
str x7?, [x5?, #0x38]
adr x8?, #0x0
str x8?, [x5?, #0x30]
exit_sequence x5?
L2:
`},
{name: "64bit unsigned", _64bit: true, signed: false, exp: `
udiv x1?, x2?, x3?
mov x1?, x65535?
cbnz x3?, L1
movz x1?, #0xa, lsl 0
str w1?, [x65535?]
mov x2?, sp
str x2?, [x65535?, #0x38]
adr x3?, #0x0
str x3?, [x65535?, #0x30]
exit_sequence x65535?
movz x2?, #0xa, lsl 0
str w2?, [x1?]
mov x3?, sp
str x3?, [x1?, #0x38]
adr x4?, #0x0
str x4?, [x1?, #0x30]
exit_sequence x1?
L1:
`},
{name: "64bit signed", _64bit: true, signed: true, exp: `
sdiv x1?, x2?, x3?
mov x1?, x65535?
cbnz x3?, L1
movz x1?, #0xa, lsl 0
str w1?, [x65535?]
mov x2?, sp
str x2?, [x65535?, #0x38]
adr x3?, #0x0
str x3?, [x65535?, #0x30]
exit_sequence x65535?
movz x2?, #0xa, lsl 0
str w2?, [x1?]
mov x3?, sp
str x3?, [x1?, #0x38]
adr x4?, #0x0
str x4?, [x1?, #0x30]
exit_sequence x1?
L1:
adds xzr, x3?, #0x1
ccmp x2?, #0x1, #0x0, eq
mov x5?, x65535?
b.vc L2
movz x4?, #0xb, lsl 0
str w4?, [x65535?]
mov x5?, sp
str x5?, [x65535?, #0x38]
adr x6?, #0x0
str x6?, [x65535?, #0x30]
exit_sequence x65535?
movz x6?, #0xb, lsl 0
str w6?, [x5?]
mov x7?, sp
str x7?, [x5?, #0x38]
adr x8?, #0x0
str x8?, [x5?, #0x30]
exit_sequence x5?
L2:
`},
} {
@@ -409,24 +415,27 @@ msr fpsr, xzr
fcvtzu w1, s2
mrs x1? fpsr
subs xzr, x1?, #0x1
mov x2?, x15
mov x3?, x2
b.ne L2
fcmp w2, w2
fcmp w3?, w3?
mov x4?, x2?
b.vc L1
movz x2?, #0xc, lsl 0
str w2?, [x15]
mov x3?, sp
str x3?, [x15, #0x38]
adr x4?, #0x0
str x4?, [x15, #0x30]
exit_sequence x15
L1:
movz x5?, #0xb, lsl 0
str w5?, [x15]
movz x5?, #0xc, lsl 0
str w5?, [x4?]
mov x6?, sp
str x6?, [x15, #0x38]
str x6?, [x4?, #0x38]
adr x7?, #0x0
str x7?, [x15, #0x30]
exit_sequence x15
str x7?, [x4?, #0x30]
exit_sequence x4?
L1:
movz x8?, #0xb, lsl 0
str w8?, [x2?]
mov x9?, sp
str x9?, [x2?, #0x38]
adr x10?, #0x0
str x10?, [x2?, #0x30]
exit_sequence x2?
L2:
`,
},
@@ -842,12 +851,11 @@ func TestMachine_lowerSelectVec(t *testing.T) {
m.lowerSelectVec(c, rn, rm, rd)
require.Equal(t, `
cbnz x1?, L1
mov v4?.16b, v3?.16b
b L2
L1:
mov v4?.16b, v2?.16b
L2:
orr w5?, wzr, #0x1
and w6?, w1?, w5?
sub x7?, xzr, x6?
dup v4?.2d, x7?
bsl v4?.16b, v2?.16b, v3?.16b
`, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
}