diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 92b5fc8c..4bc614fa 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -1095,181 +1095,201 @@ L1 (SSA Block: blk0): stp x30, xzr, [sp, #-0x10]! str xzr, [sp, #-0x10]! mov x8, x0 + mov x10.8b, v0.8b msr fpsr, xzr - fcvtzs x0, d0 + fcvtzs x0, d10 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L17) - fcmp d0, d0 + mov x9, x8 + b.ne #0x70, (L17) + fcmp x10, x10 + mov x10, x9 b.vc #0x34, (L16) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L16: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L17: msr fpsr, xzr fcvtzs x1, s1 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L15) - fcmp s1, s1 + mov x9, x8 + mov x10, d1 + b.ne #0x70, (L15) + fcmp w10, w10 + mov x10, x9 b.vc #0x34, (L14) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L14: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L15: msr fpsr, xzr - fcvtzs w2, d0 + fcvtzs w2, d10 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L13) - fcmp d0, d0 + mov x9, x8 + b.ne #0x70, (L13) + fcmp x10, x10 + mov x10, x9 b.vc #0x34, (L12) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L12: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L13: msr fpsr, xzr fcvtzs w3, s1 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L11) - fcmp s1, s1 + mov x9, x8 + mov x10, d1 + b.ne #0x70, (L11) + fcmp w10, w10 + mov x10, x9 b.vc #0x34, (L10) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L10: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L11: msr fpsr, xzr - fcvtzu x4, d0 + fcvtzu x4, d10 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L9) - fcmp d0, d0 + mov x9, x8 + b.ne #0x70, (L9) + fcmp x10, x10 + mov x10, x9 b.vc #0x34, (L8) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L8: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L9: msr fpsr, xzr fcvtzu x5, s1 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L7) - fcmp s1, s1 + mov x9, x8 + mov x10, d1 + b.ne #0x70, (L7) + fcmp w10, w10 + mov x10, x9 b.vc #0x34, (L6) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L6: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L7: msr fpsr, xzr - fcvtzu w6, d0 + fcvtzu w6, d10 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L5) - fcmp d0, d0 + mov x9, x8 + b.ne #0x70, (L5) + fcmp x10, x10 + mov x10, x9 b.vc #0x34, (L4) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x11, #0xc, lsl 0 + str w11, [x10] + mov x11, sp + str x11, [x10, #0x38] + adr x11, #0x0 + str x11, [x10, #0x30] + exit_sequence x10 L4: - movz x9, #0xb, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xb, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L5: msr fpsr, xzr fcvtzu w7, s1 mrs x9 fpsr subs xzr, x9, #0x1 - b.ne #0x6c, (L3) - fcmp s1, s1 + mov x9, d1 + b.ne #0x70, (L3) + fcmp w9, w9 + mov x9, x8 b.vc #0x34, (L2) - movz x9, #0xc, lsl 0 - str w9, [x8] - mov x9, sp - str x9, [x8, #0x38] - adr x9, #0x0 - str x9, [x8, #0x30] - exit_sequence x8 + movz x10, #0xc, lsl 0 + str w10, [x9] + mov x10, sp + str x10, [x9, #0x38] + adr x10, #0x0 + str x10, [x9, #0x30] + exit_sequence x9 L2: movz x9, #0xb, lsl 0 str w9, [x8] @@ -1279,7 +1299,7 @@ L2: str x9, [x8, #0x30] exit_sequence x8 L3: - fcvt s0, d0 + fcvt s0, x10 fcvt d1, s1 add sp, sp, #0x10 ldr x30, [sp], #0x10 @@ -1645,14 +1665,15 @@ L1 (SSA Block: blk0): ldr w133?, [x129?, #0x10] add x134?, x132?, #0x4 subs xzr, x133?, x134? + mov x140?, x128? b.hs L2 - movz x140?, #0x4, lsl 0 - str w140?, [x128?] - mov x141?, sp - str x141?, [x128?, #0x38] - adr x142?, #0x0 - str x142?, [x128?, #0x30] - exit_sequence x128? + movz x141?, #0x4, lsl 0 + str w141?, [x140?] + mov x142?, sp + str x142?, [x140?, #0x38] + adr x143?, #0x0 + str x143?, [x140?, #0x30] + exit_sequence x140? L2: ldr x136?, [x129?, #0x8] add x139?, x136?, x132? @@ -1696,14 +1717,15 @@ L1 (SSA Block: blk0): ldr w8, [x1, #0x10] add x9, x10, #0x4 subs xzr, x8, x9 + mov x9, x0 b.hs #0x34, (L10) - movz x9, #0x4, lsl 0 - str w9, [x0] - mov x9, sp - str x9, [x0, #0x38] - adr x9, #0x0 - str x9, [x0, #0x30] - exit_sequence x0 + movz x11, #0x4, lsl 0 + str w11, [x9] + mov x11, sp + str x11, [x9, #0x38] + adr x11, #0x0 + str x11, [x9, #0x30] + exit_sequence x9 L10: ldr x9, [x1, #0x8] add x10, x9, x10 @@ -1712,14 +1734,15 @@ L10: uxtw x10, w10 add x11, x10, #0x8 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L9) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L9: add x10, x9, x10 str x3, [x10] @@ -1727,14 +1750,15 @@ L9: uxtw x10, w10 add x11, x10, #0x4 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L8) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L8: add x10, x9, x10 str s0, [x10] @@ -1742,14 +1766,15 @@ L8: uxtw x10, w10 add x11, x10, #0x8 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L7) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L7: add x10, x9, x10 str d1, [x10] @@ -1757,14 +1782,15 @@ L7: uxtw x10, w10 add x11, x10, #0x1 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L6) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L6: add x10, x9, x10 strb w2, [x10] @@ -1772,14 +1798,15 @@ L6: uxtw x10, w10 add x11, x10, #0x2 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L5) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L5: add x10, x9, x10 strh w2, [x10] @@ -1787,14 +1814,15 @@ L5: uxtw x10, w10 add x11, x10, #0x1 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L4) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L4: add x10, x9, x10 strb w3, [x10] @@ -1802,14 +1830,15 @@ L4: uxtw x10, w10 add x11, x10, #0x2 subs xzr, x8, x11 + mov x11, x0 b.hs #0x34, (L3) - movz x11, #0x4, lsl 0 - str w11, [x0] - mov x11, sp - str x11, [x0, #0x38] - adr x11, #0x0 - str x11, [x0, #0x30] - exit_sequence x0 + movz x12, #0x4, lsl 0 + str w12, [x11] + mov x12, sp + str x12, [x11, #0x38] + adr x12, #0x0 + str x12, [x11, #0x30] + exit_sequence x11 L3: add x10, x9, x10 strh w3, [x10] diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 9ed74844..f3f7fccf 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -1189,9 +1189,11 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit. // Otherwise, `cond64bit` is ignored. func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) { + execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64) + cbr := m.allocateInstr() m.insert(cbr) - m.lowerExitWithCode(execCtxVReg, code) + m.lowerExitWithCode(execCtxTmp, code) // Conditional branch target is after exit. l := m.insertBrTargetLabel() cbr.asCondBr(c, l, cond64bit) @@ -1315,6 +1317,9 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) m.insert(alu) + execCtx := m.copyToTmp(ctx, ssa.TypeI64) + _rn := operandNR(m.copyToTmp(rn.nr(), ssa.TypeI64)) + // If it is not undefined, we can return the result. ok := m.allocateInstr() m.insert(ok) @@ -1323,12 +1328,12 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 // Comparing itself to check if it is a NaN. fpuCmp := m.allocateInstr() - fpuCmp.asFpuCmp(rn, rn, src64bit) + fpuCmp.asFpuCmp(_rn, _rn, src64bit) m.insert(fpuCmp) // If the VC flag is not set (== VS flag is set), it is a NaN. - m.exitIfNot(ctx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger) + m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger) // Otherwise, it is an overflow. - m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow) + m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow) // Conditional branch target is after exit. l := m.insertBrTargetLabel() @@ -1838,10 +1843,12 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va signed := c.Signed() m.lowerIcmpToFlag(x, y, signed) + execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64) + // We have to skip the entire exit sequence if the condition is false. cbr := m.allocateInstr() m.insert(cbr) - m.lowerExitWithCode(execCtxVReg, code) + m.lowerExitWithCode(execCtxTmp, code) // conditional branch target is after exit. l := m.insertBrTargetLabel() cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */) @@ -1904,31 +1911,38 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { } func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { - // Declare and insert the conditional branch here jump to label `ifNonZero` below: - // but we cannot forward reference the label. - cbr := m.allocateInstr() - m.insert(cbr) + // First we clear the unnecessary bits of rc by ANDing it with 1. + one := m.compiler.AllocateVReg(ssa.TypeI32) + m.lowerConstantI32(one, 1) + and := m.allocateInstr() + oneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + and.asALU(aluOpAnd, oneOrZero, rc, operandNR(one), false) + m.insert(and) - // If rc is zero, mov rd, rm then jump to end. - mov0 := m.allocateInstr() - mov0.asFpuMov128(rd.nr(), rm.nr()) - m.insert(mov0) + // Sets all bits to 1 if rc is not zero. + allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + alu := m.allocateInstr() + alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), oneOrZero, true) + m.insert(alu) - // Declared and insert the non-conditional jump to label `end` below: - // again, we cannot forward reference the label. - br := m.allocateInstr() - m.insert(br) + // Then move the bits to the result vector register. + dup := m.allocateInstr() + dup.asVecDup(rd, allOneOrZero, vecArrangement2D) + m.insert(dup) - // Create and insert the label, and update `cbr` to the real instruction. - ifNonZero := m.insertBrTargetLabel() - cbr.asCondBr(registerAsRegNotZeroCond(rc.nr()), ifNonZero, true) - - // If rc is non-zero, set mov rd, rn. - mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), rn.nr()) - m.insert(mov) - - // Create and insert the label, and update `br` to the real instruction. - end := m.insertBrTargetLabel() - br.asBr(end) + // Now that `rd` has either all bits one or zero depending on `rc`, + // we can use bsl to select between `rn` and `rm`. + ins := m.allocateInstr() + ins.asVecRRR(vecOpBsl, rd, rn, rm, vecArrangement16B) + m.insert(ins) +} + +// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue +// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes +func (m *machine) copyToTmp(v regalloc.VReg, typ ssa.Type) regalloc.VReg { + mov := m.allocateInstr() + tmp := m.compiler.AllocateVReg(typ) + mov.asMove64(tmp, v) + m.insert(mov) + return tmp } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go index eeed0799..5358879b 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go @@ -297,73 +297,79 @@ func TestMachine_lowerIDiv(t *testing.T) { name: "32bit unsigned", _64bit: false, signed: false, exp: ` udiv w1?, w2?, w3? +mov x1?, x65535? cbnz w3?, L1 -movz x1?, #0xa, lsl 0 -str w1?, [x65535?] -mov x2?, sp -str x2?, [x65535?, #0x38] -adr x3?, #0x0 -str x3?, [x65535?, #0x30] -exit_sequence x65535? +movz x2?, #0xa, lsl 0 +str w2?, [x1?] +mov x3?, sp +str x3?, [x1?, #0x38] +adr x4?, #0x0 +str x4?, [x1?, #0x30] +exit_sequence x1? L1: `, }, {name: "32bit signed", _64bit: false, signed: true, exp: ` sdiv w1?, w2?, w3? +mov x1?, x65535? cbnz w3?, L1 -movz x1?, #0xa, lsl 0 -str w1?, [x65535?] -mov x2?, sp -str x2?, [x65535?, #0x38] -adr x3?, #0x0 -str x3?, [x65535?, #0x30] -exit_sequence x65535? +movz x2?, #0xa, lsl 0 +str w2?, [x1?] +mov x3?, sp +str x3?, [x1?, #0x38] +adr x4?, #0x0 +str x4?, [x1?, #0x30] +exit_sequence x1? L1: adds wzr, w3?, #0x1 ccmp w2?, #0x1, #0x0, eq +mov x5?, x65535? b.vc L2 -movz x4?, #0xb, lsl 0 -str w4?, [x65535?] -mov x5?, sp -str x5?, [x65535?, #0x38] -adr x6?, #0x0 -str x6?, [x65535?, #0x30] -exit_sequence x65535? +movz x6?, #0xb, lsl 0 +str w6?, [x5?] +mov x7?, sp +str x7?, [x5?, #0x38] +adr x8?, #0x0 +str x8?, [x5?, #0x30] +exit_sequence x5? L2: `}, {name: "64bit unsigned", _64bit: true, signed: false, exp: ` udiv x1?, x2?, x3? +mov x1?, x65535? cbnz x3?, L1 -movz x1?, #0xa, lsl 0 -str w1?, [x65535?] -mov x2?, sp -str x2?, [x65535?, #0x38] -adr x3?, #0x0 -str x3?, [x65535?, #0x30] -exit_sequence x65535? +movz x2?, #0xa, lsl 0 +str w2?, [x1?] +mov x3?, sp +str x3?, [x1?, #0x38] +adr x4?, #0x0 +str x4?, [x1?, #0x30] +exit_sequence x1? L1: `}, {name: "64bit signed", _64bit: true, signed: true, exp: ` sdiv x1?, x2?, x3? +mov x1?, x65535? cbnz x3?, L1 -movz x1?, #0xa, lsl 0 -str w1?, [x65535?] -mov x2?, sp -str x2?, [x65535?, #0x38] -adr x3?, #0x0 -str x3?, [x65535?, #0x30] -exit_sequence x65535? +movz x2?, #0xa, lsl 0 +str w2?, [x1?] +mov x3?, sp +str x3?, [x1?, #0x38] +adr x4?, #0x0 +str x4?, [x1?, #0x30] +exit_sequence x1? L1: adds xzr, x3?, #0x1 ccmp x2?, #0x1, #0x0, eq +mov x5?, x65535? b.vc L2 -movz x4?, #0xb, lsl 0 -str w4?, [x65535?] -mov x5?, sp -str x5?, [x65535?, #0x38] -adr x6?, #0x0 -str x6?, [x65535?, #0x30] -exit_sequence x65535? +movz x6?, #0xb, lsl 0 +str w6?, [x5?] +mov x7?, sp +str x7?, [x5?, #0x38] +adr x8?, #0x0 +str x8?, [x5?, #0x30] +exit_sequence x5? L2: `}, } { @@ -409,24 +415,27 @@ msr fpsr, xzr fcvtzu w1, s2 mrs x1? fpsr subs xzr, x1?, #0x1 +mov x2?, x15 +mov x3?, x2 b.ne L2 -fcmp w2, w2 +fcmp w3?, w3? +mov x4?, x2? b.vc L1 -movz x2?, #0xc, lsl 0 -str w2?, [x15] -mov x3?, sp -str x3?, [x15, #0x38] -adr x4?, #0x0 -str x4?, [x15, #0x30] -exit_sequence x15 -L1: -movz x5?, #0xb, lsl 0 -str w5?, [x15] +movz x5?, #0xc, lsl 0 +str w5?, [x4?] mov x6?, sp -str x6?, [x15, #0x38] +str x6?, [x4?, #0x38] adr x7?, #0x0 -str x7?, [x15, #0x30] -exit_sequence x15 +str x7?, [x4?, #0x30] +exit_sequence x4? +L1: +movz x8?, #0xb, lsl 0 +str w8?, [x2?] +mov x9?, sp +str x9?, [x2?, #0x38] +adr x10?, #0x0 +str x10?, [x2?, #0x30] +exit_sequence x2? L2: `, }, @@ -842,12 +851,11 @@ func TestMachine_lowerSelectVec(t *testing.T) { m.lowerSelectVec(c, rn, rm, rd) require.Equal(t, ` -cbnz x1?, L1 -mov v4?.16b, v3?.16b -b L2 -L1: -mov v4?.16b, v2?.16b -L2: +orr w5?, wzr, #0x1 +and w6?, w1?, w5? +sub x7?, xzr, x6? +dup v4?.2d, x7? +bsl v4?.16b, v2?.16b, v3?.16b `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") }