diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go
index 92b5fc8c..4bc614fa 100644
--- a/internal/engine/wazevo/backend/backend_test.go
+++ b/internal/engine/wazevo/backend/backend_test.go
@@ -1095,181 +1095,201 @@ L1 (SSA Block: blk0):
 	stp x30, xzr, [sp, #-0x10]!
 	str xzr, [sp, #-0x10]!
 	mov x8, x0
+	mov x10.8b, v0.8b
 	msr fpsr, xzr
-	fcvtzs x0, d0
+	fcvtzs x0, d10
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L17)
-	fcmp d0, d0
+	mov x9, x8
+	b.ne #0x70, (L17)
+	fcmp x10, x10
+	mov x10, x9
 	b.vc #0x34, (L16)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L16:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L17:
 	msr fpsr, xzr
 	fcvtzs x1, s1
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L15)
-	fcmp s1, s1
+	mov x9, x8
+	mov x10, d1
+	b.ne #0x70, (L15)
+	fcmp w10, w10
+	mov x10, x9
 	b.vc #0x34, (L14)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L14:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L15:
 	msr fpsr, xzr
-	fcvtzs w2, d0
+	fcvtzs w2, d10
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L13)
-	fcmp d0, d0
+	mov x9, x8
+	b.ne #0x70, (L13)
+	fcmp x10, x10
+	mov x10, x9
 	b.vc #0x34, (L12)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L12:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L13:
 	msr fpsr, xzr
 	fcvtzs w3, s1
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L11)
-	fcmp s1, s1
+	mov x9, x8
+	mov x10, d1
+	b.ne #0x70, (L11)
+	fcmp w10, w10
+	mov x10, x9
 	b.vc #0x34, (L10)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L10:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L11:
 	msr fpsr, xzr
-	fcvtzu x4, d0
+	fcvtzu x4, d10
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L9)
-	fcmp d0, d0
+	mov x9, x8
+	b.ne #0x70, (L9)
+	fcmp x10, x10
+	mov x10, x9
 	b.vc #0x34, (L8)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L8:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L9:
 	msr fpsr, xzr
 	fcvtzu x5, s1
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L7)
-	fcmp s1, s1
+	mov x9, x8
+	mov x10, d1
+	b.ne #0x70, (L7)
+	fcmp w10, w10
+	mov x10, x9
 	b.vc #0x34, (L6)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L6:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L7:
 	msr fpsr, xzr
-	fcvtzu w6, d0
+	fcvtzu w6, d10
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L5)
-	fcmp d0, d0
+	mov x9, x8
+	b.ne #0x70, (L5)
+	fcmp x10, x10
+	mov x10, x9
 	b.vc #0x34, (L4)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x11, #0xc, lsl 0
+	str w11, [x10]
+	mov x11, sp
+	str x11, [x10, #0x38]
+	adr x11, #0x0
+	str x11, [x10, #0x30]
+	exit_sequence x10
 L4:
-	movz x9, #0xb, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xb, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L5:
 	msr fpsr, xzr
 	fcvtzu w7, s1
 	mrs x9 fpsr
 	subs xzr, x9, #0x1
-	b.ne #0x6c, (L3)
-	fcmp s1, s1
+	mov x9, d1
+	b.ne #0x70, (L3)
+	fcmp w9, w9
+	mov x9, x8
 	b.vc #0x34, (L2)
-	movz x9, #0xc, lsl 0
-	str w9, [x8]
-	mov x9, sp
-	str x9, [x8, #0x38]
-	adr x9, #0x0
-	str x9, [x8, #0x30]
-	exit_sequence x8
+	movz x10, #0xc, lsl 0
+	str w10, [x9]
+	mov x10, sp
+	str x10, [x9, #0x38]
+	adr x10, #0x0
+	str x10, [x9, #0x30]
+	exit_sequence x9
 L2:
 	movz x9, #0xb, lsl 0
 	str w9, [x8]
@@ -1279,7 +1299,7 @@ L2:
 	str x9, [x8, #0x30]
 	exit_sequence x8
 L3:
-	fcvt s0, d0
+	fcvt s0, x10
 	fcvt d1, s1
 	add sp, sp, #0x10
 	ldr x30, [sp], #0x10
@@ -1645,14 +1665,15 @@ L1 (SSA Block: blk0):
 	ldr w133?, [x129?, #0x10]
 	add x134?, x132?, #0x4
 	subs xzr, x133?, x134?
+	mov x140?, x128?
 	b.hs L2
-	movz x140?, #0x4, lsl 0
-	str w140?, [x128?]
-	mov x141?, sp
-	str x141?, [x128?, #0x38]
-	adr x142?, #0x0
-	str x142?, [x128?, #0x30]
-	exit_sequence x128?
+	movz x141?, #0x4, lsl 0
+	str w141?, [x140?]
+	mov x142?, sp
+	str x142?, [x140?, #0x38]
+	adr x143?, #0x0
+	str x143?, [x140?, #0x30]
+	exit_sequence x140?
 L2:
 	ldr x136?, [x129?, #0x8]
 	add x139?, x136?, x132?
@@ -1696,14 +1717,15 @@ L1 (SSA Block: blk0):
 	ldr w8, [x1, #0x10]
 	add x9, x10, #0x4
 	subs xzr, x8, x9
+	mov x9, x0
 	b.hs #0x34, (L10)
-	movz x9, #0x4, lsl 0
-	str w9, [x0]
-	mov x9, sp
-	str x9, [x0, #0x38]
-	adr x9, #0x0
-	str x9, [x0, #0x30]
-	exit_sequence x0
+	movz x11, #0x4, lsl 0
+	str w11, [x9]
+	mov x11, sp
+	str x11, [x9, #0x38]
+	adr x11, #0x0
+	str x11, [x9, #0x30]
+	exit_sequence x9
 L10:
 	ldr x9, [x1, #0x8]
 	add x10, x9, x10
@@ -1712,14 +1734,15 @@ L10:
 	uxtw x10, w10
 	add x11, x10, #0x8
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L9)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L9:
 	add x10, x9, x10
 	str x3, [x10]
@@ -1727,14 +1750,15 @@ L9:
 	uxtw x10, w10
 	add x11, x10, #0x4
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L8)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L8:
 	add x10, x9, x10
 	str s0, [x10]
@@ -1742,14 +1766,15 @@ L8:
 	uxtw x10, w10
 	add x11, x10, #0x8
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L7)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L7:
 	add x10, x9, x10
 	str d1, [x10]
@@ -1757,14 +1782,15 @@ L7:
 	uxtw x10, w10
 	add x11, x10, #0x1
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L6)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L6:
 	add x10, x9, x10
 	strb w2, [x10]
@@ -1772,14 +1798,15 @@ L6:
 	uxtw x10, w10
 	add x11, x10, #0x2
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L5)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L5:
 	add x10, x9, x10
 	strh w2, [x10]
@@ -1787,14 +1814,15 @@ L5:
 	uxtw x10, w10
 	add x11, x10, #0x1
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L4)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L4:
 	add x10, x9, x10
 	strb w3, [x10]
@@ -1802,14 +1830,15 @@ L4:
 	uxtw x10, w10
 	add x11, x10, #0x2
 	subs xzr, x8, x11
+	mov x11, x0
 	b.hs #0x34, (L3)
-	movz x11, #0x4, lsl 0
-	str w11, [x0]
-	mov x11, sp
-	str x11, [x0, #0x38]
-	adr x11, #0x0
-	str x11, [x0, #0x30]
-	exit_sequence x0
+	movz x12, #0x4, lsl 0
+	str w12, [x11]
+	mov x12, sp
+	str x12, [x11, #0x38]
+	adr x12, #0x0
+	str x12, [x11, #0x30]
+	exit_sequence x11
 L3:
 	add x10, x9, x10
 	strh w3, [x10]
diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
index 9ed74844..f3f7fccf 100644
--- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -1189,9 +1189,11 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
 // If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit.
 // Otherwise, `cond64bit` is ignored.
 func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) {
+	execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
+
 	cbr := m.allocateInstr()
 	m.insert(cbr)
-	m.lowerExitWithCode(execCtxVReg, code)
+	m.lowerExitWithCode(execCtxTmp, code)
 	// Conditional branch target is after exit.
 	l := m.insertBrTargetLabel()
 	cbr.asCondBr(c, l, cond64bit)
@@ -1315,6 +1317,9 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
 		alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
 		m.insert(alu)
 
+		execCtx := m.copyToTmp(ctx, ssa.TypeI64)
+		_rn := operandNR(m.copyToTmp(rn.nr(), ssa.TypeI64))
+
 		// If it is not undefined, we can return the result.
 		ok := m.allocateInstr()
 		m.insert(ok)
@@ -1323,12 +1328,12 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
 
 		// Comparing itself to check if it is a NaN.
 		fpuCmp := m.allocateInstr()
-		fpuCmp.asFpuCmp(rn, rn, src64bit)
+		fpuCmp.asFpuCmp(_rn, _rn, src64bit)
 		m.insert(fpuCmp)
 		// If the VC flag is not set (== VS flag is set), it is a NaN.
-		m.exitIfNot(ctx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
+		m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
 		// Otherwise, it is an overflow.
-		m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow)
+		m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
 
 		// Conditional branch target is after exit.
 		l := m.insertBrTargetLabel()
@@ -1838,10 +1843,12 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va
 	signed := c.Signed()
 	m.lowerIcmpToFlag(x, y, signed)
 
+	execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
+
 	// We have to skip the entire exit sequence if the condition is false.
 	cbr := m.allocateInstr()
 	m.insert(cbr)
-	m.lowerExitWithCode(execCtxVReg, code)
+	m.lowerExitWithCode(execCtxTmp, code)
 	// conditional branch target is after exit.
 	l := m.insertBrTargetLabel()
 	cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
@@ -1904,31 +1911,38 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
 }
 
 func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
-	// Declare and insert the conditional branch here jump to label `ifNonZero` below:
-	// but we cannot forward reference the label.
-	cbr := m.allocateInstr()
-	m.insert(cbr)
+	// First we clear the unnecessary bits of rc by ANDing it with 1.
+	one := m.compiler.AllocateVReg(ssa.TypeI32)
+	m.lowerConstantI32(one, 1)
+	and := m.allocateInstr()
+	oneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+	and.asALU(aluOpAnd, oneOrZero, rc, operandNR(one), false)
+	m.insert(and)
 
-	// If rc is zero, mov rd, rm then jump to end.
-	mov0 := m.allocateInstr()
-	mov0.asFpuMov128(rd.nr(), rm.nr())
-	m.insert(mov0)
+	// Sets all bits to 1 if rc is not zero.
+	allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+	alu := m.allocateInstr()
+	alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), oneOrZero, true)
+	m.insert(alu)
 
-	// Declared and insert the non-conditional jump to label `end` below:
-	// again, we cannot forward reference the label.
-	br := m.allocateInstr()
-	m.insert(br)
+	// Then move the bits to the result vector register.
+	dup := m.allocateInstr()
+	dup.asVecDup(rd, allOneOrZero, vecArrangement2D)
+	m.insert(dup)
 
-	// Create and insert the label, and update `cbr` to the real instruction.
-	ifNonZero := m.insertBrTargetLabel()
-	cbr.asCondBr(registerAsRegNotZeroCond(rc.nr()), ifNonZero, true)
-
-	// If rc is non-zero, set mov rd, rn.
-	mov := m.allocateInstr()
-	mov.asFpuMov128(rd.nr(), rn.nr())
-	m.insert(mov)
-
-	// Create and insert the label, and update `br` to the real instruction.
-	end := m.insertBrTargetLabel()
-	br.asBr(end)
+	// Now that `rd` has either all bits one or zero depending on `rc`,
+	// we can use bsl to select between `rn` and `rm`.
+	ins := m.allocateInstr()
+	ins.asVecRRR(vecOpBsl, rd, rn, rm, vecArrangement16B)
+	m.insert(ins)
+}
+
+// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue
+// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes
+func (m *machine) copyToTmp(v regalloc.VReg, typ ssa.Type) regalloc.VReg {
+	mov := m.allocateInstr()
+	tmp := m.compiler.AllocateVReg(typ)
+	mov.asMove64(tmp, v)
+	m.insert(mov)
+	return tmp
 }
diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go
index eeed0799..5358879b 100644
--- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go
+++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go
@@ -297,73 +297,79 @@ func TestMachine_lowerIDiv(t *testing.T) {
 			name: "32bit unsigned", _64bit: false, signed: false,
 			exp: `
 udiv w1?, w2?, w3?
+mov x1?, x65535?
 cbnz w3?, L1
-movz x1?, #0xa, lsl 0
-str w1?, [x65535?]
-mov x2?, sp
-str x2?, [x65535?, #0x38]
-adr x3?, #0x0
-str x3?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x2?, #0xa, lsl 0
+str w2?, [x1?]
+mov x3?, sp
+str x3?, [x1?, #0x38]
+adr x4?, #0x0
+str x4?, [x1?, #0x30]
+exit_sequence x1?
 L1:
 `,
 		},
 		{name: "32bit signed", _64bit: false, signed: true, exp: `
 sdiv w1?, w2?, w3?
+mov x1?, x65535?
 cbnz w3?, L1
-movz x1?, #0xa, lsl 0
-str w1?, [x65535?]
-mov x2?, sp
-str x2?, [x65535?, #0x38]
-adr x3?, #0x0
-str x3?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x2?, #0xa, lsl 0
+str w2?, [x1?]
+mov x3?, sp
+str x3?, [x1?, #0x38]
+adr x4?, #0x0
+str x4?, [x1?, #0x30]
+exit_sequence x1?
 L1:
 adds wzr, w3?, #0x1
 ccmp w2?, #0x1, #0x0, eq
+mov x5?, x65535?
 b.vc L2
-movz x4?, #0xb, lsl 0
-str w4?, [x65535?]
-mov x5?, sp
-str x5?, [x65535?, #0x38]
-adr x6?, #0x0
-str x6?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x6?, #0xb, lsl 0
+str w6?, [x5?]
+mov x7?, sp
+str x7?, [x5?, #0x38]
+adr x8?, #0x0
+str x8?, [x5?, #0x30]
+exit_sequence x5?
 L2:
 `},
 		{name: "64bit unsigned", _64bit: true, signed: false, exp: `
 udiv x1?, x2?, x3?
+mov x1?, x65535?
 cbnz x3?, L1
-movz x1?, #0xa, lsl 0
-str w1?, [x65535?]
-mov x2?, sp
-str x2?, [x65535?, #0x38]
-adr x3?, #0x0
-str x3?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x2?, #0xa, lsl 0
+str w2?, [x1?]
+mov x3?, sp
+str x3?, [x1?, #0x38]
+adr x4?, #0x0
+str x4?, [x1?, #0x30]
+exit_sequence x1?
 L1:
 `},
 		{name: "64bit signed", _64bit: true, signed: true, exp: `
 sdiv x1?, x2?, x3?
+mov x1?, x65535?
 cbnz x3?, L1
-movz x1?, #0xa, lsl 0
-str w1?, [x65535?]
-mov x2?, sp
-str x2?, [x65535?, #0x38]
-adr x3?, #0x0
-str x3?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x2?, #0xa, lsl 0
+str w2?, [x1?]
+mov x3?, sp
+str x3?, [x1?, #0x38]
+adr x4?, #0x0
+str x4?, [x1?, #0x30]
+exit_sequence x1?
 L1:
 adds xzr, x3?, #0x1
 ccmp x2?, #0x1, #0x0, eq
+mov x5?, x65535?
 b.vc L2
-movz x4?, #0xb, lsl 0
-str w4?, [x65535?]
-mov x5?, sp
-str x5?, [x65535?, #0x38]
-adr x6?, #0x0
-str x6?, [x65535?, #0x30]
-exit_sequence x65535?
+movz x6?, #0xb, lsl 0
+str w6?, [x5?]
+mov x7?, sp
+str x7?, [x5?, #0x38]
+adr x8?, #0x0
+str x8?, [x5?, #0x30]
+exit_sequence x5?
 L2:
 `},
 	} {
@@ -409,24 +415,27 @@ msr fpsr, xzr
 fcvtzu w1, s2
 mrs x1? fpsr
 subs xzr, x1?, #0x1
+mov x2?, x15
+mov x3?, x2
 b.ne L2
-fcmp w2, w2
+fcmp w3?, w3?
+mov x4?, x2?
 b.vc L1
-movz x2?, #0xc, lsl 0
-str w2?, [x15]
-mov x3?, sp
-str x3?, [x15, #0x38]
-adr x4?, #0x0
-str x4?, [x15, #0x30]
-exit_sequence x15
-L1:
-movz x5?, #0xb, lsl 0
-str w5?, [x15]
+movz x5?, #0xc, lsl 0
+str w5?, [x4?]
 mov x6?, sp
-str x6?, [x15, #0x38]
+str x6?, [x4?, #0x38]
 adr x7?, #0x0
-str x7?, [x15, #0x30]
-exit_sequence x15
+str x7?, [x4?, #0x30]
+exit_sequence x4?
+L1:
+movz x8?, #0xb, lsl 0
+str w8?, [x2?]
+mov x9?, sp
+str x9?, [x2?, #0x38]
+adr x10?, #0x0
+str x10?, [x2?, #0x30]
+exit_sequence x2?
 L2:
 `,
 		},
@@ -842,12 +851,11 @@ func TestMachine_lowerSelectVec(t *testing.T) {
 
 	m.lowerSelectVec(c, rn, rm, rd)
 	require.Equal(t, `
-cbnz x1?, L1
-mov v4?.16b, v3?.16b
-b L2
-L1:
-mov v4?.16b, v2?.16b
-L2:
+orr w5?, wzr, #0x1
+and w6?, w1?, w5?
+sub x7?, xzr, x6?
+dup v4?.2d, x7?
+bsl v4?.16b, v2?.16b, v3?.16b
 `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
 }