wazevo(arm64): use tmp regs to store values used during branches (#1830)
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -1095,181 +1095,201 @@ L1 (SSA Block: blk0):
|
||||
stp x30, xzr, [sp, #-0x10]!
|
||||
str xzr, [sp, #-0x10]!
|
||||
mov x8, x0
|
||||
mov x10.8b, v0.8b
|
||||
msr fpsr, xzr
|
||||
fcvtzs x0, d0
|
||||
fcvtzs x0, d10
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L17)
|
||||
fcmp d0, d0
|
||||
mov x9, x8
|
||||
b.ne #0x70, (L17)
|
||||
fcmp x10, x10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L16)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L16:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L17:
|
||||
msr fpsr, xzr
|
||||
fcvtzs x1, s1
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L15)
|
||||
fcmp s1, s1
|
||||
mov x9, x8
|
||||
mov x10, d1
|
||||
b.ne #0x70, (L15)
|
||||
fcmp w10, w10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L14)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L14:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L15:
|
||||
msr fpsr, xzr
|
||||
fcvtzs w2, d0
|
||||
fcvtzs w2, d10
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L13)
|
||||
fcmp d0, d0
|
||||
mov x9, x8
|
||||
b.ne #0x70, (L13)
|
||||
fcmp x10, x10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L12)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L12:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L13:
|
||||
msr fpsr, xzr
|
||||
fcvtzs w3, s1
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L11)
|
||||
fcmp s1, s1
|
||||
mov x9, x8
|
||||
mov x10, d1
|
||||
b.ne #0x70, (L11)
|
||||
fcmp w10, w10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L10)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L10:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L11:
|
||||
msr fpsr, xzr
|
||||
fcvtzu x4, d0
|
||||
fcvtzu x4, d10
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L9)
|
||||
fcmp d0, d0
|
||||
mov x9, x8
|
||||
b.ne #0x70, (L9)
|
||||
fcmp x10, x10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L8)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L8:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L9:
|
||||
msr fpsr, xzr
|
||||
fcvtzu x5, s1
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L7)
|
||||
fcmp s1, s1
|
||||
mov x9, x8
|
||||
mov x10, d1
|
||||
b.ne #0x70, (L7)
|
||||
fcmp w10, w10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L6)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L6:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L7:
|
||||
msr fpsr, xzr
|
||||
fcvtzu w6, d0
|
||||
fcvtzu w6, d10
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L5)
|
||||
fcmp d0, d0
|
||||
mov x9, x8
|
||||
b.ne #0x70, (L5)
|
||||
fcmp x10, x10
|
||||
mov x10, x9
|
||||
b.vc #0x34, (L4)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x11, #0xc, lsl 0
|
||||
str w11, [x10]
|
||||
mov x11, sp
|
||||
str x11, [x10, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x10, #0x30]
|
||||
exit_sequence x10
|
||||
L4:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xb, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L5:
|
||||
msr fpsr, xzr
|
||||
fcvtzu w7, s1
|
||||
mrs x9 fpsr
|
||||
subs xzr, x9, #0x1
|
||||
b.ne #0x6c, (L3)
|
||||
fcmp s1, s1
|
||||
mov x9, d1
|
||||
b.ne #0x70, (L3)
|
||||
fcmp w9, w9
|
||||
mov x9, x8
|
||||
b.vc #0x34, (L2)
|
||||
movz x9, #0xc, lsl 0
|
||||
str w9, [x8]
|
||||
mov x9, sp
|
||||
str x9, [x8, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
movz x10, #0xc, lsl 0
|
||||
str w10, [x9]
|
||||
mov x10, sp
|
||||
str x10, [x9, #0x38]
|
||||
adr x10, #0x0
|
||||
str x10, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L2:
|
||||
movz x9, #0xb, lsl 0
|
||||
str w9, [x8]
|
||||
@@ -1279,7 +1299,7 @@ L2:
|
||||
str x9, [x8, #0x30]
|
||||
exit_sequence x8
|
||||
L3:
|
||||
fcvt s0, d0
|
||||
fcvt s0, x10
|
||||
fcvt d1, s1
|
||||
add sp, sp, #0x10
|
||||
ldr x30, [sp], #0x10
|
||||
@@ -1645,14 +1665,15 @@ L1 (SSA Block: blk0):
|
||||
ldr w133?, [x129?, #0x10]
|
||||
add x134?, x132?, #0x4
|
||||
subs xzr, x133?, x134?
|
||||
mov x140?, x128?
|
||||
b.hs L2
|
||||
movz x140?, #0x4, lsl 0
|
||||
str w140?, [x128?]
|
||||
mov x141?, sp
|
||||
str x141?, [x128?, #0x38]
|
||||
adr x142?, #0x0
|
||||
str x142?, [x128?, #0x30]
|
||||
exit_sequence x128?
|
||||
movz x141?, #0x4, lsl 0
|
||||
str w141?, [x140?]
|
||||
mov x142?, sp
|
||||
str x142?, [x140?, #0x38]
|
||||
adr x143?, #0x0
|
||||
str x143?, [x140?, #0x30]
|
||||
exit_sequence x140?
|
||||
L2:
|
||||
ldr x136?, [x129?, #0x8]
|
||||
add x139?, x136?, x132?
|
||||
@@ -1696,14 +1717,15 @@ L1 (SSA Block: blk0):
|
||||
ldr w8, [x1, #0x10]
|
||||
add x9, x10, #0x4
|
||||
subs xzr, x8, x9
|
||||
mov x9, x0
|
||||
b.hs #0x34, (L10)
|
||||
movz x9, #0x4, lsl 0
|
||||
str w9, [x0]
|
||||
mov x9, sp
|
||||
str x9, [x0, #0x38]
|
||||
adr x9, #0x0
|
||||
str x9, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x9]
|
||||
mov x11, sp
|
||||
str x11, [x9, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x9, #0x30]
|
||||
exit_sequence x9
|
||||
L10:
|
||||
ldr x9, [x1, #0x8]
|
||||
add x10, x9, x10
|
||||
@@ -1712,14 +1734,15 @@ L10:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x8
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L9)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L9:
|
||||
add x10, x9, x10
|
||||
str x3, [x10]
|
||||
@@ -1727,14 +1750,15 @@ L9:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x4
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L8)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L8:
|
||||
add x10, x9, x10
|
||||
str s0, [x10]
|
||||
@@ -1742,14 +1766,15 @@ L8:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x8
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L7)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L7:
|
||||
add x10, x9, x10
|
||||
str d1, [x10]
|
||||
@@ -1757,14 +1782,15 @@ L7:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x1
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L6)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L6:
|
||||
add x10, x9, x10
|
||||
strb w2, [x10]
|
||||
@@ -1772,14 +1798,15 @@ L6:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x2
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L5)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L5:
|
||||
add x10, x9, x10
|
||||
strh w2, [x10]
|
||||
@@ -1787,14 +1814,15 @@ L5:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x1
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L4)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L4:
|
||||
add x10, x9, x10
|
||||
strb w3, [x10]
|
||||
@@ -1802,14 +1830,15 @@ L4:
|
||||
uxtw x10, w10
|
||||
add x11, x10, #0x2
|
||||
subs xzr, x8, x11
|
||||
mov x11, x0
|
||||
b.hs #0x34, (L3)
|
||||
movz x11, #0x4, lsl 0
|
||||
str w11, [x0]
|
||||
mov x11, sp
|
||||
str x11, [x0, #0x38]
|
||||
adr x11, #0x0
|
||||
str x11, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
movz x12, #0x4, lsl 0
|
||||
str w12, [x11]
|
||||
mov x12, sp
|
||||
str x12, [x11, #0x38]
|
||||
adr x12, #0x0
|
||||
str x12, [x11, #0x30]
|
||||
exit_sequence x11
|
||||
L3:
|
||||
add x10, x9, x10
|
||||
strh w3, [x10]
|
||||
|
||||
@@ -1189,9 +1189,11 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
|
||||
// If `c` (cond type) is a register, `cond64bit` must be chosen to indicate whether the register is 32-bit or 64-bit.
|
||||
// Otherwise, `cond64bit` is ignored.
|
||||
func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, code wazevoapi.ExitCode) {
|
||||
execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
|
||||
|
||||
cbr := m.allocateInstr()
|
||||
m.insert(cbr)
|
||||
m.lowerExitWithCode(execCtxVReg, code)
|
||||
m.lowerExitWithCode(execCtxTmp, code)
|
||||
// Conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
cbr.asCondBr(c, l, cond64bit)
|
||||
@@ -1315,6 +1317,9 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
|
||||
alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
|
||||
m.insert(alu)
|
||||
|
||||
execCtx := m.copyToTmp(ctx, ssa.TypeI64)
|
||||
_rn := operandNR(m.copyToTmp(rn.nr(), ssa.TypeI64))
|
||||
|
||||
// If it is not undefined, we can return the result.
|
||||
ok := m.allocateInstr()
|
||||
m.insert(ok)
|
||||
@@ -1323,12 +1328,12 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
|
||||
|
||||
// Comparing itself to check if it is a NaN.
|
||||
fpuCmp := m.allocateInstr()
|
||||
fpuCmp.asFpuCmp(rn, rn, src64bit)
|
||||
fpuCmp.asFpuCmp(_rn, _rn, src64bit)
|
||||
m.insert(fpuCmp)
|
||||
// If the VC flag is not set (== VS flag is set), it is a NaN.
|
||||
m.exitIfNot(ctx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
|
||||
m.exitIfNot(execCtx, vc.asCond(), false, wazevoapi.ExitCodeInvalidConversionToInteger)
|
||||
// Otherwise, it is an overflow.
|
||||
m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow)
|
||||
m.lowerExitWithCode(execCtx, wazevoapi.ExitCodeIntegerOverflow)
|
||||
|
||||
// Conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
@@ -1838,10 +1843,12 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va
|
||||
signed := c.Signed()
|
||||
m.lowerIcmpToFlag(x, y, signed)
|
||||
|
||||
execCtxTmp := m.copyToTmp(execCtxVReg, ssa.TypeI64)
|
||||
|
||||
// We have to skip the entire exit sequence if the condition is false.
|
||||
cbr := m.allocateInstr()
|
||||
m.insert(cbr)
|
||||
m.lowerExitWithCode(execCtxVReg, code)
|
||||
m.lowerExitWithCode(execCtxTmp, code)
|
||||
// conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
|
||||
@@ -1904,31 +1911,38 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
|
||||
}
|
||||
|
||||
func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
|
||||
// Declare and insert the conditional branch here jump to label `ifNonZero` below:
|
||||
// but we cannot forward reference the label.
|
||||
cbr := m.allocateInstr()
|
||||
m.insert(cbr)
|
||||
// First we clear the unnecessary bits of rc by ANDing it with 1.
|
||||
one := m.compiler.AllocateVReg(ssa.TypeI32)
|
||||
m.lowerConstantI32(one, 1)
|
||||
and := m.allocateInstr()
|
||||
oneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
|
||||
and.asALU(aluOpAnd, oneOrZero, rc, operandNR(one), false)
|
||||
m.insert(and)
|
||||
|
||||
// If rc is zero, mov rd, rm then jump to end.
|
||||
mov0 := m.allocateInstr()
|
||||
mov0.asFpuMov128(rd.nr(), rm.nr())
|
||||
m.insert(mov0)
|
||||
// Sets all bits to 1 if rc is not zero.
|
||||
allOneOrZero := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
|
||||
alu := m.allocateInstr()
|
||||
alu.asALU(aluOpSub, allOneOrZero, operandNR(xzrVReg), oneOrZero, true)
|
||||
m.insert(alu)
|
||||
|
||||
// Declared and insert the non-conditional jump to label `end` below:
|
||||
// again, we cannot forward reference the label.
|
||||
br := m.allocateInstr()
|
||||
m.insert(br)
|
||||
// Then move the bits to the result vector register.
|
||||
dup := m.allocateInstr()
|
||||
dup.asVecDup(rd, allOneOrZero, vecArrangement2D)
|
||||
m.insert(dup)
|
||||
|
||||
// Create and insert the label, and update `cbr` to the real instruction.
|
||||
ifNonZero := m.insertBrTargetLabel()
|
||||
cbr.asCondBr(registerAsRegNotZeroCond(rc.nr()), ifNonZero, true)
|
||||
|
||||
// If rc is non-zero, set mov rd, rn.
|
||||
mov := m.allocateInstr()
|
||||
mov.asFpuMov128(rd.nr(), rn.nr())
|
||||
m.insert(mov)
|
||||
|
||||
// Create and insert the label, and update `br` to the real instruction.
|
||||
end := m.insertBrTargetLabel()
|
||||
br.asBr(end)
|
||||
// Now that `rd` has either all bits one or zero depending on `rc`,
|
||||
// we can use bsl to select between `rn` and `rm`.
|
||||
ins := m.allocateInstr()
|
||||
ins.asVecRRR(vecOpBsl, rd, rn, rm, vecArrangement16B)
|
||||
m.insert(ins)
|
||||
}
|
||||
|
||||
// copyToTmp copies the given regalloc.VReg to a temporary register. This is called before cbr to avoid the regalloc issue
|
||||
// e.g. reload happening in the middle of the exit sequence which is not the path the normal path executes
|
||||
func (m *machine) copyToTmp(v regalloc.VReg, typ ssa.Type) regalloc.VReg {
|
||||
mov := m.allocateInstr()
|
||||
tmp := m.compiler.AllocateVReg(typ)
|
||||
mov.asMove64(tmp, v)
|
||||
m.insert(mov)
|
||||
return tmp
|
||||
}
|
||||
|
||||
@@ -297,73 +297,79 @@ func TestMachine_lowerIDiv(t *testing.T) {
|
||||
name: "32bit unsigned", _64bit: false, signed: false,
|
||||
exp: `
|
||||
udiv w1?, w2?, w3?
|
||||
mov x1?, x65535?
|
||||
cbnz w3?, L1
|
||||
movz x1?, #0xa, lsl 0
|
||||
str w1?, [x65535?]
|
||||
mov x2?, sp
|
||||
str x2?, [x65535?, #0x38]
|
||||
adr x3?, #0x0
|
||||
str x3?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x2?, #0xa, lsl 0
|
||||
str w2?, [x1?]
|
||||
mov x3?, sp
|
||||
str x3?, [x1?, #0x38]
|
||||
adr x4?, #0x0
|
||||
str x4?, [x1?, #0x30]
|
||||
exit_sequence x1?
|
||||
L1:
|
||||
`,
|
||||
},
|
||||
{name: "32bit signed", _64bit: false, signed: true, exp: `
|
||||
sdiv w1?, w2?, w3?
|
||||
mov x1?, x65535?
|
||||
cbnz w3?, L1
|
||||
movz x1?, #0xa, lsl 0
|
||||
str w1?, [x65535?]
|
||||
mov x2?, sp
|
||||
str x2?, [x65535?, #0x38]
|
||||
adr x3?, #0x0
|
||||
str x3?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x2?, #0xa, lsl 0
|
||||
str w2?, [x1?]
|
||||
mov x3?, sp
|
||||
str x3?, [x1?, #0x38]
|
||||
adr x4?, #0x0
|
||||
str x4?, [x1?, #0x30]
|
||||
exit_sequence x1?
|
||||
L1:
|
||||
adds wzr, w3?, #0x1
|
||||
ccmp w2?, #0x1, #0x0, eq
|
||||
mov x5?, x65535?
|
||||
b.vc L2
|
||||
movz x4?, #0xb, lsl 0
|
||||
str w4?, [x65535?]
|
||||
mov x5?, sp
|
||||
str x5?, [x65535?, #0x38]
|
||||
adr x6?, #0x0
|
||||
str x6?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x6?, #0xb, lsl 0
|
||||
str w6?, [x5?]
|
||||
mov x7?, sp
|
||||
str x7?, [x5?, #0x38]
|
||||
adr x8?, #0x0
|
||||
str x8?, [x5?, #0x30]
|
||||
exit_sequence x5?
|
||||
L2:
|
||||
`},
|
||||
{name: "64bit unsigned", _64bit: true, signed: false, exp: `
|
||||
udiv x1?, x2?, x3?
|
||||
mov x1?, x65535?
|
||||
cbnz x3?, L1
|
||||
movz x1?, #0xa, lsl 0
|
||||
str w1?, [x65535?]
|
||||
mov x2?, sp
|
||||
str x2?, [x65535?, #0x38]
|
||||
adr x3?, #0x0
|
||||
str x3?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x2?, #0xa, lsl 0
|
||||
str w2?, [x1?]
|
||||
mov x3?, sp
|
||||
str x3?, [x1?, #0x38]
|
||||
adr x4?, #0x0
|
||||
str x4?, [x1?, #0x30]
|
||||
exit_sequence x1?
|
||||
L1:
|
||||
`},
|
||||
{name: "64bit signed", _64bit: true, signed: true, exp: `
|
||||
sdiv x1?, x2?, x3?
|
||||
mov x1?, x65535?
|
||||
cbnz x3?, L1
|
||||
movz x1?, #0xa, lsl 0
|
||||
str w1?, [x65535?]
|
||||
mov x2?, sp
|
||||
str x2?, [x65535?, #0x38]
|
||||
adr x3?, #0x0
|
||||
str x3?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x2?, #0xa, lsl 0
|
||||
str w2?, [x1?]
|
||||
mov x3?, sp
|
||||
str x3?, [x1?, #0x38]
|
||||
adr x4?, #0x0
|
||||
str x4?, [x1?, #0x30]
|
||||
exit_sequence x1?
|
||||
L1:
|
||||
adds xzr, x3?, #0x1
|
||||
ccmp x2?, #0x1, #0x0, eq
|
||||
mov x5?, x65535?
|
||||
b.vc L2
|
||||
movz x4?, #0xb, lsl 0
|
||||
str w4?, [x65535?]
|
||||
mov x5?, sp
|
||||
str x5?, [x65535?, #0x38]
|
||||
adr x6?, #0x0
|
||||
str x6?, [x65535?, #0x30]
|
||||
exit_sequence x65535?
|
||||
movz x6?, #0xb, lsl 0
|
||||
str w6?, [x5?]
|
||||
mov x7?, sp
|
||||
str x7?, [x5?, #0x38]
|
||||
adr x8?, #0x0
|
||||
str x8?, [x5?, #0x30]
|
||||
exit_sequence x5?
|
||||
L2:
|
||||
`},
|
||||
} {
|
||||
@@ -409,24 +415,27 @@ msr fpsr, xzr
|
||||
fcvtzu w1, s2
|
||||
mrs x1? fpsr
|
||||
subs xzr, x1?, #0x1
|
||||
mov x2?, x15
|
||||
mov x3?, x2
|
||||
b.ne L2
|
||||
fcmp w2, w2
|
||||
fcmp w3?, w3?
|
||||
mov x4?, x2?
|
||||
b.vc L1
|
||||
movz x2?, #0xc, lsl 0
|
||||
str w2?, [x15]
|
||||
mov x3?, sp
|
||||
str x3?, [x15, #0x38]
|
||||
adr x4?, #0x0
|
||||
str x4?, [x15, #0x30]
|
||||
exit_sequence x15
|
||||
L1:
|
||||
movz x5?, #0xb, lsl 0
|
||||
str w5?, [x15]
|
||||
movz x5?, #0xc, lsl 0
|
||||
str w5?, [x4?]
|
||||
mov x6?, sp
|
||||
str x6?, [x15, #0x38]
|
||||
str x6?, [x4?, #0x38]
|
||||
adr x7?, #0x0
|
||||
str x7?, [x15, #0x30]
|
||||
exit_sequence x15
|
||||
str x7?, [x4?, #0x30]
|
||||
exit_sequence x4?
|
||||
L1:
|
||||
movz x8?, #0xb, lsl 0
|
||||
str w8?, [x2?]
|
||||
mov x9?, sp
|
||||
str x9?, [x2?, #0x38]
|
||||
adr x10?, #0x0
|
||||
str x10?, [x2?, #0x30]
|
||||
exit_sequence x2?
|
||||
L2:
|
||||
`,
|
||||
},
|
||||
@@ -842,12 +851,11 @@ func TestMachine_lowerSelectVec(t *testing.T) {
|
||||
|
||||
m.lowerSelectVec(c, rn, rm, rd)
|
||||
require.Equal(t, `
|
||||
cbnz x1?, L1
|
||||
mov v4?.16b, v3?.16b
|
||||
b L2
|
||||
L1:
|
||||
mov v4?.16b, v2?.16b
|
||||
L2:
|
||||
orr w5?, wzr, #0x1
|
||||
and w6?, w1?, w5?
|
||||
sub x7?, xzr, x6?
|
||||
dup v4?.2d, x7?
|
||||
bsl v4?.16b, v2?.16b, v3?.16b
|
||||
`, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user