wazevo: adds tests for high register pressure: part2 (#1687)
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -1618,106 +1618,122 @@ L1 (SSA Block: blk0):
|
||||
fcvtzs x0, d0
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L17)
|
||||
fcmp d0, d0
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L16)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L16:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L17:
|
||||
msr fpsr, xzr
|
||||
fcvtzs x1, s1
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L15)
|
||||
fcmp s1, s1
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L14)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L14:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L15:
|
||||
msr fpsr, xzr
|
||||
fcvtzs w2, d0
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L13)
|
||||
fcmp d0, d0
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L12)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L12:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L13:
|
||||
msr fpsr, xzr
|
||||
fcvtzs w3, s1
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L11)
|
||||
fcmp s1, s1
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L10)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L10:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L11:
|
||||
msr fpsr, xzr
|
||||
fcvtzu x4, d0
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L9)
|
||||
fcmp d0, d0
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L8)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L8:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L9:
|
||||
msr fpsr, xzr
|
||||
fcvtzu x5, s1
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L7)
|
||||
fcmp s1, s1
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L6)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L6:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L7:
|
||||
msr fpsr, xzr
|
||||
fcvtzu w6, d0
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L5)
|
||||
fcmp d0, d0
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L4)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L4:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L5:
|
||||
msr fpsr, xzr
|
||||
fcvtzu w7, s1
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne #0x4c, (L3)
|
||||
fcmp s1, s1
|
||||
b.vc #0x20
|
||||
b.vc #0x24, (L2)
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L2:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L3:
|
||||
fcvt s0, d0
|
||||
fcvt d1, s1
|
||||
ldr x30, [sp], #0x10
|
||||
@@ -1882,7 +1898,6 @@ L1 (SSA Block: blk0):
|
||||
str x24, [sp, #-0x10]!
|
||||
str x25, [sp, #-0x10]!
|
||||
str x26, [sp, #-0x10]!
|
||||
str x28, [sp, #-0x10]!
|
||||
str q18, [sp, #-0x10]!
|
||||
str q19, [sp, #-0x10]!
|
||||
str q20, [sp, #-0x10]!
|
||||
@@ -1931,9 +1946,9 @@ L1 (SSA Block: blk0):
|
||||
madd w25, w2, w25, wzr
|
||||
movz w26, #0x13, lsl 0
|
||||
madd w26, w2, w26, wzr
|
||||
movz w28, #0x14, lsl 0
|
||||
madd w28, w2, w28, wzr
|
||||
add w26, w26, w28
|
||||
movz w29, #0x14, lsl 0
|
||||
madd w29, w2, w29, wzr
|
||||
add w26, w26, w29
|
||||
add w25, w25, w26
|
||||
add w24, w24, w25
|
||||
add w23, w23, w24
|
||||
@@ -2021,7 +2036,6 @@ L1 (SSA Block: blk0):
|
||||
ldr q20, [sp], #0x10
|
||||
ldr q19, [sp], #0x10
|
||||
ldr q18, [sp], #0x10
|
||||
ldr x28, [sp], #0x10
|
||||
ldr x26, [sp], #0x10
|
||||
ldr x25, [sp], #0x10
|
||||
ldr x24, [sp], #0x10
|
||||
@@ -2150,10 +2164,11 @@ L1 (SSA Block: blk0):
|
||||
ldr w5?, [x1?, #0x10]
|
||||
add x6?, x4?, #0x4
|
||||
subs xzr, x5?, x6?
|
||||
b.hs #0x20
|
||||
b.hs L2
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
L2:
|
||||
ldr x8?, [x1?, #0x8]
|
||||
add x11?, x8?, x4?
|
||||
ldr w10?, [x11?]
|
||||
@@ -2167,10 +2182,11 @@ L1 (SSA Block: blk0):
|
||||
ldr w10, [x1, #0x10]
|
||||
add x9, x8, #0x4
|
||||
subs xzr, x10, x9
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L2)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L2:
|
||||
ldr x9, [x1, #0x8]
|
||||
add x8, x9, x8
|
||||
ldr w0, [x8]
|
||||
@@ -2188,10 +2204,11 @@ L1 (SSA Block: blk0):
|
||||
ldr w8, [x1, #0x10]
|
||||
add x9, x10, #0x4
|
||||
subs xzr, x8, x9
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L10)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L10:
|
||||
ldr x9, [x1, #0x8]
|
||||
add x10, x9, x10
|
||||
str w2, [x10]
|
||||
@@ -2199,80 +2216,88 @@ L1 (SSA Block: blk0):
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x8
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L9)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L9:
|
||||
add x10, x9, x11
|
||||
str x3, [x10]
|
||||
orr w10, wzr, #0x10
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x4
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L8)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L8:
|
||||
add x10, x9, x11
|
||||
str s0, [x10]
|
||||
orr w10, wzr, #0x18
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x8
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L7)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L7:
|
||||
add x10, x9, x11
|
||||
str d1, [x10]
|
||||
orr w10, wzr, #0x20
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x1
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L6)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L6:
|
||||
add x10, x9, x11
|
||||
strb w2, [x10]
|
||||
movz w10, #0x28, lsl 0
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x2
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L5)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L5:
|
||||
add x10, x9, x11
|
||||
strh w2, [x10]
|
||||
orr w10, wzr, #0x30
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x1
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L4)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L4:
|
||||
add x10, x9, x11
|
||||
strb w3, [x10]
|
||||
orr w10, wzr, #0x38
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x2
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L3)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L3:
|
||||
add x10, x9, x11
|
||||
strh w3, [x10]
|
||||
orr w10, wzr, #0x40
|
||||
uxtw x11, w10
|
||||
add x10, x11, #0x4
|
||||
subs xzr, x8, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L2)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0]
|
||||
exit_sequence x0
|
||||
L2:
|
||||
add x8, x9, x11
|
||||
str w3, [x8]
|
||||
ldr x30, [sp], #0x10
|
||||
@@ -2281,296 +2306,6 @@ L1 (SSA Block: blk0):
|
||||
},
|
||||
{
|
||||
name: "memory_loads", m: testcases.MemoryLoads.Module,
|
||||
afterLoweringARM64: `
|
||||
L1 (SSA Block: blk0):
|
||||
mov x0?, x0
|
||||
mov x1?, x1
|
||||
mov x2?, x2
|
||||
uxtw x4?, w2?
|
||||
ldr w5?, [x1?, #0x10]
|
||||
add x6?, x4?, #0x4
|
||||
subs xzr, x5?, x6?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
ldr x8?, [x1?, #0x8]
|
||||
add x200?, x8?, x4?
|
||||
ldr w10?, [x200?]
|
||||
uxtw x12?, w2?
|
||||
add x13?, x12?, #0x8
|
||||
subs xzr, x5?, x13?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x199?, x8?, x12?
|
||||
ldr x16?, [x199?]
|
||||
uxtw x18?, w2?
|
||||
add x19?, x18?, #0x4
|
||||
subs xzr, x5?, x19?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x198?, x8?, x18?
|
||||
ldr s22?, [x198?]
|
||||
uxtw x24?, w2?
|
||||
add x25?, x24?, #0x8
|
||||
subs xzr, x5?, x25?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x197?, x8?, x24?
|
||||
ldr d28?, [x197?]
|
||||
uxtw x30?, w2?
|
||||
add x31?, x30?, #0x13
|
||||
subs xzr, x5?, x31?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x196?, x8?, x30?
|
||||
ldr w34?, [x196?, #0xf]
|
||||
uxtw x36?, w2?
|
||||
add x37?, x36?, #0x17
|
||||
subs xzr, x5?, x37?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x195?, x8?, x36?
|
||||
ldr x40?, [x195?, #0xf]
|
||||
uxtw x42?, w2?
|
||||
add x43?, x42?, #0x13
|
||||
subs xzr, x5?, x43?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x194?, x8?, x42?
|
||||
ldr s46?, [x194?, #0xf]
|
||||
uxtw x48?, w2?
|
||||
add x49?, x48?, #0x17
|
||||
subs xzr, x5?, x49?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x193?, x8?, x48?
|
||||
ldr d52?, [x193?, #0xf]
|
||||
uxtw x54?, w2?
|
||||
add x55?, x54?, #0x1
|
||||
subs xzr, x5?, x55?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x192?, x8?, x54?
|
||||
ldrsb w58?, [x192?]
|
||||
uxtw x60?, w2?
|
||||
add x61?, x60?, #0x10
|
||||
subs xzr, x5?, x61?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x191?, x8?, x60?
|
||||
ldrsb w64?, [x191?, #0xf]
|
||||
uxtw x66?, w2?
|
||||
add x67?, x66?, #0x1
|
||||
subs xzr, x5?, x67?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x190?, x8?, x66?
|
||||
ldrb w70?, [x190?]
|
||||
uxtw x72?, w2?
|
||||
add x73?, x72?, #0x10
|
||||
subs xzr, x5?, x73?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x189?, x8?, x72?
|
||||
ldrb w76?, [x189?, #0xf]
|
||||
uxtw x78?, w2?
|
||||
add x79?, x78?, #0x2
|
||||
subs xzr, x5?, x79?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x188?, x8?, x78?
|
||||
ldrsh w82?, [x188?]
|
||||
uxtw x84?, w2?
|
||||
add x85?, x84?, #0x11
|
||||
subs xzr, x5?, x85?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x187?, x8?, x84?
|
||||
ldrsh w88?, [x187?, #0xf]
|
||||
uxtw x90?, w2?
|
||||
add x91?, x90?, #0x2
|
||||
subs xzr, x5?, x91?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x186?, x8?, x90?
|
||||
ldrh w94?, [x186?]
|
||||
uxtw x96?, w2?
|
||||
add x97?, x96?, #0x11
|
||||
subs xzr, x5?, x97?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x185?, x8?, x96?
|
||||
ldrh w100?, [x185?, #0xf]
|
||||
uxtw x102?, w2?
|
||||
add x103?, x102?, #0x1
|
||||
subs xzr, x5?, x103?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x184?, x8?, x102?
|
||||
ldrsb w106?, [x184?]
|
||||
uxtw x108?, w2?
|
||||
add x109?, x108?, #0x10
|
||||
subs xzr, x5?, x109?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x183?, x8?, x108?
|
||||
ldrsb w112?, [x183?, #0xf]
|
||||
uxtw x114?, w2?
|
||||
add x115?, x114?, #0x1
|
||||
subs xzr, x5?, x115?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x182?, x8?, x114?
|
||||
ldrb w118?, [x182?]
|
||||
uxtw x120?, w2?
|
||||
add x121?, x120?, #0x10
|
||||
subs xzr, x5?, x121?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x181?, x8?, x120?
|
||||
ldrb w124?, [x181?, #0xf]
|
||||
uxtw x126?, w2?
|
||||
add x127?, x126?, #0x2
|
||||
subs xzr, x5?, x127?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x180?, x8?, x126?
|
||||
ldrsh w130?, [x180?]
|
||||
uxtw x132?, w2?
|
||||
add x133?, x132?, #0x11
|
||||
subs xzr, x5?, x133?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x179?, x8?, x132?
|
||||
ldrsh w136?, [x179?, #0xf]
|
||||
uxtw x138?, w2?
|
||||
add x139?, x138?, #0x2
|
||||
subs xzr, x5?, x139?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x178?, x8?, x138?
|
||||
ldrh w142?, [x178?]
|
||||
uxtw x144?, w2?
|
||||
add x145?, x144?, #0x11
|
||||
subs xzr, x5?, x145?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x177?, x8?, x144?
|
||||
ldrh w148?, [x177?, #0xf]
|
||||
uxtw x150?, w2?
|
||||
add x151?, x150?, #0x4
|
||||
subs xzr, x5?, x151?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x176?, x8?, x150?
|
||||
ldrs w154?, [x176?]
|
||||
uxtw x156?, w2?
|
||||
add x157?, x156?, #0x13
|
||||
subs xzr, x5?, x157?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x175?, x8?, x156?
|
||||
ldrs w160?, [x175?, #0xf]
|
||||
uxtw x162?, w2?
|
||||
add x163?, x162?, #0x4
|
||||
subs xzr, x5?, x163?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x174?, x8?, x162?
|
||||
ldr w166?, [x174?]
|
||||
uxtw x168?, w2?
|
||||
add x169?, x168?, #0x13
|
||||
subs xzr, x5?, x169?
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x0?]
|
||||
exit_sequence x0?
|
||||
add x173?, x8?, x168?
|
||||
ldr w172?, [x173?, #0xf]
|
||||
str x172?, [#ret_space, #0x78]
|
||||
str x166?, [#ret_space, #0x70]
|
||||
str x160?, [#ret_space, #0x68]
|
||||
str x154?, [#ret_space, #0x60]
|
||||
str x148?, [#ret_space, #0x58]
|
||||
str x142?, [#ret_space, #0x50]
|
||||
str x136?, [#ret_space, #0x48]
|
||||
str x130?, [#ret_space, #0x40]
|
||||
str x124?, [#ret_space, #0x38]
|
||||
str x118?, [#ret_space, #0x30]
|
||||
str x112?, [#ret_space, #0x28]
|
||||
str x106?, [#ret_space, #0x20]
|
||||
str w100?, [#ret_space, #0x18]
|
||||
str w94?, [#ret_space, #0x10]
|
||||
str w88?, [#ret_space, #0x8]
|
||||
str w82?, [#ret_space, #0x0]
|
||||
mov x7, x76?
|
||||
mov x6, x70?
|
||||
mov x5, x64?
|
||||
mov x4, x58?
|
||||
mov v3.8b, v52?.8b
|
||||
mov v2.8b, v46?.8b
|
||||
mov x3, x40?
|
||||
mov x2, x34?
|
||||
mov v1.8b, v28?.8b
|
||||
mov v0.8b, v22?.8b
|
||||
mov x1, x16?
|
||||
mov x0, x10?
|
||||
ret
|
||||
`,
|
||||
|
||||
afterFinalizeARM64: `
|
||||
L1 (SSA Block: blk0):
|
||||
str x30, [sp, #-0x10]!
|
||||
@@ -2582,280 +2317,306 @@ L1 (SSA Block: blk0):
|
||||
str x24, [sp, #-0x10]!
|
||||
str x25, [sp, #-0x10]!
|
||||
str x26, [sp, #-0x10]!
|
||||
str x28, [sp, #-0x10]!
|
||||
mov x8, x0
|
||||
uxtw x11, w2
|
||||
ldr w9, [x1, #0x10]
|
||||
add x10, x11, #0x4
|
||||
subs xzr, x9, x10
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L29)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L29:
|
||||
ldr x10, [x1, #0x8]
|
||||
add x11, x10, x11
|
||||
ldr w0, [x11]
|
||||
uxtw x12, w2
|
||||
add x11, x12, #0x8
|
||||
subs xzr, x9, x11
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L28)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L28:
|
||||
add x11, x10, x12
|
||||
ldr x1, [x11]
|
||||
uxtw x12, w2
|
||||
add x11, x12, #0x4
|
||||
subs xzr, x9, x11
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L27)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L27:
|
||||
add x11, x10, x12
|
||||
ldr s0, [x11]
|
||||
uxtw x12, w2
|
||||
add x11, x12, #0x8
|
||||
subs xzr, x9, x11
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L26)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L26:
|
||||
add x11, x10, x12
|
||||
ldr d1, [x11]
|
||||
uxtw x12, w2
|
||||
add x11, x12, #0x13
|
||||
subs xzr, x9, x11
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L25)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L25:
|
||||
add x11, x10, x12
|
||||
ldr w11, [x11, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x17
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L24)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L24:
|
||||
add x12, x10, x13
|
||||
ldr x3, [x12, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x13
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L23)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L23:
|
||||
add x12, x10, x13
|
||||
ldr s2, [x12, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x17
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L22)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L22:
|
||||
add x12, x10, x13
|
||||
ldr d3, [x12, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x1
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L21)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L21:
|
||||
add x12, x10, x13
|
||||
ldrsb w4, [x12]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x10
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L20)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L20:
|
||||
add x12, x10, x13
|
||||
ldrsb w5, [x12, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x1
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L19)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L19:
|
||||
add x12, x10, x13
|
||||
ldrb w6, [x12]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x10
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L18)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L18:
|
||||
add x12, x10, x13
|
||||
ldrb w7, [x12, #0xf]
|
||||
uxtw x13, w2
|
||||
add x12, x13, #0x2
|
||||
subs xzr, x9, x12
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L17)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L17:
|
||||
add x12, x10, x13
|
||||
ldrsh w12, [x12]
|
||||
uxtw x14, w2
|
||||
add x13, x14, #0x11
|
||||
subs xzr, x9, x13
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L16)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L16:
|
||||
add x13, x10, x14
|
||||
ldrsh w13, [x13, #0xf]
|
||||
uxtw x15, w2
|
||||
add x14, x15, #0x2
|
||||
subs xzr, x9, x14
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L15)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L15:
|
||||
add x14, x10, x15
|
||||
ldrh w14, [x14]
|
||||
uxtw x16, w2
|
||||
add x15, x16, #0x11
|
||||
subs xzr, x9, x15
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L14)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L14:
|
||||
add x15, x10, x16
|
||||
ldrh w15, [x15, #0xf]
|
||||
uxtw x17, w2
|
||||
add x16, x17, #0x1
|
||||
subs xzr, x9, x16
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L13)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L13:
|
||||
add x16, x10, x17
|
||||
ldrsb w16, [x16]
|
||||
uxtw x18, w2
|
||||
add x17, x18, #0x10
|
||||
subs xzr, x9, x17
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L12)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L12:
|
||||
add x17, x10, x18
|
||||
ldrsb w17, [x17, #0xf]
|
||||
uxtw x19, w2
|
||||
add x18, x19, #0x1
|
||||
subs xzr, x9, x18
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L11)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L11:
|
||||
add x18, x10, x19
|
||||
ldrb w18, [x18]
|
||||
uxtw x20, w2
|
||||
add x19, x20, #0x10
|
||||
subs xzr, x9, x19
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L10)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L10:
|
||||
add x19, x10, x20
|
||||
ldrb w19, [x19, #0xf]
|
||||
uxtw x21, w2
|
||||
add x20, x21, #0x2
|
||||
subs xzr, x9, x20
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L9)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L9:
|
||||
add x20, x10, x21
|
||||
ldrsh w20, [x20]
|
||||
uxtw x22, w2
|
||||
add x21, x22, #0x11
|
||||
subs xzr, x9, x21
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L8)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L8:
|
||||
add x21, x10, x22
|
||||
ldrsh w21, [x21, #0xf]
|
||||
uxtw x23, w2
|
||||
add x22, x23, #0x2
|
||||
subs xzr, x9, x22
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L7)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L7:
|
||||
add x22, x10, x23
|
||||
ldrh w22, [x22]
|
||||
uxtw x24, w2
|
||||
add x23, x24, #0x11
|
||||
subs xzr, x9, x23
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L6)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L6:
|
||||
add x23, x10, x24
|
||||
ldrh w23, [x23, #0xf]
|
||||
uxtw x25, w2
|
||||
add x24, x25, #0x4
|
||||
subs xzr, x9, x24
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L5)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L5:
|
||||
add x24, x10, x25
|
||||
ldrs w24, [x24]
|
||||
uxtw x26, w2
|
||||
add x25, x26, #0x13
|
||||
subs xzr, x9, x25
|
||||
b.hs #0x20
|
||||
b.hs #0x24, (L4)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L4:
|
||||
add x25, x10, x26
|
||||
ldrs w25, [x25, #0xf]
|
||||
uxtw x28, w2
|
||||
add x26, x28, #0x4
|
||||
subs xzr, x9, x26
|
||||
b.hs #0x20
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
add x26, x10, x28
|
||||
ldr w26, [x26]
|
||||
uxtw x29, w2
|
||||
add x28, x29, #0x13
|
||||
subs xzr, x9, x28
|
||||
b.hs #0x20
|
||||
add x26, x29, #0x4
|
||||
subs xzr, x9, x26
|
||||
b.hs #0x24, (L3)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
add x8, x10, x29
|
||||
L3:
|
||||
add x26, x10, x29
|
||||
ldr w26, [x26]
|
||||
uxtw x30, w2
|
||||
add x29, x30, #0x13
|
||||
subs xzr, x9, x29
|
||||
b.hs #0x24, (L2)
|
||||
movz x27, #0x4, lsl 0
|
||||
str w27, [x8]
|
||||
exit_sequence x8
|
||||
L2:
|
||||
add x8, x10, x30
|
||||
ldr w8, [x8, #0xf]
|
||||
str x8, [sp, #0x118]
|
||||
str x26, [sp, #0x110]
|
||||
str x25, [sp, #0x108]
|
||||
str x24, [sp, #0x100]
|
||||
str x23, [sp, #0xf8]
|
||||
str x22, [sp, #0xf0]
|
||||
str x21, [sp, #0xe8]
|
||||
str x20, [sp, #0xe0]
|
||||
str x19, [sp, #0xd8]
|
||||
str x18, [sp, #0xd0]
|
||||
str x17, [sp, #0xc8]
|
||||
str x16, [sp, #0xc0]
|
||||
str w15, [sp, #0xb8]
|
||||
str w14, [sp, #0xb0]
|
||||
str w13, [sp, #0xa8]
|
||||
str w12, [sp, #0xa0]
|
||||
str x8, [sp, #0x108]
|
||||
str x26, [sp, #0x100]
|
||||
str x25, [sp, #0xf8]
|
||||
str x24, [sp, #0xf0]
|
||||
str x23, [sp, #0xe8]
|
||||
str x22, [sp, #0xe0]
|
||||
str x21, [sp, #0xd8]
|
||||
str x20, [sp, #0xd0]
|
||||
str x19, [sp, #0xc8]
|
||||
str x18, [sp, #0xc0]
|
||||
str x17, [sp, #0xb8]
|
||||
str x16, [sp, #0xb0]
|
||||
str w15, [sp, #0xa8]
|
||||
str w14, [sp, #0xa0]
|
||||
str w13, [sp, #0x98]
|
||||
str w12, [sp, #0x90]
|
||||
mov x2, x11
|
||||
ldr x28, [sp], #0x10
|
||||
ldr x26, [sp], #0x10
|
||||
ldr x25, [sp], #0x10
|
||||
ldr x24, [sp], #0x10
|
||||
|
||||
@@ -15,13 +15,13 @@ const xArgRetRegMax, vArgRetRegMax = x7, v7 // x0-x7 & v0-v7.
|
||||
var regInfo = ®alloc.RegisterInfo{
|
||||
AllocatableRegisters: [regalloc.RegTypeNum][]regalloc.RealReg{
|
||||
// We don't allocate:
|
||||
// - x27(=tmpReg) because of the reason described on tmpReg.
|
||||
// - x18 which is reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
|
||||
// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
|
||||
// - x28: Reserved by Go runtime.
|
||||
// - x27(=tmpReg): because of the reason described on tmpReg.
|
||||
regalloc.RegTypeInt: {
|
||||
x8, x9, x10, x11, x12, x13, x14, x15,
|
||||
x16, x17, x18, x19, x20, x21, x22, x23, x24, x25,
|
||||
// No x27 here.
|
||||
x26, x28, x29, x30,
|
||||
x26, x29, x30,
|
||||
// These are the argument/return registers. Less preferred in the allocation.
|
||||
x7, x6, x5, x4, x3, x2, x1, x0,
|
||||
},
|
||||
@@ -37,7 +37,7 @@ var regInfo = ®alloc.RegisterInfo{
|
||||
v18: {}, v19: {}, v20: {}, v21: {}, v22: {}, v23: {}, v24: {}, v25: {}, v26: {}, v27: {}, v28: {}, v29: {}, v30: {}, v31: {},
|
||||
},
|
||||
CallerSavedRegisters: map[regalloc.RealReg]struct{}{
|
||||
x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}, x6: {}, x7: {}, x8: {}, x9: {}, x10: {}, x11: {}, x12: {}, x13: {}, x14: {}, x15: {}, x16: {}, x17: {},
|
||||
x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}, x6: {}, x7: {}, x8: {}, x9: {}, x10: {}, x11: {}, x12: {}, x13: {}, x14: {}, x15: {}, x16: {}, x17: {}, x29: {}, x30: {},
|
||||
v0: {}, v1: {}, v2: {}, v3: {}, v4: {}, v5: {}, v6: {}, v7: {}, v8: {}, v9: {}, v10: {}, v11: {}, v12: {}, v13: {}, v14: {}, v15: {}, v16: {}, v17: {},
|
||||
},
|
||||
RealRegToVReg: []regalloc.VReg{
|
||||
|
||||
@@ -73,7 +73,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
|
||||
str w17, [x0]
|
||||
mov x27, sp
|
||||
str x27, [x0, #0x38]
|
||||
adr x27, #0x1c
|
||||
adr x27, #0x20
|
||||
str x27, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
ldr x19, [x0, #0x60]
|
||||
@@ -155,7 +155,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
|
||||
str w17, [x0]
|
||||
mov x27, sp
|
||||
str x27, [x0, #0x38]
|
||||
adr x27, #0x1c
|
||||
adr x27, #0x20
|
||||
str x27, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
ldr x19, [x0, #0x60]
|
||||
@@ -224,7 +224,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
|
||||
str w17, [x0]
|
||||
mov x27, sp
|
||||
str x27, [x0, #0x38]
|
||||
adr x27, #0x1c
|
||||
adr x27, #0x20
|
||||
str x27, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
ldr x19, [x0, #0x60]
|
||||
|
||||
@@ -401,6 +401,15 @@ func (i *instruction) asNop0() {
|
||||
i.kind = nop0
|
||||
}
|
||||
|
||||
func (i *instruction) asNop0WithLabel(l label) {
|
||||
i.kind = nop0
|
||||
i.u1 = uint64(l)
|
||||
}
|
||||
|
||||
func (i *instruction) nop0Label() label {
|
||||
return label(i.u1)
|
||||
}
|
||||
|
||||
func (i *instruction) asRet(abi *abiImpl) {
|
||||
i.kind = ret
|
||||
i.abi = abi
|
||||
@@ -792,7 +801,12 @@ func (i *instruction) String() (str string) {
|
||||
|
||||
switch i.kind {
|
||||
case nop0:
|
||||
str = "nop0"
|
||||
if i.u1 != 0 {
|
||||
l := label(i.u1)
|
||||
str = fmt.Sprintf("%s:", l)
|
||||
} else {
|
||||
str = "nop0"
|
||||
}
|
||||
case aluRRR:
|
||||
size := is64SizeBitToSize(i.u3)
|
||||
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
|
||||
@@ -1741,17 +1755,7 @@ func (s shiftOp) String() string {
|
||||
panic(int(s))
|
||||
}
|
||||
|
||||
func binarySize(begin, end *instruction) (size int64) {
|
||||
for cur := begin; ; cur = cur.next {
|
||||
size += cur.size()
|
||||
if cur == end {
|
||||
break
|
||||
}
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
const exitSequenceSize = 5 * 4 // 5 instructions as in encodeExitSequence.
|
||||
const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
|
||||
|
||||
// size returns the size of the instruction in encoded bytes.
|
||||
func (i *instruction) size() int64 {
|
||||
|
||||
@@ -81,17 +81,7 @@ func (i *instruction) encode(c backend.Compiler) {
|
||||
to, from := i.rd.realReg(), i.rn.realReg()
|
||||
toIsSp := to == sp
|
||||
fromIsSp := from == sp
|
||||
if toIsSp || fromIsSp {
|
||||
// This is an alias of ADD (immediate):
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
|
||||
c.Emit4Bytes(encodeAddSubtractImmediate(0b100, 0, 0,
|
||||
regNumberInEncoding[from], regNumberInEncoding[to]),
|
||||
)
|
||||
} else {
|
||||
// This is an alias of ORR (shifted register):
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
|
||||
c.Emit4Bytes(encodeLogicalShiftedRegister(0b101, 0, regNumberInEncoding[from], 0, regNumberInEncoding[xzr], regNumberInEncoding[to]))
|
||||
}
|
||||
c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
|
||||
case loadP64, storeP64:
|
||||
rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
|
||||
amode := i.amode
|
||||
@@ -216,7 +206,11 @@ func (i *instruction) encode(c backend.Compiler) {
|
||||
c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
|
||||
case udf:
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en
|
||||
c.Emit4Bytes(0)
|
||||
if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
|
||||
c.Emit4Bytes(dummyInstruction)
|
||||
} else {
|
||||
c.Emit4Bytes(0)
|
||||
}
|
||||
case adr:
|
||||
c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
|
||||
case cSel:
|
||||
@@ -305,6 +299,18 @@ func (i *instruction) encode(c backend.Compiler) {
|
||||
}
|
||||
}
|
||||
|
||||
func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 {
|
||||
if toIsSp || fromIsSp {
|
||||
// This is an alias of ADD (immediate):
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
|
||||
return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd)
|
||||
} else {
|
||||
// This is an alias of ORR (shifted register):
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
|
||||
return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd)
|
||||
}
|
||||
}
|
||||
|
||||
// encodeSystemRegisterMove encodes as "System register move" in
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
|
||||
//
|
||||
@@ -1290,12 +1296,30 @@ func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []ui
|
||||
// encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble.
|
||||
func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
|
||||
// Restore the FP, SP and LR, and return to the Go code:
|
||||
// ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
|
||||
// ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
|
||||
// ldr lr, [ctxReg, #GoReturnAddress]
|
||||
// ldr fp, [ctxReg, #OriginalFramePointer]
|
||||
// ldr tmp, [ctxReg, #OriginalStackPointer]
|
||||
// mov sp, tmp ;; sp cannot be str'ed directly.
|
||||
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
|
||||
// ret ;; --> return to the Go code
|
||||
|
||||
var ctxEvicted bool
|
||||
if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr {
|
||||
// In order to avoid overwriting the context register, we move ctxReg to tmp.
|
||||
c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false))
|
||||
ctxReg = tmpRegVReg
|
||||
ctxEvicted = true
|
||||
}
|
||||
|
||||
restoreLr := encodeLoadOrStore(
|
||||
uLoad64,
|
||||
regNumberInEncoding[lr],
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: ctxReg,
|
||||
imm: wazevoapi.ExecutionContextOffsets.GoReturnAddress.I64(),
|
||||
},
|
||||
)
|
||||
|
||||
restoreFp := encodeLoadOrStore(
|
||||
uLoad64,
|
||||
regNumberInEncoding[fp],
|
||||
@@ -1319,21 +1343,16 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
|
||||
movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0,
|
||||
regNumberInEncoding[tmp], regNumberInEncoding[sp])
|
||||
|
||||
restoreLr := encodeLoadOrStore(
|
||||
uLoad64,
|
||||
regNumberInEncoding[lr],
|
||||
addressMode{
|
||||
kind: addressModeKindRegUnsignedImm12,
|
||||
rn: ctxReg,
|
||||
imm: wazevoapi.ExecutionContextOffsets.GoReturnAddress.I64(),
|
||||
},
|
||||
)
|
||||
|
||||
c.Emit4Bytes(restoreFp)
|
||||
c.Emit4Bytes(restoreLr)
|
||||
c.Emit4Bytes(restoreSpToTmp)
|
||||
c.Emit4Bytes(movTmpToSp)
|
||||
c.Emit4Bytes(restoreLr)
|
||||
c.Emit4Bytes(encodeRet())
|
||||
if !ctxEvicted {
|
||||
// In order to have the fixed-length exit sequence, we need to padd the binary.
|
||||
// Since this will never be reached, we insert a dummy instruction.
|
||||
c.Emit4Bytes(dummyInstruction)
|
||||
}
|
||||
}
|
||||
|
||||
func encodeRet() uint32 {
|
||||
|
||||
@@ -1009,15 +1009,42 @@ func TestInstruction_encoding_store(t *testing.T) {
|
||||
}
|
||||
|
||||
func Test_encodeExitSequence(t *testing.T) {
|
||||
m := &mockCompiler{}
|
||||
encodeExitSequence(m, x22VReg)
|
||||
// ldr x29, [x22, #0x10]
|
||||
// ldr x27, [x22, #0x18]
|
||||
// mov sp, x27
|
||||
// ldr x30, [x22, #0x20]
|
||||
// ret
|
||||
require.Equal(t, "dd0a40f9db0e40f97f030091de1240f9c0035fd6", hex.EncodeToString(m.buf))
|
||||
require.Equal(t, len(m.buf), exitSequenceSize)
|
||||
t.Run("no overlap", func(t *testing.T) {
|
||||
m := &mockCompiler{}
|
||||
encodeExitSequence(m, x22VReg)
|
||||
// ldr x29, [x22, #0x10]
|
||||
// ldr x30, [x22, #0x20]
|
||||
// ldr x27, [x22, #0x18]
|
||||
// mov sp, x27
|
||||
// ret
|
||||
// b #0x14 ;; dummy
|
||||
require.Equal(t, "dd0a40f9de1240f9db0e40f97f030091c0035fd600000014", hex.EncodeToString(m.buf))
|
||||
require.Equal(t, len(m.buf), exitSequenceSize)
|
||||
})
|
||||
t.Run("fp", func(t *testing.T) {
|
||||
m := &mockCompiler{}
|
||||
encodeExitSequence(m, fpVReg)
|
||||
// mov x27, x29
|
||||
// ldr x29, [x27, #0x10]
|
||||
// ldr x30, [x27, #0x20]
|
||||
// ldr x27, [x27, #0x18]
|
||||
// mov sp, x27
|
||||
// ret
|
||||
require.Equal(t, "fb031daa7d0b40f97e1340f97b0f40f97f030091c0035fd6", hex.EncodeToString(m.buf))
|
||||
require.Equal(t, len(m.buf), exitSequenceSize)
|
||||
})
|
||||
t.Run("lr", func(t *testing.T) {
|
||||
m := &mockCompiler{}
|
||||
encodeExitSequence(m, lrVReg)
|
||||
// mov x27, x30
|
||||
// ldr x29, [x27, #0x10]
|
||||
// ldr x30, [x27, #0x20]
|
||||
// ldr x27, [x27, #0x18]
|
||||
// mov sp, x27
|
||||
// ret
|
||||
require.Equal(t, "fb031eaa7d0b40f97e1340f97b0f40f97f030091c0035fd6", hex.EncodeToString(m.buf))
|
||||
require.Equal(t, len(m.buf), exitSequenceSize)
|
||||
})
|
||||
}
|
||||
|
||||
func Test_lowerExitWithCodeEncodingSize(t *testing.T) {
|
||||
|
||||
@@ -346,14 +346,13 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
|
||||
}
|
||||
}
|
||||
|
||||
const exitIfNotSequenceEncodingSize = 4 + exitWithCodeEncodingSize
|
||||
|
||||
func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, code wazevoapi.ExitCode) {
|
||||
cbr := m.allocateInstr()
|
||||
cbr.asCondBr(c, invalidLabel, false /* ignored */)
|
||||
cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4 /* br offset is from the beginning of this instruction */)
|
||||
m.insert(cbr)
|
||||
m.lowerExitWithCode(execCtxVReg, code)
|
||||
// Conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
cbr.asCondBr(c, l, false /* ignored */)
|
||||
}
|
||||
|
||||
func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {
|
||||
@@ -456,8 +455,6 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
|
||||
|
||||
// If it is not undefined, we can return the result.
|
||||
ok := m.allocateInstr()
|
||||
ok.asCondBr(ne.asCond(), invalidLabel, false /* ignored */)
|
||||
ok.condBrOffsetResolve(4 /* fpuCmp */ + exitIfNotSequenceEncodingSize + exitWithCodeEncodingSize + 4)
|
||||
m.insert(ok)
|
||||
|
||||
// Otherwise, we have to choose the status depending on it is overflow or NaN conversion.
|
||||
@@ -470,6 +467,10 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
|
||||
m.exitIfNot(ctx, vc.asCond(), wazevoapi.ExitCodeInvalidConversionToInteger)
|
||||
// Otherwise, it is an overflow.
|
||||
m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow)
|
||||
|
||||
// Conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
ok.asCondBr(ne.asCond(), l, false /* ignored */)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -807,10 +808,11 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va
|
||||
|
||||
// We have to skip the entire exit sequence if the condition is false.
|
||||
cbr := m.allocateInstr()
|
||||
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), invalidLabel, false /* ignored */)
|
||||
cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4 /* br offset is from the beginning of this instruction */)
|
||||
m.insert(cbr)
|
||||
m.lowerExitWithCode(execCtxVReg, code)
|
||||
// conditional branch target is after exit.
|
||||
l := m.insertBrTargetLabel()
|
||||
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
|
||||
}
|
||||
|
||||
func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
|
||||
|
||||
@@ -290,44 +290,50 @@ func TestMachine_lowerIDiv(t *testing.T) {
|
||||
name: "32bit unsigned", _64bit: false, signed: false,
|
||||
exp: `
|
||||
udiv w1?, w2?, w3?
|
||||
cbnz w3?, #0x20 (L0)
|
||||
cbnz w3?, L1
|
||||
movz x27, #0xa, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L1:
|
||||
`,
|
||||
},
|
||||
{name: "32bit signed", _64bit: false, signed: true, exp: `
|
||||
sdiv w1?, w2?, w3?
|
||||
cbnz w3?, #0x20 (L0)
|
||||
cbnz w3?, L1
|
||||
movz x27, #0xa, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L1:
|
||||
adds wzr, w3?, #0x1
|
||||
ccmp w2?, #0x1, #0x0, eq
|
||||
b.vc #0x20
|
||||
b.vc L2
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L2:
|
||||
`},
|
||||
{name: "64bit unsigned", _64bit: true, signed: false, exp: `
|
||||
udiv x1?, x2?, x3?
|
||||
cbnz w3?, #0x20 (L0)
|
||||
cbnz w3?, L1
|
||||
movz x27, #0xa, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L1:
|
||||
`},
|
||||
{name: "64bit signed", _64bit: true, signed: true, exp: `
|
||||
sdiv x1?, x2?, x3?
|
||||
cbnz w3?, #0x20 (L0)
|
||||
cbnz w3?, L1
|
||||
movz x27, #0xa, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L1:
|
||||
adds xzr, x3?, #0x1
|
||||
ccmp x2?, #0x1, #0x0, eq
|
||||
b.vc #0x20
|
||||
b.vc L2
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x65535?]
|
||||
exit_sequence x65535?
|
||||
L2:
|
||||
`},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
@@ -349,20 +355,10 @@ func Test_exitWithCodeEncodingSize(t *testing.T) {
|
||||
m.FlushPendingInstructions()
|
||||
m.encode(m.perBlockHead)
|
||||
buf := m.compiler.Buf()
|
||||
require.Equal(t, "3b0080d23b0000b93d0840f93b0c40f97f0300913e1040f9c0035fd6", hex.EncodeToString(buf))
|
||||
require.Equal(t, "3b0080d23b0000b93d0840f93e1040f93b0c40f97f030091c0035fd600000014", hex.EncodeToString(buf))
|
||||
require.Equal(t, exitWithCodeEncodingSize, len(buf))
|
||||
}
|
||||
|
||||
func Test_exitIfNotSequenceEncodingSize(t *testing.T) {
|
||||
_, _, m := newSetupWithMockContext()
|
||||
m.exitIfNot(x1VReg, ne.asCond(), wazevoapi.ExitCodeGrowStack)
|
||||
m.FlushPendingInstructions()
|
||||
m.encode(m.perBlockHead)
|
||||
buf := m.compiler.Buf()
|
||||
require.Equal(t, "010100543b0080d23b0000b93d0840f93b0c40f97f0300913e1040f9c0035fd6", hex.EncodeToString(buf))
|
||||
require.Equal(t, exitIfNotSequenceEncodingSize, len(buf))
|
||||
}
|
||||
|
||||
func TestMachine_lowerFpuToInt(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
@@ -378,17 +374,19 @@ msr fpsr, xzr
|
||||
fcvtzu w1, s2
|
||||
mrs x27 fpsr
|
||||
subs xzr, x27, #0x1
|
||||
b.ne #0x44
|
||||
b.ne L2
|
||||
fcmp w2, w2
|
||||
b.vc #0x20
|
||||
b.vc L1
|
||||
movz x27, #0xc, lsl 0
|
||||
str w27, [x15]
|
||||
exit_sequence x15
|
||||
L1:
|
||||
movz x27, #0xb, lsl 0
|
||||
str w27, [x15]
|
||||
exit_sequence x15
|
||||
L2:
|
||||
`,
|
||||
expectedBytes: "3f441bd54100391e3b443bd57f0700f1210200544020221e070100549b0180d2fb0100b9fd0940f9fb0d40f97f030091fe1140f9c0035fd67b0180d2fb0100b9fd0940f9fb0d40f97f030091fe1140f9c0035fd6",
|
||||
expectedBytes: "3f441bd54100391e3b443bd57f0700f1010000544020221e070000549b0180d2fb0100b9fd0940f9fe1140f9fb0d40f97f030091c0035fd6000000147b0180d2fb0100b9fd0940f9fe1140f9fb0d40f97f030091c0035fd600000014",
|
||||
},
|
||||
{
|
||||
name: "nontrapping",
|
||||
|
||||
@@ -32,8 +32,9 @@ type (
|
||||
// ssaBlockIDToLabels maps an SSA block ID to the label.
|
||||
ssaBlockIDToLabels []label
|
||||
// labelToInstructions maps a label to the instructions of the region which the label represents.
|
||||
labelPositions map[label]*labelPosition
|
||||
orderedLabels []*labelPosition
|
||||
labelPositions map[label]*labelPosition
|
||||
orderedBlockLabels []*labelPosition
|
||||
labelPositionPool wazevoapi.Pool[labelPosition]
|
||||
|
||||
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
||||
addendsWorkQueue []ssa.Value
|
||||
@@ -109,10 +110,11 @@ const (
|
||||
// NewBackend returns a new backend for arm64.
|
||||
func NewBackend() backend.Machine {
|
||||
m := &machine{
|
||||
instrPool: wazevoapi.NewPool[instruction](),
|
||||
labelPositions: make(map[label]*labelPosition),
|
||||
spillSlots: make(map[regalloc.VRegID]int64),
|
||||
nextLabel: invalidLabel,
|
||||
instrPool: wazevoapi.NewPool[instruction](),
|
||||
labelPositionPool: wazevoapi.NewPool[labelPosition](),
|
||||
labelPositions: make(map[label]*labelPosition),
|
||||
spillSlots: make(map[regalloc.VRegID]int64),
|
||||
nextLabel: invalidLabel,
|
||||
}
|
||||
m.regAllocFn.m = m
|
||||
m.regAllocFn.labelToRegAllocBlockIndex = make(map[label]int)
|
||||
@@ -122,12 +124,12 @@ func NewBackend() backend.Machine {
|
||||
// Reset implements backend.Machine.
|
||||
func (m *machine) Reset() {
|
||||
m.instrPool.Reset()
|
||||
m.labelPositionPool.Reset()
|
||||
m.currentSSABlk = nil
|
||||
m.nextLabel = invalidLabel
|
||||
m.pendingInstructions = m.pendingInstructions[:0]
|
||||
for _, v := range m.labelPositions {
|
||||
v.begin, v.end = nil, nil
|
||||
for l := label(0); l <= m.nextLabel; l++ {
|
||||
delete(m.labelPositions, l)
|
||||
}
|
||||
m.pendingInstructions = m.pendingInstructions[:0]
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
for key := range m.spillSlots {
|
||||
m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
|
||||
@@ -136,13 +138,14 @@ func (m *machine) Reset() {
|
||||
delete(m.spillSlots, regalloc.VRegID(key))
|
||||
}
|
||||
m.clobberedRegs = m.clobberedRegs[:0]
|
||||
m.orderedLabels = m.orderedLabels[:0]
|
||||
m.orderedBlockLabels = m.orderedBlockLabels[:0]
|
||||
m.regAllocFn.reset()
|
||||
m.spillSlotSize = 0
|
||||
m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
|
||||
m.rootInstr = nil
|
||||
m.ssaBlockIDToLabels = m.ssaBlockIDToLabels[:0]
|
||||
m.perBlockHead, m.perBlockEnd = nil, nil
|
||||
m.nextLabel = invalidLabel
|
||||
}
|
||||
|
||||
// InitializeABI implements backend.Machine InitializeABI.
|
||||
@@ -198,10 +201,10 @@ func (m *machine) StartBlock(blk ssa.BasicBlock) {
|
||||
|
||||
labelPos, ok := m.labelPositions[l]
|
||||
if !ok {
|
||||
labelPos = &labelPosition{}
|
||||
labelPos = m.allocateLabelPosition()
|
||||
m.labelPositions[l] = labelPos
|
||||
}
|
||||
m.orderedLabels = append(m.orderedLabels, labelPos)
|
||||
m.orderedBlockLabels = append(m.orderedBlockLabels, labelPos)
|
||||
labelPos.begin, labelPos.end = end, end
|
||||
m.regAllocFn.addBlock(blk, l, labelPos)
|
||||
}
|
||||
@@ -223,6 +226,23 @@ func (m *machine) insert(i *instruction) {
|
||||
m.pendingInstructions = append(m.pendingInstructions, i)
|
||||
}
|
||||
|
||||
func (m *machine) insertBrTargetLabel() label {
|
||||
l := m.allocateLabel()
|
||||
nop := m.allocateInstr()
|
||||
nop.asNop0WithLabel(l)
|
||||
m.insert(nop)
|
||||
pos := m.allocateLabelPosition()
|
||||
pos.begin, pos.end = nop, nop
|
||||
m.labelPositions[l] = pos
|
||||
return l
|
||||
}
|
||||
|
||||
func (m *machine) allocateLabelPosition() *labelPosition {
|
||||
l := m.labelPositionPool.Allocate()
|
||||
*l = labelPosition{}
|
||||
return l
|
||||
}
|
||||
|
||||
func (m *machine) FlushPendingInstructions() {
|
||||
l := len(m.pendingInstructions)
|
||||
if l == 0 {
|
||||
@@ -253,6 +273,7 @@ func (l label) String() string {
|
||||
// allocateInstr allocates an instruction.
|
||||
func (m *machine) allocateInstr() *instruction {
|
||||
instr := m.instrPool.Allocate()
|
||||
*instr = instruction{}
|
||||
return instr
|
||||
}
|
||||
|
||||
@@ -328,9 +349,21 @@ func (m *machine) ResolveRelativeAddresses() {
|
||||
|
||||
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
|
||||
var offset int64
|
||||
for _, pos := range m.orderedLabels {
|
||||
for _, pos := range m.orderedBlockLabels {
|
||||
pos.binaryOffset = offset
|
||||
size := binarySize(pos.begin, pos.end)
|
||||
var size int64
|
||||
for cur := pos.begin; ; cur = cur.next {
|
||||
if cur.kind == nop0 {
|
||||
l := cur.nop0Label()
|
||||
if pos, ok := m.labelPositions[l]; ok {
|
||||
pos.binaryOffset = offset + size
|
||||
}
|
||||
}
|
||||
size += cur.size()
|
||||
if cur == pos.end {
|
||||
break
|
||||
}
|
||||
}
|
||||
pos.binarySize = size
|
||||
offset += size
|
||||
}
|
||||
|
||||
@@ -258,7 +258,7 @@ func TestMachine_CompileStackGrowCallSequence(t *testing.T) {
|
||||
str x27, [x0, #0x38]
|
||||
orr w17, wzr, #0x1
|
||||
str w17, [x0]
|
||||
adr x27, #0x1c
|
||||
adr x27, #0x20
|
||||
str x27, [x0, #0x30]
|
||||
exit_sequence x0
|
||||
ldr x1, [x0, #0x60]
|
||||
|
||||
@@ -228,10 +228,14 @@ func (m *machine) RegisterInfo(debug bool) *regalloc.RegisterInfo {
|
||||
regInfoDebug.RealRegToVReg = regInfo.RealRegToVReg
|
||||
regInfoDebug.RealRegName = regInfo.RealRegName
|
||||
regInfoDebug.AllocatableRegisters[regalloc.RegTypeFloat] = []regalloc.RealReg{
|
||||
v18, // One callee saved.
|
||||
v7, v6, v5, v4, v3, v2, v1, v0, // Allocatable sets == Argument registers.
|
||||
}
|
||||
// TODO: tests for high pressured int registers.
|
||||
regInfoDebug.AllocatableRegisters[regalloc.RegTypeInt] = regInfo.AllocatableRegisters[regalloc.RegTypeInt]
|
||||
regInfoDebug.AllocatableRegisters[regalloc.RegTypeInt] = []regalloc.RealReg{
|
||||
x29, x30, // Caller saved, and special ones. But they should be able to get allocated.
|
||||
x19, // One callee saved.
|
||||
x7, x6, x5, x4, x3, x2, x1, x0, // Argument registers (all caller saved).
|
||||
}
|
||||
return regInfoDebug
|
||||
}
|
||||
return regInfo
|
||||
|
||||
Reference in New Issue
Block a user