wazevo: adds tests for high register pressure: part2 (#1687)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-09-06 13:29:10 +09:00
committed by GitHub
parent ffd75aefd0
commit 2c1dfc2a4b
11 changed files with 328 additions and 480 deletions

View File

@@ -1618,106 +1618,122 @@ L1 (SSA Block: blk0):
fcvtzs x0, d0
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L17)
fcmp d0, d0
b.vc #0x20
b.vc #0x24, (L16)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L16:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L17:
msr fpsr, xzr
fcvtzs x1, s1
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L15)
fcmp s1, s1
b.vc #0x20
b.vc #0x24, (L14)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L14:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L15:
msr fpsr, xzr
fcvtzs w2, d0
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L13)
fcmp d0, d0
b.vc #0x20
b.vc #0x24, (L12)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L12:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L13:
msr fpsr, xzr
fcvtzs w3, s1
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L11)
fcmp s1, s1
b.vc #0x20
b.vc #0x24, (L10)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L10:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L11:
msr fpsr, xzr
fcvtzu x4, d0
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L9)
fcmp d0, d0
b.vc #0x20
b.vc #0x24, (L8)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L8:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L9:
msr fpsr, xzr
fcvtzu x5, s1
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L7)
fcmp s1, s1
b.vc #0x20
b.vc #0x24, (L6)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L6:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L7:
msr fpsr, xzr
fcvtzu w6, d0
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L5)
fcmp d0, d0
b.vc #0x20
b.vc #0x24, (L4)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L4:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L5:
msr fpsr, xzr
fcvtzu w7, s1
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne #0x4c, (L3)
fcmp s1, s1
b.vc #0x20
b.vc #0x24, (L2)
movz x27, #0xc, lsl 0
str w27, [x8]
exit_sequence x8
L2:
movz x27, #0xb, lsl 0
str w27, [x8]
exit_sequence x8
L3:
fcvt s0, d0
fcvt d1, s1
ldr x30, [sp], #0x10
@@ -1882,7 +1898,6 @@ L1 (SSA Block: blk0):
str x24, [sp, #-0x10]!
str x25, [sp, #-0x10]!
str x26, [sp, #-0x10]!
str x28, [sp, #-0x10]!
str q18, [sp, #-0x10]!
str q19, [sp, #-0x10]!
str q20, [sp, #-0x10]!
@@ -1931,9 +1946,9 @@ L1 (SSA Block: blk0):
madd w25, w2, w25, wzr
movz w26, #0x13, lsl 0
madd w26, w2, w26, wzr
movz w28, #0x14, lsl 0
madd w28, w2, w28, wzr
add w26, w26, w28
movz w29, #0x14, lsl 0
madd w29, w2, w29, wzr
add w26, w26, w29
add w25, w25, w26
add w24, w24, w25
add w23, w23, w24
@@ -2021,7 +2036,6 @@ L1 (SSA Block: blk0):
ldr q20, [sp], #0x10
ldr q19, [sp], #0x10
ldr q18, [sp], #0x10
ldr x28, [sp], #0x10
ldr x26, [sp], #0x10
ldr x25, [sp], #0x10
ldr x24, [sp], #0x10
@@ -2150,10 +2164,11 @@ L1 (SSA Block: blk0):
ldr w5?, [x1?, #0x10]
add x6?, x4?, #0x4
subs xzr, x5?, x6?
b.hs #0x20
b.hs L2
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
L2:
ldr x8?, [x1?, #0x8]
add x11?, x8?, x4?
ldr w10?, [x11?]
@@ -2167,10 +2182,11 @@ L1 (SSA Block: blk0):
ldr w10, [x1, #0x10]
add x9, x8, #0x4
subs xzr, x10, x9
b.hs #0x20
b.hs #0x24, (L2)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L2:
ldr x9, [x1, #0x8]
add x8, x9, x8
ldr w0, [x8]
@@ -2188,10 +2204,11 @@ L1 (SSA Block: blk0):
ldr w8, [x1, #0x10]
add x9, x10, #0x4
subs xzr, x8, x9
b.hs #0x20
b.hs #0x24, (L10)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L10:
ldr x9, [x1, #0x8]
add x10, x9, x10
str w2, [x10]
@@ -2199,80 +2216,88 @@ L1 (SSA Block: blk0):
uxtw x11, w10
add x10, x11, #0x8
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L9)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L9:
add x10, x9, x11
str x3, [x10]
orr w10, wzr, #0x10
uxtw x11, w10
add x10, x11, #0x4
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L8)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L8:
add x10, x9, x11
str s0, [x10]
orr w10, wzr, #0x18
uxtw x11, w10
add x10, x11, #0x8
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L7)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L7:
add x10, x9, x11
str d1, [x10]
orr w10, wzr, #0x20
uxtw x11, w10
add x10, x11, #0x1
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L6)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L6:
add x10, x9, x11
strb w2, [x10]
movz w10, #0x28, lsl 0
uxtw x11, w10
add x10, x11, #0x2
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L5)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L5:
add x10, x9, x11
strh w2, [x10]
orr w10, wzr, #0x30
uxtw x11, w10
add x10, x11, #0x1
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L4)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L4:
add x10, x9, x11
strb w3, [x10]
orr w10, wzr, #0x38
uxtw x11, w10
add x10, x11, #0x2
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L3)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L3:
add x10, x9, x11
strh w3, [x10]
orr w10, wzr, #0x40
uxtw x11, w10
add x10, x11, #0x4
subs xzr, x8, x10
b.hs #0x20
b.hs #0x24, (L2)
movz x27, #0x4, lsl 0
str w27, [x0]
exit_sequence x0
L2:
add x8, x9, x11
str w3, [x8]
ldr x30, [sp], #0x10
@@ -2281,296 +2306,6 @@ L1 (SSA Block: blk0):
},
{
name: "memory_loads", m: testcases.MemoryLoads.Module,
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x0?, x0
mov x1?, x1
mov x2?, x2
uxtw x4?, w2?
ldr w5?, [x1?, #0x10]
add x6?, x4?, #0x4
subs xzr, x5?, x6?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
ldr x8?, [x1?, #0x8]
add x200?, x8?, x4?
ldr w10?, [x200?]
uxtw x12?, w2?
add x13?, x12?, #0x8
subs xzr, x5?, x13?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x199?, x8?, x12?
ldr x16?, [x199?]
uxtw x18?, w2?
add x19?, x18?, #0x4
subs xzr, x5?, x19?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x198?, x8?, x18?
ldr s22?, [x198?]
uxtw x24?, w2?
add x25?, x24?, #0x8
subs xzr, x5?, x25?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x197?, x8?, x24?
ldr d28?, [x197?]
uxtw x30?, w2?
add x31?, x30?, #0x13
subs xzr, x5?, x31?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x196?, x8?, x30?
ldr w34?, [x196?, #0xf]
uxtw x36?, w2?
add x37?, x36?, #0x17
subs xzr, x5?, x37?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x195?, x8?, x36?
ldr x40?, [x195?, #0xf]
uxtw x42?, w2?
add x43?, x42?, #0x13
subs xzr, x5?, x43?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x194?, x8?, x42?
ldr s46?, [x194?, #0xf]
uxtw x48?, w2?
add x49?, x48?, #0x17
subs xzr, x5?, x49?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x193?, x8?, x48?
ldr d52?, [x193?, #0xf]
uxtw x54?, w2?
add x55?, x54?, #0x1
subs xzr, x5?, x55?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x192?, x8?, x54?
ldrsb w58?, [x192?]
uxtw x60?, w2?
add x61?, x60?, #0x10
subs xzr, x5?, x61?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x191?, x8?, x60?
ldrsb w64?, [x191?, #0xf]
uxtw x66?, w2?
add x67?, x66?, #0x1
subs xzr, x5?, x67?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x190?, x8?, x66?
ldrb w70?, [x190?]
uxtw x72?, w2?
add x73?, x72?, #0x10
subs xzr, x5?, x73?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x189?, x8?, x72?
ldrb w76?, [x189?, #0xf]
uxtw x78?, w2?
add x79?, x78?, #0x2
subs xzr, x5?, x79?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x188?, x8?, x78?
ldrsh w82?, [x188?]
uxtw x84?, w2?
add x85?, x84?, #0x11
subs xzr, x5?, x85?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x187?, x8?, x84?
ldrsh w88?, [x187?, #0xf]
uxtw x90?, w2?
add x91?, x90?, #0x2
subs xzr, x5?, x91?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x186?, x8?, x90?
ldrh w94?, [x186?]
uxtw x96?, w2?
add x97?, x96?, #0x11
subs xzr, x5?, x97?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x185?, x8?, x96?
ldrh w100?, [x185?, #0xf]
uxtw x102?, w2?
add x103?, x102?, #0x1
subs xzr, x5?, x103?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x184?, x8?, x102?
ldrsb w106?, [x184?]
uxtw x108?, w2?
add x109?, x108?, #0x10
subs xzr, x5?, x109?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x183?, x8?, x108?
ldrsb w112?, [x183?, #0xf]
uxtw x114?, w2?
add x115?, x114?, #0x1
subs xzr, x5?, x115?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x182?, x8?, x114?
ldrb w118?, [x182?]
uxtw x120?, w2?
add x121?, x120?, #0x10
subs xzr, x5?, x121?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x181?, x8?, x120?
ldrb w124?, [x181?, #0xf]
uxtw x126?, w2?
add x127?, x126?, #0x2
subs xzr, x5?, x127?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x180?, x8?, x126?
ldrsh w130?, [x180?]
uxtw x132?, w2?
add x133?, x132?, #0x11
subs xzr, x5?, x133?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x179?, x8?, x132?
ldrsh w136?, [x179?, #0xf]
uxtw x138?, w2?
add x139?, x138?, #0x2
subs xzr, x5?, x139?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x178?, x8?, x138?
ldrh w142?, [x178?]
uxtw x144?, w2?
add x145?, x144?, #0x11
subs xzr, x5?, x145?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x177?, x8?, x144?
ldrh w148?, [x177?, #0xf]
uxtw x150?, w2?
add x151?, x150?, #0x4
subs xzr, x5?, x151?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x176?, x8?, x150?
ldrs w154?, [x176?]
uxtw x156?, w2?
add x157?, x156?, #0x13
subs xzr, x5?, x157?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x175?, x8?, x156?
ldrs w160?, [x175?, #0xf]
uxtw x162?, w2?
add x163?, x162?, #0x4
subs xzr, x5?, x163?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x174?, x8?, x162?
ldr w166?, [x174?]
uxtw x168?, w2?
add x169?, x168?, #0x13
subs xzr, x5?, x169?
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x0?]
exit_sequence x0?
add x173?, x8?, x168?
ldr w172?, [x173?, #0xf]
str x172?, [#ret_space, #0x78]
str x166?, [#ret_space, #0x70]
str x160?, [#ret_space, #0x68]
str x154?, [#ret_space, #0x60]
str x148?, [#ret_space, #0x58]
str x142?, [#ret_space, #0x50]
str x136?, [#ret_space, #0x48]
str x130?, [#ret_space, #0x40]
str x124?, [#ret_space, #0x38]
str x118?, [#ret_space, #0x30]
str x112?, [#ret_space, #0x28]
str x106?, [#ret_space, #0x20]
str w100?, [#ret_space, #0x18]
str w94?, [#ret_space, #0x10]
str w88?, [#ret_space, #0x8]
str w82?, [#ret_space, #0x0]
mov x7, x76?
mov x6, x70?
mov x5, x64?
mov x4, x58?
mov v3.8b, v52?.8b
mov v2.8b, v46?.8b
mov x3, x40?
mov x2, x34?
mov v1.8b, v28?.8b
mov v0.8b, v22?.8b
mov x1, x16?
mov x0, x10?
ret
`,
afterFinalizeARM64: `
L1 (SSA Block: blk0):
str x30, [sp, #-0x10]!
@@ -2582,280 +2317,306 @@ L1 (SSA Block: blk0):
str x24, [sp, #-0x10]!
str x25, [sp, #-0x10]!
str x26, [sp, #-0x10]!
str x28, [sp, #-0x10]!
mov x8, x0
uxtw x11, w2
ldr w9, [x1, #0x10]
add x10, x11, #0x4
subs xzr, x9, x10
b.hs #0x20
b.hs #0x24, (L29)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L29:
ldr x10, [x1, #0x8]
add x11, x10, x11
ldr w0, [x11]
uxtw x12, w2
add x11, x12, #0x8
subs xzr, x9, x11
b.hs #0x20
b.hs #0x24, (L28)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L28:
add x11, x10, x12
ldr x1, [x11]
uxtw x12, w2
add x11, x12, #0x4
subs xzr, x9, x11
b.hs #0x20
b.hs #0x24, (L27)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L27:
add x11, x10, x12
ldr s0, [x11]
uxtw x12, w2
add x11, x12, #0x8
subs xzr, x9, x11
b.hs #0x20
b.hs #0x24, (L26)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L26:
add x11, x10, x12
ldr d1, [x11]
uxtw x12, w2
add x11, x12, #0x13
subs xzr, x9, x11
b.hs #0x20
b.hs #0x24, (L25)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L25:
add x11, x10, x12
ldr w11, [x11, #0xf]
uxtw x13, w2
add x12, x13, #0x17
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L24)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L24:
add x12, x10, x13
ldr x3, [x12, #0xf]
uxtw x13, w2
add x12, x13, #0x13
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L23)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L23:
add x12, x10, x13
ldr s2, [x12, #0xf]
uxtw x13, w2
add x12, x13, #0x17
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L22)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L22:
add x12, x10, x13
ldr d3, [x12, #0xf]
uxtw x13, w2
add x12, x13, #0x1
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L21)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L21:
add x12, x10, x13
ldrsb w4, [x12]
uxtw x13, w2
add x12, x13, #0x10
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L20)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L20:
add x12, x10, x13
ldrsb w5, [x12, #0xf]
uxtw x13, w2
add x12, x13, #0x1
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L19)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L19:
add x12, x10, x13
ldrb w6, [x12]
uxtw x13, w2
add x12, x13, #0x10
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L18)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L18:
add x12, x10, x13
ldrb w7, [x12, #0xf]
uxtw x13, w2
add x12, x13, #0x2
subs xzr, x9, x12
b.hs #0x20
b.hs #0x24, (L17)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L17:
add x12, x10, x13
ldrsh w12, [x12]
uxtw x14, w2
add x13, x14, #0x11
subs xzr, x9, x13
b.hs #0x20
b.hs #0x24, (L16)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L16:
add x13, x10, x14
ldrsh w13, [x13, #0xf]
uxtw x15, w2
add x14, x15, #0x2
subs xzr, x9, x14
b.hs #0x20
b.hs #0x24, (L15)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L15:
add x14, x10, x15
ldrh w14, [x14]
uxtw x16, w2
add x15, x16, #0x11
subs xzr, x9, x15
b.hs #0x20
b.hs #0x24, (L14)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L14:
add x15, x10, x16
ldrh w15, [x15, #0xf]
uxtw x17, w2
add x16, x17, #0x1
subs xzr, x9, x16
b.hs #0x20
b.hs #0x24, (L13)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L13:
add x16, x10, x17
ldrsb w16, [x16]
uxtw x18, w2
add x17, x18, #0x10
subs xzr, x9, x17
b.hs #0x20
b.hs #0x24, (L12)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L12:
add x17, x10, x18
ldrsb w17, [x17, #0xf]
uxtw x19, w2
add x18, x19, #0x1
subs xzr, x9, x18
b.hs #0x20
b.hs #0x24, (L11)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L11:
add x18, x10, x19
ldrb w18, [x18]
uxtw x20, w2
add x19, x20, #0x10
subs xzr, x9, x19
b.hs #0x20
b.hs #0x24, (L10)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L10:
add x19, x10, x20
ldrb w19, [x19, #0xf]
uxtw x21, w2
add x20, x21, #0x2
subs xzr, x9, x20
b.hs #0x20
b.hs #0x24, (L9)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L9:
add x20, x10, x21
ldrsh w20, [x20]
uxtw x22, w2
add x21, x22, #0x11
subs xzr, x9, x21
b.hs #0x20
b.hs #0x24, (L8)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L8:
add x21, x10, x22
ldrsh w21, [x21, #0xf]
uxtw x23, w2
add x22, x23, #0x2
subs xzr, x9, x22
b.hs #0x20
b.hs #0x24, (L7)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L7:
add x22, x10, x23
ldrh w22, [x22]
uxtw x24, w2
add x23, x24, #0x11
subs xzr, x9, x23
b.hs #0x20
b.hs #0x24, (L6)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L6:
add x23, x10, x24
ldrh w23, [x23, #0xf]
uxtw x25, w2
add x24, x25, #0x4
subs xzr, x9, x24
b.hs #0x20
b.hs #0x24, (L5)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L5:
add x24, x10, x25
ldrs w24, [x24]
uxtw x26, w2
add x25, x26, #0x13
subs xzr, x9, x25
b.hs #0x20
b.hs #0x24, (L4)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L4:
add x25, x10, x26
ldrs w25, [x25, #0xf]
uxtw x28, w2
add x26, x28, #0x4
subs xzr, x9, x26
b.hs #0x20
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
add x26, x10, x28
ldr w26, [x26]
uxtw x29, w2
add x28, x29, #0x13
subs xzr, x9, x28
b.hs #0x20
add x26, x29, #0x4
subs xzr, x9, x26
b.hs #0x24, (L3)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
add x8, x10, x29
L3:
add x26, x10, x29
ldr w26, [x26]
uxtw x30, w2
add x29, x30, #0x13
subs xzr, x9, x29
b.hs #0x24, (L2)
movz x27, #0x4, lsl 0
str w27, [x8]
exit_sequence x8
L2:
add x8, x10, x30
ldr w8, [x8, #0xf]
str x8, [sp, #0x118]
str x26, [sp, #0x110]
str x25, [sp, #0x108]
str x24, [sp, #0x100]
str x23, [sp, #0xf8]
str x22, [sp, #0xf0]
str x21, [sp, #0xe8]
str x20, [sp, #0xe0]
str x19, [sp, #0xd8]
str x18, [sp, #0xd0]
str x17, [sp, #0xc8]
str x16, [sp, #0xc0]
str w15, [sp, #0xb8]
str w14, [sp, #0xb0]
str w13, [sp, #0xa8]
str w12, [sp, #0xa0]
str x8, [sp, #0x108]
str x26, [sp, #0x100]
str x25, [sp, #0xf8]
str x24, [sp, #0xf0]
str x23, [sp, #0xe8]
str x22, [sp, #0xe0]
str x21, [sp, #0xd8]
str x20, [sp, #0xd0]
str x19, [sp, #0xc8]
str x18, [sp, #0xc0]
str x17, [sp, #0xb8]
str x16, [sp, #0xb0]
str w15, [sp, #0xa8]
str w14, [sp, #0xa0]
str w13, [sp, #0x98]
str w12, [sp, #0x90]
mov x2, x11
ldr x28, [sp], #0x10
ldr x26, [sp], #0x10
ldr x25, [sp], #0x10
ldr x24, [sp], #0x10

View File

@@ -15,13 +15,13 @@ const xArgRetRegMax, vArgRetRegMax = x7, v7 // x0-x7 & v0-v7.
var regInfo = &regalloc.RegisterInfo{
AllocatableRegisters: [regalloc.RegTypeNum][]regalloc.RealReg{
// We don't allocate:
// - x27(=tmpReg) because of the reason described on tmpReg.
// - x18 which is reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
// - x18: Reserved by the macOS: https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms#Respect-the-purpose-of-specific-CPU-registers
// - x28: Reserved by Go runtime.
// - x27(=tmpReg): because of the reason described on tmpReg.
regalloc.RegTypeInt: {
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23, x24, x25,
// No x27 here.
x26, x28, x29, x30,
x26, x29, x30,
// These are the argument/return registers. Less preferred in the allocation.
x7, x6, x5, x4, x3, x2, x1, x0,
},
@@ -37,7 +37,7 @@ var regInfo = &regalloc.RegisterInfo{
v18: {}, v19: {}, v20: {}, v21: {}, v22: {}, v23: {}, v24: {}, v25: {}, v26: {}, v27: {}, v28: {}, v29: {}, v30: {}, v31: {},
},
CallerSavedRegisters: map[regalloc.RealReg]struct{}{
x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}, x6: {}, x7: {}, x8: {}, x9: {}, x10: {}, x11: {}, x12: {}, x13: {}, x14: {}, x15: {}, x16: {}, x17: {},
x0: {}, x1: {}, x2: {}, x3: {}, x4: {}, x5: {}, x6: {}, x7: {}, x8: {}, x9: {}, x10: {}, x11: {}, x12: {}, x13: {}, x14: {}, x15: {}, x16: {}, x17: {}, x29: {}, x30: {},
v0: {}, v1: {}, v2: {}, v3: {}, v4: {}, v5: {}, v6: {}, v7: {}, v8: {}, v9: {}, v10: {}, v11: {}, v12: {}, v13: {}, v14: {}, v15: {}, v16: {}, v17: {},
},
RealRegToVReg: []regalloc.VReg{

View File

@@ -73,7 +73,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
str w17, [x0]
mov x27, sp
str x27, [x0, #0x38]
adr x27, #0x1c
adr x27, #0x20
str x27, [x0, #0x30]
exit_sequence x0
ldr x19, [x0, #0x60]
@@ -155,7 +155,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
str w17, [x0]
mov x27, sp
str x27, [x0, #0x38]
adr x27, #0x1c
adr x27, #0x20
str x27, [x0, #0x30]
exit_sequence x0
ldr x19, [x0, #0x60]
@@ -224,7 +224,7 @@ func TestMachine_CompileGoFunctionTrampoline(t *testing.T) {
str w17, [x0]
mov x27, sp
str x27, [x0, #0x38]
adr x27, #0x1c
adr x27, #0x20
str x27, [x0, #0x30]
exit_sequence x0
ldr x19, [x0, #0x60]

View File

@@ -401,6 +401,15 @@ func (i *instruction) asNop0() {
i.kind = nop0
}
func (i *instruction) asNop0WithLabel(l label) {
i.kind = nop0
i.u1 = uint64(l)
}
func (i *instruction) nop0Label() label {
return label(i.u1)
}
func (i *instruction) asRet(abi *abiImpl) {
i.kind = ret
i.abi = abi
@@ -792,7 +801,12 @@ func (i *instruction) String() (str string) {
switch i.kind {
case nop0:
str = "nop0"
if i.u1 != 0 {
l := label(i.u1)
str = fmt.Sprintf("%s:", l)
} else {
str = "nop0"
}
case aluRRR:
size := is64SizeBitToSize(i.u3)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
@@ -1741,17 +1755,7 @@ func (s shiftOp) String() string {
panic(int(s))
}
func binarySize(begin, end *instruction) (size int64) {
for cur := begin; ; cur = cur.next {
size += cur.size()
if cur == end {
break
}
}
return size
}
const exitSequenceSize = 5 * 4 // 5 instructions as in encodeExitSequence.
const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
// size returns the size of the instruction in encoded bytes.
func (i *instruction) size() int64 {

View File

@@ -81,17 +81,7 @@ func (i *instruction) encode(c backend.Compiler) {
to, from := i.rd.realReg(), i.rn.realReg()
toIsSp := to == sp
fromIsSp := from == sp
if toIsSp || fromIsSp {
// This is an alias of ADD (immediate):
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
c.Emit4Bytes(encodeAddSubtractImmediate(0b100, 0, 0,
regNumberInEncoding[from], regNumberInEncoding[to]),
)
} else {
// This is an alias of ORR (shifted register):
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
c.Emit4Bytes(encodeLogicalShiftedRegister(0b101, 0, regNumberInEncoding[from], 0, regNumberInEncoding[xzr], regNumberInEncoding[to]))
}
c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
case loadP64, storeP64:
rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
amode := i.amode
@@ -216,7 +206,11 @@ func (i *instruction) encode(c backend.Compiler) {
c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
case udf:
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en
c.Emit4Bytes(0)
if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable {
c.Emit4Bytes(dummyInstruction)
} else {
c.Emit4Bytes(0)
}
case adr:
c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
case cSel:
@@ -305,6 +299,18 @@ func (i *instruction) encode(c backend.Compiler) {
}
}
func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 {
if toIsSp || fromIsSp {
// This is an alias of ADD (immediate):
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate--
return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd)
} else {
// This is an alias of ORR (shifted register):
// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--
return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd)
}
}
// encodeSystemRegisterMove encodes as "System register move" in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
//
@@ -1290,12 +1296,30 @@ func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []ui
// encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble.
func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
// Restore the FP, SP and LR, and return to the Go code:
// ldr fp, [savedExecutionContextPtr, #OriginalFramePointer]
// ldr tmp, [savedExecutionContextPtr, #OriginalStackPointer]
// ldr lr, [ctxReg, #GoReturnAddress]
// ldr fp, [ctxReg, #OriginalFramePointer]
// ldr tmp, [ctxReg, #OriginalStackPointer]
// mov sp, tmp ;; sp cannot be str'ed directly.
// ldr lr, [savedExecutionContextPtr, #GoReturnAddress]
// ret ;; --> return to the Go code
var ctxEvicted bool
if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr {
// In order to avoid overwriting the context register, we move ctxReg to tmp.
c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false))
ctxReg = tmpRegVReg
ctxEvicted = true
}
restoreLr := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[lr],
addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: ctxReg,
imm: wazevoapi.ExecutionContextOffsets.GoReturnAddress.I64(),
},
)
restoreFp := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[fp],
@@ -1319,21 +1343,16 @@ func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) {
movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0,
regNumberInEncoding[tmp], regNumberInEncoding[sp])
restoreLr := encodeLoadOrStore(
uLoad64,
regNumberInEncoding[lr],
addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: ctxReg,
imm: wazevoapi.ExecutionContextOffsets.GoReturnAddress.I64(),
},
)
c.Emit4Bytes(restoreFp)
c.Emit4Bytes(restoreLr)
c.Emit4Bytes(restoreSpToTmp)
c.Emit4Bytes(movTmpToSp)
c.Emit4Bytes(restoreLr)
c.Emit4Bytes(encodeRet())
if !ctxEvicted {
// In order to have the fixed-length exit sequence, we need to padd the binary.
// Since this will never be reached, we insert a dummy instruction.
c.Emit4Bytes(dummyInstruction)
}
}
func encodeRet() uint32 {

View File

@@ -1009,15 +1009,42 @@ func TestInstruction_encoding_store(t *testing.T) {
}
func Test_encodeExitSequence(t *testing.T) {
m := &mockCompiler{}
encodeExitSequence(m, x22VReg)
// ldr x29, [x22, #0x10]
// ldr x27, [x22, #0x18]
// mov sp, x27
// ldr x30, [x22, #0x20]
// ret
require.Equal(t, "dd0a40f9db0e40f97f030091de1240f9c0035fd6", hex.EncodeToString(m.buf))
require.Equal(t, len(m.buf), exitSequenceSize)
t.Run("no overlap", func(t *testing.T) {
m := &mockCompiler{}
encodeExitSequence(m, x22VReg)
// ldr x29, [x22, #0x10]
// ldr x30, [x22, #0x20]
// ldr x27, [x22, #0x18]
// mov sp, x27
// ret
// b #0x14 ;; dummy
require.Equal(t, "dd0a40f9de1240f9db0e40f97f030091c0035fd600000014", hex.EncodeToString(m.buf))
require.Equal(t, len(m.buf), exitSequenceSize)
})
t.Run("fp", func(t *testing.T) {
m := &mockCompiler{}
encodeExitSequence(m, fpVReg)
// mov x27, x29
// ldr x29, [x27, #0x10]
// ldr x30, [x27, #0x20]
// ldr x27, [x27, #0x18]
// mov sp, x27
// ret
require.Equal(t, "fb031daa7d0b40f97e1340f97b0f40f97f030091c0035fd6", hex.EncodeToString(m.buf))
require.Equal(t, len(m.buf), exitSequenceSize)
})
t.Run("lr", func(t *testing.T) {
m := &mockCompiler{}
encodeExitSequence(m, lrVReg)
// mov x27, x30
// ldr x29, [x27, #0x10]
// ldr x30, [x27, #0x20]
// ldr x27, [x27, #0x18]
// mov sp, x27
// ret
require.Equal(t, "fb031eaa7d0b40f97e1340f97b0f40f97f030091c0035fd6", hex.EncodeToString(m.buf))
require.Equal(t, len(m.buf), exitSequenceSize)
})
}
func Test_lowerExitWithCodeEncodingSize(t *testing.T) {

View File

@@ -346,14 +346,13 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
}
}
const exitIfNotSequenceEncodingSize = 4 + exitWithCodeEncodingSize
func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, code wazevoapi.ExitCode) {
cbr := m.allocateInstr()
cbr.asCondBr(c, invalidLabel, false /* ignored */)
cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4 /* br offset is from the beginning of this instruction */)
m.insert(cbr)
m.lowerExitWithCode(execCtxVReg, code)
// Conditional branch target is after exit.
l := m.insertBrTargetLabel()
cbr.asCondBr(c, l, false /* ignored */)
}
func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {
@@ -456,8 +455,6 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
// If it is not undefined, we can return the result.
ok := m.allocateInstr()
ok.asCondBr(ne.asCond(), invalidLabel, false /* ignored */)
ok.condBrOffsetResolve(4 /* fpuCmp */ + exitIfNotSequenceEncodingSize + exitWithCodeEncodingSize + 4)
m.insert(ok)
// Otherwise, we have to choose the status depending on it is overflow or NaN conversion.
@@ -470,6 +467,10 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
m.exitIfNot(ctx, vc.asCond(), wazevoapi.ExitCodeInvalidConversionToInteger)
// Otherwise, it is an overflow.
m.lowerExitWithCode(ctx, wazevoapi.ExitCodeIntegerOverflow)
// Conditional branch target is after exit.
l := m.insertBrTargetLabel()
ok.asCondBr(ne.asCond(), l, false /* ignored */)
}
}
@@ -807,10 +808,11 @@ func (m *machine) lowerExitIfTrueWithCode(execCtxVReg regalloc.VReg, cond ssa.Va
// We have to skip the entire exit sequence if the condition is false.
cbr := m.allocateInstr()
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), invalidLabel, false /* ignored */)
cbr.condBrOffsetResolve(exitWithCodeEncodingSize + 4 /* br offset is from the beginning of this instruction */)
m.insert(cbr)
m.lowerExitWithCode(execCtxVReg, code)
// conditional branch target is after exit.
l := m.insertBrTargetLabel()
cbr.asCondBr(condFlagFromSSAIntegerCmpCond(c).invert().asCond(), l, false /* ignored */)
}
func (m *machine) lowerSelect(c, x, y, result ssa.Value) {

View File

@@ -290,44 +290,50 @@ func TestMachine_lowerIDiv(t *testing.T) {
name: "32bit unsigned", _64bit: false, signed: false,
exp: `
udiv w1?, w2?, w3?
cbnz w3?, #0x20 (L0)
cbnz w3?, L1
movz x27, #0xa, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L1:
`,
},
{name: "32bit signed", _64bit: false, signed: true, exp: `
sdiv w1?, w2?, w3?
cbnz w3?, #0x20 (L0)
cbnz w3?, L1
movz x27, #0xa, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L1:
adds wzr, w3?, #0x1
ccmp w2?, #0x1, #0x0, eq
b.vc #0x20
b.vc L2
movz x27, #0xb, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L2:
`},
{name: "64bit unsigned", _64bit: true, signed: false, exp: `
udiv x1?, x2?, x3?
cbnz w3?, #0x20 (L0)
cbnz w3?, L1
movz x27, #0xa, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L1:
`},
{name: "64bit signed", _64bit: true, signed: true, exp: `
sdiv x1?, x2?, x3?
cbnz w3?, #0x20 (L0)
cbnz w3?, L1
movz x27, #0xa, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L1:
adds xzr, x3?, #0x1
ccmp x2?, #0x1, #0x0, eq
b.vc #0x20
b.vc L2
movz x27, #0xb, lsl 0
str w27, [x65535?]
exit_sequence x65535?
L2:
`},
} {
t.Run(tc.name, func(t *testing.T) {
@@ -349,20 +355,10 @@ func Test_exitWithCodeEncodingSize(t *testing.T) {
m.FlushPendingInstructions()
m.encode(m.perBlockHead)
buf := m.compiler.Buf()
require.Equal(t, "3b0080d23b0000b93d0840f93b0c40f97f0300913e1040f9c0035fd6", hex.EncodeToString(buf))
require.Equal(t, "3b0080d23b0000b93d0840f93e1040f93b0c40f97f030091c0035fd600000014", hex.EncodeToString(buf))
require.Equal(t, exitWithCodeEncodingSize, len(buf))
}
func Test_exitIfNotSequenceEncodingSize(t *testing.T) {
_, _, m := newSetupWithMockContext()
m.exitIfNot(x1VReg, ne.asCond(), wazevoapi.ExitCodeGrowStack)
m.FlushPendingInstructions()
m.encode(m.perBlockHead)
buf := m.compiler.Buf()
require.Equal(t, "010100543b0080d23b0000b93d0840f93b0c40f97f0300913e1040f9c0035fd6", hex.EncodeToString(buf))
require.Equal(t, exitIfNotSequenceEncodingSize, len(buf))
}
func TestMachine_lowerFpuToInt(t *testing.T) {
for _, tc := range []struct {
name string
@@ -378,17 +374,19 @@ msr fpsr, xzr
fcvtzu w1, s2
mrs x27 fpsr
subs xzr, x27, #0x1
b.ne #0x44
b.ne L2
fcmp w2, w2
b.vc #0x20
b.vc L1
movz x27, #0xc, lsl 0
str w27, [x15]
exit_sequence x15
L1:
movz x27, #0xb, lsl 0
str w27, [x15]
exit_sequence x15
L2:
`,
expectedBytes: "3f441bd54100391e3b443bd57f0700f1210200544020221e070100549b0180d2fb0100b9fd0940f9fb0d40f97f030091fe1140f9c0035fd67b0180d2fb0100b9fd0940f9fb0d40f97f030091fe1140f9c0035fd6",
expectedBytes: "3f441bd54100391e3b443bd57f0700f1010000544020221e070000549b0180d2fb0100b9fd0940f9fe1140f9fb0d40f97f030091c0035fd6000000147b0180d2fb0100b9fd0940f9fe1140f9fb0d40f97f030091c0035fd600000014",
},
{
name: "nontrapping",

View File

@@ -32,8 +32,9 @@ type (
// ssaBlockIDToLabels maps an SSA block ID to the label.
ssaBlockIDToLabels []label
// labelToInstructions maps a label to the instructions of the region which the label represents.
labelPositions map[label]*labelPosition
orderedLabels []*labelPosition
labelPositions map[label]*labelPosition
orderedBlockLabels []*labelPosition
labelPositionPool wazevoapi.Pool[labelPosition]
// addendsWorkQueue is used during address lowering, defined here for reuse.
addendsWorkQueue []ssa.Value
@@ -109,10 +110,11 @@ const (
// NewBackend returns a new backend for arm64.
func NewBackend() backend.Machine {
m := &machine{
instrPool: wazevoapi.NewPool[instruction](),
labelPositions: make(map[label]*labelPosition),
spillSlots: make(map[regalloc.VRegID]int64),
nextLabel: invalidLabel,
instrPool: wazevoapi.NewPool[instruction](),
labelPositionPool: wazevoapi.NewPool[labelPosition](),
labelPositions: make(map[label]*labelPosition),
spillSlots: make(map[regalloc.VRegID]int64),
nextLabel: invalidLabel,
}
m.regAllocFn.m = m
m.regAllocFn.labelToRegAllocBlockIndex = make(map[label]int)
@@ -122,12 +124,12 @@ func NewBackend() backend.Machine {
// Reset implements backend.Machine.
func (m *machine) Reset() {
m.instrPool.Reset()
m.labelPositionPool.Reset()
m.currentSSABlk = nil
m.nextLabel = invalidLabel
m.pendingInstructions = m.pendingInstructions[:0]
for _, v := range m.labelPositions {
v.begin, v.end = nil, nil
for l := label(0); l <= m.nextLabel; l++ {
delete(m.labelPositions, l)
}
m.pendingInstructions = m.pendingInstructions[:0]
m.clobberedRegs = m.clobberedRegs[:0]
for key := range m.spillSlots {
m.clobberedRegs = append(m.clobberedRegs, regalloc.VReg(key))
@@ -136,13 +138,14 @@ func (m *machine) Reset() {
delete(m.spillSlots, regalloc.VRegID(key))
}
m.clobberedRegs = m.clobberedRegs[:0]
m.orderedLabels = m.orderedLabels[:0]
m.orderedBlockLabels = m.orderedBlockLabels[:0]
m.regAllocFn.reset()
m.spillSlotSize = 0
m.unresolvedAddressModes = m.unresolvedAddressModes[:0]
m.rootInstr = nil
m.ssaBlockIDToLabels = m.ssaBlockIDToLabels[:0]
m.perBlockHead, m.perBlockEnd = nil, nil
m.nextLabel = invalidLabel
}
// InitializeABI implements backend.Machine InitializeABI.
@@ -198,10 +201,10 @@ func (m *machine) StartBlock(blk ssa.BasicBlock) {
labelPos, ok := m.labelPositions[l]
if !ok {
labelPos = &labelPosition{}
labelPos = m.allocateLabelPosition()
m.labelPositions[l] = labelPos
}
m.orderedLabels = append(m.orderedLabels, labelPos)
m.orderedBlockLabels = append(m.orderedBlockLabels, labelPos)
labelPos.begin, labelPos.end = end, end
m.regAllocFn.addBlock(blk, l, labelPos)
}
@@ -223,6 +226,23 @@ func (m *machine) insert(i *instruction) {
m.pendingInstructions = append(m.pendingInstructions, i)
}
func (m *machine) insertBrTargetLabel() label {
l := m.allocateLabel()
nop := m.allocateInstr()
nop.asNop0WithLabel(l)
m.insert(nop)
pos := m.allocateLabelPosition()
pos.begin, pos.end = nop, nop
m.labelPositions[l] = pos
return l
}
func (m *machine) allocateLabelPosition() *labelPosition {
l := m.labelPositionPool.Allocate()
*l = labelPosition{}
return l
}
func (m *machine) FlushPendingInstructions() {
l := len(m.pendingInstructions)
if l == 0 {
@@ -253,6 +273,7 @@ func (l label) String() string {
// allocateInstr allocates an instruction.
func (m *machine) allocateInstr() *instruction {
instr := m.instrPool.Allocate()
*instr = instruction{}
return instr
}
@@ -328,9 +349,21 @@ func (m *machine) ResolveRelativeAddresses() {
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
var offset int64
for _, pos := range m.orderedLabels {
for _, pos := range m.orderedBlockLabels {
pos.binaryOffset = offset
size := binarySize(pos.begin, pos.end)
var size int64
for cur := pos.begin; ; cur = cur.next {
if cur.kind == nop0 {
l := cur.nop0Label()
if pos, ok := m.labelPositions[l]; ok {
pos.binaryOffset = offset + size
}
}
size += cur.size()
if cur == pos.end {
break
}
}
pos.binarySize = size
offset += size
}

View File

@@ -258,7 +258,7 @@ func TestMachine_CompileStackGrowCallSequence(t *testing.T) {
str x27, [x0, #0x38]
orr w17, wzr, #0x1
str w17, [x0]
adr x27, #0x1c
adr x27, #0x20
str x27, [x0, #0x30]
exit_sequence x0
ldr x1, [x0, #0x60]

View File

@@ -228,10 +228,14 @@ func (m *machine) RegisterInfo(debug bool) *regalloc.RegisterInfo {
regInfoDebug.RealRegToVReg = regInfo.RealRegToVReg
regInfoDebug.RealRegName = regInfo.RealRegName
regInfoDebug.AllocatableRegisters[regalloc.RegTypeFloat] = []regalloc.RealReg{
v18, // One callee saved.
v7, v6, v5, v4, v3, v2, v1, v0, // Allocatable sets == Argument registers.
}
// TODO: tests for high pressured int registers.
regInfoDebug.AllocatableRegisters[regalloc.RegTypeInt] = regInfo.AllocatableRegisters[regalloc.RegTypeInt]
regInfoDebug.AllocatableRegisters[regalloc.RegTypeInt] = []regalloc.RealReg{
x29, x30, // Caller saved, and special ones. But they should be able to get allocated.
x19, // One callee saved.
x7, x6, x5, x4, x3, x2, x1, x0, // Argument registers (all caller saved).
}
return regInfoDebug
}
return regInfo