wazevo(arm64): reuses queue in addr lowering (#1835)
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
@@ -307,37 +307,35 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte
|
|||||||
// During the construction, this might emit additional instructions.
|
// During the construction, this might emit additional instructions.
|
||||||
//
|
//
|
||||||
// Extracted as a separate function for easy testing.
|
// Extracted as a separate function for easy testing.
|
||||||
func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc.VReg, size byte, offset int64) (amode addressMode) {
|
func (m *machine) lowerToAddressModeFromAddends(a32s *queue[addend32], a64s *queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
|
||||||
switch a64sExist, a32sExist := len(a64s) > 0, len(a32s) > 0; {
|
switch a64sExist, a32sExist := !a64s.empty(), !a32s.empty(); {
|
||||||
case a64sExist && a32sExist:
|
case a64sExist && a32sExist:
|
||||||
var base regalloc.VReg
|
var base regalloc.VReg
|
||||||
base, a64s = dequeue(a64s)
|
base = a64s.dequeue()
|
||||||
var a32 addend32
|
var a32 addend32
|
||||||
a32, a32s = dequeue(a32s)
|
a32 = a32s.dequeue()
|
||||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
|
||||||
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
|
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
|
||||||
var base regalloc.VReg
|
var base regalloc.VReg
|
||||||
base, a64s = dequeue(a64s)
|
base = a64s.dequeue()
|
||||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
|
||||||
offset = 0
|
offset = 0
|
||||||
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
|
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
|
||||||
var base regalloc.VReg
|
var base regalloc.VReg
|
||||||
base, a64s = dequeue(a64s)
|
base = a64s.dequeue()
|
||||||
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
|
||||||
offset = 0
|
offset = 0
|
||||||
case a64sExist:
|
case a64sExist:
|
||||||
var base regalloc.VReg
|
var base regalloc.VReg
|
||||||
base, a64s = dequeue(a64s)
|
base = a64s.dequeue()
|
||||||
if len(a64s) > 0 {
|
if !a64s.empty() {
|
||||||
var index regalloc.VReg
|
index := a64s.dequeue()
|
||||||
index, a64s = dequeue(a64s)
|
|
||||||
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
|
||||||
} else {
|
} else {
|
||||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||||
}
|
}
|
||||||
case a32sExist:
|
case a32sExist:
|
||||||
var base32 addend32
|
base32 := a32s.dequeue()
|
||||||
base32, a32s = dequeue(a32s)
|
|
||||||
|
|
||||||
// First we need 64-bit base.
|
// First we need 64-bit base.
|
||||||
base := m.compiler.AllocateVReg(ssa.TypeI64)
|
base := m.compiler.AllocateVReg(ssa.TypeI64)
|
||||||
@@ -349,9 +347,8 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
|
|||||||
baseExt.asExtend(base, base32.r, 32, 64, signed)
|
baseExt.asExtend(base, base32.r, 32, 64, signed)
|
||||||
m.insert(baseExt)
|
m.insert(baseExt)
|
||||||
|
|
||||||
if len(a32s) > 0 {
|
if !a32s.empty() {
|
||||||
var index addend32
|
index := a32s.dequeue()
|
||||||
index, a32s = dequeue(a32s)
|
|
||||||
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
|
||||||
} else {
|
} else {
|
||||||
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
|
||||||
@@ -368,11 +365,13 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
|
|||||||
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
|
baseReg = m.addConstToReg64(baseReg, offset) // baseReg += offset
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, a64 := range a64s {
|
for !a64s.empty() {
|
||||||
|
a64 := a64s.dequeue()
|
||||||
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
|
baseReg = m.addReg64ToReg64(baseReg, a64) // baseReg += a64
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, a32 := range a32s {
|
for !a32s.empty() {
|
||||||
|
a32 := a32s.dequeue()
|
||||||
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
|
baseReg = m.addRegToReg64Ext(baseReg, a32.r, a32.ext) // baseReg += (a32 extended to 64-bit)
|
||||||
}
|
}
|
||||||
amode.rn = baseReg
|
amode.rn = baseReg
|
||||||
@@ -381,21 +380,22 @@ func (m *machine) lowerToAddressModeFromAddends(a32s []addend32, a64s []regalloc
|
|||||||
|
|
||||||
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
|
var addendsMatchOpcodes = [4]ssa.Opcode{ssa.OpcodeUExtend, ssa.OpcodeSExtend, ssa.OpcodeIadd, ssa.OpcodeIconst}
|
||||||
|
|
||||||
func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64 []regalloc.VReg, offset int64) {
|
func (m *machine) collectAddends(ptr ssa.Value) (addends32 *queue[addend32], addends64 *queue[regalloc.VReg], offset int64) {
|
||||||
m.addends64 = m.addends64[:0]
|
m.addendsWorkQueue.reset()
|
||||||
m.addends32 = m.addends32[:0]
|
m.addends32.reset()
|
||||||
m.addendsWorkQueue = append(m.addendsWorkQueue[:0], ptr)
|
m.addends64.reset()
|
||||||
|
m.addendsWorkQueue.enqueue(ptr)
|
||||||
|
|
||||||
for len(m.addendsWorkQueue) > 0 {
|
for !m.addendsWorkQueue.empty() {
|
||||||
var v ssa.Value
|
v := m.addendsWorkQueue.dequeue()
|
||||||
v, m.addendsWorkQueue = dequeue(m.addendsWorkQueue)
|
|
||||||
|
|
||||||
def := m.compiler.ValueDefinition(v)
|
def := m.compiler.ValueDefinition(v)
|
||||||
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
|
switch op := m.compiler.MatchInstrOneOf(def, addendsMatchOpcodes[:]); op {
|
||||||
case ssa.OpcodeIadd:
|
case ssa.OpcodeIadd:
|
||||||
// If the addend is an add, we recursively collect its operands.
|
// If the addend is an add, we recursively collect its operands.
|
||||||
x, y := def.Instr.Arg2()
|
x, y := def.Instr.Arg2()
|
||||||
m.addendsWorkQueue = append(m.addendsWorkQueue, x, y)
|
m.addendsWorkQueue.enqueue(x)
|
||||||
|
m.addendsWorkQueue.enqueue(y)
|
||||||
def.Instr.MarkLowered()
|
def.Instr.MarkLowered()
|
||||||
case ssa.OpcodeIconst:
|
case ssa.OpcodeIconst:
|
||||||
// If the addend is constant, we just statically merge it into the offset.
|
// If the addend is constant, we just statically merge it into the offset.
|
||||||
@@ -411,7 +411,7 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
|
|||||||
switch input := def.Instr.Arg(); input.Type().Bits() {
|
switch input := def.Instr.Arg(); input.Type().Bits() {
|
||||||
case 64:
|
case 64:
|
||||||
// If the input is already 64-bit, this extend is a no-op. TODO: shouldn't this be optimized out at much earlier stage? no?
|
// If the input is already 64-bit, this extend is a no-op. TODO: shouldn't this be optimized out at much earlier stage? no?
|
||||||
m.addends64 = append(m.addends64, m.getOperand_NR(m.compiler.ValueDefinition(input), extModeNone).nr())
|
m.addends64.enqueue(m.getOperand_NR(m.compiler.ValueDefinition(input), extModeNone).nr())
|
||||||
def.Instr.MarkLowered()
|
def.Instr.MarkLowered()
|
||||||
continue
|
continue
|
||||||
case 32:
|
case 32:
|
||||||
@@ -432,7 +432,7 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
|
|||||||
// Sign-extension of a 32-bit constant can be merged into the offset.
|
// Sign-extension of a 32-bit constant can be merged into the offset.
|
||||||
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
|
offset += int64(int32(inputDef.Instr.ConstantVal())) // sign-extend!
|
||||||
default:
|
default:
|
||||||
m.addends32 = append(m.addends32, addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
|
m.addends32.enqueue(addend32{r: m.getOperand_NR(inputDef, extModeNone).nr(), ext: ext})
|
||||||
}
|
}
|
||||||
def.Instr.MarkLowered()
|
def.Instr.MarkLowered()
|
||||||
continue
|
continue
|
||||||
@@ -443,10 +443,10 @@ func (m *machine) collectAddends(ptr ssa.Value) (addends32 []addend32, addends64
|
|||||||
panic("TODO: add tests")
|
panic("TODO: add tests")
|
||||||
default:
|
default:
|
||||||
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
|
// If the addend is not one of them, we simply use it as-is (without merging!), optionally zero-extending it.
|
||||||
m.addends64 = append(m.addends64, m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
|
m.addends64.enqueue(m.getOperand_NR(def, extModeZeroExtend64 /* optional zero ext */).nr())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return m.addends32, m.addends64, offset
|
return &m.addends32, &m.addends64, offset
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
|
||||||
@@ -481,7 +481,27 @@ func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regal
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func dequeue[T any](q []T) (ret T, popped []T) {
|
// queue is the resettable queue where the underlying slice is reused.
|
||||||
ret, popped = (q)[0], (q)[1:]
|
type queue[T any] struct {
|
||||||
|
index int
|
||||||
|
data []T
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *queue[T]) enqueue(v T) {
|
||||||
|
q.data = append(q.data, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *queue[T]) dequeue() (ret T) {
|
||||||
|
ret = q.data[q.index]
|
||||||
|
q.index++
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *queue[T]) empty() bool {
|
||||||
|
return q.index >= len(q.data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *queue[T]) reset() {
|
||||||
|
q.index = 0
|
||||||
|
q.data = q.data[:0]
|
||||||
|
}
|
||||||
|
|||||||
@@ -472,9 +472,9 @@ func TestMachine_collectAddends(t *testing.T) {
|
|||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
ctx, b, m := newSetupWithMockContext()
|
ctx, b, m := newSetupWithMockContext()
|
||||||
ptr, verify := tc.setup(ctx, b, m)
|
ptr, verify := tc.setup(ctx, b, m)
|
||||||
actual32s, actual64s, actualOffset := m.collectAddends(ptr)
|
actual32sQ, actual64sQ, actualOffset := m.collectAddends(ptr)
|
||||||
require.Equal(t, tc.exp32s, actual32s)
|
require.Equal(t, tc.exp32s, actual32sQ.data)
|
||||||
require.Equal(t, tc.exp64s, actual64s)
|
require.Equal(t, tc.exp64s, actual64sQ.data)
|
||||||
require.Equal(t, tc.offset, actualOffset)
|
require.Equal(t, tc.offset, actualOffset)
|
||||||
verify(t)
|
verify(t)
|
||||||
})
|
})
|
||||||
@@ -567,18 +567,6 @@ func TestMachine_addRegToReg64Ext(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_dequeue(t *testing.T) {
|
|
||||||
ints := []int{1, 2, 3}
|
|
||||||
one, intPopped := dequeue(ints)
|
|
||||||
require.Equal(t, 1, one)
|
|
||||||
require.Equal(t, []int{2, 3}, intPopped)
|
|
||||||
|
|
||||||
strs := []string{"a", "b", "c"}
|
|
||||||
a, strPopped := dequeue(strs)
|
|
||||||
require.Equal(t, "a", a)
|
|
||||||
require.Equal(t, []string{"b", "c"}, strPopped)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMachine_lowerToAddressModeFromAddends(t *testing.T) {
|
func TestMachine_lowerToAddressModeFromAddends(t *testing.T) {
|
||||||
x1, x2, x3 := regalloc.FromRealReg(x1, regalloc.RegTypeInt), regalloc.FromRealReg(x2, regalloc.RegTypeInt), regalloc.FromRealReg(x3, regalloc.RegTypeInt)
|
x1, x2, x3 := regalloc.FromRealReg(x1, regalloc.RegTypeInt), regalloc.FromRealReg(x2, regalloc.RegTypeInt), regalloc.FromRealReg(x3, regalloc.RegTypeInt)
|
||||||
x4, x5, x6 := regalloc.FromRealReg(x4, regalloc.RegTypeInt), regalloc.FromRealReg(x5, regalloc.RegTypeInt), regalloc.FromRealReg(x6, regalloc.RegTypeInt)
|
x4, x5, x6 := regalloc.FromRealReg(x4, regalloc.RegTypeInt), regalloc.FromRealReg(x5, regalloc.RegTypeInt), regalloc.FromRealReg(x6, regalloc.RegTypeInt)
|
||||||
@@ -823,7 +811,16 @@ func TestMachine_lowerToAddressModeFromAddends(t *testing.T) {
|
|||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
ctx, _, m := newSetupWithMockContext()
|
ctx, _, m := newSetupWithMockContext()
|
||||||
ctx.vRegCounter = int(nextVReg.ID()) - 1
|
ctx.vRegCounter = int(nextVReg.ID()) - 1
|
||||||
actual := m.lowerToAddressModeFromAddends(tc.a32s, tc.a64s, tc.dstSizeInBits, tc.offset)
|
|
||||||
|
var a32s queue[addend32]
|
||||||
|
var a64s queue[regalloc.VReg]
|
||||||
|
for _, a32 := range tc.a32s {
|
||||||
|
a32s.enqueue(a32)
|
||||||
|
}
|
||||||
|
for _, a64 := range tc.a64s {
|
||||||
|
a64s.enqueue(a64)
|
||||||
|
}
|
||||||
|
actual := m.lowerToAddressModeFromAddends(&a32s, &a64s, tc.dstSizeInBits, tc.offset)
|
||||||
require.Equal(t, strings.Join(tc.insts, "\n"), formatEmittedInstructionsInCurrentBlock(m))
|
require.Equal(t, strings.Join(tc.insts, "\n"), formatEmittedInstructionsInCurrentBlock(m))
|
||||||
require.Equal(t, tc.exp, actual, actual.format(tc.dstSizeInBits))
|
require.Equal(t, tc.exp, actual, actual.format(tc.dstSizeInBits))
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -37,10 +37,10 @@ type (
|
|||||||
labelPositionPool wazevoapi.Pool[labelPosition]
|
labelPositionPool wazevoapi.Pool[labelPosition]
|
||||||
|
|
||||||
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
// addendsWorkQueue is used during address lowering, defined here for reuse.
|
||||||
addendsWorkQueue []ssa.Value
|
addendsWorkQueue queue[ssa.Value]
|
||||||
addends32 []addend32
|
addends32 queue[addend32]
|
||||||
// addends64 is used during address lowering, defined here for reuse.
|
// addends64 is used during address lowering, defined here for reuse.
|
||||||
addends64 []regalloc.VReg
|
addends64 queue[regalloc.VReg]
|
||||||
unresolvedAddressModes []*instruction
|
unresolvedAddressModes []*instruction
|
||||||
|
|
||||||
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
|
// spillSlotSize is the size of the stack slot in bytes used for spilling registers.
|
||||||
|
|||||||
Reference in New Issue
Block a user