wazevo: refactors liveness analysis (#1821)

This refactors the liveness analysis and starts using the different algorithm,
which results in 30s -> 12s for Python binary compilation.

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-10-31 08:12:17 +09:00
committed by GitHub
parent 695b49e94d
commit 6f16354ec7
17 changed files with 833 additions and 341 deletions

View File

@@ -266,10 +266,8 @@ L2 (SSA Block: blk1):
L1 (SSA Block: blk0):
mov x128?, x0
mov x131?, xzr
cbz w131?, (L2)
L3 (SSA Block: blk1):
ret
L2 (SSA Block: blk2):
cbnz w131?, L2
L3 (SSA Block: blk2):
movz x132?, #0x3, lsl 0
str w132?, [x128?]
mov x133?, sp
@@ -277,18 +275,16 @@ L2 (SSA Block: blk2):
adr x134?, #0x0
str x134?, [x128?, #0x30]
exit_sequence x128?
L2 (SSA Block: blk1):
ret
`,
afterFinalizeARM64: `
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, xzr
cbz w8, #0x10 L2
L3 (SSA Block: blk1):
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
L2 (SSA Block: blk2):
cbnz w8, #0x34 (L2)
L3 (SSA Block: blk2):
movz x8, #0x3, lsl 0
str w8, [x0]
mov x8, sp
@@ -296,6 +292,10 @@ L2 (SSA Block: blk2):
adr x8, #0x0
str x8, [x0, #0x30]
exit_sequence x0
L2 (SSA Block: blk1):
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
`,
},
{
@@ -397,10 +397,10 @@ L2 (SSA Block: blk1):
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x131?, xzr
cbnz w131?, L2
L3 (SSA Block: blk2):
cbz w131?, (L2)
L3 (SSA Block: blk1):
b L4
L2 (SSA Block: blk1):
L2 (SSA Block: blk2):
L4 (SSA Block: blk3):
ret
`,
@@ -409,10 +409,10 @@ L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, xzr
cbnz w8, #0x8 (L2)
L3 (SSA Block: blk2):
cbz w8, #0x8 L2
L3 (SSA Block: blk1):
b #0x4 (L4)
L2 (SSA Block: blk1):
L2 (SSA Block: blk2):
L4 (SSA Block: blk3):
add sp, sp, #0x10
ldr x30, [sp], #0x10
@@ -424,25 +424,25 @@ L4 (SSA Block: blk3):
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x131?, xzr
cbnz w131?, L2
L3 (SSA Block: blk2):
ret
L2 (SSA Block: blk1):
cbz w131?, (L2)
L3 (SSA Block: blk1):
L4 (SSA Block: blk3):
ret
L2 (SSA Block: blk2):
ret
`,
afterFinalizeARM64: `
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, xzr
cbnz w8, #0x10 (L2)
L3 (SSA Block: blk2):
cbz w8, #0x10 L2
L3 (SSA Block: blk1):
L4 (SSA Block: blk3):
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
L2 (SSA Block: blk1):
L4 (SSA Block: blk3):
L2 (SSA Block: blk2):
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
@@ -453,30 +453,30 @@ L4 (SSA Block: blk3):
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x132?, xzr
cbnz w132?, L2
L3 (SSA Block: blk2):
cbz w132?, (L2)
L3 (SSA Block: blk1):
mov x131?, xzr
mov x0, x131?
ret
L2 (SSA Block: blk2):
L4 (SSA Block: blk3):
mov x130?, xzr
mov x0, x130?
ret
L2 (SSA Block: blk1):
mov x131?, xzr
mov x0, x131?
ret
`,
afterFinalizeARM64: `
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, xzr
cbnz w8, #0x14 (L2)
L3 (SSA Block: blk2):
L4 (SSA Block: blk3):
cbz w8, #0x14 L2
L3 (SSA Block: blk1):
mov x0, xzr
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
L2 (SSA Block: blk1):
L2 (SSA Block: blk2):
L4 (SSA Block: blk3):
mov x0, xzr
add sp, sp, #0x10
ldr x30, [sp], #0x10
@@ -489,12 +489,12 @@ L2 (SSA Block: blk1):
L1 (SSA Block: blk0):
mov x130?, x2
mov x131?, x3
cbnz w130?, L2
L3 (SSA Block: blk2):
mov x132?, x131?
b L4
L2 (SSA Block: blk1):
cbz w130?, (L2)
L3 (SSA Block: blk1):
mov x132?, x130?
b L4
L2 (SSA Block: blk2):
mov x132?, x131?
L4 (SSA Block: blk3):
mov x0, x132?
ret
@@ -503,13 +503,13 @@ L4 (SSA Block: blk3):
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
cbnz w2, #0x8 (L2)
L3 (SSA Block: blk2):
cbz w2, #0x8 L2
L3 (SSA Block: blk1):
b #0x8 (L4)
L2 (SSA Block: blk1):
mov x3, x2
L2 (SSA Block: blk2):
mov x2, x3
L4 (SSA Block: blk3):
mov x0, x3
mov x0, x2
add sp, sp, #0x10
ldr x30, [sp], #0x10
ret
@@ -589,9 +589,8 @@ L5 (SSA Block: blk3):
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x8, x2
L2 (SSA Block: blk1):
cbnz w8, #0x8 (L4)
cbnz w2, #0x8 (L4)
b #0x10 (L3)
L4 (SSA Block: blk5):
add sp, sp, #0x10
@@ -599,7 +598,7 @@ L4 (SSA Block: blk5):
ret
L3 (SSA Block: blk4):
L5 (SSA Block: blk3):
orr w8, wzr, #0x1
orr w2, wzr, #0x1
b #-0x18 (L2)
`,
},
@@ -639,30 +638,30 @@ L1 (SSA Block: blk0):
sub sp, sp, #0x10
orr x27, xzr, #0x10
str x27, [sp, #-0x10]!
mov x9, x0
mov x8, x1
str x8, [x9, #0x8]
mov x0, x9
mov x1, x8
str x9, [sp, #0x10]
str x8, [sp, #0x18]
mov x8, x0
mov x9, x1
str x9, [x8, #0x8]
mov x0, x8
mov x1, x9
str x8, [sp, #0x10]
str x9, [sp, #0x18]
bl f1
ldr x8, [sp, #0x18]
ldr x9, [sp, #0x10]
ldr x9, [sp, #0x18]
ldr x8, [sp, #0x10]
mov x2, x0
str x8, [x9, #0x8]
mov x0, x9
mov x1, x8
str x9, [x8, #0x8]
mov x0, x8
mov x1, x9
movz w3, #0x5, lsl 0
str x9, [sp, #0x10]
str x8, [sp, #0x18]
str x8, [sp, #0x10]
str x9, [sp, #0x18]
bl f2
ldr x8, [sp, #0x18]
ldr x9, [sp, #0x10]
ldr x9, [sp, #0x18]
ldr x8, [sp, #0x10]
mov x2, x0
str x8, [x9, #0x8]
mov x0, x9
mov x1, x8
str x9, [x8, #0x8]
mov x0, x8
mov x1, x9
bl f3
add sp, sp, #0x10
add sp, sp, #0x10
@@ -1623,11 +1622,12 @@ L1 (SSA Block: blk0):
L1 (SSA Block: blk0):
stp x30, xzr, [sp, #-0x10]!
str xzr, [sp, #-0x10]!
mov x3, x2
mov x9, x2
str x1, [x0, #0x8]
ldr x8, [x1, #0x8]
ldr x1, [x1, #0x10]
mov x2, x3
mov x2, x9
mov x3, x9
bl x8
add sp, sp, #0x10
ldr x30, [sp], #0x10

View File

@@ -102,6 +102,9 @@ type Compiler interface {
// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
Emit4Bytes(b uint32)
// LoopNestingForestRoots returns the roots of the loop nesting forest.
LoopNestingForestRoots() []ssa.BasicBlock
}
// RelocationInfo represents the relocation information for a call instruction.
@@ -389,3 +392,8 @@ func (c *compiler) Emit4Bytes(b uint32) {
func (c *compiler) Buf() []byte {
return c.buf
}
// LoopNestingForestRoots implements Compiler.LoopNestingForestRoots.
func (c *compiler) LoopNestingForestRoots() []ssa.BasicBlock {
return c.ssaBuilder.LoopNestingForestRoots()
}

View File

@@ -17,7 +17,8 @@ type (
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
labelToRegAllocBlockIndex map[label]int
// vs is used for regalloc.Instr Defs() and Uses() methods, defined here for reuse.
vs []regalloc.VReg
vs []regalloc.VReg
loopNestingForestRoots []ssa.BasicBlock
}
// regAllocBlockImpl implements regalloc.Block.
@@ -28,7 +29,8 @@ type (
l label
pos *labelPosition
// instrImpl is re-used for all instructions in this block.
instrImpl regAllocInstrImpl
instrImpl regAllocInstrImpl
loopNestingForestChildren []ssa.BasicBlock
}
// regAllocInstrImpl implements regalloc.Instr.
@@ -151,6 +153,56 @@ func (r *regAllocBlockImpl) Pred(i int) regalloc.Block {
return &r.f.reversePostOrderBlocks[index]
}
// Succs implements regalloc.Block Succs.
func (r *regAllocBlockImpl) Succs() int {
return r.sb.Succs()
}
// Succ implements regalloc.Block Succ.
func (r *regAllocBlockImpl) Succ(i int) regalloc.Block {
sb := r.sb
succ := sb.Succ(i)
if succ.ReturnBlock() {
return nil
}
l := r.f.m.ssaBlockIDToLabels[succ.ID()]
index := r.f.labelToRegAllocBlockIndex[l]
return &r.f.reversePostOrderBlocks[index]
}
// LoopHeader implements regalloc.Block LoopHeader.
func (r *regAllocBlockImpl) LoopHeader() bool {
return r.sb.LoopHeader()
}
// LoopNestingForestRoots implements regalloc.Function LoopNestingForestRoots.
func (f *regAllocFunctionImpl) LoopNestingForestRoots() int {
f.loopNestingForestRoots = f.m.compiler.LoopNestingForestRoots()
return len(f.loopNestingForestRoots)
}
// LoopNestingForestRoot implements regalloc.Function LoopNestingForestRoot.
func (f *regAllocFunctionImpl) LoopNestingForestRoot(i int) regalloc.Block {
blk := f.loopNestingForestRoots[i]
l := f.m.ssaBlockIDToLabels[blk.ID()]
index := f.labelToRegAllocBlockIndex[l]
return &f.reversePostOrderBlocks[index]
}
// LoopNestingForestChildren implements regalloc.Block LoopNestingForestChildren.
func (r *regAllocBlockImpl) LoopNestingForestChildren() int {
r.loopNestingForestChildren = r.sb.LoopNestingForestChildren()
return len(r.loopNestingForestChildren)
}
// LoopNestingForestChild implements regalloc.Block LoopNestingForestChild.
func (r *regAllocBlockImpl) LoopNestingForestChild(i int) regalloc.Block {
blk := r.loopNestingForestChildren[i]
l := r.f.m.ssaBlockIDToLabels[blk.ID()]
index := r.f.labelToRegAllocBlockIndex[l]
return &r.f.reversePostOrderBlocks[index]
}
// InstrIteratorBegin implements regalloc.Block InstrIteratorBegin.
func (r *regAllocBlockImpl) InstrIteratorBegin() regalloc.Instr {
r.instrImpl.i = r.pos.begin
@@ -170,6 +222,25 @@ func (r *regAllocBlockImpl) InstrIteratorNext() regalloc.Instr {
}
}
// InstrRevIteratorBegin implements regalloc.Block InstrRevIteratorBegin.
func (r *regAllocBlockImpl) InstrRevIteratorBegin() regalloc.Instr {
r.instrImpl.i = r.pos.end
return &r.instrImpl
}
// InstrRevIteratorNext implements regalloc.Block InstrRevIteratorNext.
func (r *regAllocBlockImpl) InstrRevIteratorNext() regalloc.Instr {
for {
instr := r.instrIteratorRevNext()
if instr == nil {
return nil
} else if instr.i.addedBeforeRegAlloc {
// Only concerned about the instruction added before regalloc.
return instr
}
}
}
// BlockParams implements regalloc.Block BlockParams.
func (r *regAllocBlockImpl) BlockParams() []regalloc.VReg {
c := r.f.m.compiler
@@ -190,6 +261,15 @@ func (r *regAllocBlockImpl) instrIteratorNext() *regAllocInstrImpl {
return &r.instrImpl
}
func (r *regAllocBlockImpl) instrIteratorRevNext() *regAllocInstrImpl {
cur := r.instrImpl.i
if r.pos.begin == cur {
return nil
}
r.instrImpl.i = cur.prev
return &r.instrImpl
}
// Entry implements regalloc.Block Entry.
func (r *regAllocBlockImpl) Entry() bool { return r.sb.EntryBlock() }

View File

@@ -61,6 +61,8 @@ type mockCompiler struct {
buf []byte
}
func (m *mockCompiler) LoopNestingForestRoots() []ssa.BasicBlock { panic("TODO") }
func (m *mockCompiler) SourceOffsetInfo() []backend.SourceOffsetInfo { return nil }
func (m *mockCompiler) AddSourceOffsetInfo(int64, ssa.SourceOffset) {}

View File

@@ -35,12 +35,17 @@ type (
ReloadRegisterAfter(v VReg, instr Instr)
// Done tells the implementation that register allocation is done, and it can finalize the stack
Done()
// LoopNestingForestRoots returns the number of roots of the loop nesting forest in a function.
LoopNestingForestRoots() int
// LoopNestingForestRoot returns the i-th root of the loop nesting forest in a function.
LoopNestingForestRoot(i int) Block
}
// Block is a basic block in the CFG of a function, and it consists of multiple instructions, and predecessor Block(s).
Block interface {
// ID returns the unique identifier of this block.
ID() int
// BlockParams returns the virtual registers used as the parameters of this block.
BlockParams() []VReg
// InstrIteratorBegin returns the first instruction in this block. Instructions added after lowering must be skipped.
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
@@ -48,12 +53,26 @@ type (
// InstrIteratorNext returns the next instruction in this block. Instructions added after lowering must be skipped.
// Note: multiple Instr(s) will not be held at the same time, so it's safe to use the same impl for the return Instr.
InstrIteratorNext() Instr
// InstrRevIteratorBegin is the same as InstrIteratorBegin, but in the reverse order.
InstrRevIteratorBegin() Instr
// InstrRevIteratorNext is the same as InstrIteratorNext, but in the reverse order.
InstrRevIteratorNext() Instr
// Preds returns the number of predecessors of this block in the CFG.
Preds() int
// Pred returns the i-th predecessor of this block in the CFG.
Pred(i int) Block
// Entry returns true if the block is for the entry block.
Entry() bool
// Succs returns the number of successors of this block in the CFG.
Succs() int
// Succ returns the i-th successor of this block in the CFG.
Succ(i int) Block
// LoopHeader returns true if this block is a loop header.
LoopHeader() bool
// LoopNestingForestChildren returns the number of children of this block in the loop nesting forest.
LoopNestingForestChildren() int
// LoopNestingForestChild returns the i-th child of this block in the loop nesting forest.
LoopNestingForestChild(i int) Block
}
// Instr is an instruction in a block, abstracting away the underlying ISA.

View File

@@ -12,6 +12,7 @@ type (
iter int
blocks []*mockBlock
befores, afters []storeOrReloadInfo
lnfRoots []*mockBlock
}
storeOrReloadInfo struct {
@@ -22,12 +23,15 @@ type (
// mockBlock implements Block.
mockBlock struct {
id int
instructions []*mockInstr
preds []*mockBlock
_preds []Block
iter int
_entry bool
id int
instructions []*mockInstr
preds, succs []*mockBlock
_preds, _succs []Block
iter int
_entry bool
_loop bool
lnfChildren []*mockBlock
blockParams []VReg
}
// mockInstr implements Instr.
@@ -41,6 +45,10 @@ func newMockFunction(blocks ...*mockBlock) *mockFunction {
return &mockFunction{blocks: blocks}
}
func (m *mockFunction) loopNestingForestRoots(blocks ...*mockBlock) {
m.lnfRoots = blocks
}
func newMockBlock(id int, instructions ...*mockInstr) *mockBlock {
return &mockBlock{id: id, instructions: instructions}
}
@@ -75,6 +83,8 @@ func (m *mockBlock) String() string {
func (m *mockBlock) addPred(b *mockBlock) {
m.preds = append(m.preds, b)
m._preds = append(m._preds, b)
b._succs = append(b._succs, m)
b.succs = append(b.succs, m)
}
func (m *mockInstr) use(uses ...VReg) *mockInstr {
@@ -87,6 +97,12 @@ func (m *mockInstr) def(defs ...VReg) *mockInstr {
return m
}
func (m *mockBlock) loop(children ...*mockBlock) *mockBlock {
m._loop = true
m.lnfChildren = children
return m
}
func (m *mockBlock) entry() *mockBlock {
m._entry = true
return m
@@ -194,13 +210,35 @@ func (m *mockBlock) InstrIteratorNext() Instr {
return ret
}
// InstrRevIteratorBegin implements Block.
func (m *mockBlock) InstrRevIteratorBegin() Instr {
if len(m.instructions) == 0 {
return nil
}
m.iter = len(m.instructions)
return m.InstrRevIteratorNext()
}
// InstrRevIteratorNext implements Block.
func (m *mockBlock) InstrRevIteratorNext() Instr {
m.iter--
if m.iter < 0 {
return nil
}
return m.instructions[m.iter]
}
// Preds implements Block.
func (m *mockBlock) Preds() int {
return len(m._preds)
}
// BlockParams implements Block.
func (m *mockBlock) BlockParams() []VReg { return nil }
func (m *mockBlock) BlockParams() []VReg { return m.blockParams }
func (m *mockBlock) blockParam(v VReg) {
m.blockParams = append(m.blockParams, v)
}
// Pred implements Instr.
func (m *mockBlock) Pred(i int) Block { return m._preds[i] }
@@ -248,3 +286,31 @@ var (
_ Block = (*mockBlock)(nil)
_ Instr = (*mockInstr)(nil)
)
func (m *mockFunction) LoopNestingForestRoots() int {
return len(m.lnfRoots)
}
func (m *mockFunction) LoopNestingForestRoot(i int) Block {
return m.lnfRoots[i]
}
func (m *mockBlock) LoopHeader() bool {
return m._loop
}
func (m *mockBlock) Succs() int {
return len(m.succs)
}
func (m *mockBlock) Succ(i int) Block {
return m.succs[i]
}
func (m *mockBlock) LoopNestingForestChildren() int {
return len(m.lnfChildren)
}
func (m *mockBlock) LoopNestingForestChild(i int) Block {
return m.lnfChildren[i]
}

View File

@@ -73,6 +73,7 @@ type (
nodes2 []*node
nodes3 []*node
dedup []bool
blks []Block
}
// blockInfo is a per-block information used during the register allocation.
@@ -145,17 +146,21 @@ const (
pcStride = pcDefOffset + 1
)
// phiBlk returns the block that defines the given phi value, nil otherwise.
func (a *Allocator) phiBlk(id VRegID) Block {
if int(id) >= len(a.phiBlocks) {
return nil
}
return a.phiBlocks[id]
}
// liveAnalysis constructs Allocator.blockInfos.
// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.4.
//
// TODO: this might not be efficient. We should be able to leverage dominance tree, etc.
// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
func (a *Allocator) livenessAnalysis(f Function) {
// First, we need to allocate blockInfos.
var maxBlockID int
for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() { // Order doesn't matter.
info := a.allocateBlockInfo(blk.ID())
if blk.Entry() {
continue
}
// If this is not the entry block, we should define phi nodes, which are not defined by instructions.
for _, p := range blk.BlockParams() {
info.defs[p] = 0 // Earliest definition is at the beginning of the block.
@@ -166,137 +171,145 @@ func (a *Allocator) livenessAnalysis(f Function) {
}
a.phiBlocks[pid] = blk
}
if blk.ID() > maxBlockID {
maxBlockID = blk.ID()
}
}
// Gathers all defs, lastUses, and VRegs in use (into a.vs).
a.vs = a.vs[:0]
for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
info := a.blockInfoAt(blk.ID())
if maxBlockID >= len(a.dedup) {
a.dedup = append(a.dedup, make([]bool, maxBlockID+1)...)
}
// We have to do a first pass to find the lowest VRegID in the block;
// this is used to reduce memory utilization in the VRegTable, which
// can avoid allocating memory for registers zero to minVRegID-1.
minVRegID := VRegIDMinSet{}
// Run the Algorithm 9.2 in the bool.
for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
blkID := blk.ID()
info := a.allocateBlockInfo(blkID)
ns := blk.Succs()
for i := 0; i < ns; i++ {
succ := blk.Succ(i)
if succ == nil {
continue
}
succID := succ.ID()
if !a.dedup[succID] { // This means the back edge.
continue
}
succInfo := a.blockInfoAt(succID)
for v := range succInfo.liveIns {
if a.phiBlk(v.ID()) != succ {
info.liveOuts[v] = struct{}{}
info.liveIns[v] = struct{}{}
}
}
}
var pc programCounter
var minVRegID VRegIDMinSet
for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
for _, use := range instr.Uses() {
uses := instr.Uses()
for _, use := range uses {
if !use.IsRealReg() {
minVRegID.Observe(use)
}
}
pc += pcStride
}
info.lastUses.Reset(minVRegID)
var pc programCounter
for instr := blk.InstrIteratorBegin(); instr != nil; instr = blk.InstrIteratorNext() {
var srcVR, dstVR VReg
for _, use := range instr.Uses() {
srcVR = use
pos := pc + pcUseOffset
if use.IsRealReg() {
info.addRealRegUsage(use, pos)
} else {
info.lastUses.Insert(use, pos)
}
}
for _, def := range instr.Defs() {
dstVR = def
for instr := blk.InstrRevIteratorBegin(); instr != nil; instr = blk.InstrRevIteratorNext() {
pc -= pcStride
var use, def VReg
for _, def = range instr.Defs() {
defID := def.ID()
pos := pc + pcDefOffset
if def.IsRealReg() {
info.realRegDefs[defID] = append(info.realRegDefs[defID], pos)
} else {
if _, ok := info.defs[def]; !ok {
// This means that this VReg is defined multiple times in a series of instructions
// e.g. loading arbitrary constant in arm64, and we only need the earliest
// definition to construct live range.
info.defs[def] = pos
info.defs[def] = pos
delete(info.liveIns, def)
}
}
for _, use = range instr.Uses() {
pos := pc + pcUseOffset
if use.IsRealReg() {
id := use.ID()
info.realRegUses[id] = append(info.realRegUses[id], pos)
} else {
if info.lastUses.Lookup(use) < 0 {
info.lastUses.Insert(use, pos)
}
a.vs = append(a.vs, def)
info.liveIns[use] = struct{}{}
}
}
if instr.IsCopy() {
id := int(dstVR.ID())
if id < len(a.phiBlocks) && a.phiBlocks[id] != nil {
info.liveOuts[dstVR] = struct{}{}
}
a.recordCopyRelation(dstVR, srcVR)
a.recordCopyRelation(def, use)
}
// If the destination is a phi value, and ...
if def.Valid() && a.phiBlk(def.ID()) != nil {
if use.Valid() && use.IsRealReg() {
// If the source is a real register, this is the beginning of the function, and
// therefore we need to add the definition of the real register.
r := use.ID()
info.realRegDefs[r] = append(info.realRegDefs[r], 0)
} else {
// Otherwise, this is the definition of the phi value for the successor block.
// So we need to make it outlive the block.
info.liveOuts[def] = struct{}{}
}
}
pc += pcStride
}
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("prepared block info for block[%d]:\n%s\n\n", blk.ID(), info.Format(a.regInfo))
}
a.dedup[blkID] = true
}
// Run the Algorithm 9.9. in the book. This will construct blockInfo.liveIns and blockInfo.liveOuts.
for _, phi := range a.phis {
blk := a.phiBlocks[phi.ID()]
a.beginUpAndMarkStack(f, phi, true, blk)
nrs := f.LoopNestingForestRoots()
for i := 0; i < nrs; i++ {
root := f.LoopNestingForestRoot(i)
a.loopTreeDFS(root)
}
for _, v := range a.vs {
if v.IsRealReg() {
// Real registers do not need to be tracked in liveOuts and liveIns because they are not allocation targets.
panic("BUG")
}
a.beginUpAndMarkStack(f, v, false, nil)
// Clears the dedup array for the next function.
for i := 0; i <= maxBlockID; i++ {
a.dedup[i] = false
}
}
func (a *Allocator) beginUpAndMarkStack(f Function, v VReg, isPhi bool, phiDefinedAt Block) {
for blk := f.PostOrderBlockIteratorBegin(); blk != nil; blk = f.PostOrderBlockIteratorNext() {
if blk.Preds() == 0 && !blk.Entry() {
panic(fmt.Sprintf("block without predecessor must be optimized out by the compiler: %d", blk.ID()))
// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
func (a *Allocator) loopTreeDFS(entry Block) {
a.blks = a.blks[:0]
a.blks = append(a.blks, entry)
for len(a.blks) > 0 {
tail := len(a.blks) - 1
loop := a.blks[tail]
a.blks = a.blks[:tail]
a.vs = a.vs[:0]
info := a.blockInfoAt(loop.ID())
for v := range info.liveIns {
if a.phiBlk(v.ID()) != loop {
a.vs = append(a.vs, v)
info.liveOuts[v] = struct{}{}
}
}
info := a.blockInfoAt(blk.ID())
if !info.lastUses.Contains(v) {
continue
cn := loop.LoopNestingForestChildren()
for i := 0; i < cn; i++ {
child := loop.LoopNestingForestChild(i)
childID := child.ID()
childInfo := a.blockInfoAt(childID)
for _, v := range a.vs {
childInfo.liveIns[v] = struct{}{}
childInfo.liveOuts[v] = struct{}{}
}
if child.LoopHeader() {
a.blks = append(a.blks, child)
}
}
// TODO: we might want to avoid recursion here.
a.upAndMarkStack(blk, v, isPhi, phiDefinedAt, 0)
}
}
// upAndMarkStack is the Algorithm 9.10. in the book named Up_and_Mark_Stack(B, v).
//
// We recursively call this, so passing `depth` for debugging.
func (a *Allocator) upAndMarkStack(b Block, v VReg, isPhi bool, phiDefinedAt Block, depth int) {
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("%supAndMarkStack for %v at %v\n", strings.Repeat("\t", depth), v, b.ID())
}
info := a.blockInfoAt(b.ID())
if _, ok := info.defs[v]; ok && !isPhi {
return // Defined in this block, so no need to go further climbing up.
}
// v must be in liveIns.
if _, ok := info.liveIns[v]; ok {
return // But this case, it is already visited. (maybe by, for example, sibling blocks).
}
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("%sadding %v live-in at block[%d]\n", strings.Repeat("\t", depth), v, b.ID())
}
// Now we can safely mark v as a part of live-in
info.liveIns[v] = struct{}{}
// Plus if this is this block has the definition of this phi, we can stop climbing up.
if b == phiDefinedAt {
return
}
preds := b.Preds()
if preds == 0 {
panic(fmt.Sprintf("BUG: block has no predecessors while requiring live-in: blk%d", b.ID()))
}
// and climb up the CFG.
for i := 0; i < preds; i++ {
pred := b.Pred(i)
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("%sadding %v live-out at block[%d]\n", strings.Repeat("\t", depth+1), v, pred.ID())
}
a.blockInfoAt(pred.ID()).liveOuts[v] = struct{}{}
a.upAndMarkStack(pred, v, isPhi, phiDefinedAt, depth+1)
}
}
@@ -362,10 +375,11 @@ func (a *Allocator) buildLiveRangesForNonReals(info *blockInfo) {
// v is defined here and live-out, so it is live-through.
end = math.MaxInt32
} else {
if end = info.lastUses.Lookup(v); end == -1 {
end = info.lastUses.Lookup(v)
if end == -1 {
// This case the defined value is not used at all.
end = defPos
} // Otherwise v is killed at defPos.
}
}
n := a.getOrAllocateNode(v)
intervalNode := info.intervalMng.insert(n, defPos, end)
@@ -398,8 +412,17 @@ func (a *Allocator) buildLiveRangesForReals(info *blockInfo) {
a.regInfo.RealRegName(r), len(defs), len(uses),
),
)
} else if len(uses) == 0 {
continue
}
sort.Slice(uses, func(i, j int) bool {
return uses[i] < uses[j]
})
sort.Slice(defs, func(i, j int) bool {
return defs[i] < defs[j]
})
for i := range uses {
n := a.allocateNode()
n.r = r
@@ -514,17 +537,6 @@ func (a *Allocator) allocateNode() (n *node) {
return
}
func (i *blockInfo) addRealRegUsage(v VReg, pc programCounter) {
id := v.ID()
defs := i.realRegDefs[id]
if len(defs) == 0 {
// If the definition not found yet but used, this must be a function preamble,
// so we let's assume it is defined at the beginning.
i.realRegDefs[id] = append(i.realRegDefs[id], 0)
}
i.realRegUses[id] = append(i.realRegUses[id], pc)
}
// Format is for debugging.
func (i *blockInfo) Format(ri *RegisterInfo) string {
var buf strings.Builder

View File

@@ -1,6 +1,8 @@
package regalloc
import (
"fmt"
"sort"
"testing"
"github.com/tetratelabs/wazero/internal/testing/require"
@@ -24,7 +26,7 @@ func makeVRegTable(vregs map[VReg]programCounter) (table VRegTable) {
func TestAllocator_livenessAnalysis(t *testing.T) {
const realRegID, realRegID2 = 50, 100
realReg, realReg2 := FromRealReg(realRegID, RegTypeInt), FromRealReg(realRegID2, RegTypeInt)
const phiVReg = 12345
phiVReg := VReg(12345).SetRegType(RegTypeInt)
for _, tc := range []struct {
name string
setup func() Function
@@ -47,7 +49,32 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
},
},
},
{
name: "single block with real reg",
setup: func() Function {
realVReg := FromRealReg(10, RegTypeInt)
param := VReg(1)
ret := VReg(2)
blk := newMockBlock(0,
newMockInstr().def(param).use(realVReg),
newMockInstr().def(ret).use(param, param),
newMockInstr().def(realVReg).use(ret),
).entry()
blk.blockParam(param)
return newMockFunction(blk)
},
exp: map[int]*blockInfo{
0: {
defs: map[VReg]programCounter{1: 1, 2: pcDefOffset + pcStride},
lastUses: makeVRegTable(map[VReg]programCounter{
1: pcStride + pcUseOffset,
2: pcStride*2 + pcUseOffset,
}),
realRegUses: [vRegIDReservedForRealNum][]programCounter{10: {0}},
realRegDefs: [vRegIDReservedForRealNum][]programCounter{10: {pcDefOffset + pcStride*2}},
},
},
},
{
name: "straight",
// b0 -> b1 -> b2
@@ -168,7 +195,7 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
liveOuts: map[VReg]struct{}{1000: {}},
lastUses: makeVRegTable(map[VReg]programCounter{2: pcUseOffset}),
realRegUses: [vRegIDReservedForRealNum][]programCounter{realRegID2: {pcUseOffset}},
realRegDefs: [vRegIDReservedForRealNum][]programCounter{realRegID2: {0}},
realRegDefs: [vRegIDReservedForRealNum][]programCounter{},
},
3: {
liveIns: map[VReg]struct{}{1000: {}},
@@ -251,6 +278,7 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
b1 := newMockBlock(1,
newMockInstr().def(9999),
)
b1.blockParam(phiVReg)
b2 := newMockBlock(2,
newMockInstr().def(100).use(phiVReg, 9999),
)
@@ -259,7 +287,9 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
newMockInstr().use(100),
)
b4 := newMockBlock(4,
newMockInstr().def(phiVReg).use(54321),
newMockInstr().def(phiVReg).use(54321).
// Make sure this is the PHI defining instruction.
asCopy(),
)
b5 := newMockBlock(
4, newMockInstr().use(54321),
@@ -270,7 +300,10 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
b3.addPred(b2)
b4.addPred(b3)
b5.addPred(b3)
return newMockFunction(b0, b1, b2, b3, b4, b5)
b1.loop(b2, b3, b4, b5)
f := newMockFunction(b0, b1, b2, b3, b4, b5)
f.loopNestingForestRoots(b1)
return f
},
exp: map[int]*blockInfo{
0: {
@@ -289,7 +322,7 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
1: {
liveIns: map[VReg]struct{}{phiVReg: {}},
liveOuts: map[VReg]struct{}{phiVReg: {}, 9999: {}},
defs: map[VReg]programCounter{9999: pcDefOffset},
defs: map[VReg]programCounter{phiVReg: 0, 9999: pcDefOffset},
lastUses: makeVRegTable(map[VReg]programCounter{}),
},
2: {
@@ -312,7 +345,61 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
},
},
},
{
name: "multiple pass alive",
setup: func() Function {
v := VReg(9999)
b0 := newMockBlock(0, newMockInstr().def(v)).entry()
b1, b2, b3, b4, b5, b6 := newMockBlock(1), newMockBlock(2),
newMockBlock(3, newMockInstr().use(v)),
newMockBlock(4), newMockBlock(5), newMockBlock(6)
b1.addPred(b0)
b4.addPred(b0)
b2.addPred(b1)
b5.addPred(b2)
b2.addPred(b5)
b6.addPred(b2)
b3.addPred(b6)
b3.addPred(b4)
f := newMockFunction(b0, b1, b2, b4, b5, b6, b3)
f.loopNestingForestRoots(b2)
return f
},
exp: map[int]*blockInfo{
0: {
liveOuts: map[VReg]struct{}{9999: {}},
defs: map[VReg]programCounter{9999: pcDefOffset},
lastUses: makeVRegTable(nil),
},
1: {
liveIns: map[VReg]struct{}{9999: {}},
liveOuts: map[VReg]struct{}{9999: {}},
lastUses: makeVRegTable(nil),
},
2: {
liveIns: map[VReg]struct{}{9999: {}},
liveOuts: map[VReg]struct{}{9999: {}},
lastUses: makeVRegTable(nil),
},
3: {
liveIns: map[VReg]struct{}{9999: {}},
lastUses: makeVRegTable(map[VReg]programCounter{9999: pcUseOffset}),
},
4: {
liveIns: map[VReg]struct{}{9999: {}},
liveOuts: map[VReg]struct{}{9999: {}},
lastUses: makeVRegTable(nil),
},
5: {lastUses: makeVRegTable(nil)},
6: {
liveIns: map[VReg]struct{}{9999: {}},
liveOuts: map[VReg]struct{}{9999: {}},
lastUses: makeVRegTable(nil),
},
},
},
{
// -----+
// v |
@@ -321,10 +408,14 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
// +----+
name: "Fig. 9.2 in paper",
setup: func() Function {
b0 := newMockBlock(0, newMockInstr().def(99999)).entry()
b0 := newMockBlock(0,
newMockInstr().def(99999),
newMockInstr().def(phiVReg).use(111).asCopy(),
).entry()
b1 := newMockBlock(1, newMockInstr().use(99999))
b2 := newMockBlock(2)
b3 := newMockBlock(3)
b1.blockParam(phiVReg)
b2 := newMockBlock(2, newMockInstr().def(88888).use(phiVReg, phiVReg))
b3 := newMockBlock(3, newMockInstr().def(phiVReg).use(88888).asCopy())
b4 := newMockBlock(4)
b1.addPred(b0)
b1.addPred(b2)
@@ -332,145 +423,86 @@ func TestAllocator_livenessAnalysis(t *testing.T) {
b2.addPred(b3)
b3.addPred(b2)
b4.addPred(b3)
return newMockFunction(b0, b1, b2, b3, b4)
b1.loop(b2)
b2.loop(b3)
f := newMockFunction(b0, b1, b2, b3, b4)
f.loopNestingForestRoots(b1)
return f
},
exp: map[int]*blockInfo{
0: {
defs: map[VReg]programCounter{99999: pcDefOffset},
liveOuts: map[VReg]struct{}{99999: {}},
lastUses: makeVRegTable(nil),
defs: map[VReg]programCounter{99999: pcDefOffset, phiVReg: pcStride + pcDefOffset},
liveOuts: map[VReg]struct{}{99999: {}, phiVReg: {}},
liveIns: map[VReg]struct{}{111: {}},
lastUses: makeVRegTable(map[VReg]programCounter{111: pcStride + pcUseOffset}),
},
1: {
liveIns: map[VReg]struct{}{99999: {}},
liveOuts: map[VReg]struct{}{99999: {}},
defs: map[VReg]programCounter{phiVReg: 0},
liveIns: map[VReg]struct{}{99999: {}, phiVReg: {}},
liveOuts: map[VReg]struct{}{99999: {}, phiVReg: {}},
lastUses: makeVRegTable(map[VReg]programCounter{99999: pcUseOffset}),
},
2: {
liveIns: map[VReg]struct{}{99999: {}},
liveOuts: map[VReg]struct{}{99999: {}},
lastUses: makeVRegTable(nil),
liveIns: map[VReg]struct{}{99999: {}, phiVReg: {}},
liveOuts: map[VReg]struct{}{99999: {}, 88888: {}, phiVReg: {}},
defs: map[VReg]programCounter{88888: pcDefOffset},
lastUses: makeVRegTable(map[VReg]programCounter{phiVReg: pcUseOffset}),
},
3: {
liveIns: map[VReg]struct{}{99999: {}},
liveOuts: map[VReg]struct{}{99999: {}},
lastUses: makeVRegTable(nil),
liveIns: map[VReg]struct{}{99999: {}, phiVReg: {}, 88888: {}},
liveOuts: map[VReg]struct{}{99999: {}, phiVReg: {}},
defs: map[VReg]programCounter{phiVReg: pcDefOffset},
lastUses: makeVRegTable(map[VReg]programCounter{88888: pcUseOffset}),
},
4: {
lastUses: makeVRegTable(nil),
},
},
},
// 2
// ^ +----+
// | v |
// 0 -> 1 -> 3 -> 4 -> 5 -> 6 -> 9
// ^ | ^ |
// | v | |
// | 7 -> 8 ---+ |
// | ^ | |
// | +----+ |
// +------------------------+
{
name: "Fig. 9.1 in paper",
setup: func() Function {
b0 := newMockBlock(0).entry()
b1 := newMockBlock(1)
b2 := newMockBlock(2)
b3 := newMockBlock(3,
newMockInstr().def(100),
)
b4 := newMockBlock(4)
b5 := newMockBlock(5,
newMockInstr().use(100),
)
b6 := newMockBlock(6)
b7 := newMockBlock(7)
b8 := newMockBlock(8)
b9 := newMockBlock(9)
b1.addPred(b0)
b1.addPred(b9)
b2.addPred(b1)
b3.addPred(b1)
b4.addPred(b3)
b5.addPred(b4)
b5.addPred(b6)
b5.addPred(b8)
b6.addPred(b5)
b7.addPred(b3)
b7.addPred(b8)
b8.addPred(b7)
b9.addPred(b6)
return newMockFunction(b0, b1, b2, b3, b4, b7, b8, b5, b6, b9)
},
exp: map[int]*blockInfo{
0: {
lastUses: makeVRegTable(nil),
},
1: {
lastUses: makeVRegTable(nil),
},
2: {
lastUses: makeVRegTable(nil),
},
3: {
defs: map[VReg]programCounter{100: pcDefOffset},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(nil),
},
4: {
liveIns: map[VReg]struct{}{100: {}},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(nil),
},
5: {
liveIns: map[VReg]struct{}{100: {}},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(map[VReg]programCounter{100: pcUseOffset}),
},
6: {
liveIns: map[VReg]struct{}{100: {}},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(nil),
},
7: {
liveIns: map[VReg]struct{}{100: {}},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(nil),
},
8: {
liveIns: map[VReg]struct{}{100: {}},
liveOuts: map[VReg]struct{}{100: {}},
lastUses: makeVRegTable(nil),
},
9: {
lastUses: makeVRegTable(nil),
},
},
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
f := tc.setup()
a := NewAllocator(&RegisterInfo{})
a := NewAllocator(&RegisterInfo{
RealRegName: func(r RealReg) string {
return fmt.Sprintf("r%d", r)
},
})
a.livenessAnalysis(f)
for blockID := range a.blockInfos {
actual := a.blockInfos[blockID]
exp := tc.exp[blockID]
initMapInInfo(exp)
saved := actual.intervalMng
actual.intervalMng = nil // Don't compare intervalManager.
require.Equal(t, exp, actual, "\n[exp for block[%d]]\n%v\n[actual for block[%d]]\n%v", blockID, exp, blockID, actual)
actual.intervalMng = saved
t.Run(fmt.Sprintf("block_id=%d", blockID), func(t *testing.T) {
actual := a.blockInfos[blockID]
exp := tc.exp[blockID]
initMapInInfo(exp)
fmt.Printf("\n[exp for block[%d]]\n%v\n[actual for block[%d]]\n%v\n",
blockID, exp.Format(a.regInfo), blockID, actual.Format(a.regInfo))
require.Equal(t, exp.liveOuts, actual.liveOuts, "live outs")
require.Equal(t, exp.liveIns, actual.liveIns, "live ins")
require.Equal(t, exp.defs, actual.defs, "defs")
for i := range exp.realRegUses {
_exp, _actual := exp.realRegUses[i], actual.realRegUses[i]
sort.Slice(_exp, func(i, j int) bool {
return _exp[i] < _exp[j]
})
sort.Slice(_actual, func(i, j int) bool {
return _actual[i] < _actual[j]
})
require.Equal(t, _exp, _actual, "real reg use[%d]", i)
}
for i := range exp.realRegDefs {
_exp, _actual := exp.realRegDefs[i], actual.realRegDefs[i]
sort.Slice(_exp, func(i, j int) bool {
return _exp[i] < _exp[j]
})
sort.Slice(_actual, func(i, j int) bool {
return _actual[i] < _actual[j]
})
require.Equal(t, _exp, _actual, "real defs[%d]", i)
}
require.Equal(t, exp.lastUses, actual.lastUses, "last uses")
})
}
// Sanity check: buildLiveRanges should not panic.

View File

@@ -52,14 +52,30 @@ type BasicBlock interface {
// Valid is true if this block is still valid even after optimizations.
Valid() bool
// BeginPredIterator returns the first predecessor of this block.
BeginPredIterator() BasicBlock
// NextPredIterator returns the next predecessor of this block.
NextPredIterator() BasicBlock
// Preds returns the number of predecessors of this block.
Preds() int
// Pred returns the i-th predecessor of this block.
Pred(i int) BasicBlock
// Succs returns the number of successors of this block.
Succs() int
// Succ returns the i-th successor of this block.
Succ(i int) BasicBlock
// LoopHeader returns true if this block is a loop header.
LoopHeader() bool
// LoopNestingForestChildren returns the children of this block in the loop nesting forest.
LoopNestingForestChildren() []BasicBlock
}
type (
@@ -93,6 +109,10 @@ type (
// This is modified during the subPassLoopDetection pass.
loopHeader bool
// loopNestingForestChildren holds the children of this block in the loop nesting forest.
// Non-empty if and only if this block is a loop header (i.e. loopHeader=true)
loopNestingForestChildren []BasicBlock
// reversePostOrder is used to sort all the blocks in the function in reverse post order.
// This is used in builder.LayoutBlocks.
reversePostOrder int
@@ -234,6 +254,16 @@ func (bb *basicBlock) Pred(i int) BasicBlock {
return bb.preds[i].blk
}
// Succs implements BasicBlock.Succs.
func (bb *basicBlock) Succs() int {
return len(bb.success)
}
// Succ implements BasicBlock.Succ.
func (bb *basicBlock) Succ(i int) BasicBlock {
return bb.success[i]
}
// Root implements BasicBlock.Root.
func (bb *basicBlock) Root() *Instruction {
return bb.rootInstr
@@ -256,6 +286,7 @@ func resetBasicBlock(bb *basicBlock) {
bb.unknownValues = make(map[Variable]Value)
bb.lastDefinitions = make(map[Variable]Value)
bb.reversePostOrder = -1
bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0]
}
// addPred adds a predecessor to this block specified by the branch instruction.
@@ -342,3 +373,13 @@ func (bb *basicBlock) validate(b *builder) {
func (bb *basicBlock) String() string {
return strconv.Itoa(int(bb.id))
}
// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.
func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock {
return bb.loopNestingForestChildren
}
// LoopHeader implements BasicBlock.LoopHeader.
func (bb *basicBlock) LoopHeader() bool {
return bb.loopHeader
}

View File

@@ -121,6 +121,9 @@ type Builder interface {
// SetCurrentSourceOffset sets the current source offset. The incoming instruction will be annotated with this offset.
SetCurrentSourceOffset(line SourceOffset)
// LoopNestingForestRoots returns the roots of the loop nesting forest.
LoopNestingForestRoots() []BasicBlock
}
// NewBuilder returns a new Builder implementation.
@@ -167,6 +170,9 @@ type builder struct {
// The index is blockID of the BasicBlock.
dominators []*basicBlock
// loopNestingForestRoots are the roots of the loop nesting forest.
loopNestingForestRoots []BasicBlock
// The followings are used for optimization passes/deterministic compilation.
instStack []*Instruction
blkVisited map[*basicBlock]int
@@ -208,6 +214,7 @@ func (b *builder) Init(s *Signature) {
b.blkStack = b.blkStack[:0]
b.blkStack2 = b.blkStack2[:0]
b.dominators = b.dominators[:0]
b.loopNestingForestRoots = b.loopNestingForestRoots[:0]
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
blk := b.basicBlocksPool.View(i)
@@ -249,6 +256,7 @@ func (b *builder) AnnotateValue(value Value, a string) {
// AllocateInstruction implements Builder.AllocateInstruction.
func (b *builder) AllocateInstruction() *Instruction {
instr := b.instructionsPool.Allocate()
instr.id = b.instructionsPool.Allocated()
return instr
}
@@ -827,12 +835,6 @@ func (b *builder) LayoutBlocks() {
bs = append(bs, blk.Name())
}
fmt.Println("ordered blocks: ", strings.Join(bs, ", "))
bs = bs[:0]
for visited := range b.blkVisited {
bs = append(bs, visited.Name())
}
sort.Slice(bs, func(i, j int) bool { return bs[i] < bs[j] })
fmt.Println("visited blocks: ", strings.Join(bs, ", "))
}
if wazevoapi.SSAValidationEnabled {
@@ -844,6 +846,9 @@ func (b *builder) LayoutBlocks() {
}
}
// Critical edges are split, so we fix the loop nesting forest.
buildLoopNestingForest(b)
// Reuse the stack for the next iteration.
b.blkStack2 = uninsertedTrampolines[:0]
@@ -966,6 +971,11 @@ func (b *builder) splitCriticalEdge(pred, succ *basicBlock, predInfo *basicBlock
// where trampoline is a new basic block which is created to split the critical edge.
trampoline := b.allocateBasicBlock()
if int(trampoline.id) >= len(b.dominators) {
b.dominators = append(b.dominators, make([]*basicBlock, trampoline.id+1)...)
}
b.dominators[trampoline.id] = pred
originalBranch := predInfo.branch
// Replace originalBranch with the newBranch.
@@ -1034,3 +1044,8 @@ func (b *builder) InsertUndefined() {
instr.opcode = OpcodeUndefined
b.InsertInstruction(instr)
}
// LoopNestingForestRoots implements Builder.LoopNestingForestRoots.
func (b *builder) LoopNestingForestRoots() []BasicBlock {
return b.loopNestingForestRoots
}

View File

@@ -341,7 +341,7 @@ func TestBuilder_LayoutBlocks(t *testing.T) {
b.Seal(b2)
b.Seal(b3)
},
exp: []BasicBlockID{0, 2, 1, 3},
exp: []BasicBlockID{0, 1, 2, 3},
},
{
name: "loop towards loop header in fallthrough",
@@ -462,7 +462,7 @@ func TestBuilder_LayoutBlocks(t *testing.T) {
b.Seal(b5)
},
// The trampoline 6 is placed right after 4, which is the hot path of the loop.
exp: []BasicBlockID{0, 1, 2, 3, 4, 6, 5},
exp: []BasicBlockID{0, 1, 3, 2, 4, 6, 5},
},
{
name: "multiple critical edges",

View File

@@ -16,6 +16,8 @@ type Opcode uint32
// for all instructions, and therefore each field has different meaning
// depending on Opcode.
type Instruction struct {
// id is the unique ID of this instruction which ascends from 0 following the order of program.
id int
opcode Opcode
u1, u2 uint64
v Value

View File

@@ -2,6 +2,7 @@ package ssa
import (
"fmt"
"sort"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
@@ -13,6 +14,7 @@ import (
// Note that passes suffixed with "Opt" are the optimization passes, meaning that they edit the instructions and blocks
// while the other passes are not, like passEstimateBranchProbabilities does not edit them, but only calculates the additional information.
func (b *builder) RunPasses() {
passSortSuccessors(b)
passDeadBlockEliminationOpt(b)
passRedundantPhiEliminationOpt(b)
// The result of passCalculateImmediateDominators will be used by various passes below.
@@ -350,3 +352,24 @@ func passNopInstElimination(b *builder) {
}
}
}
// passSortSuccessors sorts the successors of each block in the natural program order.
func passSortSuccessors(b *builder) {
for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
blk := b.basicBlocksPool.View(i)
sort.SliceStable(blk.success, func(i, j int) bool {
iBlk, jBlk := blk.success[i], blk.success[j]
if jBlk.ReturnBlock() {
return true
}
if iBlk.ReturnBlock() {
return false
}
iRoot, jRoot := iBlk.rootInstr, jBlk.rootInstr
if iRoot == nil || jRoot == nil { // For testing.
return true
}
return iBlk.rootInstr.id < jBlk.rootInstr.id
})
}
}

View File

@@ -1,5 +1,12 @@
package ssa
import (
"fmt"
"strings"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
// passCalculateImmediateDominators calculates immediate dominators for each basic block.
// The result is stored in b.dominators. This make it possible for the following passes to
// use builder.isDominatedBy to check if a block is dominated by another block.
@@ -156,3 +163,39 @@ func subPassLoopDetection(b *builder) {
}
}
}
// buildLoopNestingForest builds the loop nesting forest for the function.
// This must be called after branch splitting since it relies on the CFG.
func buildLoopNestingForest(b *builder) {
ent := b.entryBlk()
doms := b.dominators
for _, blk := range b.reversePostOrderedBasicBlocks {
n := doms[blk.id]
for !n.loopHeader && n != ent {
n = doms[n.id]
}
if n == ent && blk.loopHeader {
b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)
} else if n == ent {
} else if n.loopHeader {
n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk)
}
}
if wazevoapi.SSALoggingEnabled {
for _, root := range b.loopNestingForestRoots {
printLoopNestingForest(root.(*basicBlock), 0)
}
}
}
func printLoopNestingForest(root *basicBlock, depth int) {
fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID())
for _, child := range root.loopNestingForestChildren {
fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())
if child.LoopHeader() {
printLoopNestingForest(child.(*basicBlock), depth+2)
}
}
}

View File

@@ -1,6 +1,7 @@
package ssa
import (
"sort"
"testing"
"github.com/tetratelabs/wazero/internal/testing/require"
@@ -505,6 +506,22 @@ func TestBuilder_passCalculateImmediateDominators(t *testing.T) {
},
expLoops: map[BasicBlockID]struct{}{1: {}, 6: {}},
},
{
name: "merge after loop",
edges: edgesCase{
0: {3, 1},
1: {2},
2: {1, 3},
3: {4},
},
expDoms: map[BasicBlockID]BasicBlockID{
1: 0,
2: 1,
3: 0,
4: 3,
},
expLoops: map[BasicBlockID]struct{}{1: {}},
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
@@ -524,3 +541,142 @@ func TestBuilder_passCalculateImmediateDominators(t *testing.T) {
})
}
}
func TestBuildLoopNestingForest(t *testing.T) {
type expLoopNestingForest struct {
roots []BasicBlockID
children map[BasicBlockID][]BasicBlockID
}
for _, tc := range []struct {
name string
edges edgesCase
expLNF expLoopNestingForest
}{
{
name: "linear",
// 0 -> 1 -> 2 -> 3 -> 4
edges: edgesCase{
0: {1},
1: {2},
2: {3},
3: {4},
},
},
{
name: "loop",
// 0 -> 1 -> 2
// ^ |
// | v
// |--- 3
edges: edgesCase{
0: {1},
1: {2},
2: {3},
3: {1},
},
expLNF: expLoopNestingForest{
roots: []BasicBlockID{1},
children: map[BasicBlockID][]BasicBlockID{
1: {2, 3},
},
},
},
{
name: "two independent loops",
// 0
// |
// v
// 1 --> 2 --> 3
// ^ |
// v v
// 4 <---------5
// |
// v
// 6 --> 7 --> 8
// ^ |
// v v
// 9 <---------10
edges: map[BasicBlockID][]BasicBlockID{
0: {1},
1: {2, 4},
2: {3},
3: {5},
4: {1, 6},
5: {4},
6: {7, 9},
7: {8},
8: {10},
9: {6},
10: {9},
},
expLNF: expLoopNestingForest{
roots: []BasicBlockID{1},
children: map[BasicBlockID][]BasicBlockID{
1: {2, 3, 4, 5, 6},
6: {7, 8, 9, 10},
},
},
},
{
//
// +-----+
// | |
// v |
// 0 ---> 1 ---> 2 --> 3 ---> 4
// ^ |
// | |
// +------+
//
name: "Fig. 9.2", // in "SSA-based Compiler Design".
edges: map[BasicBlockID][]BasicBlockID{
0: {1},
1: {2},
2: {1, 3},
3: {2, 4},
},
expLNF: expLoopNestingForest{
roots: []BasicBlockID{1},
children: map[BasicBlockID][]BasicBlockID{
1: {2},
2: {3, 4},
},
},
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
b := constructGraphFromEdges(tc.edges)
// buildLoopNestingForest requires passCalculateImmediateDominators to be done.
passCalculateImmediateDominators(b)
buildLoopNestingForest(b)
blocks := map[BasicBlockID]*basicBlock{}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
blocks[blk.id] = blk
}
// Check the result of buildLoopNestingForest.
var forestRoots []BasicBlockID
for _, root := range b.loopNestingForestRoots {
forestRoots = append(forestRoots, root.(*basicBlock).id)
}
sort.Slice(forestRoots, func(i, j int) bool {
return forestRoots[i] < forestRoots[j]
})
require.Equal(t, tc.expLNF.roots, forestRoots)
for expBlkID, blk := range blocks {
expChildren := tc.expLNF.children[expBlkID]
var actualChildren []BasicBlockID
for _, child := range blk.loopNestingForestChildren {
actualChildren = append(actualChildren, child.(*basicBlock).id)
}
sort.Slice(actualChildren, func(i, j int) bool {
return actualChildren[i] < actualChildren[j]
})
require.Equal(t, expChildren, actualChildren, "block %d", expBlkID)
}
})
}
}

View File

@@ -1,8 +1,6 @@
package ssa
import (
"sort"
)
import "sort"
// edgesCase is a map from BasicBlockID to its successors.
type edgesCase map[BasicBlockID][]BasicBlockID
@@ -34,14 +32,10 @@ func constructGraphFromEdges(edges edgesCase) (b *builder) {
blocks[blk.id] = blk
}
// To have a consistent behavior in test, we sort the pairs.
// To have a consistent behavior in test, we sort the pairs by fromID.
sort.Slice(pairs, func(i, j int) bool {
xf, yf := pairs[i][0], pairs[j][0]
xt, yt := pairs[i][1], pairs[j][1]
if xf < yf {
return true
}
return xt < yt
return xf < yf
})
// Add edges.

View File

@@ -327,7 +327,6 @@ var (
Module: SingleFunctionModule(i32_v, []byte{
wasm.OpcodeLoop, blockSignature_vv,
wasm.OpcodeBlock, blockSignature_vv,
wasm.OpcodeLocalGet, 0,
wasm.OpcodeBrIf, 2,
wasm.OpcodeEnd,