This removes the unnecessary code paths in a various places,
and the below is the result:
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
Compilation/wazero-10 1.634 ± 0% 1.626 ± 0% -0.51% (p=0.002 n=6)
Compilation/zig-10 3.588 ± 0% 3.538 ± 2% ~ (p=0.065 n=6)
Compilation/zz-10 15.25 ± 0% 14.87 ± 1% -2.46% (p=0.002 n=6)
geomean 4.472 4.406 -1.46%
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
Compilation/wazero-10 271.2Mi ± 0% 271.2Mi ± 0% ~ (p=1.000 n=6)
Compilation/zig-10 596.3Mi ± 0% 596.3Mi ± 0% ~ (p=0.699 n=6)
Compilation/zz-10 528.9Mi ± 0% 528.9Mi ± 0% ~ (p=0.818 n=6)
geomean 440.6Mi 440.6Mi +0.00%
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
Compilation/wazero-10 448.5k ± 0% 448.5k ± 0% ~ (p=0.937 n=6)
Compilation/zig-10 274.8k ± 0% 274.7k ± 0% ~ (p=1.000 n=6)
Compilation/zz-10 618.3k ± 0% 618.4k ± 0% ~ (p=0.818 n=6)
geomean 423.9k 423.9k -0.00%
```
Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
1196 lines
37 KiB
Go
1196 lines
37 KiB
Go
// Package regalloc performs register allocation. The algorithm can work on any ISA by implementing the interfaces in
|
|
// api.go.
|
|
//
|
|
// References:
|
|
// - https://web.stanford.edu/class/archive/cs/cs143/cs143.1128/lectures/17/Slides17.pdf
|
|
// - https://en.wikipedia.org/wiki/Chaitin%27s_algorithm
|
|
// - https://llvm.org/ProjectsWithLLVM/2004-Fall-CS426-LS.pdf
|
|
// - https://pfalcon.github.io/ssabook/latest/book-full.pdf: Chapter 9. for liveness analysis.
|
|
// - https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
|
|
package regalloc
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"strings"
|
|
|
|
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
|
|
)
|
|
|
|
// NewAllocator returns a new Allocator.
|
|
func NewAllocator[I Instr, B Block[I]](allocatableRegs *RegisterInfo) Allocator[I, B] {
|
|
a := Allocator[I, B]{
|
|
regInfo: allocatableRegs,
|
|
phiDefInstListPool: wazevoapi.NewPool[phiDefInstList[I]](resetPhiDefInstList[I]),
|
|
blockStates: wazevoapi.NewIDedPool[blockState[I, B]](resetBlockState[I, B]),
|
|
}
|
|
a.state.vrStates = wazevoapi.NewIDedPool[vrState[I, B]](resetVrState[I, B])
|
|
a.state.reset()
|
|
for _, regs := range allocatableRegs.AllocatableRegisters {
|
|
for _, r := range regs {
|
|
a.allocatableSet = a.allocatableSet.add(r)
|
|
}
|
|
}
|
|
return a
|
|
}
|
|
|
|
type (
|
|
// RegisterInfo holds the statically-known ISA-specific register information.
|
|
RegisterInfo struct {
|
|
// AllocatableRegisters is a 2D array of allocatable RealReg, indexed by regTypeNum and regNum.
|
|
// The order matters: the first element is the most preferred one when allocating.
|
|
AllocatableRegisters [NumRegType][]RealReg
|
|
CalleeSavedRegisters RegSet
|
|
CallerSavedRegisters RegSet
|
|
RealRegToVReg []VReg
|
|
// RealRegName returns the name of the given RealReg for debugging.
|
|
RealRegName func(r RealReg) string
|
|
RealRegType func(r RealReg) RegType
|
|
}
|
|
|
|
// Allocator is a register allocator.
|
|
Allocator[I Instr, B Block[I]] struct {
|
|
// regInfo is static per ABI/ISA, and is initialized by the machine during Machine.PrepareRegisterAllocator.
|
|
regInfo *RegisterInfo
|
|
// allocatableSet is a set of allocatable RealReg derived from regInfo. Static per ABI/ISA.
|
|
allocatableSet RegSet
|
|
allocatedCalleeSavedRegs []VReg
|
|
vs []VReg
|
|
ss []*vrState[I, B]
|
|
copies []_copy[I, B]
|
|
phiDefInstListPool wazevoapi.Pool[phiDefInstList[I]]
|
|
|
|
// Followings are re-used during various places.
|
|
blks []B
|
|
reals []RealReg
|
|
|
|
// Following two fields are updated while iterating the blocks in the reverse postorder.
|
|
state state[I, B]
|
|
blockStates wazevoapi.IDedPool[blockState[I, B]]
|
|
}
|
|
|
|
// _copy represents a source and destination pair of a copy instruction.
|
|
_copy[I Instr, B Block[I]] struct {
|
|
src *vrState[I, B]
|
|
dstID VRegID
|
|
}
|
|
|
|
// programCounter represents an opaque index into the program which is used to represents a LiveInterval of a VReg.
|
|
programCounter int32
|
|
|
|
state[I Instr, B Block[I]] struct {
|
|
argRealRegs []VReg
|
|
regsInUse regInUseSet[I, B]
|
|
vrStates wazevoapi.IDedPool[vrState[I, B]]
|
|
|
|
currentBlockID int32
|
|
|
|
// allocatedRegSet is a set of RealReg that are allocated during the allocation phase. This is reset per function.
|
|
allocatedRegSet RegSet
|
|
}
|
|
|
|
blockState[I Instr, B Block[I]] struct {
|
|
// liveIns is a list of VReg that are live at the beginning of the block.
|
|
liveIns []*vrState[I, B]
|
|
// seen is true if the block is visited during the liveness analysis.
|
|
seen bool
|
|
// visited is true if the block is visited during the allocation phase.
|
|
visited bool
|
|
startFromPredIndex int
|
|
// startRegs is a list of RealReg that are used at the beginning of the block. This is used to fix the merge edges.
|
|
startRegs regInUseSet[I, B]
|
|
// endRegs is a list of RealReg that are used at the end of the block. This is used to fix the merge edges.
|
|
endRegs regInUseSet[I, B]
|
|
}
|
|
|
|
vrState[I Instr, B Block[I]] struct {
|
|
v VReg
|
|
r RealReg
|
|
// defInstr is the instruction that defines this value. If this is the phi value and not the entry block, this is nil.
|
|
defInstr I
|
|
// defBlk is the block that defines this value. If this is the phi value, this is the block whose arguments contain this value.
|
|
defBlk B
|
|
// lca = lowest common ancestor. This is the block that is the lowest common ancestor of all the blocks that
|
|
// reloads this value. This is used to determine the spill location. Only valid if spilled=true.
|
|
lca B
|
|
// lastUse is the program counter of the last use of this value. This changes while iterating the block, and
|
|
// should not be used across the blocks as it becomes invalid. To check the validity, use lastUseUpdatedAtBlockID.
|
|
lastUse programCounter
|
|
lastUseUpdatedAtBlockID int32
|
|
// spilled is true if this value is spilled i.e. the value is reload from the stack somewhere in the program.
|
|
//
|
|
// Note that this field is used during liveness analysis for different purpose. This is used to determine the
|
|
// value is live-in or not.
|
|
spilled bool
|
|
// isPhi is true if this is a phi value.
|
|
isPhi bool
|
|
desiredLoc desiredLoc
|
|
// phiDefInstList is a list of instructions that defines this phi value.
|
|
// This is used to determine the spill location, and only valid if isPhi=true.
|
|
*phiDefInstList[I]
|
|
}
|
|
|
|
// phiDefInstList is a linked list of instructions that defines a phi value.
|
|
phiDefInstList[I Instr] struct {
|
|
instr I
|
|
v VReg
|
|
next *phiDefInstList[I]
|
|
}
|
|
|
|
// desiredLoc represents a desired location for a VReg.
|
|
desiredLoc uint16
|
|
// desiredLocKind is a kind of desired location for a VReg.
|
|
desiredLocKind uint16
|
|
)
|
|
|
|
const (
|
|
// desiredLocKindUnspecified is a kind of desired location for a VReg that is not specified.
|
|
desiredLocKindUnspecified desiredLocKind = iota
|
|
// desiredLocKindStack is a kind of desired location for a VReg that is on the stack, only used for the phi values.
|
|
desiredLocKindStack
|
|
// desiredLocKindReg is a kind of desired location for a VReg that is in a register.
|
|
desiredLocKindReg
|
|
desiredLocUnspecified = desiredLoc(desiredLocKindUnspecified)
|
|
desiredLocStack = desiredLoc(desiredLocKindStack)
|
|
)
|
|
|
|
func newDesiredLocReg(r RealReg) desiredLoc {
|
|
return desiredLoc(desiredLocKindReg) | desiredLoc(r<<2)
|
|
}
|
|
|
|
func (d desiredLoc) realReg() RealReg {
|
|
return RealReg(d >> 2)
|
|
}
|
|
|
|
func (d desiredLoc) stack() bool {
|
|
return d&3 == desiredLoc(desiredLocKindStack)
|
|
}
|
|
|
|
func resetPhiDefInstList[I Instr](l *phiDefInstList[I]) {
|
|
var nilInstr I
|
|
l.instr = nilInstr
|
|
l.next = nil
|
|
l.v = VRegInvalid
|
|
}
|
|
|
|
func (s *state[I, B]) dump(info *RegisterInfo) { //nolint:unused
|
|
fmt.Println("\t\tstate:")
|
|
fmt.Println("\t\t\targRealRegs:", s.argRealRegs)
|
|
fmt.Println("\t\t\tregsInUse", s.regsInUse.format(info))
|
|
fmt.Println("\t\t\tallocatedRegSet:", s.allocatedRegSet.format(info))
|
|
fmt.Println("\t\t\tused:", s.regsInUse.format(info))
|
|
var strs []string
|
|
for i := 0; i <= s.vrStates.MaxIDEncountered(); i++ {
|
|
vs := s.vrStates.Get(i)
|
|
if vs == nil {
|
|
continue
|
|
}
|
|
if vs.r != RealRegInvalid {
|
|
strs = append(strs, fmt.Sprintf("(v%d: %s)", vs.v.ID(), info.RealRegName(vs.r)))
|
|
}
|
|
}
|
|
fmt.Println("\t\t\tvrStates:", strings.Join(strs, ", "))
|
|
}
|
|
|
|
func (s *state[I, B]) reset() {
|
|
s.argRealRegs = s.argRealRegs[:0]
|
|
s.vrStates.Reset()
|
|
s.allocatedRegSet = RegSet(0)
|
|
s.regsInUse.reset()
|
|
s.currentBlockID = -1
|
|
}
|
|
|
|
func resetVrState[I Instr, B Block[I]](vs *vrState[I, B]) {
|
|
vs.v = VRegInvalid
|
|
vs.r = RealRegInvalid
|
|
var nilInstr I
|
|
vs.defInstr = nilInstr
|
|
var nilBlk B
|
|
vs.defBlk = nilBlk
|
|
vs.spilled = false
|
|
vs.lastUse = -1
|
|
vs.lastUseUpdatedAtBlockID = -1
|
|
vs.lca = nilBlk
|
|
vs.isPhi = false
|
|
vs.phiDefInstList = nil
|
|
vs.desiredLoc = desiredLocUnspecified
|
|
}
|
|
|
|
func (s *state[I, B]) getOrAllocateVRegState(v VReg) *vrState[I, B] {
|
|
st := s.vrStates.GetOrAllocate(int(v.ID()))
|
|
if st.v == VRegInvalid {
|
|
st.v = v
|
|
}
|
|
return st
|
|
}
|
|
|
|
func (s *state[I, B]) getVRegState(v VRegID) *vrState[I, B] {
|
|
return s.vrStates.Get(int(v))
|
|
}
|
|
|
|
func (s *state[I, B]) useRealReg(r RealReg, vr *vrState[I, B]) {
|
|
s.regsInUse.add(r, vr)
|
|
vr.r = r
|
|
s.allocatedRegSet = s.allocatedRegSet.add(r)
|
|
}
|
|
|
|
func (s *state[I, B]) releaseRealReg(r RealReg) {
|
|
current := s.regsInUse.get(r)
|
|
if current != nil {
|
|
s.regsInUse.remove(r)
|
|
current.r = RealRegInvalid
|
|
}
|
|
}
|
|
|
|
// recordReload records that the given VReg is reloaded in the given block.
|
|
// This is used to determine the spill location by tracking the lowest common ancestor of all the blocks that reloads the value.
|
|
func (vs *vrState[I, B]) recordReload(f Function[I, B], blk B) {
|
|
vs.spilled = true
|
|
var nilBlk B
|
|
if lca := vs.lca; lca == nilBlk {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is reloaded in blk%d,\n", vs.v.ID(), blk.ID())
|
|
}
|
|
vs.lca = blk
|
|
} else if lca != blk {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is reloaded in blk%d, lca=%d\n", vs.v.ID(), blk.ID(), vs.lca.ID())
|
|
}
|
|
vs.lca = f.LowestCommonAncestor(lca, blk)
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("updated lca=%d\n", vs.lca.ID())
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) findOrSpillAllocatable(s *state[I, B], allocatable []RealReg, forbiddenMask RegSet, preferred RealReg) (r RealReg) {
|
|
r = RealRegInvalid
|
|
// First, check if the preferredMask has any allocatable register.
|
|
if preferred != RealRegInvalid && !forbiddenMask.has(preferred) && !s.regsInUse.has(preferred) {
|
|
for _, candidateReal := range allocatable {
|
|
// TODO: we should ensure the preferred register is in the allocatable set in the first place,
|
|
// but right now, just in case, we check it here.
|
|
if candidateReal == preferred {
|
|
return preferred
|
|
}
|
|
}
|
|
}
|
|
|
|
var lastUseAt programCounter
|
|
var spillVReg VReg
|
|
for _, candidateReal := range allocatable {
|
|
if forbiddenMask.has(candidateReal) {
|
|
continue
|
|
}
|
|
|
|
using := s.regsInUse.get(candidateReal)
|
|
if using == nil {
|
|
// This is not used at this point.
|
|
return candidateReal
|
|
}
|
|
|
|
// Real registers in use should not be spilled, so we skip them.
|
|
// For example, if the register is used as an argument register, and it might be
|
|
// spilled and not reloaded when it ends up being used as a temporary to pass
|
|
// stack based argument.
|
|
if using.v.IsRealReg() {
|
|
continue
|
|
}
|
|
|
|
isPreferred := candidateReal == preferred
|
|
|
|
// last == -1 means the value won't be used anymore.
|
|
if last := using.lastUse; r == RealRegInvalid || isPreferred || last == -1 || (lastUseAt != -1 && last > lastUseAt) {
|
|
lastUseAt = last
|
|
r = candidateReal
|
|
spillVReg = using.v
|
|
if isPreferred {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if r == RealRegInvalid {
|
|
panic("not found any allocatable register")
|
|
}
|
|
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\tspilling v%d when lastUseAt=%d and regsInUse=%s\n", spillVReg.ID(), lastUseAt, s.regsInUse.format(a.regInfo))
|
|
}
|
|
s.releaseRealReg(r)
|
|
return r
|
|
}
|
|
|
|
func (s *state[I, B]) findAllocatable(allocatable []RealReg, forbiddenMask RegSet) RealReg {
|
|
for _, r := range allocatable {
|
|
if !s.regsInUse.has(r) && !forbiddenMask.has(r) {
|
|
return r
|
|
}
|
|
}
|
|
return RealRegInvalid
|
|
}
|
|
|
|
func (s *state[I, B]) resetAt(bs *blockState[I, B]) {
|
|
s.regsInUse.range_(func(_ RealReg, vs *vrState[I, B]) {
|
|
vs.r = RealRegInvalid
|
|
})
|
|
s.regsInUse.reset()
|
|
bs.endRegs.range_(func(r RealReg, vs *vrState[I, B]) {
|
|
if vs.lastUseUpdatedAtBlockID == s.currentBlockID && vs.lastUse == programCounterLiveIn {
|
|
s.regsInUse.add(r, vs)
|
|
vs.r = r
|
|
}
|
|
})
|
|
}
|
|
|
|
func resetBlockState[I Instr, B Block[I]](b *blockState[I, B]) {
|
|
b.seen = false
|
|
b.visited = false
|
|
b.endRegs.reset()
|
|
b.startRegs.reset()
|
|
b.startFromPredIndex = -1
|
|
b.liveIns = b.liveIns[:0]
|
|
}
|
|
|
|
func (b *blockState[I, B]) dump(a *RegisterInfo) {
|
|
fmt.Println("\t\tblockState:")
|
|
fmt.Println("\t\t\tstartRegs:", b.startRegs.format(a))
|
|
fmt.Println("\t\t\tendRegs:", b.endRegs.format(a))
|
|
fmt.Println("\t\t\tstartFromPredIndex:", b.startFromPredIndex)
|
|
fmt.Println("\t\t\tvisited:", b.visited)
|
|
}
|
|
|
|
// DoAllocation performs register allocation on the given Function.
|
|
func (a *Allocator[I, B]) DoAllocation(f Function[I, B]) {
|
|
a.livenessAnalysis(f)
|
|
a.alloc(f)
|
|
a.determineCalleeSavedRealRegs(f)
|
|
}
|
|
|
|
func (a *Allocator[I, B]) determineCalleeSavedRealRegs(f Function[I, B]) {
|
|
a.allocatedCalleeSavedRegs = a.allocatedCalleeSavedRegs[:0]
|
|
a.state.allocatedRegSet.Range(func(allocatedRealReg RealReg) {
|
|
if a.regInfo.CalleeSavedRegisters.has(allocatedRealReg) {
|
|
a.allocatedCalleeSavedRegs = append(a.allocatedCalleeSavedRegs, a.regInfo.RealRegToVReg[allocatedRealReg])
|
|
}
|
|
})
|
|
f.ClobberedRegisters(a.allocatedCalleeSavedRegs)
|
|
}
|
|
|
|
func (a *Allocator[I, B]) getOrAllocateBlockState(blockID int32) *blockState[I, B] {
|
|
return a.blockStates.GetOrAllocate(int(blockID))
|
|
}
|
|
|
|
// phiBlk returns the block that defines the given phi value, nil otherwise.
|
|
func (vs *vrState[I, B]) phiBlk() B {
|
|
if vs.isPhi {
|
|
return vs.defBlk
|
|
}
|
|
var nilBlk B
|
|
return nilBlk
|
|
}
|
|
|
|
const (
|
|
programCounterLiveIn = math.MinInt32
|
|
programCounterLiveOut = math.MaxInt32
|
|
)
|
|
|
|
// liveAnalysis constructs Allocator.blockLivenessData.
|
|
// The algorithm here is described in https://pfalcon.github.io/ssabook/latest/book-full.pdf Chapter 9.2.
|
|
func (a *Allocator[I, B]) livenessAnalysis(f Function[I, B]) {
|
|
s := &a.state
|
|
|
|
for i := VRegID(0); i < vRegIDReservedForRealNum; i++ {
|
|
s.getOrAllocateVRegState(VReg(i).SetRealReg(RealReg(i)))
|
|
}
|
|
|
|
var nilBlk B
|
|
var nilInstr I
|
|
for blk := f.PostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.PostOrderBlockIteratorNext() {
|
|
// We should gather phi value data.
|
|
for _, p := range f.BlockParams(blk, &a.vs) {
|
|
vs := s.getOrAllocateVRegState(p)
|
|
vs.isPhi = true
|
|
vs.defBlk = blk
|
|
}
|
|
|
|
blkID := blk.ID()
|
|
info := a.getOrAllocateBlockState(blkID)
|
|
|
|
a.ss = a.ss[:0]
|
|
const (
|
|
flagDeleted = false
|
|
flagLive = true
|
|
)
|
|
ns := blk.Succs()
|
|
for i := 0; i < ns; i++ {
|
|
succ := f.Succ(blk, i)
|
|
if succ == nilBlk {
|
|
continue
|
|
}
|
|
|
|
succID := succ.ID()
|
|
succInfo := a.getOrAllocateBlockState(succID)
|
|
if !succInfo.seen { // This means the back edge.
|
|
continue
|
|
}
|
|
|
|
for _, st := range succInfo.liveIns {
|
|
if st.phiBlk() != succ && st.spilled != flagLive { //nolint:gosimple
|
|
// We use .spilled field to store the flag.
|
|
st.spilled = flagLive
|
|
a.ss = append(a.ss, st)
|
|
}
|
|
}
|
|
}
|
|
|
|
for instr := blk.InstrRevIteratorBegin(); instr != nilInstr; instr = blk.InstrRevIteratorNext() {
|
|
|
|
var use, def VReg
|
|
var defIsPhi bool
|
|
for _, def = range instr.Defs(&a.vs) {
|
|
if !def.IsRealReg() {
|
|
st := s.getOrAllocateVRegState(def)
|
|
defIsPhi = st.isPhi
|
|
// Note: We use .spilled field to store the flag.
|
|
st.spilled = flagDeleted
|
|
}
|
|
}
|
|
for _, use = range instr.Uses(&a.vs) {
|
|
if !use.IsRealReg() {
|
|
st := s.getOrAllocateVRegState(use)
|
|
// Note: We use .spilled field to store the flag.
|
|
if st.spilled != flagLive { //nolint:gosimple
|
|
st.spilled = flagLive
|
|
a.ss = append(a.ss, st)
|
|
}
|
|
}
|
|
}
|
|
|
|
if defIsPhi {
|
|
if use.Valid() && use.IsRealReg() {
|
|
// If the destination is a phi value, and the source is a real register, this is the beginning of the function.
|
|
a.state.argRealRegs = append(a.state.argRealRegs, use)
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, st := range a.ss {
|
|
// We use .spilled field to store the flag.
|
|
if st.spilled == flagLive { //nolint:gosimple
|
|
info.liveIns = append(info.liveIns, st)
|
|
st.spilled = false
|
|
}
|
|
}
|
|
|
|
info.seen = true
|
|
}
|
|
|
|
nrs := f.LoopNestingForestRoots()
|
|
for i := 0; i < nrs; i++ {
|
|
root := f.LoopNestingForestRoot(i)
|
|
a.loopTreeDFS(f, root)
|
|
}
|
|
}
|
|
|
|
// loopTreeDFS implements the Algorithm 9.3 in the book in an iterative way.
|
|
func (a *Allocator[I, B]) loopTreeDFS(f Function[I, B], entry B) {
|
|
a.blks = a.blks[:0]
|
|
a.blks = append(a.blks, entry)
|
|
|
|
for len(a.blks) > 0 {
|
|
tail := len(a.blks) - 1
|
|
loop := a.blks[tail]
|
|
a.blks = a.blks[:tail]
|
|
a.ss = a.ss[:0]
|
|
const (
|
|
flagDone = false
|
|
flagPending = true
|
|
)
|
|
info := a.getOrAllocateBlockState(loop.ID())
|
|
for _, st := range info.liveIns {
|
|
if st.phiBlk() != loop {
|
|
a.ss = append(a.ss, st)
|
|
// We use .spilled field to store the flag.
|
|
st.spilled = flagPending
|
|
}
|
|
}
|
|
|
|
var siblingAddedView []*vrState[I, B]
|
|
cn := loop.LoopNestingForestChildren()
|
|
for i := 0; i < cn; i++ {
|
|
child := f.LoopNestingForestChild(loop, i)
|
|
childID := child.ID()
|
|
childInfo := a.getOrAllocateBlockState(childID)
|
|
|
|
if i == 0 {
|
|
begin := len(childInfo.liveIns)
|
|
for _, st := range a.ss {
|
|
// We use .spilled field to store the flag.
|
|
if st.spilled == flagPending { //nolint:gosimple
|
|
st.spilled = flagDone
|
|
// TODO: deduplicate, though I don't think it has much impact.
|
|
childInfo.liveIns = append(childInfo.liveIns, st)
|
|
}
|
|
}
|
|
siblingAddedView = childInfo.liveIns[begin:]
|
|
} else {
|
|
// TODO: deduplicate, though I don't think it has much impact.
|
|
childInfo.liveIns = append(childInfo.liveIns, siblingAddedView...)
|
|
}
|
|
|
|
if child.LoopHeader() {
|
|
a.blks = append(a.blks, child)
|
|
}
|
|
}
|
|
|
|
if cn == 0 {
|
|
// If there's no forest child, we haven't cleared the .spilled field at this point.
|
|
for _, st := range a.ss {
|
|
st.spilled = false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// alloc allocates registers for the given function by iterating the blocks in the reverse postorder.
|
|
// The algorithm here is derived from the Go compiler's allocator https://github.com/golang/go/blob/release-branch.go1.21/src/cmd/compile/internal/ssa/regalloc.go
|
|
// In short, this is a simply linear scan register allocation where each block inherits the register allocation state from
|
|
// one of its predecessors. Each block inherits the selected state and starts allocation from there.
|
|
// If there's a discrepancy in the end states between predecessors, the adjustments are made to ensure consistency after allocation is done (which we call "fixing merge state").
|
|
// The spill instructions (store into the dedicated slots) are inserted after all the allocations and fixing merge states. That is because
|
|
// at the point, we all know where the reloads happen, and therefore we can know the best place to spill the values. More precisely,
|
|
// the spill happens in the block that is the lowest common ancestor of all the blocks that reloads the value.
|
|
//
|
|
// All of these logics are almost the same as Go's compiler which has a dedicated description in the source file ^^.
|
|
func (a *Allocator[I, B]) alloc(f Function[I, B]) {
|
|
// First we allocate each block in the reverse postorder (at least one predecessor should be allocated for each block).
|
|
var nilBlk B
|
|
for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("========== allocating blk%d ========\n", blk.ID())
|
|
}
|
|
if blk.Entry() {
|
|
a.finalizeStartReg(f, blk)
|
|
}
|
|
a.allocBlock(f, blk)
|
|
}
|
|
// After the allocation, we all know the start and end state of each block. So we can fix the merge states.
|
|
for blk := f.ReversePostOrderBlockIteratorBegin(); blk != nilBlk; blk = f.ReversePostOrderBlockIteratorNext() {
|
|
a.fixMergeState(f, blk)
|
|
}
|
|
// Finally, we insert the spill instructions as we know all the places where the reloads happen.
|
|
a.scheduleSpills(f)
|
|
}
|
|
|
|
func (a *Allocator[I, B]) updateLiveInVRState(liveness *blockState[I, B]) {
|
|
currentBlockID := a.state.currentBlockID
|
|
for _, vs := range liveness.liveIns {
|
|
vs.lastUse = programCounterLiveIn
|
|
vs.lastUseUpdatedAtBlockID = currentBlockID
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) finalizeStartReg(f Function[I, B], blk B) {
|
|
bID := blk.ID()
|
|
s := &a.state
|
|
currentBlkState := a.getOrAllocateBlockState(bID)
|
|
if currentBlkState.startFromPredIndex > -1 {
|
|
return
|
|
}
|
|
|
|
s.currentBlockID = bID
|
|
a.updateLiveInVRState(currentBlkState)
|
|
|
|
preds := blk.Preds()
|
|
var predState *blockState[I, B]
|
|
switch preds {
|
|
case 0: // This is the entry block.
|
|
case 1:
|
|
predID := f.Pred(blk, 0).ID()
|
|
predState = a.getOrAllocateBlockState(predID)
|
|
currentBlkState.startFromPredIndex = 0
|
|
default:
|
|
// TODO: there should be some better heuristic to choose the predecessor.
|
|
for i := 0; i < preds; i++ {
|
|
predID := f.Pred(blk, i).ID()
|
|
if _predState := a.getOrAllocateBlockState(predID); _predState.visited {
|
|
predState = _predState
|
|
currentBlkState.startFromPredIndex = i
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if predState == nil {
|
|
if !blk.Entry() {
|
|
panic(fmt.Sprintf("BUG: at lease one predecessor should be visited for blk%d", blk.ID()))
|
|
}
|
|
for _, u := range s.argRealRegs {
|
|
s.useRealReg(u.RealReg(), s.getVRegState(u.ID()))
|
|
}
|
|
currentBlkState.startFromPredIndex = 0
|
|
} else {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("allocating blk%d starting from blk%d (on index=%d) \n",
|
|
bID, f.Pred(blk, currentBlkState.startFromPredIndex).ID(), currentBlkState.startFromPredIndex)
|
|
}
|
|
s.resetAt(predState)
|
|
}
|
|
|
|
s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B]) {
|
|
currentBlkState.startRegs.add(allocated, v)
|
|
})
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("finalized start reg for blk%d: %s\n", blk.ID(), currentBlkState.startRegs.format(a.regInfo))
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) allocBlock(f Function[I, B], blk B) {
|
|
bID := blk.ID()
|
|
s := &a.state
|
|
currentBlkState := a.getOrAllocateBlockState(bID)
|
|
s.currentBlockID = bID
|
|
|
|
if currentBlkState.startFromPredIndex < 0 {
|
|
panic("BUG: startFromPredIndex should be set in finalizeStartReg prior to allocBlock")
|
|
}
|
|
|
|
// Clears the previous state.
|
|
s.regsInUse.range_(func(allocatedRealReg RealReg, vr *vrState[I, B]) { vr.r = RealRegInvalid })
|
|
s.regsInUse.reset()
|
|
// Then set the start state.
|
|
currentBlkState.startRegs.range_(func(allocatedRealReg RealReg, vr *vrState[I, B]) { s.useRealReg(allocatedRealReg, vr) })
|
|
|
|
desiredUpdated := a.ss[:0]
|
|
|
|
// Update the last use of each VReg.
|
|
a.copies = a.copies[:0] // Stores the copy instructions.
|
|
var pc programCounter
|
|
var nilInstr I
|
|
for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
|
|
var useState *vrState[I, B]
|
|
for _, use := range instr.Uses(&a.vs) {
|
|
useState = s.getVRegState(use.ID())
|
|
if !use.IsRealReg() {
|
|
useState.lastUse = pc
|
|
}
|
|
}
|
|
|
|
if instr.IsCopy() {
|
|
def := instr.Defs(&a.vs)[0]
|
|
a.copies = append(a.copies, _copy[I, B]{src: useState, dstID: def.ID()})
|
|
r := def.RealReg()
|
|
if r != RealRegInvalid {
|
|
if !useState.isPhi { // TODO: no idea why do we need this.
|
|
useState.desiredLoc = newDesiredLocReg(r)
|
|
desiredUpdated = append(desiredUpdated, useState)
|
|
}
|
|
}
|
|
}
|
|
pc++
|
|
}
|
|
|
|
// Mark all live-out values by checking live-in of the successors.
|
|
// While doing so, we also update the desired register values.
|
|
var succ B
|
|
var nilBlk B
|
|
for i, ns := 0, blk.Succs(); i < ns; i++ {
|
|
succ = f.Succ(blk, i)
|
|
if succ == nilBlk {
|
|
continue
|
|
}
|
|
|
|
succID := succ.ID()
|
|
succState := a.getOrAllocateBlockState(succID)
|
|
for _, st := range succState.liveIns {
|
|
if st.phiBlk() != succ {
|
|
st.lastUse = programCounterLiveOut
|
|
}
|
|
}
|
|
|
|
if succState.startFromPredIndex > -1 {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("blk%d -> blk%d: start_regs: %s\n", bID, succID, succState.startRegs.format(a.regInfo))
|
|
}
|
|
succState.startRegs.range_(func(allocatedRealReg RealReg, vs *vrState[I, B]) {
|
|
vs.desiredLoc = newDesiredLocReg(allocatedRealReg)
|
|
desiredUpdated = append(desiredUpdated, vs)
|
|
})
|
|
for _, p := range f.BlockParams(succ, &a.vs) {
|
|
vs := s.getVRegState(p.ID())
|
|
if vs.desiredLoc.realReg() == RealRegInvalid {
|
|
vs.desiredLoc = desiredLocStack
|
|
desiredUpdated = append(desiredUpdated, vs)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Propagate the desired register values from the end of the block to the beginning.
|
|
for _, instr := range a.copies {
|
|
defState := s.getVRegState(instr.dstID)
|
|
desired := defState.desiredLoc.realReg()
|
|
useState := instr.src
|
|
if useState.phiBlk() != succ && useState.desiredLoc == desiredLocUnspecified {
|
|
useState.desiredLoc = newDesiredLocReg(desired)
|
|
desiredUpdated = append(desiredUpdated, useState)
|
|
}
|
|
}
|
|
|
|
pc = 0
|
|
for instr := blk.InstrIteratorBegin(); instr != nilInstr; instr = blk.InstrIteratorNext() {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Println(instr)
|
|
}
|
|
|
|
var currentUsedSet RegSet
|
|
killSet := a.reals[:0]
|
|
|
|
// Gather the set of registers that will be used in the current instruction.
|
|
uses := instr.Uses(&a.vs)
|
|
for _, use := range uses {
|
|
if use.IsRealReg() {
|
|
r := use.RealReg()
|
|
currentUsedSet = currentUsedSet.add(r)
|
|
if a.allocatableSet.has(r) {
|
|
killSet = append(killSet, r)
|
|
}
|
|
} else {
|
|
vs := s.getVRegState(use.ID())
|
|
if r := vs.r; r != RealRegInvalid {
|
|
currentUsedSet = currentUsedSet.add(r)
|
|
}
|
|
}
|
|
}
|
|
|
|
for i, use := range uses {
|
|
if !use.IsRealReg() {
|
|
vs := s.getVRegState(use.ID())
|
|
killed := vs.lastUse == pc
|
|
r := vs.r
|
|
|
|
if r == RealRegInvalid {
|
|
r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[use.RegType()], currentUsedSet,
|
|
// Prefer the desired register if it's available.
|
|
vs.desiredLoc.realReg())
|
|
vs.recordReload(f, blk)
|
|
f.ReloadRegisterBefore(use.SetRealReg(r), instr)
|
|
s.useRealReg(r, vs)
|
|
}
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\ttrying to use v%v on %s\n", use.ID(), a.regInfo.RealRegName(r))
|
|
}
|
|
instr.AssignUse(i, use.SetRealReg(r))
|
|
currentUsedSet = currentUsedSet.add(r)
|
|
if killed {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\tkill v%d with %s\n", use.ID(), a.regInfo.RealRegName(r))
|
|
}
|
|
killSet = append(killSet, r)
|
|
}
|
|
}
|
|
}
|
|
|
|
isIndirect := instr.IsIndirectCall()
|
|
if instr.IsCall() || isIndirect {
|
|
addr := RealRegInvalid
|
|
if isIndirect {
|
|
addr = a.vs[0].RealReg()
|
|
}
|
|
a.releaseCallerSavedRegs(addr)
|
|
}
|
|
|
|
for _, r := range killSet {
|
|
s.releaseRealReg(r)
|
|
}
|
|
a.reals = killSet
|
|
|
|
defs := instr.Defs(&a.vs)
|
|
switch len(defs) {
|
|
default:
|
|
// Some instructions define multiple values on real registers.
|
|
// E.g. call instructions (following calling convention) / div instruction on x64 that defines both rax and rdx.
|
|
//
|
|
// Note that currently I assume that such instructions define only the pre colored real registers, not the VRegs
|
|
// that require allocations. If we need to support such case, we need to add the logic to handle it here,
|
|
// though is there any such instruction?
|
|
for _, def := range defs {
|
|
if !def.IsRealReg() {
|
|
panic("BUG: multiple defs should be on real registers")
|
|
}
|
|
r := def.RealReg()
|
|
if s.regsInUse.has(r) {
|
|
s.releaseRealReg(r)
|
|
}
|
|
s.useRealReg(r, s.getVRegState(def.ID()))
|
|
}
|
|
case 0:
|
|
case 1:
|
|
def := defs[0]
|
|
vState := s.getVRegState(def.ID())
|
|
if def.IsRealReg() {
|
|
r := def.RealReg()
|
|
if a.allocatableSet.has(r) {
|
|
if s.regsInUse.has(r) {
|
|
s.releaseRealReg(r)
|
|
}
|
|
s.useRealReg(r, vState)
|
|
}
|
|
} else {
|
|
r := vState.r
|
|
|
|
if desired := vState.desiredLoc.realReg(); desired != RealRegInvalid {
|
|
if r != desired {
|
|
if (vState.isPhi && vState.defBlk == succ) ||
|
|
// If this is not a phi and it's already assigned a real reg,
|
|
// this value has multiple definitions, hence we cannot assign the desired register.
|
|
(!s.regsInUse.has(desired) && r == RealRegInvalid) {
|
|
// If the phi value is passed via a real register, we force the value to be in the desired register.
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is phi and desiredReg=%s\n", def.ID(), a.regInfo.RealRegName(desired))
|
|
}
|
|
if r != RealRegInvalid {
|
|
// If the value is already in a different real register, we release it to change the state.
|
|
// Otherwise, multiple registers might have the same values at the end, which results in
|
|
// messing up the merge state reconciliation.
|
|
s.releaseRealReg(r)
|
|
}
|
|
r = desired
|
|
s.releaseRealReg(r)
|
|
s.useRealReg(r, vState)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Allocate a new real register if `def` is not currently assigned one.
|
|
// It can happen when multiple instructions define the same VReg (e.g. const loads).
|
|
if r == RealRegInvalid {
|
|
if instr.IsCopy() {
|
|
copySrc := instr.Uses(&a.vs)[0].RealReg()
|
|
if a.allocatableSet.has(copySrc) && !s.regsInUse.has(copySrc) {
|
|
r = copySrc
|
|
}
|
|
}
|
|
if r == RealRegInvalid {
|
|
typ := def.RegType()
|
|
r = a.findOrSpillAllocatable(s, a.regInfo.AllocatableRegisters[typ], RegSet(0), RealRegInvalid)
|
|
}
|
|
s.useRealReg(r, vState)
|
|
}
|
|
dr := def.SetRealReg(r)
|
|
instr.AssignDef(dr)
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\tdefining v%d with %s\n", def.ID(), a.regInfo.RealRegName(r))
|
|
}
|
|
if vState.isPhi {
|
|
if vState.desiredLoc.stack() { // Stack based phi value.
|
|
f.StoreRegisterAfter(dr, instr)
|
|
// Release the real register as it's not used anymore.
|
|
s.releaseRealReg(r)
|
|
} else {
|
|
// Only the register based phis are necessary to track the defining instructions
|
|
// since the stack-based phis are already having stores inserted ^.
|
|
n := a.phiDefInstListPool.Allocate()
|
|
n.instr = instr
|
|
n.next = vState.phiDefInstList
|
|
n.v = dr
|
|
vState.phiDefInstList = n
|
|
}
|
|
} else {
|
|
vState.defInstr = instr
|
|
vState.defBlk = blk
|
|
}
|
|
}
|
|
}
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Println(instr)
|
|
}
|
|
pc++
|
|
}
|
|
|
|
s.regsInUse.range_(func(allocated RealReg, v *vrState[I, B]) { currentBlkState.endRegs.add(allocated, v) })
|
|
|
|
currentBlkState.visited = true
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
currentBlkState.dump(a.regInfo)
|
|
}
|
|
|
|
// Reset the desired end location.
|
|
for _, vs := range desiredUpdated {
|
|
vs.desiredLoc = desiredLocUnspecified
|
|
}
|
|
a.ss = desiredUpdated[:0]
|
|
|
|
for i := 0; i < blk.Succs(); i++ {
|
|
succ := f.Succ(blk, i)
|
|
if succ == nilBlk {
|
|
continue
|
|
}
|
|
// If the successor is not visited yet, finalize the start state.
|
|
a.finalizeStartReg(f, succ)
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) releaseCallerSavedRegs(addrReg RealReg) {
|
|
s := &a.state
|
|
|
|
for allocated := RealReg(0); allocated < 64; allocated++ {
|
|
if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
|
|
continue
|
|
}
|
|
if vs := s.regsInUse.get(allocated); vs != nil {
|
|
if vs.v.IsRealReg() {
|
|
continue // This is the argument register as it's already used by VReg backed by the corresponding RealReg.
|
|
}
|
|
if !a.regInfo.CallerSavedRegisters.has(allocated) {
|
|
// If this is not a caller-saved register, it is safe to keep it across the call.
|
|
continue
|
|
}
|
|
s.releaseRealReg(allocated)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) fixMergeState(f Function[I, B], blk B) {
|
|
preds := blk.Preds()
|
|
if preds <= 1 {
|
|
return
|
|
}
|
|
|
|
s := &a.state
|
|
|
|
// Restores the state at the beginning of the block.
|
|
bID := blk.ID()
|
|
blkSt := a.getOrAllocateBlockState(bID)
|
|
desiredOccupants := &blkSt.startRegs
|
|
var desiredOccupantsSet RegSet
|
|
for i, v := range desiredOccupants {
|
|
if v != nil {
|
|
desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
|
|
}
|
|
}
|
|
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Println("fixMergeState", blk.ID(), ":", desiredOccupants.format(a.regInfo))
|
|
}
|
|
|
|
s.currentBlockID = bID
|
|
a.updateLiveInVRState(blkSt)
|
|
|
|
for i := 0; i < preds; i++ {
|
|
if i == blkSt.startFromPredIndex {
|
|
continue
|
|
}
|
|
|
|
pred := f.Pred(blk, i)
|
|
predSt := a.getOrAllocateBlockState(pred.ID())
|
|
|
|
s.resetAt(predSt)
|
|
|
|
// Finds the free registers if any.
|
|
intTmp, floatTmp := VRegInvalid, VRegInvalid
|
|
if intFree := s.findAllocatable(
|
|
a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
|
|
); intFree != RealRegInvalid {
|
|
intTmp = FromRealReg(intFree, RegTypeInt)
|
|
}
|
|
if floatFree := s.findAllocatable(
|
|
a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
|
|
); floatFree != RealRegInvalid {
|
|
floatTmp = FromRealReg(floatFree, RegTypeFloat)
|
|
}
|
|
|
|
for r := RealReg(0); r < 64; r++ {
|
|
desiredVReg := desiredOccupants.get(r)
|
|
if desiredVReg == nil {
|
|
continue
|
|
}
|
|
|
|
currentVReg := s.regsInUse.get(r)
|
|
if currentVReg != nil && desiredVReg.v.ID() == currentVReg.v.ID() {
|
|
continue
|
|
}
|
|
|
|
typ := desiredVReg.v.RegType()
|
|
var tmpRealReg VReg
|
|
if typ == RegTypeInt {
|
|
tmpRealReg = intTmp
|
|
} else {
|
|
tmpRealReg = floatTmp
|
|
}
|
|
a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
|
|
}
|
|
}
|
|
}
|
|
|
|
// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
|
|
//
|
|
// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
|
|
// - desiredVReg is the desired VReg value that should be on the register `r`.
|
|
// - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
|
|
// - typ is the register type of the `r`.
|
|
func (a *Allocator[I, B]) reconcileEdge(f Function[I, B],
|
|
r RealReg,
|
|
pred B,
|
|
currentState, desiredState *vrState[I, B],
|
|
freeReg VReg,
|
|
typ RegType,
|
|
) {
|
|
desiredVReg := desiredState.v
|
|
currentVReg := VRegInvalid
|
|
if currentState != nil {
|
|
currentVReg = currentState.v
|
|
}
|
|
// There are four cases to consider:
|
|
// 1. currentVReg is valid, but desiredVReg is on the stack.
|
|
// 2. Both currentVReg and desiredVReg are valid.
|
|
// 3. Desired is on a different register than `r` and currentReg is not valid.
|
|
// 4. Desired is on the stack and currentReg is not valid.
|
|
|
|
s := &a.state
|
|
if currentVReg.Valid() {
|
|
er := desiredState.r
|
|
if er == RealRegInvalid {
|
|
// Case 1: currentVReg is valid, but desiredVReg is on the stack.
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
|
|
desiredVReg.ID(), a.regInfo.RealRegName(r),
|
|
)
|
|
}
|
|
// We need to move the current value to the stack, and reload the desired value into the register.
|
|
// TODO: we can do better here.
|
|
f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
|
s.releaseRealReg(r)
|
|
|
|
desiredState.recordReload(f, pred)
|
|
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
|
s.useRealReg(r, desiredState)
|
|
return
|
|
} else {
|
|
// Case 2: Both currentVReg and desiredVReg are valid.
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
|
|
desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
|
|
)
|
|
}
|
|
// This case, we need to swap the values between the current and desired values.
|
|
f.SwapBefore(
|
|
currentVReg.SetRealReg(r),
|
|
desiredVReg.SetRealReg(er),
|
|
freeReg,
|
|
pred.LastInstrForInsertion(),
|
|
)
|
|
s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
|
|
s.releaseRealReg(r)
|
|
s.releaseRealReg(er)
|
|
s.useRealReg(r, desiredState)
|
|
s.useRealReg(er, currentState)
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
|
|
}
|
|
}
|
|
} else {
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
|
|
desiredVReg.ID(), a.regInfo.RealRegName(r),
|
|
)
|
|
}
|
|
if currentReg := desiredState.r; currentReg != RealRegInvalid {
|
|
// Case 3: Desired is on a different register than `r` and currentReg is not valid.
|
|
// We simply need to move the desired value to the register.
|
|
f.InsertMoveBefore(
|
|
FromRealReg(r, typ),
|
|
desiredVReg.SetRealReg(currentReg),
|
|
pred.LastInstrForInsertion(),
|
|
)
|
|
s.releaseRealReg(currentReg)
|
|
} else {
|
|
// Case 4: Both currentVReg and desiredVReg are not valid.
|
|
// We simply need to reload the desired value into the register.
|
|
desiredState.recordReload(f, pred)
|
|
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
|
|
}
|
|
s.useRealReg(r, desiredState)
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) scheduleSpills(f Function[I, B]) {
|
|
states := a.state.vrStates
|
|
for i := 0; i <= states.MaxIDEncountered(); i++ {
|
|
vs := states.Get(i)
|
|
if vs == nil {
|
|
continue
|
|
}
|
|
if vs.spilled {
|
|
a.scheduleSpill(f, vs)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *Allocator[I, B]) scheduleSpill(f Function[I, B], vs *vrState[I, B]) {
|
|
v := vs.v
|
|
// If the value is the phi value, we need to insert a spill after each phi definition.
|
|
if vs.isPhi {
|
|
for defInstr := vs.phiDefInstList; defInstr != nil; defInstr = defInstr.next {
|
|
f.StoreRegisterAfter(defInstr.v, defInstr.instr)
|
|
}
|
|
return
|
|
}
|
|
|
|
pos := vs.lca
|
|
definingBlk := vs.defBlk
|
|
r := RealRegInvalid
|
|
var nilBlk B
|
|
if definingBlk == nilBlk {
|
|
panic(fmt.Sprintf("BUG: definingBlk should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
|
|
}
|
|
if pos == nilBlk {
|
|
panic(fmt.Sprintf("BUG: pos should not be nil for %s. This is likley a bug in backend lowering logic", vs.v.String()))
|
|
}
|
|
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("v%d is spilled in blk%d, lca=blk%d\n", v.ID(), definingBlk.ID(), pos.ID())
|
|
}
|
|
for pos != definingBlk {
|
|
st := a.getOrAllocateBlockState(pos.ID())
|
|
for rr := RealReg(0); rr < 64; rr++ {
|
|
if vs := st.startRegs.get(rr); vs != nil && vs.v == v {
|
|
r = rr
|
|
// Already in the register, so we can place the spill at the beginning of the block.
|
|
break
|
|
}
|
|
}
|
|
|
|
if r != RealRegInvalid {
|
|
break
|
|
}
|
|
|
|
pos = f.Idom(pos)
|
|
}
|
|
|
|
if pos == definingBlk {
|
|
defInstr := vs.defInstr
|
|
defInstr.Defs(&a.vs)
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("schedule spill v%d after %v\n", v.ID(), defInstr)
|
|
}
|
|
f.StoreRegisterAfter(a.vs[0], defInstr)
|
|
} else {
|
|
// Found an ancestor block that holds the value in the register at the beginning of the block.
|
|
// We need to insert a spill before the last use.
|
|
first := pos.FirstInstr()
|
|
if wazevoapi.RegAllocLoggingEnabled {
|
|
fmt.Printf("schedule spill v%d before %v\n", v.ID(), first)
|
|
}
|
|
f.StoreRegisterAfter(v.SetRealReg(r), first)
|
|
}
|
|
}
|
|
|
|
// Reset resets the allocator's internal state so that it can be reused.
|
|
func (a *Allocator[I, B]) Reset() {
|
|
a.state.reset()
|
|
a.blockStates.Reset()
|
|
a.phiDefInstListPool.Reset()
|
|
a.vs = a.vs[:0]
|
|
}
|