wazevo: adds perfmap build tag to write perf-map (#1880)

Signed-off-by: Takeshi Yoneda <t.y.mathetake@gmail.com>
This commit is contained in:
Takeshi Yoneda
2023-12-19 08:26:47 -08:00
committed by GitHub
parent 10bb61bee8
commit fe5aebc764
11 changed files with 232 additions and 25 deletions

View File

@@ -2231,7 +2231,7 @@ L1 (SSA Block: blk0):
fmt.Println(be.Format())
}
be.Finalize()
be.Finalize(context.Background())
if verbose {
fmt.Println("============ finalization result ============")
fmt.Println(be.Format())

View File

@@ -50,7 +50,7 @@ type Compiler interface {
RegAlloc()
// Finalize performs the finalization of the compilation. This must be called after RegAlloc.
Finalize()
Finalize(ctx context.Context)
// Encode encodes the machine code to the buffer.
Encode()
@@ -150,21 +150,21 @@ type SourceOffsetInfo struct {
// Compile implements Compiler.Compile.
func (c *compiler) Compile(ctx context.Context) ([]byte, []RelocationInfo, error) {
c.Lower()
if wazevoapi.PrintSSAToBackendIRLowering {
if wazevoapi.PrintSSAToBackendIRLowering && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after lowering for %s ]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After lowering to ISA specific IR", c.Format())
}
c.RegAlloc()
if wazevoapi.PrintRegisterAllocated {
if wazevoapi.PrintRegisterAllocated && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after regalloc for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
wazevoapi.VerifyOrSetDeterministicCompilationContextValue(ctx, "After Register Allocation", c.Format())
}
c.Finalize()
if wazevoapi.PrintFinalizedMachineCode {
c.Finalize(ctx)
if wazevoapi.PrintFinalizedMachineCode && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[after finalize for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), c.Format())
}
if wazevoapi.DeterministicCompilationVerifierEnabled {
@@ -184,10 +184,10 @@ func (c *compiler) RegAlloc() {
}
// Finalize implements Compiler.Finalize.
func (c *compiler) Finalize() {
func (c *compiler) Finalize(ctx context.Context) {
c.mach.SetupPrologue()
c.mach.SetupEpilogue()
c.mach.ResolveRelativeAddresses()
c.mach.ResolveRelativeAddresses(ctx)
}
// Encode implements Compiler.Encode.

View File

@@ -1,6 +1,7 @@
package arm64
import (
"context"
"fmt"
"math"
"strings"
@@ -355,7 +356,7 @@ func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruc
}
// ResolveRelativeAddresses implements backend.Machine.
func (m *machine) ResolveRelativeAddresses() {
func (m *machine) ResolveRelativeAddresses(ctx context.Context) {
if len(m.unresolvedAddressModes) > 0 {
arg0offset, ret0offset := m.arg0OffsetFromSP(), m.ret0OffsetFromSP()
for _, i := range m.unresolvedAddressModes {
@@ -366,6 +367,18 @@ func (m *machine) ResolveRelativeAddresses() {
// Reuse the slice to gather the unresolved conditional branches.
cbrs := m.condBrRelocs[:0]
var fn string
var fnIndex int
var labelToSSABlockID map[label]ssa.BasicBlockID
if wazevoapi.PerfMapEnabled {
fn = wazevoapi.GetCurrentFunctionName(ctx)
labelToSSABlockID = make(map[label]ssa.BasicBlockID)
for i, l := range m.ssaBlockIDToLabels {
labelToSSABlockID[l] = ssa.BasicBlockID(i)
}
fnIndex = wazevoapi.GetCurrentFunctionIndex(ctx)
}
// Next, in order to determine the offsets of relative jumps, we have to calculate the size of each label.
var offset int64
for i, pos := range m.orderedBlockLabels {
@@ -397,6 +410,20 @@ func (m *machine) ResolveRelativeAddresses() {
break
}
}
if wazevoapi.PerfMapEnabled {
if size > 0 {
l := pos.l
var labelStr string
if blkID, ok := labelToSSABlockID[l]; ok {
labelStr = fmt.Sprintf("%s::SSA_Block[%s]", l, blkID)
} else {
labelStr = l.String()
}
wazevoapi.PerfMap.AddModuleEntry(fnIndex, offset, uint64(size), fmt.Sprintf("%s:::::%s", fn, labelStr))
}
}
pos.binarySize = size
offset += size
}
@@ -421,7 +448,8 @@ func (m *machine) ResolveRelativeAddresses() {
}
}
if needRerun {
m.ResolveRelativeAddresses()
m.ResolveRelativeAddresses(ctx)
wazevoapi.PerfMap.Clear()
return
}

View File

@@ -82,11 +82,11 @@ func (m *mockCompiler) Buf() []byte { return m.buf }
func (m *mockCompiler) TypeOf(v regalloc.VReg) (ret ssa.Type) {
return m.typeOf[v.ID()]
}
func (m *mockCompiler) Finalize() {}
func (m *mockCompiler) RegAlloc() {}
func (m *mockCompiler) Lower() {}
func (m *mockCompiler) Format() string { return "" }
func (m *mockCompiler) Init() {}
func (m *mockCompiler) Finalize(context.Context) {}
func (m *mockCompiler) RegAlloc() {}
func (m *mockCompiler) Lower() {}
func (m *mockCompiler) Format() string { return "" }
func (m *mockCompiler) Init() {}
func newMockCompilationContext() *mockCompiler {
return &mockCompiler{

View File

@@ -1,6 +1,8 @@
package backend
import (
"context"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
@@ -89,7 +91,7 @@ type (
// ResolveRelativeAddresses resolves the relative addresses after register allocations and prologue/epilogue setup.
// After this, the compiler is finally ready to emit machine code.
ResolveRelativeAddresses()
ResolveRelativeAddresses(ctx context.Context)
// ResolveRelocations resolves the relocations after emitting machine code.
ResolveRelocations(refToBinaryOffset map[ssa.FuncRef]int, binary []byte, relocations []RelocationInfo)

View File

@@ -1,6 +1,8 @@
package backend
import (
"context"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
@@ -50,7 +52,7 @@ func (m mockMachine) SetupPrologue() {}
func (m mockMachine) SetupEpilogue() {}
// ResolveRelativeAddresses implements Machine.ResolveRelativeAddresses.
func (m mockMachine) ResolveRelativeAddresses() {}
func (m mockMachine) ResolveRelativeAddresses(ctx context.Context) {}
// Function implements Machine.Function.
func (m mockMachine) Function() (f regalloc.Function) { return }

View File

@@ -116,6 +116,11 @@ func NewEngine(ctx context.Context, _ api.CoreFeatures, fc filecache.Cache) wasm
// CompileModule implements wasm.Engine.
func (e *engine) CompileModule(ctx context.Context, module *wasm.Module, listeners []experimental.FunctionListener, ensureTermination bool) (err error) {
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Lock()
defer wazevoapi.PerfMap.Unlock()
}
if _, ok, err := e.getCompiledModule(module, listeners, ensureTermination); ok { // cache hit!
return nil
} else if err != nil {
@@ -165,6 +170,11 @@ func (exec *executables) compileEntryPreambles(m *wasm.Module, machine backend.M
buf := machine.CompileEntryPreamble(&sig)
executable := mmapExecutable(buf)
exec.entryPreambles[i] = executable
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&executable[0])),
uint64(len(executable)), fmt.Sprintf("entry_preamble::type=%s", typ.String()))
}
}
}
@@ -217,7 +227,7 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
if len(def.ExportNames()) > 0 {
name = def.ExportNames()[0]
}
ctx = wazevoapi.SetCurrentFunctionName(ctx, fmt.Sprintf("[%d/%d] \"%s\"", i, len(module.CodeSection)-1, name))
ctx = wazevoapi.SetCurrentFunctionName(ctx, i, fmt.Sprintf("[%d/%d]%s", i, len(module.CodeSection)-1, name))
}
needListener := len(listeners) > 0 && listeners[i] != nil
@@ -271,6 +281,10 @@ func (e *engine) compileModule(ctx context.Context, module *wasm.Module, listene
copy(executable[offset:], b)
}
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
if needSourceInfo {
for i := range cm.sourceMap.executableOffsets {
cm.sourceMap.executableOffsets[i] += uintptr(unsafe.Pointer(&cm.executable[0]))
@@ -310,7 +324,7 @@ func (e *engine) compileLocalWasmFunction(
// Lower Wasm to SSA.
fe.LowerToSSA()
if wazevoapi.PrintSSA {
if wazevoapi.PrintSSA && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
}
@@ -321,7 +335,7 @@ func (e *engine) compileLocalWasmFunction(
// Run SSA-level optimization passes.
ssaBuilder.RunPasses()
if wazevoapi.PrintOptimizedSSA {
if wazevoapi.PrintOptimizedSSA && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[Optimized SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
}
@@ -332,7 +346,7 @@ func (e *engine) compileLocalWasmFunction(
// Finalize the layout of SSA blocks which might use the optimization results.
ssaBuilder.LayoutBlocks()
if wazevoapi.PrintBlockLaidOutSSA {
if wazevoapi.PrintBlockLaidOutSSA && wazevoapi.PrintEnabledIndex(ctx) {
fmt.Printf("[[[Laidout SSA for %s]]]%s\n", wazevoapi.GetCurrentFunctionName(ctx), ssaBuilder.Format())
}
@@ -411,6 +425,14 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
be.Encode()
body := be.Buf()
if wazevoapi.PerfMapEnabled {
name := module.FunctionDefinition(wasm.Index(i)).DebugName()
wazevoapi.PerfMap.AddModuleEntry(i,
int64(totalSize),
uint64(len(body)),
fmt.Sprintf("trampoline:%s", name))
}
// TODO: optimize as zero copy.
copied := make([]byte, len(body))
copy(copied, body)
@@ -430,6 +452,10 @@ func (e *engine) compileHostModule(ctx context.Context, module *wasm.Module, lis
copy(executable[offset:], b)
}
if wazevoapi.PerfMapEnabled {
wazevoapi.PerfMap.Flush(uintptr(unsafe.Pointer(&executable[0])), cm.functionOffsets)
}
if runtime.GOARCH == "arm64" {
// On arm64, we cannot give all of rwx at the same time, so we change it to exec.
if err = platform.MprotectRX(executable); err != nil {
@@ -556,6 +582,10 @@ func (e *engine) compileSharedFunctions() {
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.memoryGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.memoryGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "memory_grow_trampoline")
}
}
e.be.Init()
@@ -565,6 +595,10 @@ func (e *engine) compileSharedFunctions() {
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.tableGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.tableGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "table_grow_trampoline")
}
}
e.be.Init()
@@ -574,6 +608,10 @@ func (e *engine) compileSharedFunctions() {
Results: []ssa.Type{ssa.TypeI32},
}, false)
e.sharedFunctions.checkModuleExitCode = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.checkModuleExitCode
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "check_module_exit_code_trampoline")
}
}
e.be.Init()
@@ -583,12 +621,20 @@ func (e *engine) compileSharedFunctions() {
Results: []ssa.Type{ssa.TypeI64}, // returns the function reference.
}, false)
e.sharedFunctions.refFuncExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.refFuncExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "ref_func_trampoline")
}
}
e.be.Init()
{
src := e.machine.CompileStackGrowCallSequence()
e.sharedFunctions.stackGrowExecutable = mmapExecutable(src)
if wazevoapi.PerfMapEnabled {
exe := e.sharedFunctions.stackGrowExecutable
wazevoapi.PerfMap.AddEntry(uintptr(unsafe.Pointer(&exe[0])), uint64(len(exe)), "stack_grow_trampoline")
}
}
e.setFinalizer(e.sharedFunctions, sharedFunctionsFinalizer)

View File

@@ -40,6 +40,18 @@ const (
PrintMachineCodeHexPerFunctionDisassemblable = false
)
// printTarget is the function index to print the machine code. This is used for debugging to print the machine code
// of a specific function.
const printTarget = -1
// PrintEnabledIndex returns true if the current function index is the print target.
func PrintEnabledIndex(ctx context.Context) bool {
if printTarget == -1 {
return true
}
return GetCurrentFunctionIndex(ctx) == printTarget
}
// ----- Validations -----
const (
// SSAValidationEnabled enables the SSA validation. This is disabled by default since the operation is expensive.
@@ -84,6 +96,7 @@ type (
}
verifierStateContextKey struct{}
currentFunctionNameKey struct{}
currentFunctionIndexKey struct{}
)
// NewDeterministicCompilationVerifierContext creates a new context with the deterministic compilation verifier used per wasm.Module.
@@ -162,16 +175,26 @@ const NeedFunctionNameInContext = PrintSSA ||
PrintRegisterAllocated ||
PrintFinalizedMachineCode ||
PrintMachineCodeHexPerFunction ||
DeterministicCompilationVerifierEnabled
DeterministicCompilationVerifierEnabled ||
PerfMapEnabled
// SetCurrentFunctionName sets the current function name to the given `functionName`.
func SetCurrentFunctionName(ctx context.Context, functionName string) context.Context {
return context.WithValue(ctx, currentFunctionNameKey{}, functionName)
func SetCurrentFunctionName(ctx context.Context, index int, functionName string) context.Context {
ctx = context.WithValue(ctx, currentFunctionNameKey{}, functionName)
ctx = context.WithValue(ctx, currentFunctionIndexKey{}, index)
return ctx
}
// GetCurrentFunctionName returns the current function name.
func GetCurrentFunctionName(ctx context.Context) string {
return ctx.Value(currentFunctionNameKey{}).(string)
ret, _ := ctx.Value(currentFunctionNameKey{}).(string)
return ret
}
// GetCurrentFunctionIndex returns the current function index.
func GetCurrentFunctionIndex(ctx context.Context) int {
ret, _ := ctx.Value(currentFunctionIndexKey{}).(int)
return ret
}
// ----- High Register Pressure -----

View File

@@ -0,0 +1,96 @@
package wazevoapi
import (
"fmt"
"os"
"strconv"
"sync"
)
var PerfMap *Perfmap
func init() {
if PerfMapEnabled {
pid := os.Getpid()
filename := "/tmp/perf-" + strconv.Itoa(pid) + ".map"
fh, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0o644)
if err != nil {
panic(err)
}
PerfMap = &Perfmap{fh: fh}
}
}
// Perfmap holds perfmap entries to be flushed into a perfmap file.
type Perfmap struct {
entries []entry
mux sync.Mutex
fh *os.File
}
type entry struct {
index int
offset int64
size uint64
name string
}
func (f *Perfmap) Lock() {
f.mux.Lock()
}
func (f *Perfmap) Unlock() {
f.mux.Unlock()
}
// AddModuleEntry adds a perfmap entry into the perfmap file.
// index is the index of the function in the module, offset is the offset of the function in the module,
// size is the size of the function, and name is the name of the function.
//
// Note that the entries are not flushed into the perfmap file until Flush is called,
// and the entries are module-scoped; Perfmap must be locked until Flush is called.
func (f *Perfmap) AddModuleEntry(index int, offset int64, size uint64, name string) {
e := entry{index: index, offset: offset, size: size, name: name}
if f.entries == nil {
f.entries = []entry{e}
return
}
f.entries = append(f.entries, e)
}
// Flush writes the perfmap entries into the perfmap file where the entries are adjusted by the given `addr` and `functionOffsets`.
func (f *Perfmap) Flush(addr uintptr, functionOffsets []int) {
defer func() {
_ = f.fh.Sync()
}()
for _, e := range f.entries {
if _, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
uintptr(e.offset)+addr+uintptr(functionOffsets[e.index]),
strconv.FormatUint(e.size, 16),
e.name,
)); err != nil {
panic(err)
}
}
f.entries = f.entries[:0]
}
// Clear clears the perfmap entries not yet flushed.
func (f *Perfmap) Clear() {
f.entries = f.entries[:0]
}
// AddEntry writes a perfmap entry directly into the perfmap file, not using the entries.
func (f *Perfmap) AddEntry(addr uintptr, size uint64, name string) {
_, err := f.fh.WriteString(fmt.Sprintf("%x %s %s\n",
addr,
strconv.FormatUint(size, 16),
name,
))
if err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,5 @@
//go:build !perfmap
package wazevoapi
const PerfMapEnabled = false

View File

@@ -0,0 +1,5 @@
//go:build perfmap
package wazevoapi
const PerfMapEnabled = true