dwarf: include inlined function calls in stack traces (#912)

Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
2022-12-12 11:20:14 +09:00
parent 97cefe0e2d
commit 45aa798a55
9 changed files with 212 additions and 74 deletions
--- a/experimental/dwarf_test.go
+++ b/experimental/dwarf_test.go
@@ -1,8 +1,10 @@
 package experimental_test

 import (
+	"bufio"
 	"context"
 	_ "embed"
+	"strings"
 	"testing"

 	"github.com/tetratelabs/wazero"
@@ -37,37 +39,50 @@ func TestWithDWARFBasedStackTrace(t *testing.T) {
 		t.Run(tc.name, func(t *testing.T) {
 			r := tc.r
 			defer r.Close(ctx) // This closes everything this Runtime created.
-
 			wasi_snapshot_preview1.MustInstantiate(ctx, r)

 			for _, lang := range []struct {
 				name string
 				bin  []byte
-				exps []string
+				exp  string
 			}{
 				{
 					name: "tinygo",
 					bin:  dwarftestdata.TinyGoWasm,
-					exps: []string{
-						"src/runtime/runtime_tinygowasm.go:73:6",
-						"wazero/internal/testing/dwarftestdata/testdata/main.go:19:7",
-						"wazero/internal/testing/dwarftestdata/testdata/main.go:14:3",
-						"wazero/internal/testing/dwarftestdata/testdata/main.go:9:3",
-						"wazero/internal/testing/dwarftestdata/testdata/main.go:4:3",
-						"wazero/internal/testing/dwarftestdata/testdata/main.go:4:3",
-						"src/runtime/scheduler_none.go:26:10",
-						"src/runtime/runtime_wasm_wasi.go:22:5",
-					},
+					exp: `module[] function[_start] failed: wasm error: unreachable
+wasm stack trace:
+	.runtime._panic(i32)
+		0x16e2: /runtime_tinygowasm.go:73:6 (inlined)
+		        /panic.go:52:7
+	.c()
+		0x1919: /main.go:19:7
+	.b()
+		0x1901: /main.go:14:3
+	.a()
+		0x18f7: /main.go:9:3
+	.main.main()
+		0x18ed: /main.go:4:3
+	.runtime.run()
+		0x18cc: /scheduler_none.go:26:10
+	._start()
+		0x18b6: /runtime_wasm_wasi.go:22:5`,
 				},
 				{
 					name: "zig",
 					bin:  dwarftestdata.ZigWasm,
-					exps: []string{
-						"lib/std/os.zig:552:9",
-						"lib/std/builtin.zig:787:25",
-						"main.zig:1:23",
-						"lib/std/start.zig:614:37",
-					},
+					exp: `module[] function[_start] failed: wasm error: unreachable
+wasm stack trace:
+	.os.abort()
+		0x1b3: /os.zig:552:9
+	.builtin.default_panic(i32,i32,i32,i32)
+		0x86: /builtin.zig:787:25
+	.main.main() i32
+		0x2d: main.zig:10:5 (inlined)
+		      main.zig:6:5 (inlined)
+		      main.zig:2:5
+	._start()
+		0x1c6: /start.zig:614:37 (inlined)
+		       /start.zig:240:42`,
 				},
 			} {
 				t.Run(lang.name, func(t *testing.T) {
@@ -79,9 +94,26 @@ func TestWithDWARFBasedStackTrace(t *testing.T) {
 					require.Error(t, err)

 					errStr := err.Error()
-					for _, exp := range lang.exps {
-						require.Contains(t, errStr, exp)
+
+					// Since stack traces change where the binary is compiled, we sanitize each line
+					// so that it doesn't contain any file system dependent info.
+					scanner := bufio.NewScanner(strings.NewReader(errStr))
+					scanner.Split(bufio.ScanLines)
+					var sanitizedLines []string
+					for scanner.Scan() {
+						line := scanner.Text()
+						start, last := strings.Index(line, "/"), strings.LastIndex(line, "/")
+						if start >= 0 {
+							l := len(line) - last
+							buf := []byte(line)
+							copy(buf[start:], buf[last:])
+							line = string(buf[:start+l])
+						}
+						sanitizedLines = append(sanitizedLines, line)
 					}
+
+					sanitizedTraces := strings.Join(sanitizedLines, "\n")
+					require.Equal(t, sanitizedTraces, lang.exp)
 				})
 			}
 		})
--- a/internal/engine/compiler/engine.go
+++ b/internal/engine/compiler/engine.go
@@ -735,14 +735,14 @@ func (ce *callEngine) deferredOnCall(recovered interface{}) (err error) {

 			// sourceInfo holds the source code information corresponding to the frame.
 			// It is not empty only when the DWARF is enabled.
-			var sourceInfo string
+			var sources []string
 			if p := fn.parent; p.codeSegment != nil {
 				if p.sourceOffsetMap != nil {
 					offset := fn.getSourceOffsetInWasmBinary(pc)
-					sourceInfo = p.sourceModule.DWARFLines.Line(offset)
+					sources = p.sourceModule.DWARFLines.Line(offset)
 				}
 			}
-			builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sourceInfo)
+			builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)

 			callFrameOffset := callFrameOffset(source.Type)
 			if stackBasePointer != 0 {
--- a/internal/engine/interpreter/interpreter.go
+++ b/internal/engine/interpreter/interpreter.go
@@ -840,11 +840,11 @@ func (ce *callEngine) recoverOnCall(v interface{}) (err error) {
 	for i := 0; i < frameCount; i++ {
 		frame := ce.popFrame()
 		def := frame.f.source.Definition
-		var sourceInfo string
+		var sources []string
 		if frame.f.body != nil {
-			sourceInfo = frame.f.parent.source.DWARFLines.Line(frame.f.body[frame.pc].sourcePC)
+			sources = frame.f.parent.source.DWARFLines.Line(frame.f.body[frame.pc].sourcePC)
 		}
-		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sourceInfo)
+		builder.AddFrame(def.DebugName(), def.ParamTypes(), def.ResultTypes(), sources)
 	}
 	err = builder.FromRecovered(v)

--- a/internal/testing/dwarftestdata/testdata/zig/main.wasm
+++ b/internal/testing/dwarftestdata/testdata/zig/main.wasm
--- a/internal/testing/dwarftestdata/testdata/zig/main.zig
+++ b/internal/testing/dwarftestdata/testdata/zig/main.zig
@@ -1 +1,11 @@
-pub fn main() !void { unreachable; }
+pub fn main() !void {
+    @call(.{ .modifier = .always_inline }, inlined_a, .{});
+}
+
+fn inlined_a() void {
+    @call(.{ .modifier = .always_inline }, inlined_b, .{});
+}
+
+fn inlined_b() void {
+    unreachable;
+}
--- a/internal/wasmdebug/debug.go
+++ b/internal/wasmdebug/debug.go
@@ -95,10 +95,10 @@ type ErrorBuilder interface {
 	// * funcName should be from FuncName
 	// * paramTypes should be from wasm.FunctionType
 	// * resultTypes should be from wasm.FunctionType
-	// * sourceInfo is the source code information for this frame and can be empty.
+	// * sources is the source code information for this frame and can be empty.
 	//
 	// Note: paramTypes and resultTypes are present because signature misunderstanding, mismatch or overflow are common.
-	AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sourceInfo string)
+	AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sources []string)

 	// FromRecovered returns an error with the wasm stack trace appended to it.
 	FromRecovered(recovered interface{}) error
@@ -143,11 +143,11 @@ func (s *stackTrace) FromRecovered(recovered interface{}) error {
 	}
 }

-// AddFrame implements ErrorBuilder.Format
-func (s *stackTrace) AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sourceInfo string) {
+// AddFrame implements ErrorBuilder.AddFrame
+func (s *stackTrace) AddFrame(funcName string, paramTypes, resultTypes []api.ValueType, sources []string) {
 	sig := signature(funcName, paramTypes, resultTypes)
 	s.frames = append(s.frames, sig)
-	if sourceInfo != "" {
-		s.frames = append(s.frames, "\t"+sourceInfo)
+	for _, source := range sources {
+		s.frames = append(s.frames, "\t"+source)
 	}
 }
--- a/internal/wasmdebug/debug_test.go
+++ b/internal/wasmdebug/debug_test.go
@@ -79,7 +79,7 @@ func TestErrorBuilder(t *testing.T) {
 		{
 			name: "one",
 			build: func(builder ErrorBuilder) error {
-				builder.AddFrame("x.y", nil, nil, "")
+				builder.AddFrame("x.y", nil, nil, nil)
 				return builder.FromRecovered(argErr)
 			},
 			expectedErr: `invalid argument (recovered by wazero)
@@ -90,8 +90,8 @@ wasm stack trace:
 		{
 			name: "two",
 			build: func(builder ErrorBuilder) error {
-				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32}, "")
-				builder.AddFrame("x.y", nil, nil, "")
+				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32}, nil)
+				builder.AddFrame("x.y", nil, nil, nil)
 				return builder.FromRecovered(argErr)
 			},
 			expectedErr: `invalid argument (recovered by wazero)
@@ -103,8 +103,8 @@ wasm stack trace:
 		{
 			name: "runtime.Error",
 			build: func(builder ErrorBuilder) error {
-				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32}, "")
-				builder.AddFrame("x.y", nil, nil, "")
+				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32}, nil)
+				builder.AddFrame("x.y", nil, nil, nil)
 				return builder.FromRecovered(rteErr)
 			},
 			expectedErr: `index out of bounds (recovered by wazero)
@@ -116,8 +116,9 @@ wasm stack trace:
 		{
 			name: "wasmruntime.Error",
 			build: func(builder ErrorBuilder) error {
-				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32}, "/opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_tinygowasm.go:73:6")
-				builder.AddFrame("x.y", nil, nil, "")
+				builder.AddFrame("wasi_snapshot_preview1.fd_write", i32i32i32i32, []api.ValueType{i32},
+					[]string{"/opt/homebrew/Cellar/tinygo/0.26.0/src/runtime/runtime_tinygowasm.go:73:6"})
+				builder.AddFrame("x.y", nil, nil, nil)
 				return builder.FromRecovered(wasmruntime.ErrRuntimeStackOverflow)
 			},
 			expectedErr: `wasm error: stack overflow
--- a/internal/wasmdebug/dwarf.go
+++ b/internal/wasmdebug/dwarf.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io"
 	"sort"
+	"strings"
 	"sync"
 )

@@ -34,9 +35,9 @@ func NewDWARFLines(d *dwarf.Data) *DWARFLines {

 // Line returns the line information for the given instructionOffset which is an offset in
 // the code section of the original Wasm binary. Returns empty string if the info is not found.
-func (d *DWARFLines) Line(instructionOffset uint64) string {
+func (d *DWARFLines) Line(instructionOffset uint64) (ret []string) {
 	if d == nil {
-		return ""
+		return
 	}

 	// DWARFLines is created per Wasm binary, so there's a possibility that multiple instances
@@ -47,22 +48,64 @@ func (d *DWARFLines) Line(instructionOffset uint64) string {

 	r := d.d.Reader()

-	// Get the dwarf.Entry containing the instruction.
-	entry, err := r.SeekPC(instructionOffset)
-	if err != nil {
-		return ""
+	var inlinedRoutines []*dwarf.Entry
+	var cu *dwarf.Entry
+	var inlinedDone bool
+entry:
+	for {
+		ent, err := r.Next()
+		if err != nil || ent == nil {
+			break
+		}
+
+		// If we already found the compilation unit and relevant inlined routines, we can stop searching entries.
+		if cu != nil && inlinedDone {
+			break
+		}
+
+		switch ent.Tag {
+		case dwarf.TagCompileUnit, dwarf.TagInlinedSubroutine:
+		default:
+			// Only CompileUnit and InlinedSubroutines are relevant.
+			continue
+		}
+
+		// Check if the entry spans the range which contains the target instruction.
+		ranges, err := d.d.Ranges(ent)
+		if err != nil {
+			continue
+		}
+		for _, pcs := range ranges {
+			if pcs[0] <= instructionOffset && instructionOffset < pcs[1] {
+				switch ent.Tag {
+				case dwarf.TagCompileUnit:
+					cu = ent
+				case dwarf.TagInlinedSubroutine:
+					inlinedRoutines = append(inlinedRoutines, ent)
+					// Search inlined subroutines until all the children.
+					inlinedDone = !ent.Children
+					// Not that "children" in the DWARF spec is defined as the next entry to this entry.
+					// See "2.3 Relationship of Debugging Information Entries" in https://dwarfstd.org/doc/DWARF4.pdf
+				}
+				continue entry
+			}
+		}
 	}

-	lineReader, err := d.d.LineReader(entry)
-	if err != nil {
-		return ""
+	// If the relevant compilation unit is not found, nothing we can do with this DWARF info.
+	if cu == nil {
+		return
 	}

+	lineReader, err := d.d.LineReader(cu)
+	if err != nil || lineReader == nil {
+		return
+	}
 	var lines []line
 	var ok bool
 	var le dwarf.LineEntry
 	// Get the lines inside the entry.
-	if lines, ok = d.linesPerEntry[entry.Offset]; !ok {
+	if lines, ok = d.linesPerEntry[cu.Offset]; !ok {
 		// If not found, we create the list of lines by reading all the LineEntries in the Entry.
 		//
 		// Note that the dwarf.LineEntry.SeekPC API shouldn't be used because the Go's dwarf package assumes that
@@ -77,12 +120,12 @@ func (d *DWARFLines) Line(instructionOffset uint64) string {
 			if errors.Is(err, io.EOF) {
 				break
 			} else if err != nil {
-				return ""
+				return
 			}
 			lines = append(lines, line{addr: le.Address, pos: pos})
 		}
 		sort.Slice(lines, func(i, j int) bool { return lines[i].addr < lines[j].addr })
-		d.linesPerEntry[entry.Offset] = lines // Caches for the future inquiries for the same Entry.
+		d.linesPerEntry[cu.Offset] = lines // Caches for the future inquiries for the same Entry.
 	}

 	// Now we have the lines for this entry. We can find the corresponding source line for instructionOffset
@@ -91,15 +134,47 @@ func (d *DWARFLines) Line(instructionOffset uint64) string {
 	index := sort.Search(n, func(i int) bool { return lines[i].addr >= instructionOffset })

 	if index == n { // This case the address is not found. See the doc sort.Search.
-		return ""
+		return
 	}

 	// Advance the line reader for the found position.
 	lineReader.Seek(lines[index].pos)
 	err = lineReader.Next(&le)
+
 	if err != nil {
 		// If we reach this block, that means there's a bug in the []line creation logic above.
 		panic("BUG: stored dwarf.LineReaderPos is invalid")
 	}
-	return fmt.Sprintf("%#x: %s:%d:%d", le.Address, le.File.Name, le.Line, le.Column)
+
+	if len(inlinedRoutines) == 0 {
+		// Do early return for non-inlined case.
+		ret = []string{fmt.Sprintf("%#x: %s:%d:%d", le.Address, le.File.Name, le.Line, le.Column)}
+		return
+	}
+
+	// In the inlined case, the line info is the innermost inlined function call.
+	addr := fmt.Sprintf("%#x: ", le.Address)
+	ret = append(ret, fmt.Sprintf("%s%s:%d:%d (inlined)", addr, le.File.Name, le.Line, le.Column))
+
+	files := lineReader.Files()
+	// inlinedRoutines contain the inlined call information in the reverse order (children is higher than parent),
+	// so we traverse the reverse order and emit the inlined calls.
+	for i := len(inlinedRoutines) - 1; i >= 0; i-- {
+		inlined := inlinedRoutines[i]
+		fileIndex, ok := inlined.Val(dwarf.AttrCallFile).(int64)
+		if !ok {
+			return
+		} else if fileIndex >= int64(len(files)) {
+			// This in theory shouldn't happen according to the spec, but guard against ill-formed DWARF info.
+			return
+		}
+		fileName, line, col := files[fileIndex], inlined.Val(dwarf.AttrCallLine), inlined.Val(dwarf.AttrCallColumn)
+		if i == 0 {
+			// Last one is the origin of the inlined function calls.
+			ret = append(ret, fmt.Sprintf("%s%s:%d:%d", strings.Repeat(" ", len(addr)), fileName.Name, line, col))
+		} else {
+			ret = append(ret, fmt.Sprintf("%s%s:%d:%d (inlined)", strings.Repeat(" ", len(addr)), fileName.Name, line, col))
+		}
+	}
+	return
 }
--- a/internal/wasmdebug/dwarf_test.go
+++ b/internal/wasmdebug/dwarf_test.go
@@ -13,12 +13,12 @@ import (
 	"github.com/tetratelabs/wazero/internal/wasm/binary"
 )

-func TestDWARFLines_Line_TinyGO(t *testing.T) {
+func TestDWARFLines_Line_TinyGo(t *testing.T) {
 	mod, err := binary.DecodeModule(dwarftestdata.TinyGoWasm, api.CoreFeaturesV2, wasm.MemoryLimitPages, false, true, false)
 	require.NoError(t, err)
 	require.NotNil(t, mod.DWARFLines)

-	// Get the offsets of functions named "a", "b" and "c" in dwarftestdata.DWARFWasm.
+	// Get the offsets of functions named "a", "b" and "c" in dwarftestdata.TinyGoWasm.
 	var a, b, c uint64
 	for _, exp := range mod.ExportSection {
 		switch exp.Name {
@@ -32,19 +32,20 @@ func TestDWARFLines_Line_TinyGO(t *testing.T) {
 	}

 	tests := []struct {
+		name   string
 		offset uint64
-		exp    string
+		exp    []string
 	}{
 		// Unknown offset returns empty string.
-		{offset: math.MaxUint64, exp: ""},
+		{offset: math.MaxUint64},
 		// The first instruction should point to the first line of each function in internal/testing/dwarftestdata/testdata/tinygo.go
-		{offset: a, exp: "wazero/internal/testing/dwarftestdata/testdata/main.go:9:3"},
-		{offset: b, exp: "wazero/internal/testing/dwarftestdata/testdata/main.go:14:3"},
-		{offset: c, exp: "wazero/internal/testing/dwarftestdata/testdata/main.go:19:7"},
+		{offset: a, exp: []string{"wazero/internal/testing/dwarftestdata/testdata/main.go:9:3"}},
+		{offset: b, exp: []string{"wazero/internal/testing/dwarftestdata/testdata/main.go:14:3"}},
+		{offset: c, exp: []string{"wazero/internal/testing/dwarftestdata/testdata/main.go:19:7"}},
 	}

 	for _, tc := range tests {
-		t.Run(tc.exp, func(t *testing.T) {
+		t.Run(tc.name, func(t *testing.T) {
 			// Ensures that DWARFLines.Line is goroutine-safe.
 			const concurrent = 100
 			var wg sync.WaitGroup
@@ -54,7 +55,11 @@ func TestDWARFLines_Line_TinyGO(t *testing.T) {
 				go func() {
 					defer wg.Done()
 					actual := mod.DWARFLines.Line(tc.offset)
-					require.Contains(t, actual, tc.exp)
+
+					require.Equal(t, len(tc.exp), len(actual))
+					for i := range tc.exp {
+						require.Contains(t, actual[i], tc.exp[i])
+					}
 				}()
 			}
 			wg.Wait()
@@ -69,7 +74,7 @@ func TestDWARFLines_Line_Zig(t *testing.T) {

 	// codeSecStart is the beginning of the first code entry in the Wasm binary.
 	// If dwarftestdata.ZigWasm has been changed, we need to inspect by `wasm-tools dump`.
-	const codeSecStart = 0x0109
+	const codeSecStart = 0x108

 	// These cases are crafted by matching the stack trace result from wasmtime. To verify, run:
 	//
@@ -84,25 +89,40 @@ func TestDWARFLines_Line_Zig(t *testing.T) {
 	//                           at /Users/mathetake/zig-macos-aarch64-0.11.0-dev.618+096d3efae/lib/std/os.zig:552:9
 	//           1:  0x18e - builtin.default_panic
 	//                           at /Users/mathetake/zig-macos-aarch64-0.11.0-dev.618+096d3efae/lib/std/builtin.zig:787:25
-	//           2:  0x12d - main.main
-	//                           at ././main.zig:1:23
+	//           2:  0x12d - main.inlined_b
+	//                           at ././main.zig:10:5              - main.inlined_a
+	//                           at ././main.zig:6:5              - main.main
+	//                           at ././main.zig:2:5
 	//           3:  0x2ce - start.callMain
-	//                           at /Users/mathetake/zig-macos-aarch64-0.11.0-dev.618+096d3efae/lib/std/start.zig:614:37
-	//                     - _start
+	//                           at /Users/mathetake/zig-macos-aarch64-0.11.0-dev.618+096d3efae/lib/std/start.zig:614:37              - _start
 	//                           at /Users/mathetake/zig-macos-aarch64-0.11.0-dev.618+096d3efae/lib/std/start.zig:240:42
+	//    2: wasm trap: wasm `unreachable` instruction executed
 	for _, tc := range []struct {
 		offset uint64
-		exp    string
+		exp    []string
 	}{
-		{offset: 0x2bb - codeSecStart, exp: "lib/std/os.zig:552:9"},
-		{offset: 0x18e - codeSecStart, exp: "lib/std/builtin.zig:787:25"},
-		{offset: 0x12d - codeSecStart, exp: "main.zig:1:23"},
-		{offset: 0x2ce - codeSecStart, exp: "lib/std/start.zig:614:37"},
+		{offset: 0x2bb - codeSecStart, exp: []string{"lib/std/os.zig:552:9"}},
+		{offset: 0x18e - codeSecStart, exp: []string{"lib/std/builtin.zig:787:25"}},
+		{offset: 0x12d - codeSecStart, exp: []string{
+			"main.zig:10:5 (inlined)",
+			"main.zig:6:5 (inlined)",
+			"main.zig:2:5",
+		}},
+		{offset: 0x2ce - codeSecStart, exp: []string{
+			"lib/std/start.zig:614:37 (inlined)",
+			"lib/std/start.zig:240:42",
+		}},
 	} {
 		tc := tc
 		t.Run(fmt.Sprintf("%#x/%s", tc.offset, tc.exp), func(t *testing.T) {
 			actual := mod.DWARFLines.Line(tc.offset)
-			require.Contains(t, actual, tc.exp)
+
+			t.Log(actual)
+
+			require.Equal(t, len(tc.exp), len(actual))
+			for i := range tc.exp {
+				require.Contains(t, actual[i], tc.exp[i])
+			}
 		})
 	}
 }