fuzz: adds validation target for maybe invalid module compilation (#879)

Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
This commit is contained in:
Takeshi Yoneda
2022-12-02 10:56:13 +09:00
committed by GitHub
parent dbb13117c2
commit e9de52c5d6
13 changed files with 344 additions and 224 deletions

View File

@@ -229,3 +229,4 @@ fuzz_timeout_seconds ?= 10
.PHONY: fuzz
fuzz:
@cd internal/integration_test/fuzz && cargo fuzz run basic -- -max_total_time=$(fuzz_timeout_seconds)
@cd internal/integration_test/fuzz && cargo fuzz run validation -- -max_total_time=$(fuzz_timeout_seconds)

View File

View File

View File

@@ -13,6 +13,12 @@ libfuzzer-sys = "0.4.3"
wasm-smith = "0.11.4"
wasmprinter = "0.2.39"
[[bin]]
name = "validation"
path = "fuzz_targets/validation.rs"
test = false
doc = false
[[bin]]
name = "basic"
path = "fuzz_targets/basic.rs"

View File

@@ -8,7 +8,7 @@ fn main() {
let wazero_fuzz_lib_dir = format!("{}/wazerolib", wazero_fuzz_dir.as_str());
let library_out_path = format!("{}/libwazero.a", wazero_fuzz_lib_dir);
let library_source_path = format!("{}/lib.go", wazero_fuzz_lib_dir);
let library_source_path = format!("{}/...", wazero_fuzz_lib_dir);
// Parse the GOARCH from the --target argument passed to cargo.
let goarch = var("TARGET")

View File

@@ -0,0 +1,29 @@
#![no_main]
use libfuzzer_sys::arbitrary::{Result, Unstructured};
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
drop(run(data));
});
fn run(data: &[u8]) -> Result<()> {
// Create the random source.
let mut u = Unstructured::new(data);
// Generate the random module via wasm-smith, but MaybeInvalidModule.
let module: wasm_smith::MaybeInvalidModule = u.arbitrary()?;
let module_bytes = module.to_bytes();
unsafe {
validate(module_bytes.as_ptr(), module_bytes.len());
}
// We always return Ok as inside validate, we cause panic if the binary is interesting.
Ok(())
}
extern "C" {
// validate is implemented in Go, and accepts the pointer to the binary and its size.
fn validate(binary_ptr: *const u8, binary_size: usize);
}

View File

@@ -2,224 +2,19 @@ package main
import "C"
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path"
"reflect"
"strings"
"unsafe"
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
)
func main() {}
// require_no_diff ensures that the behavior is the same between the compiler and the interpreter for any given binary.
// And if there's diff, this also saves the problematic binary and wat into testdata directory.
//
//export require_no_diff
func require_no_diff(binaryPtr uintptr, binarySize int, watPtr uintptr, watSize int) {
wasmBin := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: binaryPtr,
Len: binarySize,
Cap: binarySize,
}))
wat := *(*string)(unsafe.Pointer(&reflect.SliceHeader{
Data: watPtr,
Len: watSize,
Cap: watSize,
}))
// Create two runtimes.
failed := true
defer func() {
if failed {
// If the test fails, we save the binary and wat into testdata directory.
saveFailedBinary(wasmBin, wat)
}
}()
requireNoDiff(wasmBin, func(err error) {
if err != nil {
panic(err)
}
})
failed = false
return
}
// requireNoDiff ensures that the behavior is the same between the compiler and the interpreter for any given binary.
func requireNoDiff(wasmBin []byte, requireNoError func(err error)) {
// Choose the context to use for function calls.
ctx := context.Background()
compiler := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
interpreter := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigInterpreter())
defer compiler.Close(ctx)
defer interpreter.Close(ctx)
compiledCompiled, err := compiler.CompileModule(ctx, wasmBin)
requireNoError(err)
interpreterCompiled, err := interpreter.CompileModule(ctx, wasmBin)
requireNoError(err)
// Instantiate module.
compilerMod, compilerInstErr := compiler.InstantiateModule(ctx, compiledCompiled, wazero.NewModuleConfig())
interpreterMod, interpreterInstErr := interpreter.InstantiateModule(ctx, interpreterCompiled, wazero.NewModuleConfig())
okToInvoke, err := ensureInstantiationError(compilerInstErr, interpreterInstErr)
requireNoError(err)
if okToInvoke {
err = ensureInvocationResultMatch(compilerMod, interpreterMod, interpreterCompiled.ExportedFunctions())
requireNoError(err)
}
}
const valueTypeVector = 0x7b
// ensureInvocationResultMatch invokes all the exported functions from the module, and compare all the results between compiler vs interpreter.
func ensureInvocationResultMatch(compiledMod, interpreterMod api.Module, exportedFunctions map[string]api.FunctionDefinition) (err error) {
ctx := context.Background()
outer:
for name, def := range exportedFunctions {
resultTypes := def.ResultTypes()
for _, rt := range resultTypes {
switch rt {
case api.ValueTypeI32, api.ValueTypeI64, api.ValueTypeF32, api.ValueTypeF64, valueTypeVector:
default:
// For the sake of simplicity in the assertion, we only invoke the function with the basic types.
continue outer
}
}
cmpF := compiledMod.ExportedFunction(name)
intF := interpreterMod.ExportedFunction(name)
params := getDummyValues(def.ParamTypes())
cmpRes, cmpErr := cmpF.Call(ctx, params...)
intRes, intErr := intF.Call(ctx, params...)
if errMismatch := ensureInvocationError(cmpErr, intErr); errMismatch != nil {
panic(fmt.Sprintf("error mismatch on invoking %s: %v", name, errMismatch))
}
matched := true
var typesIndex int
for i := 0; i < len(cmpRes); i++ {
switch resultTypes[typesIndex] {
case api.ValueTypeI32, api.ValueTypeF32:
matched = matched && uint32(cmpRes[i]) == uint32(intRes[i])
case api.ValueTypeI64, api.ValueTypeF64:
matched = matched && cmpRes[i] == intRes[i]
case valueTypeVector:
matched = matched && cmpRes[i] == intRes[i] && cmpRes[i+1] == intRes[i+1]
i++ // We need to advance twice (lower and higher 64bits)
}
typesIndex++
}
if !matched {
err = fmt.Errorf("result mismatch on invoking '%s':\n\tinterpreter got: %v\n\tcompiler got: %v", name, intRes, cmpRes)
}
}
return
}
// getDummyValues returns a dummy input values for function invocations.
func getDummyValues(valueTypes []api.ValueType) (ret []uint64) {
for _, vt := range valueTypes {
if vt != 0x7b { // v128
ret = append(ret, 0)
} else {
ret = append(ret, 0, 0)
}
}
return
}
// ensureInvocationError ensures that function invocation errors returned by interpreter and compiler match each other's.
func ensureInvocationError(compilerErr, interpErr error) error {
if compilerErr == nil && interpErr == nil {
return nil
} else if compilerErr == nil && interpErr != nil {
return fmt.Errorf("compiler returned no error, but interpreter got: %w", interpErr)
} else if compilerErr != nil && interpErr == nil {
return fmt.Errorf("interpreter returned no error, but compiler got: %w", compilerErr)
}
compilerErrMsg, interpErrMsg := compilerErr.Error(), interpErr.Error()
if idx := strings.Index(compilerErrMsg, "\n"); idx >= 0 {
compilerErrMsg = compilerErrMsg[:strings.Index(compilerErrMsg, "\n")]
}
if idx := strings.Index(interpErrMsg, "\n"); idx >= 0 {
interpErrMsg = interpErrMsg[:strings.Index(interpErrMsg, "\n")]
}
if compilerErrMsg != interpErrMsg {
return fmt.Errorf("error mismatch:\n\tinterpreter: %v\n\tcompiler: %v", interpErr, compilerErr)
}
return nil
}
// ensureInstantiationError ensures that instantiation errors returned by interpreter and compiler match each other's.
func ensureInstantiationError(compilerErr, interpErr error) (okToInvoke bool, err error) {
if compilerErr == nil && interpErr == nil {
return true, nil
} else if compilerErr == nil && interpErr != nil {
return false, fmt.Errorf("compiler returned no error, but interpreter got: %w", interpErr)
} else if compilerErr != nil && interpErr == nil {
return false, fmt.Errorf("interpreter returned no error, but compiler got: %w", compilerErr)
}
compilerErrMsg, interpErrMsg := compilerErr.Error(), interpErr.Error()
if idx := strings.Index(compilerErrMsg, "\n"); idx >= 0 {
compilerErrMsg = compilerErrMsg[:strings.Index(compilerErrMsg, "\n")]
}
if idx := strings.Index(interpErrMsg, "\n"); idx >= 0 {
interpErrMsg = interpErrMsg[:strings.Index(interpErrMsg, "\n")]
}
if !allowedErrorDuringInstantiation(compilerErrMsg) {
return false, fmt.Errorf("invalid error occur with compiler: %v", compilerErr)
} else if !allowedErrorDuringInstantiation(interpErrMsg) {
return false, fmt.Errorf("invalid error occur with interpreter: %v", interpErrMsg)
}
if compilerErrMsg != interpErrMsg {
return false, fmt.Errorf("error mismatch:\n\tinterpreter: %v\n\tcompiler: %v", interpErr, compilerErr)
}
return false, nil
}
// allowedErrorDuringInstantiation checks if the error message is considered sane.
func allowedErrorDuringInstantiation(errMsg string) bool {
// This happens when data segment causes out of bound, but it is considered as runtime-error in WebAssembly 2.0
// which is fine.
if strings.HasPrefix(errMsg, "data[") && strings.HasSuffix(errMsg, "]: out of bounds memory access") {
return true
}
// Start function failure is neither instantiation nor compilation error, but rather a runtime error, so that is fine.
if strings.HasPrefix(errMsg, "start function[") && strings.Contains(errMsg, "failed: wasm error:") {
return true
}
return false
}
const failedCasesDir = "wazerolib/testdata"
// saveFailedBinary writes binary and wat into failedCasesDir so that it is easy to reproduce the error.
func saveFailedBinary(bin []byte, wat string) {
func saveFailedBinary(bin []byte, wat string, reproduceTestName string) {
checksum := sha256.Sum256(bin)
checkSumStr := hex.EncodeToString(checksum[:])
@@ -242,7 +37,9 @@ func saveFailedBinary(bin []byte, wat string) {
panic(err)
}
watPath := path.Join(testDataDir, fmt.Sprintf("%s.wat", checkSumStr))
var watPath string
if len(wat) != 0 {
watPath = path.Join(testDataDir, fmt.Sprintf("%s.wat", checkSumStr))
watF, err := os.Create(watPath)
if err != nil {
panic(err)
@@ -254,15 +51,21 @@ func saveFailedBinary(bin []byte, wat string) {
if err != nil {
panic(err)
}
fmt.Printf(`
Failed WebAssembly Text:
%s
Failed Wasm binary has been written to %s
Failed Wasm Text has been written to %s
To reproduce the failure, execute: WASM_BINARY_PATH=%s go test ./wazerolib/...
To reproduce the failure, execute: WASM_BINARY_PATH=%s go test -run=%s ./wazerolib/...
`, wat, binaryPath, watPath, binaryPath)
`, wat, binaryPath, watPath, binaryPath, reproduceTestName)
} else {
fmt.Printf(`
Failed WebAssembly Binary in hex: %s
Failed Wasm binary has been written to %s
To reproduce the failure, execute: WASM_BINARY_PATH=%s go test -run=%s ./wazerolib/...
`, hex.EncodeToString(bin), binaryPath, binaryPath, reproduceTestName)
}
}

View File

@@ -0,0 +1,209 @@
package main
import "C"
import (
"context"
"fmt"
"reflect"
"strings"
"unsafe"
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
)
// require_no_diff ensures that the behavior is the same between the compiler and the interpreter for any given binary.
// And if there's diff, this also saves the problematic binary and wat into testdata directory.
//
//export require_no_diff
func require_no_diff(binaryPtr uintptr, binarySize int, watPtr uintptr, watSize int) {
wasmBin := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: binaryPtr,
Len: binarySize,
Cap: binarySize,
}))
wat := *(*string)(unsafe.Pointer(&reflect.SliceHeader{
Data: watPtr,
Len: watSize,
Cap: watSize,
}))
failed := true
defer func() {
if failed {
// If the test fails, we save the binary and wat into testdata directory.
saveFailedBinary(wasmBin, wat, "TestReRunFailedRequireNoDiffCase")
}
}()
requireNoDiff(wasmBin, func(err error) {
if err != nil {
panic(err)
}
})
failed = false
return
}
// requireNoDiff ensures that the behavior is the same between the compiler and the interpreter for any given binary.
func requireNoDiff(wasmBin []byte, requireNoError func(err error)) {
// Choose the context to use for function calls.
ctx := context.Background()
compiler := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
interpreter := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigInterpreter())
defer compiler.Close(ctx)
defer interpreter.Close(ctx)
compiledCompiled, err := compiler.CompileModule(ctx, wasmBin)
requireNoError(err)
interpreterCompiled, err := interpreter.CompileModule(ctx, wasmBin)
requireNoError(err)
// Instantiate module.
compilerMod, compilerInstErr := compiler.InstantiateModule(ctx, compiledCompiled, wazero.NewModuleConfig())
interpreterMod, interpreterInstErr := interpreter.InstantiateModule(ctx, interpreterCompiled, wazero.NewModuleConfig())
okToInvoke, err := ensureInstantiationError(compilerInstErr, interpreterInstErr)
requireNoError(err)
if okToInvoke {
err = ensureInvocationResultMatch(compilerMod, interpreterMod, interpreterCompiled.ExportedFunctions())
requireNoError(err)
}
}
const valueTypeVector = 0x7b
// ensureInvocationResultMatch invokes all the exported functions from the module, and compare all the results between compiler vs interpreter.
func ensureInvocationResultMatch(compiledMod, interpreterMod api.Module, exportedFunctions map[string]api.FunctionDefinition) (err error) {
ctx := context.Background()
outer:
for name, def := range exportedFunctions {
resultTypes := def.ResultTypes()
for _, rt := range resultTypes {
switch rt {
case api.ValueTypeI32, api.ValueTypeI64, api.ValueTypeF32, api.ValueTypeF64, valueTypeVector:
default:
// For the sake of simplicity in the assertion, we only invoke the function with the basic types.
continue outer
}
}
cmpF := compiledMod.ExportedFunction(name)
intF := interpreterMod.ExportedFunction(name)
params := getDummyValues(def.ParamTypes())
cmpRes, cmpErr := cmpF.Call(ctx, params...)
intRes, intErr := intF.Call(ctx, params...)
if errMismatch := ensureInvocationError(cmpErr, intErr); errMismatch != nil {
panic(fmt.Sprintf("error mismatch on invoking %s: %v", name, errMismatch))
}
matched := true
var typesIndex int
for i := 0; i < len(cmpRes); i++ {
switch resultTypes[typesIndex] {
case api.ValueTypeI32, api.ValueTypeF32:
matched = matched && uint32(cmpRes[i]) == uint32(intRes[i])
case api.ValueTypeI64, api.ValueTypeF64:
matched = matched && cmpRes[i] == intRes[i]
case valueTypeVector:
matched = matched && cmpRes[i] == intRes[i] && cmpRes[i+1] == intRes[i+1]
i++ // We need to advance twice (lower and higher 64bits)
}
typesIndex++
}
if !matched {
err = fmt.Errorf("result mismatch on invoking '%s':\n\tinterpreter got: %v\n\tcompiler got: %v", name, intRes, cmpRes)
}
}
return
}
// getDummyValues returns a dummy input values for function invocations.
func getDummyValues(valueTypes []api.ValueType) (ret []uint64) {
for _, vt := range valueTypes {
if vt != 0x7b { // v128
ret = append(ret, 0)
} else {
ret = append(ret, 0, 0)
}
}
return
}
// ensureInvocationError ensures that function invocation errors returned by interpreter and compiler match each other's.
func ensureInvocationError(compilerErr, interpErr error) error {
if compilerErr == nil && interpErr == nil {
return nil
} else if compilerErr == nil && interpErr != nil {
return fmt.Errorf("compiler returned no error, but interpreter got: %w", interpErr)
} else if compilerErr != nil && interpErr == nil {
return fmt.Errorf("interpreter returned no error, but compiler got: %w", compilerErr)
}
compilerErrMsg, interpErrMsg := compilerErr.Error(), interpErr.Error()
if idx := strings.Index(compilerErrMsg, "\n"); idx >= 0 {
compilerErrMsg = compilerErrMsg[:strings.Index(compilerErrMsg, "\n")]
}
if idx := strings.Index(interpErrMsg, "\n"); idx >= 0 {
interpErrMsg = interpErrMsg[:strings.Index(interpErrMsg, "\n")]
}
if compilerErrMsg != interpErrMsg {
return fmt.Errorf("error mismatch:\n\tinterpreter: %v\n\tcompiler: %v", interpErr, compilerErr)
}
return nil
}
// ensureInstantiationError ensures that instantiation errors returned by interpreter and compiler match each other's.
func ensureInstantiationError(compilerErr, interpErr error) (okToInvoke bool, err error) {
if compilerErr == nil && interpErr == nil {
return true, nil
} else if compilerErr == nil && interpErr != nil {
return false, fmt.Errorf("compiler returned no error, but interpreter got: %w", interpErr)
} else if compilerErr != nil && interpErr == nil {
return false, fmt.Errorf("interpreter returned no error, but compiler got: %w", compilerErr)
}
compilerErrMsg, interpErrMsg := compilerErr.Error(), interpErr.Error()
if idx := strings.Index(compilerErrMsg, "\n"); idx >= 0 {
compilerErrMsg = compilerErrMsg[:strings.Index(compilerErrMsg, "\n")]
}
if idx := strings.Index(interpErrMsg, "\n"); idx >= 0 {
interpErrMsg = interpErrMsg[:strings.Index(interpErrMsg, "\n")]
}
if !allowedErrorDuringInstantiation(compilerErrMsg) {
return false, fmt.Errorf("invalid error occur with compiler: %v", compilerErr)
} else if !allowedErrorDuringInstantiation(interpErrMsg) {
return false, fmt.Errorf("invalid error occur with interpreter: %v", interpErrMsg)
}
if compilerErrMsg != interpErrMsg {
return false, fmt.Errorf("error mismatch:\n\tinterpreter: %v\n\tcompiler: %v", interpErr, compilerErr)
}
return false, nil
}
// allowedErrorDuringInstantiation checks if the error message is considered sane.
func allowedErrorDuringInstantiation(errMsg string) bool {
// This happens when data segment causes out of bound, but it is considered as runtime-error in WebAssembly 2.0
// which is fine.
if strings.HasPrefix(errMsg, "data[") && strings.HasSuffix(errMsg, "]: out of bounds memory access") {
return true
}
// Start function failure is neither instantiation nor compilation error, but rather a runtime error, so that is fine.
if strings.HasPrefix(errMsg, "start function[") && strings.Contains(errMsg, "failed: wasm error:") {
return true
}
return false
}

View File

@@ -7,13 +7,13 @@ import (
"github.com/tetratelabs/wazero/internal/testing/require"
)
// TestReRunFailedCase re-runs the failed case specified by WASM_BINARY_NAME in testdata directory.
func TestReRunFailedCase(t *testing.T) {
// TestReRunFailedRequireNoDiffCase re-runs the failed case specified by WASM_BINARY_NAME in testdata directory.
func TestReRunFailedRequireNoDiffCase(t *testing.T) {
binaryPath := os.Getenv("WASM_BINARY_PATH")
wasmBin, err := os.ReadFile(binaryPath)
if err != nil {
t.Fatal(err)
t.Skip(err)
}
requireNoDiff(wasmBin, func(err error) { require.NoError(t, err) })

View File

@@ -0,0 +1,40 @@
package main
import "C"
import (
"context"
"reflect"
"unsafe"
"github.com/tetratelabs/wazero"
)
// validate accepts maybe-invalid Wasm module bytes and ensures that our validation phase works correctly
// as well as the compiler doesn't panic during compilation!
//
//export validate
func validate(binaryPtr uintptr, binarySize int) {
wasmBin := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: binaryPtr,
Len: binarySize,
Cap: binarySize,
}))
failed := true
defer func() {
if failed {
// If the test fails, we save the binary and wat into testdata directory.
saveFailedBinary(wasmBin, "", "TestReRunFailedValidateCase")
}
}()
tryCompile(wasmBin)
failed = false
}
// Ensure that validation and compilation do not panic!
func tryCompile(wasmBin []byte) {
ctx := context.Background()
compiler := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
_, _ = compiler.CompileModule(ctx, wasmBin)
}

View File

@@ -0,0 +1,18 @@
package main
import (
"os"
"testing"
)
// TestReRunFailedValidateCase re-runs the failed case specified by WASM_BINARY_NAME in testdata directory.
func TestReRunFailedValidateCase(t *testing.T) {
binaryPath := os.Getenv("WASM_BINARY_PATH")
wasmBin, err := os.ReadFile(binaryPath)
if err != nil {
t.Skip(err)
}
tryCompile(wasmBin)
}

View File

@@ -1464,6 +1464,9 @@ func (m *Module) validateFunctionWithMaxStackValues(
valueTypeStack.push(p)
}
} else if op == OpcodeEnd {
if len(controlBlockStack) == 0 {
return fmt.Errorf("redundant End instruction at %#x", pc)
}
bl := controlBlockStack[len(controlBlockStack)-1]
bl.endAt = pc
controlBlockStack = controlBlockStack[:len(controlBlockStack)-1]

View File

@@ -3542,3 +3542,14 @@ func TestModule_funcValidation_loopWithParams(t *testing.T) {
})
}
}
// TestFunctionValidation_redundantEnd is found in th validation fuzzing #879.
func TestFunctionValidation_redundantEnd(t *testing.T) {
m := &Module{
TypeSection: []*FunctionType{{}},
FunctionSection: []Index{0},
CodeSection: []*Code{{Body: []byte{OpcodeEnd, OpcodeEnd}}},
}
err := m.validateFunction(api.CoreFeaturesV2, 0, nil, nil, nil, nil, nil)
require.EqualError(t, err, "redundant End instruction at 0x1")
}