Files
wazero/internal/asm/amd64/impl.go
Crypt Keeper 2664b1eb62 Simplifies API per feedback (#427)
During #425, @neilalexander gave constructive feedback that the API is
both moving fast, and not good enough yet. This attempts to reduce the
incidental complexity at the cost of a little conflation.

### odd presence of `wasm` and `wasi` packages -> `api` package

We had public API packages in wasm and wasi, which helped us avoid
leaking too many internals as public. That these had names that look
like there should be implementations in them cause unnecessary
confusion. This squashes both into one package "api" which has no
package collission with anything.

We've long struggled with the poorly specified and non-uniformly
implemented WASI specification. Trying to bring visibility to its
constraints knowing they are routinely invalid taints our API for no
good reason. This removes all `WASI` commands for a default to invoke
the function `_start` if it exists. In doing so, there's only one path
to start a module.

Moreover, this puts all wasi code in a top-level package "wasi" as it
isn't re-imported by any internal types.

### Reuse of Module for pre and post instantiation to `Binary` -> `Module`

Module is defined by WebAssembly in many phases, from decoded to
instantiated. However, using the same noun in multiple packages is very
confusing. We at one point tried a name "DecodedModule" or
"InstantiatedModule", but this is a fools errand. By deviating slightly
from the spec we can make it unambiguous what a module is.

This make a result of compilation a `Binary`, retaining `Module` for an
instantiated one. In doing so, there's no longer any name conflicts
whatsoever.

### Confusion about config -> `ModuleConfig`

Also caused by splitting wasm into wasm+wasi is configuration. This
conflates both into the same type `ModuleConfig` as it is simpler than
trying to explain a "will never be finished" api of wasi snapshot-01 in
routine use of WebAssembly. In other words, this further moves WASI out
of the foreground as it has been nothing but burden.

```diff
--- a/README.md
+++ b/README.md
@@ -49,8 +49,8 @@ For example, here's how you can allow WebAssembly modules to read
-wm, err := r.InstantiateModule(wazero.WASISnapshotPreview1())
-defer wm.Close()
+wm, err := wasi.InstantiateSnapshotPreview1(r)
+defer wm.Close()

-sysConfig := wazero.NewSysConfig().WithFS(os.DirFS("/work/home"))
-module, err := wazero.StartWASICommandWithConfig(r, compiled, sysConfig)
+config := wazero.ModuleConfig().WithFS(os.DirFS("/work/home"))
+module, err := r.InstantiateModule(binary, config)
 defer module.Close()
 ...
```
2022-04-02 06:42:36 +08:00

2145 lines
72 KiB
Go

package asm_amd64
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"github.com/tetratelabs/wazero/internal/asm"
)
// NodeImpl implements asm.Node for amd64.
type NodeImpl struct {
// NOTE: fields here are exported for testing with the amd64_debug package.
Instruction asm.Instruction
OffsetInBinaryField asm.NodeOffsetInBinary // Field suffix to dodge conflict with OffsetInBinary
// JumpTarget holds the target node in the linked for the jump-kind instruction.
JumpTarget *NodeImpl
Flag NodeFlag
// next holds the next node from this node in the assembled linked list.
Next *NodeImpl
Types OperandTypes
SrcReg, DstReg asm.Register
SrcConst, DstConst asm.ConstantValue
SrcMemIndex, DstMemIndex asm.Register
SrcMemScale, DstMemScale byte
Mode byte
// readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of
// read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress.
readInstructionAddressBeforeTargetInstruction asm.Instruction
// JumpOrigins hold all the nodes trying to jump into this node. In other words, all the nodes with .JumpTarget == this.
JumpOrigins map[*NodeImpl]struct{}
}
type NodeFlag byte
const (
// NodeFlagInitializedForEncoding is always set to indicate that node is already initialized. Notably, this is used to judge
// whether a jump is backward or forward before encoding.
NodeFlagInitializedForEncoding NodeFlag = (1 << iota)
NodeFlagBackwardJump
// NodeFlagShortForwardJump is set to false by default and only used by forward branch jumps, which means .JumpTarget != nil and
// the target node is encoded afoter this node. False by default means that that we Encode all the jumps with JumpTarget
// as short jump (i.e. relative signed 8-bit integer offset jump) and try to Encode as small as possible.
NodeFlagShortForwardJump
)
func (n *NodeImpl) isInitializedForEncoding() bool {
return n.Flag&NodeFlagInitializedForEncoding != 0
}
func (n *NodeImpl) isJumpNode() bool {
return n.JumpTarget != nil
}
func (n *NodeImpl) isBackwardJump() bool {
return n.isJumpNode() && (n.Flag&NodeFlagBackwardJump != 0)
}
func (n *NodeImpl) isForwardJump() bool {
return n.isJumpNode() && (n.Flag&NodeFlagBackwardJump == 0)
}
func (n *NodeImpl) isForwardShortJump() bool {
return n.isForwardJump() && n.Flag&NodeFlagShortForwardJump != 0
}
// AssignJumpTarget implements asm.Node.AssignJumpTarget.
func (n *NodeImpl) AssignJumpTarget(target asm.Node) {
n.JumpTarget = target.(*NodeImpl)
}
// AssignDestinationConstant implements asm.Node.AssignDestinationConstant.
func (n *NodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
n.DstConst = value
}
// AssignSourceConstant implements asm.Node.AssignSourceConstant.
func (n *NodeImpl) AssignSourceConstant(value asm.ConstantValue) {
n.SrcConst = value
}
// OffsetInBinary implements asm.Node.OffsetInBinary.
func (n *NodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
return n.OffsetInBinaryField
}
// String implements fmt.Stringer.
//
// This is for debugging purpose, and the format is almost same as the AT&T assembly syntax,
// meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand
// might be embraced by '[]' to represent the memory location.
func (n *NodeImpl) String() (ret string) {
instName := InstructionName(n.Instruction)
switch n.Types {
case OperandTypesNoneToNone:
ret = instName
case OperandTypesNoneToRegister:
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.DstReg))
case OperandTypesNoneToMemory:
if n.DstMemIndex != asm.NilRegister {
ret = fmt.Sprintf("%s [%s + 0x%x + %s*0x%x]", instName,
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
} else {
ret = fmt.Sprintf("%s [%s + 0x%x]", instName, RegisterName(n.DstReg), n.DstConst)
}
case OperandTypesNoneToBranch:
ret = fmt.Sprintf("%s {%v}", instName, n.JumpTarget)
case OperandTypesRegisterToNone:
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.SrcReg))
case OperandTypesRegisterToRegister:
ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg))
case OperandTypesRegisterToMemory:
if n.DstMemIndex != asm.NilRegister {
ret = fmt.Sprintf("%s %s, [%s + 0x%x + %s*0x%x]", instName, RegisterName(n.SrcReg),
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
} else {
ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg), n.DstConst)
}
case OperandTypesRegisterToConst:
ret = fmt.Sprintf("%s %s, 0x%x", instName, RegisterName(n.SrcReg), n.DstConst)
case OperandTypesMemoryToRegister:
if n.SrcMemIndex != asm.NilRegister {
ret = fmt.Sprintf("%s [%s + %d + %s*0x%x], %s", instName,
RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.SrcMemIndex), n.SrcMemScale, RegisterName(n.DstReg))
} else {
ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.DstReg))
}
case OperandTypesMemoryToConst:
if n.SrcMemIndex != asm.NilRegister {
ret = fmt.Sprintf("%s [%s + %d + %s*0x%x], 0x%x", instName,
RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.SrcMemIndex), n.SrcMemScale, n.DstConst)
} else {
ret = fmt.Sprintf("%s [%s + 0x%x], 0x%x", instName, RegisterName(n.SrcReg), n.SrcConst, n.DstConst)
}
case OperandTypesConstToMemory:
if n.DstMemIndex != asm.NilRegister {
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x + %s*0x%x]", instName, n.SrcConst,
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
} else {
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x]", instName, n.SrcConst, RegisterName(n.DstReg), n.DstConst)
}
case OperandTypesConstToRegister:
ret = fmt.Sprintf("%s 0x%x, %s", instName, n.SrcConst, RegisterName(n.DstReg))
}
return
}
// OperandType represents where an operand is placed for an instruction.
// Note: this is almost the same as obj.AddrType in GO assembler.
type OperandType byte
const (
OperandTypeNone OperandType = iota
OperandTypeRegister
OperandTypeMemory
OperandTypeConst
OperandTypeBranch
)
func (o OperandType) String() (ret string) {
switch o {
case OperandTypeNone:
ret = "none"
case OperandTypeRegister:
ret = "register"
case OperandTypeMemory:
ret = "memory"
case OperandTypeConst:
ret = "const"
case OperandTypeBranch:
ret = "branch"
}
return
}
// OperandTypes represents the only combinations of two OperandTypes used by wazero
type OperandTypes struct{ src, dst OperandType }
var (
OperandTypesNoneToNone = OperandTypes{OperandTypeNone, OperandTypeNone}
OperandTypesNoneToRegister = OperandTypes{OperandTypeNone, OperandTypeRegister}
OperandTypesNoneToMemory = OperandTypes{OperandTypeNone, OperandTypeMemory}
OperandTypesNoneToBranch = OperandTypes{OperandTypeNone, OperandTypeBranch}
OperandTypesRegisterToNone = OperandTypes{OperandTypeRegister, OperandTypeNone}
OperandTypesRegisterToRegister = OperandTypes{OperandTypeRegister, OperandTypeRegister}
OperandTypesRegisterToMemory = OperandTypes{OperandTypeRegister, OperandTypeMemory}
OperandTypesRegisterToConst = OperandTypes{OperandTypeRegister, OperandTypeConst}
OperandTypesMemoryToRegister = OperandTypes{OperandTypeMemory, OperandTypeRegister}
OperandTypesMemoryToConst = OperandTypes{OperandTypeMemory, OperandTypeConst}
OperandTypesConstToRegister = OperandTypes{OperandTypeConst, OperandTypeRegister}
OperandTypesConstToMemory = OperandTypes{OperandTypeConst, OperandTypeMemory}
)
// String implements fmt.Stringer
func (o OperandTypes) String() string {
return fmt.Sprintf("from:%s,to:%s", o.src, o.dst)
}
// AssemblerImpl implements Assembler.
type AssemblerImpl struct {
asm.BaseAssemblerImpl
EnablePadding bool
Root, Current *NodeImpl
Buf *bytes.Buffer
ForceReAssemble bool
}
func NewAssemblerImpl() *AssemblerImpl {
return &AssemblerImpl{Buf: bytes.NewBuffer(nil), EnablePadding: true}
}
// newNode creates a new Node and appends it into the linked list.
func (a *AssemblerImpl) newNode(instruction asm.Instruction, types OperandTypes) *NodeImpl {
n := &NodeImpl{
Instruction: instruction,
Next: nil,
Types: types,
JumpOrigins: map[*NodeImpl]struct{}{},
}
a.addNode(n)
return n
}
// addNode appends the new node into the linked list.
func (a *AssemblerImpl) addNode(node *NodeImpl) {
if a.Root == nil {
a.Root = node
a.Current = node
} else {
parent := a.Current
parent.Next = node
a.Current = node
}
for _, o := range a.SetBranchTargetOnNextNodes {
origin := o.(*NodeImpl)
origin.JumpTarget = node
}
a.SetBranchTargetOnNextNodes = nil
}
// EncodeNode encodes the given node into writer.
func (a *AssemblerImpl) EncodeNode(n *NodeImpl) (err error) {
switch n.Types {
case OperandTypesNoneToNone:
err = a.encodeNoneToNone(n)
case OperandTypesNoneToRegister:
err = a.EncodeNoneToRegister(n)
case OperandTypesNoneToMemory:
err = a.EncodeNoneToMemory(n)
case OperandTypesNoneToBranch:
// Branching operand can be encoded as relative jumps.
err = a.EncodeRelativeJump(n)
case OperandTypesRegisterToNone:
err = a.EncodeRegisterToNone(n)
case OperandTypesRegisterToRegister:
err = a.EncodeRegisterToRegister(n)
case OperandTypesRegisterToMemory:
err = a.EncodeRegisterToMemory(n)
case OperandTypesRegisterToConst:
err = a.EncodeRegisterToConst(n)
case OperandTypesMemoryToRegister:
err = a.EncodeMemoryToRegister(n)
case OperandTypesConstToRegister:
err = a.EncodeConstToRegister(n)
case OperandTypesConstToMemory:
err = a.EncodeConstToMemory(n)
case OperandTypesMemoryToConst:
err = a.EncodeMemoryToConst(n)
default:
err = fmt.Errorf("encoder undefined for [%s] operand type", n.Types)
}
return
}
// Assemble implements asm.AssemblerBase
func (a *AssemblerImpl) Assemble() ([]byte, error) {
a.InitializeNodesForEncoding()
// Continue encoding until we are not forced to re-assemble which happens when
// an short relative jump ends up the offset larger than 8-bit length.
for {
err := a.Encode()
if err != nil {
return nil, err
}
if !a.ForceReAssemble {
break
} else {
// We reset the length of buffer but don't delete the underlying slice since
// the binary size will roughly the same after reassemble.
a.Buf.Reset()
// Reset the re-assemble Flag in order to avoid the infinite loop!
a.ForceReAssemble = false
}
}
code := a.Buf.Bytes()
for _, cb := range a.OnGenerateCallbacks {
if err := cb(code); err != nil {
return nil, err
}
}
return code, nil
}
// InitializeNodesForEncoding initializes NodeImpl.Flag and determine all the jumps
// are forward or backward jump.
func (a *AssemblerImpl) InitializeNodesForEncoding() {
var count int
for n := a.Root; n != nil; n = n.Next {
count++
n.Flag |= NodeFlagInitializedForEncoding
if target := n.JumpTarget; target != nil {
if target.isInitializedForEncoding() {
// This means the target exists behind.
n.Flag |= NodeFlagBackwardJump
} else {
// Otherwise, this is forward jump.
// We start with assuming that the jump can be short (8-bit displacement).
// If it doens't fit, we change this Flag in resolveRelativeForwardJump.
n.Flag |= NodeFlagShortForwardJump
}
}
}
// Roughly allocate the buffer by assuming an instruction has 5-bytes length on average.
a.Buf.Grow(count * 5)
}
func (a *AssemblerImpl) Encode() (err error) {
for n := a.Root; n != nil; n = n.Next {
// If an instruction needs NOP padding, we do so before encoding it.
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
if a.EnablePadding {
if err = a.maybeNOPPadding(n); err != nil {
return
}
}
// After the padding, we can finalize the offset of this instruction in the binary.
n.OffsetInBinaryField = (uint64(a.Buf.Len()))
if err := a.EncodeNode(n); err != nil {
return fmt.Errorf("%w: %v", err, n)
}
err = a.ResolveForwardRelativeJumps(n)
if err != nil {
err = fmt.Errorf("invalid relative forward jumps: %w", err)
break
}
}
return
}
// maybeNOPpadding maybe appends NOP instructions before the node `n`.
// This is necessary to avoid Intel's jump erratum:
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
func (a *AssemblerImpl) maybeNOPPadding(n *NodeImpl) (err error) {
var instructionLen int32
// See in Section 2.1 in for when we have to pad NOP.
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
switch n.Instruction {
case RET, JMP, JCC, JCS, JEQ, JGE, JGT, JHI, JLE, JLS, JLT, JMI, JNE, JPC, JPS:
// In order to know the instruction length before writing into the binary,
// we try encoding it with the temporary buffer.
saved := a.Buf
a.Buf = bytes.NewBuffer(nil)
// Assign the temporary offset which may or may not be correct depending on the padding decision.
n.OffsetInBinaryField = uint64(saved.Len())
// Encode the node and get the instruction length.
if err = a.EncodeNode(n); err != nil {
return
}
instructionLen = int32(a.Buf.Len())
// Revert the temporary buffer.
a.Buf = saved
case // The possible fused jump instructions if the next node is a conditional jump instruction.
CMPL, CMPQ, TESTL, TESTQ, ADDL, ADDQ, SUBL, SUBQ, ANDL, ANDQ, INCQ, DECQ:
instructionLen, err = a.fusedInstructionLength(n)
if err != nil {
return err
}
}
if instructionLen == 0 {
return
}
const boundaryInBytes int32 = 32
const mask int32 = boundaryInBytes - 1
var padNum int
currentPos := int32(a.Buf.Len())
if used := currentPos & mask; used+instructionLen >= boundaryInBytes {
padNum = int(boundaryInBytes - used)
}
a.padNOP(padNum)
return
}
// fusedInstructionLength returns the length of "macro fused instruction" if the
// instruction sequence starting from `n` can be fused by processor. Otherwise,
// returns zero.
func (a *AssemblerImpl) fusedInstructionLength(n *NodeImpl) (ret int32, err error) {
// Find the next non-NOP instruction.
next := n.Next
for ; next != nil && next.Instruction == NOP; next = next.Next {
}
if next == nil {
return
}
inst, jmpInst := n.Instruction, next.Instruction
if !(jmpInst == JCC || jmpInst == JCS || jmpInst == JEQ || jmpInst == JGE || jmpInst == JGT ||
jmpInst == JHI || jmpInst == JLE || jmpInst == JLS || jmpInst == JLT || jmpInst == JMI ||
jmpInst == JNE || jmpInst == JPC || jmpInst == JPS) {
// If the next instruction is not jump kind, the instruction will not be fused.
return
}
// How to determine whether or not the instruction can be fused is described in
// Section 3.4.2.2 of "Intel Optimization Manual":
// https://www.intel.com/content/dam/doc/manual/64-ia-32-architectures-optimization-manual.pdf
isTest := inst == TESTL || inst == TESTQ
isCmp := inst == CMPQ || inst == CMPL
isTestCmp := isTest || isCmp
if isTestCmp && ((n.Types.src == OperandTypeMemory && n.Types.dst == OperandTypeConst) ||
(n.Types.src == OperandTypeConst && n.Types.dst == OperandTypeMemory)) {
// The manual says: "CMP and TEST can not be fused when comparing MEM-IMM".
return
}
// Implement the descision according to the table 3-1 in the manual.
isAnd := inst == ANDL || inst == ANDQ
if !isTest && !isAnd {
if jmpInst == JMI || jmpInst == JPL || jmpInst == JPS || jmpInst == JPC {
// These jumps are only fused for TEST or AND.
return
}
isAdd := inst == ADDL || inst == ADDQ
isSub := inst == SUBL || inst == SUBQ
if !isCmp && !isAdd && !isSub {
if jmpInst == JCS || jmpInst == JCC || jmpInst == JHI || jmpInst == JLS {
// Thses jumpst are only fused for TEST, AND, CMP, ADD, or SUB.
return
}
}
}
// Now the instruction is ensured to be fused by the processor.
// In order to know the fused instruction length before writing into the binary,
// we try encoding it with the temporary buffer.
saved := a.Buf
savedLen := uint64(saved.Len())
a.Buf = bytes.NewBuffer(nil)
for _, fused := range []*NodeImpl{n, next} {
// Assign the temporary offset which may or may not be correct depending on the padding decision.
fused.OffsetInBinaryField = savedLen + uint64(a.Buf.Len())
// Encode the node into the temporary buffer.
err = a.EncodeNode(fused)
if err != nil {
return
}
}
ret = int32(a.Buf.Len())
// Revert the temporary buffer.
a.Buf = saved
return
}
// nopOpcodes is the multi byte NOP instructions table derived from section 5.8 "Code Padding with Operand-Size Override and Multibyte NOP"
// in "AMD Software Optimization Guide for AMD Family 15h Processors" https://www.amd.com/system/files/TechDocs/47414_15h_sw_opt_guide.pdf
//
// Note: We use up to 9 bytes NOP variant to line our implementation with Go's assembler.
// TODO: After golang-asm removal, add 9, 10 and 11 bytes variants.
var nopOpcodes = [][9]byte{
{0x90},
{0x66, 0x90},
{0x0f, 0x1f, 0x00},
{0x0f, 0x1f, 0x40, 0x00},
{0x0f, 0x1f, 0x44, 0x00, 0x00},
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
{0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
{0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
}
func (a *AssemblerImpl) padNOP(num int) {
for num > 0 {
singleNopNum := num
if singleNopNum > len(nopOpcodes) {
singleNopNum = len(nopOpcodes)
}
a.Buf.Write(nopOpcodes[singleNopNum-1][:singleNopNum])
num -= singleNopNum
}
}
// CompileStandAlone implements asm.AssemblerBase.CompileStandAlone
func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
return a.newNode(instruction, OperandTypesNoneToNone)
}
// CompileConstToRegister implements asm.AssemblerBase.CompileConstToRegister
func (a *AssemblerImpl) CompileConstToRegister(instruction asm.Instruction, value asm.ConstantValue, destinationReg asm.Register) (inst asm.Node) {
n := a.newNode(instruction, OperandTypesConstToRegister)
n.SrcConst = value
n.DstReg = destinationReg
return n
}
// CompileRegisterToRegister implements asm.AssemblerBase.CompileRegisterToRegister
func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
n := a.newNode(instruction, OperandTypesRegisterToRegister)
n.SrcReg = from
n.DstReg = to
}
// CompileMemoryToRegister implements asm.AssemblerBase.CompileMemoryToRegister
func (a *AssemblerImpl) CompileMemoryToRegister(instruction asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) {
n := a.newNode(instruction, OperandTypesMemoryToRegister)
n.SrcReg = sourceBaseReg
n.SrcConst = sourceOffsetConst
n.DstReg = destinationReg
}
// CompileRegisterToMemory implements asm.AssemblerBase.CompileRegisterToMemory
func (a *AssemblerImpl) CompileRegisterToMemory(instruction asm.Instruction, sourceRegister asm.Register, destinationBaseRegister asm.Register, destinationOffsetConst asm.ConstantValue) {
n := a.newNode(instruction, OperandTypesRegisterToMemory)
n.SrcReg = sourceRegister
n.DstReg = destinationBaseRegister
n.DstConst = destinationOffsetConst
}
// CompileJump implements asm.AssemblerBase.CompileJump
func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
return a.newNode(jmpInstruction, OperandTypesNoneToBranch)
}
// CompileJumpToMemory implements asm.AssemblerBase.CompileJumpToMemory
func (a *AssemblerImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) {
n := a.newNode(jmpInstruction, OperandTypesNoneToMemory)
n.DstReg = baseReg
n.DstConst = offset
}
// CompileJumpToRegister implements asm.AssemblerBase.CompileJumpToRegister
func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
n := a.newNode(jmpInstruction, OperandTypesNoneToRegister)
n.DstReg = reg
}
// CompileReadInstructionAddress implements asm.AssemblerBase.CompileReadInstructionAddress
func (a *AssemblerImpl) CompileReadInstructionAddress(destinationRegister asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) {
n := a.newNode(LEAQ, OperandTypesMemoryToRegister)
n.DstReg = destinationRegister
n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
}
// CompileRegisterToRegisterWithMode implements assembler.CompileRegisterToRegisterWithMode
func (a *AssemblerImpl) CompileRegisterToRegisterWithMode(instruction asm.Instruction, from, to asm.Register, mode Mode) {
n := a.newNode(instruction, OperandTypesRegisterToRegister)
n.SrcReg = from
n.DstReg = to
n.Mode = mode
}
// CompileMemoryWithIndexToRegister implements assembler.CompileMemoryWithIndexToRegister
func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(instruction asm.Instruction, srcBaseReg asm.Register, srcOffsetConst asm.ConstantValue, srcIndex asm.Register, srcScale int16, dstReg asm.Register) {
n := a.newNode(instruction, OperandTypesMemoryToRegister)
n.SrcReg = srcBaseReg
n.SrcConst = srcOffsetConst
n.SrcMemIndex = srcIndex
n.SrcMemScale = byte(srcScale)
n.DstReg = dstReg
}
// CompileRegisterToMemoryWithIndex implements assembler.CompileRegisterToMemoryWithIndex
func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(instruction asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst asm.ConstantValue, dstIndex asm.Register, dstScale int16) {
n := a.newNode(instruction, OperandTypesRegisterToMemory)
n.SrcReg = srcReg
n.DstReg = dstBaseReg
n.DstConst = dstOffsetConst
n.DstMemIndex = dstIndex
n.DstMemScale = byte(dstScale)
}
// CompileRegisterToConst implements assembler.CompileRegisterToConst
func (a *AssemblerImpl) CompileRegisterToConst(instruction asm.Instruction, srcRegister asm.Register, value asm.ConstantValue) asm.Node {
n := a.newNode(instruction, OperandTypesRegisterToConst)
n.SrcReg = srcRegister
n.DstConst = value
return n
}
// CompileRegisterToNone implements assembler.CompileRegisterToNone
func (a *AssemblerImpl) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) {
n := a.newNode(instruction, OperandTypesRegisterToNone)
n.SrcReg = register
}
// CompileNoneToRegister implements assembler.CompileNoneToRegister
func (a *AssemblerImpl) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) {
n := a.newNode(instruction, OperandTypesNoneToRegister)
n.DstReg = register
}
// CompileNoneToMemory implements assembler.CompileNoneToMemory
func (a *AssemblerImpl) CompileNoneToMemory(instruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) {
n := a.newNode(instruction, OperandTypesNoneToMemory)
n.DstReg = baseReg
n.DstConst = offset
}
// CompileConstToMemory implements assembler.CompileConstToMemory
func (a *AssemblerImpl) CompileConstToMemory(instruction asm.Instruction, value asm.ConstantValue, dstbaseReg asm.Register, dstOffset asm.ConstantValue) asm.Node {
n := a.newNode(instruction, OperandTypesConstToMemory)
n.SrcConst = value
n.DstReg = dstbaseReg
n.DstConst = dstOffset
return n
}
// CompileMemoryToConst implements assembler.CompileMemoryToConst
func (a *AssemblerImpl) CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset asm.ConstantValue, value asm.ConstantValue) asm.Node {
n := a.newNode(instruction, OperandTypesMemoryToConst)
n.SrcReg = srcBaseReg
n.SrcConst = srcOffset
n.DstConst = value
return n
}
func errorEncodingUnsupported(n *NodeImpl) error {
return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.Instruction), n.Types)
}
func (a *AssemblerImpl) encodeNoneToNone(n *NodeImpl) (err error) {
switch n.Instruction {
case CDQ:
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
err = a.Buf.WriteByte(0x99)
case CQO:
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
_, err = a.Buf.Write([]byte{RexPrefixW, 0x99})
case NOP:
// Simply optimize out the NOP instructions.
case RET:
// https://www.felixcloutier.com/x86/ret
err = a.Buf.WriteByte(0xc3)
default:
err = errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) EncodeNoneToRegister(n *NodeImpl) (err error) {
regBits, prefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return err
}
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM := 0b11_000_000 | // Specifying that opeand is register.
regBits
if n.Instruction == JMP {
// JMP's Opcode is defined as "FF /4" meaning that we have to have "4"
// in 4-6th bits in the ModRM byte. https://www.felixcloutier.com/x86/jmp
modRM |= 0b00_100_000
} else {
if REG_SP <= n.DstReg && n.DstReg <= REG_DI {
// If the destination is one byte length register, we need to have the default prefix.
// https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers
prefix |= RexPrefixDefault
}
}
if prefix != RexPrefixNone {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Encoding
if err = a.Buf.WriteByte(prefix); err != nil {
return
}
}
switch n.Instruction {
case JMP:
// https://www.felixcloutier.com/x86/jmp
_, err = a.Buf.Write([]byte{0xff, modRM})
case SETCC:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x93, modRM})
case SETCS:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x92, modRM})
case SETEQ:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x94, modRM})
case SETGE:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9d, modRM})
case SETGT:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9f, modRM})
case SETHI:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x97, modRM})
case SETLE:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9e, modRM})
case SETLS:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x96, modRM})
case SETLT:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9c, modRM})
case SETNE:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x95, modRM})
case SETPC:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9b, modRM})
case SETPS:
// https://www.felixcloutier.com/x86/setcc
_, err = a.Buf.Write([]byte{0x0f, 0x9a, modRM})
default:
err = errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) EncodeNoneToMemory(n *NodeImpl) (err error) {
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
if err != nil {
return err
}
var opcode byte
switch n.Instruction {
case INCQ:
// https://www.felixcloutier.com/x86/inc
RexPrefix |= RexPrefixW
opcode = 0xff
case DECQ:
// https://www.felixcloutier.com/x86/dec
RexPrefix |= RexPrefixW
modRM |= 0b00_001_000 // DEC needs "/1" extension in ModRM.
opcode = 0xff
case JMP:
// https://www.felixcloutier.com/x86/jmp
modRM |= 0b00_100_000 // JMP needs "/4" extension in ModRM.
opcode = 0xff
default:
return errorEncodingUnsupported(n)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write([]byte{opcode, modRM})
if sbi != nil {
a.Buf.WriteByte(*sbi)
}
if displacementWidth != 0 {
a.WriteConst(n.DstConst, displacementWidth)
}
return
}
type relativeJumpOpcode struct{ short, long []byte }
func (o relativeJumpOpcode) instructionLen(short bool) int64 {
if short {
return int64(len(o.short)) + 1 // 1 byte = 8 bit offset
} else {
return int64(len(o.long)) + 4 // 4 byte = 32 bit offset
}
}
var relativeJumpOpcodes = map[asm.Instruction]relativeJumpOpcode{
// https://www.felixcloutier.com/x86/jcc
JCC: {short: []byte{0x73}, long: []byte{0x0f, 0x83}},
JCS: {short: []byte{0x72}, long: []byte{0x0f, 0x82}},
JEQ: {short: []byte{0x74}, long: []byte{0x0f, 0x84}},
JGE: {short: []byte{0x7d}, long: []byte{0x0f, 0x8d}},
JGT: {short: []byte{0x7f}, long: []byte{0x0f, 0x8f}},
JHI: {short: []byte{0x77}, long: []byte{0x0f, 0x87}},
JLE: {short: []byte{0x7e}, long: []byte{0x0f, 0x8e}},
JLS: {short: []byte{0x76}, long: []byte{0x0f, 0x86}},
JLT: {short: []byte{0x7c}, long: []byte{0x0f, 0x8c}},
JMI: {short: []byte{0x78}, long: []byte{0x0f, 0x88}},
JNE: {short: []byte{0x75}, long: []byte{0x0f, 0x85}},
JPC: {short: []byte{0x7b}, long: []byte{0x0f, 0x8b}},
JPS: {short: []byte{0x7a}, long: []byte{0x0f, 0x8a}},
// https://www.felixcloutier.com/x86/jmp
JMP: {short: []byte{0xeb}, long: []byte{0xe9}},
}
func (a *AssemblerImpl) ResolveForwardRelativeJumps(target *NodeImpl) (err error) {
offsetInBinary := int64(target.OffsetInBinary())
for origin := range target.JumpOrigins {
shortJump := origin.isForwardShortJump()
op := relativeJumpOpcodes[origin.Instruction]
instructionLen := op.instructionLen(shortJump)
// Calculate the offset from the EIP (at the time of executing this jump instruction)
// to the target instruction. This value is always >= 0 as here we only handle forward jumps.
offset := offsetInBinary - (int64(origin.OffsetInBinary()) + instructionLen)
if shortJump {
if offset > math.MaxInt8 {
// This forces reassemble in the outer loop inside of AssemblerImpl.Assemble().
a.ForceReAssemble = true
// From the next reAssemble phases, this forward jump will be encoded long jump and
// allocate 32-bit offset bytes by default. This means that this `origin` node
// will always enter the "long jump offset encoding" block below
origin.Flag ^= NodeFlagShortForwardJump
} else {
a.Buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-1] = byte(offset)
}
} else { // long jump offset encoding.
if offset > math.MaxInt32 {
return fmt.Errorf("too large jump offset %d for encoding %s", offset, InstructionName(origin.Instruction))
}
binary.LittleEndian.PutUint32(a.Buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-4:], uint32(offset))
}
}
return nil
}
func (a *AssemblerImpl) EncodeRelativeJump(n *NodeImpl) (err error) {
if n.JumpTarget == nil {
err = fmt.Errorf("jump target must not be nil for relative %s", InstructionName(n.Instruction))
return
}
op, ok := relativeJumpOpcodes[n.Instruction]
if !ok {
return errorEncodingUnsupported(n)
}
var isShortJump bool
// offsetOfEIP means the offset of EIP register at the time of executing this jump instruction.
// Relative jump instructions can be encoded with the signed 8-bit or 32-bit integer offsets from the EIP.
var offsetOfEIP int64 = 0 // We set zero and resolve later once the target instruction is encoded for forward jumps
if n.isBackwardJump() {
// If this is the backward jump, we can calculate the exact offset now.
offsetOfJumpInstruction := int64(n.JumpTarget.OffsetInBinary()) - int64(n.OffsetInBinary())
isShortJump = offsetOfJumpInstruction-2 >= math.MinInt8
offsetOfEIP = offsetOfJumpInstruction - op.instructionLen(isShortJump)
} else {
// For forward jumps, we resolve the offset when we Encode the target node. See AssemblerImpl.ResolveForwardRelativeJumps.
n.JumpTarget.JumpOrigins[n] = struct{}{}
isShortJump = n.isForwardShortJump()
}
if offsetOfEIP < math.MinInt32 { // offsetOfEIP is always <= 0 as we don't calculate it for forward jump here.
return fmt.Errorf("too large jump offset %d for encoding %s", offsetOfEIP, InstructionName(n.Instruction))
}
if isShortJump {
a.Buf.Write(op.short)
a.WriteConst(offsetOfEIP, 8)
} else {
a.Buf.Write(op.long)
a.WriteConst(offsetOfEIP, 32)
}
return
}
func (a *AssemblerImpl) EncodeRegisterToNone(n *NodeImpl) (err error) {
regBits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return err
}
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM := 0b11_000_000 | // Specifying that opeand is register.
regBits
var opcode byte
switch n.Instruction {
case DIVL:
// https://www.felixcloutier.com/x86/div
modRM |= 0b00_110_000
opcode = 0xf7
case DIVQ:
// https://www.felixcloutier.com/x86/div
prefix |= RexPrefixW
modRM |= 0b00_110_000
opcode = 0xf7
case IDIVL:
// https://www.felixcloutier.com/x86/idiv
modRM |= 0b00_111_000
opcode = 0xf7
case IDIVQ:
// https://www.felixcloutier.com/x86/idiv
prefix |= RexPrefixW
modRM |= 0b00_111_000
opcode = 0xf7
case MULL:
// https://www.felixcloutier.com/x86/mul
modRM |= 0b00_100_000
opcode = 0xf7
case MULQ:
// https://www.felixcloutier.com/x86/mul
prefix |= RexPrefixW
modRM |= 0b00_100_000
opcode = 0xf7
default:
err = errorEncodingUnsupported(n)
}
if prefix != RexPrefixNone {
a.Buf.WriteByte(prefix)
}
a.Buf.Write([]byte{opcode, modRM})
return
}
var registerToRegisterOpcode = map[asm.Instruction]struct {
opcode []byte
rPrefix RexPrefix
mandatoryPrefix byte
srcOnModRMReg bool
isSrc8bit bool
needMode bool
requireSrcFloat, requireDstFloat bool
}{
// https://www.felixcloutier.com/x86/add
ADDL: {opcode: []byte{0x1}, srcOnModRMReg: true},
ADDQ: {opcode: []byte{0x1}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/and
ANDL: {opcode: []byte{0x21}, srcOnModRMReg: true},
ANDQ: {opcode: []byte{0x21}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/cmp
CMPL: {opcode: []byte{0x39}},
CMPQ: {opcode: []byte{0x39}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/cmovcc
CMOVQCS: {opcode: []byte{0x0f, 0x42}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/addsd
ADDSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x58}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/addss
ADDSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x58}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/addpd
ANDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x54}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/addps
ANDPS: {opcode: []byte{0x0f, 0x54}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/bsr
BSRL: {opcode: []byte{0xf, 0xbd}},
BSRQ: {opcode: []byte{0xf, 0xbd}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/comisd
COMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2f}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/comiss
COMISS: {opcode: []byte{0x0f, 0x2f}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtsd2ss
CVTSD2SS: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtsi2sd
CVTSL2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtsi2sd
CVTSQ2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, rPrefix: RexPrefixW, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtsi2ss
CVTSL2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtsi2ss
CVTSQ2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, rPrefix: RexPrefixW, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvtss2sd
CVTSS2SD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cvttsd2si
CVTTSD2SL: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, requireSrcFloat: true},
CVTTSD2SQ: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, rPrefix: RexPrefixW, requireSrcFloat: true},
// https://www.felixcloutier.com/x86/cvttss2si
CVTTSS2SL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, requireSrcFloat: true},
CVTTSS2SQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, rPrefix: RexPrefixW, requireSrcFloat: true},
// https://www.felixcloutier.com/x86/divsd
DIVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/divss
DIVSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/lzcnt
LZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}},
LZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/maxsd
MAXSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/maxss
MAXSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/minsd
MINSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/minss
MINSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/movsx:movsxd
MOVBLSX: {opcode: []byte{0x0f, 0xbe}, isSrc8bit: true},
// https://www.felixcloutier.com/x86/movzx
MOVBLZX: {opcode: []byte{0x0f, 0xb6}, isSrc8bit: true},
// https://www.felixcloutier.com/x86/movsx:movsxd
MOVBQSX: {opcode: []byte{0x0f, 0xbe}, rPrefix: RexPrefixW, isSrc8bit: true},
// https://www.felixcloutier.com/x86/movsx:movsxd
MOVLQSX: {opcode: []byte{0x63}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/movsx:movsxd
MOVWQSX: {opcode: []byte{0x0f, 0xbf}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/movsx:movsxd
MOVWLSX: {opcode: []byte{0x0f, 0xbf}},
// https://www.felixcloutier.com/x86/mulss
MULSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/mulsd
MULSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/or
ORL: {opcode: []byte{0x09}, srcOnModRMReg: true},
ORQ: {opcode: []byte{0x09}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/orpd
ORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x56}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/orps
ORPS: {opcode: []byte{0x0f, 0x56}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/popcnt
POPCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}},
POPCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/roundss
ROUNDSS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0a}, needMode: true, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/roundsd
ROUNDSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0b}, needMode: true, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/sqrtss
SQRTSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/sqrtsd
SQRTSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/sub
SUBL: {opcode: []byte{0x29}, srcOnModRMReg: true},
SUBQ: {opcode: []byte{0x29}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/subss
SUBSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5c}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/subsd
SUBSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5c}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/test
TESTL: {opcode: []byte{0x85}, srcOnModRMReg: true},
TESTQ: {opcode: []byte{0x85}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/tzcnt
TZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}},
TZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/ucomisd
UCOMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2e}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/ucomiss
UCOMISS: {opcode: []byte{0x0f, 0x2e}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/xor
XORL: {opcode: []byte{0x31}, srcOnModRMReg: true},
XORQ: {opcode: []byte{0x31}, rPrefix: RexPrefixW, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/xorpd
XORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x57}, requireSrcFloat: true, requireDstFloat: true},
XORPS: {opcode: []byte{0x0f, 0x57}, requireSrcFloat: true, requireDstFloat: true},
}
var RegisterToRegisterShiftOpcode = map[asm.Instruction]struct {
opcode []byte
rPrefix RexPrefix
modRMExtension byte
}{
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
ROLL: {opcode: []byte{0xd3}},
ROLQ: {opcode: []byte{0xd3}, rPrefix: RexPrefixW},
RORL: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000},
RORQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
SARL: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000},
SARQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000, rPrefix: RexPrefixW},
SHLL: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000},
SHLQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000, rPrefix: RexPrefixW},
SHRL: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000},
SHRQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000, rPrefix: RexPrefixW},
}
type registerToRegisterMOVOpcode struct {
opcode []byte
mandatoryPrefix byte
srcOnModRMReg bool
rPrefix RexPrefix
}
var registerToRegisterMOVOpcodes = map[asm.Instruction]struct {
i2i, i2f, f2i, f2f registerToRegisterMOVOpcode
}{
MOVL: {
// https://www.felixcloutier.com/x86/mov
i2i: registerToRegisterMOVOpcode{opcode: []byte{0x89}, srcOnModRMReg: true},
// https://www.felixcloutier.com/x86/movd:movq
i2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x6e}, mandatoryPrefix: 0x66, srcOnModRMReg: false},
f2i: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0x66, srcOnModRMReg: true},
},
MOVQ: {
// https://www.felixcloutier.com/x86/mov
i2i: registerToRegisterMOVOpcode{opcode: []byte{0x89}, srcOnModRMReg: true, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/movd:movq
i2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x6e}, mandatoryPrefix: 0x66, srcOnModRMReg: false, rPrefix: RexPrefixW},
f2i: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0x66, srcOnModRMReg: true, rPrefix: RexPrefixW},
// https://www.felixcloutier.com/x86/movq
f2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0xf3},
},
}
func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) {
// Alias for readability
inst := n.Instruction
if op, ok := registerToRegisterMOVOpcodes[inst]; ok {
var opcode registerToRegisterMOVOpcode
srcIsFloat, dstIsFloat := IsFloatRegister(n.SrcReg), IsFloatRegister(n.DstReg)
if srcIsFloat && dstIsFloat {
if inst == MOVL {
return errors.New("MOVL for float to float is undefined")
}
opcode = op.f2f
} else if srcIsFloat && !dstIsFloat {
opcode = op.f2i
} else if !srcIsFloat && dstIsFloat {
opcode = op.i2f
} else {
opcode = op.i2i
}
RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(opcode.srcOnModRMReg)
if err != nil {
return err
}
RexPrefix |= opcode.rPrefix
if opcode.mandatoryPrefix != 0 {
a.Buf.WriteByte(opcode.mandatoryPrefix)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write(opcode.opcode)
a.Buf.WriteByte(modRM)
return nil
} else if op, ok := registerToRegisterOpcode[inst]; ok {
srcIsFloat, dstIsFloat := IsFloatRegister(n.SrcReg), IsFloatRegister(n.DstReg)
if op.requireSrcFloat && !srcIsFloat {
return fmt.Errorf("%s require float src register but got %s", InstructionName(inst), RegisterName(n.SrcReg))
} else if op.requireDstFloat && !dstIsFloat {
return fmt.Errorf("%s require float dst register but got %s", InstructionName(inst), RegisterName(n.DstReg))
} else if !op.requireSrcFloat && srcIsFloat {
return fmt.Errorf("%s require integer src register but got %s", InstructionName(inst), RegisterName(n.SrcReg))
} else if !op.requireDstFloat && dstIsFloat {
return fmt.Errorf("%s require integer dst register but got %s", InstructionName(inst), RegisterName(n.DstReg))
}
RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(op.srcOnModRMReg)
if err != nil {
return err
}
RexPrefix |= op.rPrefix
if op.isSrc8bit && REG_SP <= n.SrcReg && n.SrcReg <= REG_DI {
// If an operand register is 8-bit length of SP, BP, DI, or SI register, we need to have the default prefix.
// https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers
RexPrefix |= RexPrefixDefault
}
if op.mandatoryPrefix != 0 {
a.Buf.WriteByte(op.mandatoryPrefix)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write(op.opcode)
a.Buf.WriteByte(modRM)
if op.needMode {
a.WriteConst(int64(n.Mode), 8)
}
return nil
} else if op, ok := RegisterToRegisterShiftOpcode[inst]; ok {
if n.SrcReg != REG_CX {
return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(inst), RegisterName(n.SrcReg))
} else if IsFloatRegister(n.DstReg) {
return fmt.Errorf("shifting instruction %s require integer register as dst but got %s", InstructionName(inst), RegisterName(n.SrcReg))
}
reg3bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return err
}
RexPrefix |= op.rPrefix
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM := 0b11_000_000 |
(op.modRMExtension) |
reg3bits
a.Buf.Write(append(op.opcode, modRM))
return nil
} else {
return errorEncodingUnsupported(n)
}
}
func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) {
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
if err != nil {
return err
}
var opcode []byte
var mandatoryPrefix byte
var isShiftInstruction bool
switch n.Instruction {
case CMPL:
// https://www.felixcloutier.com/x86/cmp
opcode = []byte{0x3b}
case CMPQ:
// https://www.felixcloutier.com/x86/cmp
RexPrefix |= RexPrefixW
opcode = []byte{0x3b}
case MOVB:
// https://www.felixcloutier.com/x86/mov
opcode = []byte{0x88}
case MOVL:
if IsFloatRegister(n.SrcReg) {
// https://www.felixcloutier.com/x86/movd:movq
opcode = []byte{0x0f, 0x7e}
mandatoryPrefix = 0x66
} else {
// https://www.felixcloutier.com/x86/mov
opcode = []byte{0x89}
}
case MOVQ:
if IsFloatRegister(n.SrcReg) {
// https://www.felixcloutier.com/x86/movq
opcode = []byte{0x0f, 0xd6}
mandatoryPrefix = 0x66
} else {
// https://www.felixcloutier.com/x86/mov
RexPrefix |= RexPrefixW
opcode = []byte{0x89}
}
case MOVW:
// https://www.felixcloutier.com/x86/mov
// Note: Need 0x66 to indicate that the operand size is 16-bit.
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Operand-size_and_address-size_override_prefix
mandatoryPrefix = 0x66
opcode = []byte{0x89}
case SARL:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
modRM |= 0b00_111_000
opcode = []byte{0xd3}
isShiftInstruction = true
case SARQ:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
RexPrefix |= RexPrefixW
modRM |= 0b00_111_000
opcode = []byte{0xd3}
isShiftInstruction = true
case SHLL:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
modRM |= 0b00_100_000
opcode = []byte{0xd3}
isShiftInstruction = true
case SHLQ:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
RexPrefix |= RexPrefixW
modRM |= 0b00_100_000
opcode = []byte{0xd3}
isShiftInstruction = true
case SHRL:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
modRM |= 0b00_101_000
opcode = []byte{0xd3}
isShiftInstruction = true
case SHRQ:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
RexPrefix |= RexPrefixW
modRM |= 0b00_101_000
opcode = []byte{0xd3}
isShiftInstruction = true
case ROLL:
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
opcode = []byte{0xd3}
isShiftInstruction = true
case ROLQ:
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
RexPrefix |= RexPrefixW
opcode = []byte{0xd3}
isShiftInstruction = true
case RORL:
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
modRM |= 0b00_001_000
opcode = []byte{0xd3}
isShiftInstruction = true
case RORQ:
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
RexPrefix |= RexPrefixW
opcode = []byte{0xd3}
modRM |= 0b00_001_000
isShiftInstruction = true
default:
return errorEncodingUnsupported(n)
}
if !isShiftInstruction {
srcReg3Bits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldReg)
if err != nil {
return err
}
RexPrefix |= prefix
modRM |= (srcReg3Bits << 3) // Place the source register on ModRM:reg
} else {
if n.SrcReg != REG_CX {
return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(n.Instruction), RegisterName(n.SrcReg))
}
}
if mandatoryPrefix != 0 {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
a.Buf.WriteByte(mandatoryPrefix)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write(opcode)
a.Buf.WriteByte(modRM)
if sbi != nil {
a.Buf.WriteByte(*sbi)
}
if displacementWidth != 0 {
a.WriteConst(n.DstConst, displacementWidth)
}
return
}
func (a *AssemblerImpl) EncodeRegisterToConst(n *NodeImpl) (err error) {
regBits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return err
}
switch n.Instruction {
case CMPL, CMPQ:
if n.Instruction == CMPQ {
prefix |= RexPrefixW
}
if prefix != RexPrefixNone {
a.Buf.WriteByte(prefix)
}
is8bitConst := fitInSigned8bit(n.DstConst)
// https://www.felixcloutier.com/x86/cmp
if n.SrcReg == REG_AX && !is8bitConst {
a.Buf.Write([]byte{0x3d})
} else {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_111_000 | // CMP with immediate needs "/7" extension.
regBits
if is8bitConst {
a.Buf.Write([]byte{0x83, modRM})
} else {
a.Buf.Write([]byte{0x81, modRM})
}
}
default:
err = errorEncodingUnsupported(n)
}
if fitInSigned8bit(n.DstConst) {
a.WriteConst(n.DstConst, 8)
} else {
a.WriteConst(n.DstConst, 32)
}
return
}
func (a *AssemblerImpl) encodeReadInstructionAddress(n *NodeImpl) error {
dstReg3Bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg)
if err != nil {
return err
}
a.AddOnGenerateCallBack(func(code []byte) error {
// Find the target instruction node.
targetNode := n
for ; targetNode != nil; targetNode = targetNode.Next {
if targetNode.Instruction == n.readInstructionAddressBeforeTargetInstruction {
targetNode = targetNode.Next
break
}
}
if targetNode == nil {
return errors.New("BUG: target instruction not found for read instruction address")
}
offset := targetNode.OffsetInBinary() - (n.OffsetInBinary() + 7 /* 7 = the length of the LEAQ instruction */)
if offset >= math.MaxInt32 {
return errors.New("BUG: too large offset for LEAQ instruction")
}
binary.LittleEndian.PutUint32(code[n.OffsetInBinary()+3:], uint32(int32(offset)))
return nil
})
// https://www.felixcloutier.com/x86/lea
opcode := byte(0x8d)
RexPrefix |= RexPrefixW
// https://wiki.osdev.org/X86-64_Instruction_Encoding#64-bit_addressing
modRM := 0b00_000_101 | // Indicate "LEAQ [RIP + 32bit displacement], DstReg" encoding.
(dstReg3Bits << 3) // Place the DstReg on ModRM:reg.
a.Buf.Write([]byte{RexPrefix, opcode, modRM})
a.WriteConst(int64(0), 32) // Preserve
return nil
}
func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) {
if n.Instruction == LEAQ && n.readInstructionAddressBeforeTargetInstruction != NONE {
return a.encodeReadInstructionAddress(n)
}
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
if err != nil {
return err
}
dstReg3Bits, prefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg)
if err != nil {
return err
}
RexPrefix |= prefix
modRM |= (dstReg3Bits << 3) // Place the destination register on ModRM:reg
var mandatoryPrefix byte
var opcode []byte
switch n.Instruction {
case ADDL:
// https://www.felixcloutier.com/x86/add
opcode = []byte{0x03}
case ADDQ:
// https://www.felixcloutier.com/x86/add
RexPrefix |= RexPrefixW
opcode = []byte{0x03}
case CMPL:
// https://www.felixcloutier.com/x86/cmp
opcode = []byte{0x39}
case CMPQ:
// https://www.felixcloutier.com/x86/cmp
RexPrefix |= RexPrefixW
opcode = []byte{0x39}
case LEAQ:
// https://www.felixcloutier.com/x86/lea
RexPrefix |= RexPrefixW
opcode = []byte{0x8d}
case MOVBLSX:
// https://www.felixcloutier.com/x86/movsx:movsxd
opcode = []byte{0x0f, 0xbe}
case MOVBLZX:
// https://www.felixcloutier.com/x86/movzx
opcode = []byte{0x0f, 0xb6}
case MOVBQSX:
// https://www.felixcloutier.com/x86/movsx:movsxd
RexPrefix |= RexPrefixW
opcode = []byte{0x0f, 0xbe}
case MOVBQZX:
// https://www.felixcloutier.com/x86/movzx
RexPrefix |= RexPrefixW
opcode = []byte{0x0f, 0xb6}
case MOVLQSX:
// https://www.felixcloutier.com/x86/movsx:movsxd
RexPrefix |= RexPrefixW
opcode = []byte{0x63}
case MOVLQZX:
// https://www.felixcloutier.com/x86/mov
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
// that is semantically equivalent to MOV 32bit to 32bit.
opcode = []byte{0x8B}
case MOVL:
// https://www.felixcloutier.com/x86/mov
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
// that is semantically equivalent to MOV 32bit to 32bit.
if IsFloatRegister(n.DstReg) {
// https://www.felixcloutier.com/x86/movd:movq
opcode = []byte{0x0f, 0x6e}
mandatoryPrefix = 0x66
} else {
// https://www.felixcloutier.com/x86/mov
opcode = []byte{0x8B}
}
case MOVQ:
if IsFloatRegister(n.DstReg) {
// https://www.felixcloutier.com/x86/movq
opcode = []byte{0x0f, 0x7e}
mandatoryPrefix = 0xf3
} else {
// https://www.felixcloutier.com/x86/mov
RexPrefix |= RexPrefixW
opcode = []byte{0x8B}
}
case MOVWLSX:
// https://www.felixcloutier.com/x86/movsx:movsxd
opcode = []byte{0x0f, 0xbf}
case MOVWLZX:
// https://www.felixcloutier.com/x86/movzx
opcode = []byte{0x0f, 0xb7}
case MOVWQSX:
// https://www.felixcloutier.com/x86/movsx:movsxd
RexPrefix |= RexPrefixW
opcode = []byte{0x0f, 0xbf}
case MOVWQZX:
// https://www.felixcloutier.com/x86/movzx
RexPrefix |= RexPrefixW
opcode = []byte{0x0f, 0xb7}
case SUBQ:
// https://www.felixcloutier.com/x86/sub
RexPrefix |= RexPrefixW
opcode = []byte{0x2b}
case SUBSD:
// https://www.felixcloutier.com/x86/subsd
opcode = []byte{0x0f, 0x5c}
mandatoryPrefix = 0xf2
case SUBSS:
// https://www.felixcloutier.com/x86/subss
opcode = []byte{0x0f, 0x5c}
mandatoryPrefix = 0xf3
case UCOMISD:
// https://www.felixcloutier.com/x86/ucomisd
opcode = []byte{0x0f, 0x2e}
mandatoryPrefix = 0x66
case UCOMISS:
// https://www.felixcloutier.com/x86/ucomiss
opcode = []byte{0x0f, 0x2e}
default:
return errorEncodingUnsupported(n)
}
if mandatoryPrefix != 0 {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
a.Buf.WriteByte(mandatoryPrefix)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write(opcode)
a.Buf.WriteByte(modRM)
if sbi != nil {
a.Buf.WriteByte(*sbi)
}
if displacementWidth != 0 {
a.WriteConst(n.SrcConst, displacementWidth)
}
return
}
func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) {
regBits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return err
}
isFloatReg := IsFloatRegister(n.DstReg)
switch n.Instruction {
case PSLLL, PSLLQ, PSRLL, PSRLQ:
if !isFloatReg {
return fmt.Errorf("%s needs float register but got %s", InstructionName(n.Instruction), RegisterName(n.DstReg))
}
default:
if isFloatReg {
return fmt.Errorf("%s needs int register but got %s", InstructionName(n.Instruction), RegisterName(n.DstReg))
}
}
if n.Instruction != MOVQ && !FitIn32bit(n.SrcConst) {
return fmt.Errorf("constant must fit in 32-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
} else if (n.Instruction == SHLQ || n.Instruction == SHRQ) && (n.SrcConst < 0 || n.SrcConst > math.MaxUint8) {
return fmt.Errorf("constant must fit in positive 8-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
} else if (n.Instruction == PSLLL ||
n.Instruction == PSLLQ ||
n.Instruction == PSRLL ||
n.Instruction == PSRLQ) && (n.SrcConst < math.MinInt8 || n.SrcConst > math.MaxInt8) {
return fmt.Errorf("constant must fit in signed 8-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
}
isSigned8bitConst := fitInSigned8bit(n.SrcConst)
switch inst := n.Instruction; inst {
case ADDQ:
// https://www.felixcloutier.com/x86/add
RexPrefix |= RexPrefixW
if n.DstReg == REG_AX && !isSigned8bitConst {
a.Buf.Write([]byte{RexPrefix, 0x05})
} else {
modRM := 0b11_000_000 | // Specifying that opeand is register.
regBits
if isSigned8bitConst {
a.Buf.Write([]byte{RexPrefix, 0x83, modRM})
} else {
a.Buf.Write([]byte{RexPrefix, 0x81, modRM})
}
}
if isSigned8bitConst {
a.WriteConst(n.SrcConst, 8)
} else {
a.WriteConst(n.SrcConst, 32)
}
case ANDQ:
// https://www.felixcloutier.com/x86/and
RexPrefix |= RexPrefixW
if n.DstReg == REG_AX && !isSigned8bitConst {
a.Buf.Write([]byte{RexPrefix, 0x25})
} else {
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_100_000 | // AND with immediate needs "/4" extension.
regBits
if isSigned8bitConst {
a.Buf.Write([]byte{RexPrefix, 0x83, modRM})
} else {
a.Buf.Write([]byte{RexPrefix, 0x81, modRM})
}
}
if fitInSigned8bit(n.SrcConst) {
a.WriteConst(n.SrcConst, 8)
} else {
a.WriteConst(n.SrcConst, 32)
}
case MOVL:
// https://www.felixcloutier.com/x86/mov
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write([]byte{0xb8 | regBits})
a.WriteConst(n.SrcConst, 32)
case MOVQ:
// https://www.felixcloutier.com/x86/mov
if FitIn32bit(n.SrcConst) {
if n.SrcConst > math.MaxInt32 {
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write([]byte{0xb8 | regBits})
} else {
RexPrefix |= RexPrefixW
modRM := 0b11_000_000 | // Specifying that opeand is register.
regBits
a.Buf.Write([]byte{RexPrefix, 0xc7, modRM})
}
a.WriteConst(n.SrcConst, 32)
} else {
RexPrefix |= RexPrefixW
a.Buf.Write([]byte{RexPrefix, 0xb8 | regBits})
a.WriteConst(n.SrcConst, 64)
}
case SHLQ:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
RexPrefix |= RexPrefixW
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_100_000 | // SHL with immediate needs "/4" extension.
regBits
if n.SrcConst == 1 {
a.Buf.Write([]byte{RexPrefix, 0xd1, modRM})
} else {
a.Buf.Write([]byte{RexPrefix, 0xc1, modRM})
a.WriteConst(n.SrcConst, 8)
}
case SHRQ:
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
RexPrefix |= RexPrefixW
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_101_000 | // SHR with immediate needs "/5" extension.
regBits
if n.SrcConst == 1 {
a.Buf.Write([]byte{RexPrefix, 0xd1, modRM})
} else {
a.Buf.Write([]byte{RexPrefix, 0xc1, modRM})
a.WriteConst(n.SrcConst, 8)
}
case PSLLL:
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_110_000 | // PSLL with immediate needs "/6" extension.
regBits
if RexPrefix != RexPrefixNone {
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM})
a.WriteConst(n.SrcConst, 8)
} else {
a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM})
a.WriteConst(n.SrcConst, 8)
}
case PSLLQ:
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_110_000 | // PSLL with immediate needs "/6" extension.
regBits
if RexPrefix != RexPrefixNone {
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM})
a.WriteConst(n.SrcConst, 8)
} else {
a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM})
a.WriteConst(n.SrcConst, 8)
}
case PSRLL:
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_010_000 | // PSRL with immediate needs "/2" extension.
regBits
if RexPrefix != RexPrefixNone {
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM})
a.WriteConst(n.SrcConst, 8)
} else {
a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM})
a.WriteConst(n.SrcConst, 8)
}
case PSRLQ:
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_010_000 | // PSRL with immediate needs "/2" extension.
regBits
if RexPrefix != RexPrefixNone {
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM})
a.WriteConst(n.SrcConst, 8)
} else {
a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM})
a.WriteConst(n.SrcConst, 8)
}
case XORL, XORQ:
// https://www.felixcloutier.com/x86/xor
if inst == XORQ {
RexPrefix |= RexPrefixW
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
if n.DstReg == REG_AX && !isSigned8bitConst {
a.Buf.Write([]byte{0x35})
} else {
modRM := 0b11_000_000 | // Specifying that opeand is register.
0b00_110_000 | // XOR with immediate needs "/6" extension.
regBits
if isSigned8bitConst {
a.Buf.Write([]byte{0x83, modRM})
} else {
a.Buf.Write([]byte{0x81, modRM})
}
}
if fitInSigned8bit(n.SrcConst) {
a.WriteConst(n.SrcConst, 8)
} else {
a.WriteConst(n.SrcConst, 32)
}
default:
err = errorEncodingUnsupported(n)
}
return
}
func (a *AssemblerImpl) EncodeMemoryToConst(n *NodeImpl) (err error) {
if !FitIn32bit(n.DstConst) {
return fmt.Errorf("too large target const %d for %s", n.DstConst, InstructionName(n.Instruction))
}
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
if err != nil {
return err
}
// Alias for readability.
c := n.DstConst
var opcode, constWidth byte
switch n.Instruction {
case CMPL:
// https://www.felixcloutier.com/x86/cmp
if fitInSigned8bit(c) {
opcode = 0x83
constWidth = 8
} else {
opcode = 0x81
constWidth = 32
}
modRM |= 0b00_111_000
default:
return errorEncodingUnsupported(n)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write([]byte{opcode, modRM})
if sbi != nil {
a.Buf.WriteByte(*sbi)
}
if displacementWidth != 0 {
a.WriteConst(n.SrcConst, displacementWidth)
}
a.WriteConst(c, constWidth)
return
}
func (a *AssemblerImpl) EncodeConstToMemory(n *NodeImpl) (err error) {
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
if err != nil {
return err
}
// Alias for readability.
inst := n.Instruction
c := n.SrcConst
if inst == MOVB && !fitInSigned8bit(c) {
return fmt.Errorf("too large load target const %d for MOVB", c)
} else if !FitIn32bit(c) {
return fmt.Errorf("too large load target const %d for %s", c, InstructionName(n.Instruction))
}
var constWidth, opcode byte
switch inst {
case MOVB:
opcode = 0xc6
constWidth = 8
case MOVL:
opcode = 0xc7
constWidth = 32
case MOVQ:
RexPrefix |= RexPrefixW
opcode = 0xc7
constWidth = 32
default:
return errorEncodingUnsupported(n)
}
if RexPrefix != RexPrefixNone {
a.Buf.WriteByte(RexPrefix)
}
a.Buf.Write([]byte{opcode, modRM})
if sbi != nil {
a.Buf.WriteByte(*sbi)
}
if displacementWidth != 0 {
a.WriteConst(n.DstConst, displacementWidth)
}
a.WriteConst(c, constWidth)
return
}
func (a *AssemblerImpl) WriteConst(v int64, length byte) {
switch length {
case 8:
a.Buf.WriteByte(byte(int8(v)))
case 32:
// TODO: any way to directly put little endian bytes into bytes.Buffer?
offsetBytes := make([]byte, 4)
binary.LittleEndian.PutUint32(offsetBytes, uint32(int32(v)))
a.Buf.Write(offsetBytes)
case 64:
// TODO: any way to directly put little endian bytes into bytes.Buffer?
offsetBytes := make([]byte, 8)
binary.LittleEndian.PutUint64(offsetBytes, uint64(v))
a.Buf.Write(offsetBytes)
default:
panic("BUG: length must be one of 8, 32 or 64")
}
}
func (n *NodeImpl) GetMemoryLocation() (p RexPrefix, modRM byte, sbi *byte, displacementWidth byte, err error) {
var baseReg, indexReg asm.Register
var offset asm.ConstantValue
var scale byte
if n.Types.dst == OperandTypeMemory {
baseReg, offset, indexReg, scale = n.DstReg, n.DstConst, n.DstMemIndex, n.DstMemScale
} else if n.Types.src == OperandTypeMemory {
baseReg, offset, indexReg, scale = n.SrcReg, n.SrcConst, n.SrcMemIndex, n.SrcMemScale
} else {
err = fmt.Errorf("memory location is not supported for %s", n.Types)
return
}
if !FitIn32bit(offset) {
err = errors.New("offset does not fit in 32-bit integer")
return
}
if baseReg == asm.NilRegister && indexReg != asm.NilRegister {
// [(index*scale) + displacement] addressing is possible, but we haven't used it for now.
err = errors.New("addressing without base register but with index is not implemented")
} else if baseReg == asm.NilRegister {
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
sbiValue := byte(0b00_100_101)
sbi = &sbiValue
displacementWidth = 32
} else if indexReg == asm.NilRegister {
modRM, p, err = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return
}
// Create ModR/M byte so that this instrction takes [R/M + displacement] operand if displacement !=0
// and otherwise [R/M].
withoutDisplacement := offset == 0 &&
// If the target register is R13 or BP, we have to keep [R/M + displacement] even if the value
// is zero since it's not [R/M] operand is not defined for these two registers.
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
baseReg != REG_R13 && baseReg != REG_BP
if withoutDisplacement {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b00_000_000 // Specifying that operand is memory without displacement
displacementWidth = 0
} else if fitInSigned8bit(offset) {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b01_000_000 // Specifying that operand is memory + 8bit displacement.
displacementWidth = 8
} else {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b10_000_000 // Specifying that operand is memory + 32bit displacement.
displacementWidth = 32
}
// For SP and R12 register, we have [SIB + displacement] if the const is non-zero, otherwise [SIP].
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
//
// Thefore we emit the SIB byte before the const so that [SIB + displacement] ends up [register + displacement].
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
if baseReg == REG_SP || baseReg == REG_R12 {
sbiValue := byte(0b00_100_100)
sbi = &sbiValue
}
} else {
if indexReg == REG_SP {
err = errors.New("SP cannot be used for SIB index")
return
}
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
withoutDisplacement := offset == 0 &&
// For R13 and BP, base registers cannot be encoded "without displacement" mod (i.e. 0b00 mod).
baseReg != REG_R13 && baseReg != REG_BP
if withoutDisplacement {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b00_000_000 // Specifying that operand is SIB without displacement
displacementWidth = 0
} else if fitInSigned8bit(offset) {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b01_000_000 // Specifying that operand is SIB + 8bit displacement.
displacementWidth = 8
} else {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM |= 0b10_000_000 // Specifying that operand is SIB + 32bit displacement.
displacementWidth = 32
}
var baseRegBits byte
baseRegBits, p, err = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
if err != nil {
return
}
var indexRegBits byte
var indexRegPrefix RexPrefix
indexRegBits, indexRegPrefix, err = register3bits(indexReg, registerSpecifierPositionSIBIndex)
if err != nil {
return
}
p |= indexRegPrefix
sbiValue := baseRegBits | (indexRegBits << 3)
switch scale {
case 1:
sbiValue |= 0b00_000_000
case 2:
sbiValue |= 0b01_000_000
case 4:
sbiValue |= 0b10_000_000
case 8:
sbiValue |= 0b11_000_000
default:
err = fmt.Errorf("scale in SIB must be one of 1, 2, 4, 8 but got %d", scale)
return
}
sbi = &sbiValue
}
return
}
// TODO: srcOnModRMReg can be deleted after golang-asm removal. This is necessary to match our implementation
// with golang-asm, but in practice, there are equivalent opcodes to always have src on ModRM:reg without ambiguity.
func (n *NodeImpl) GetRegisterToRegisterModRM(srcOnModRMReg bool) (RexPrefix, modRM byte, err error) {
var reg3bits, rm3bits byte
if srcOnModRMReg {
reg3bits, RexPrefix, err = register3bits(n.SrcReg,
// Indicate that SrcReg will be specified by ModRM:reg.
registerSpecifierPositionModRMFieldReg)
if err != nil {
return
}
var dstRexPrefix byte
rm3bits, dstRexPrefix, err = register3bits(n.DstReg,
// Indicate that DstReg will be specified by ModRM:r/m.
registerSpecifierPositionModRMFieldRM)
if err != nil {
return
}
RexPrefix |= dstRexPrefix
} else {
rm3bits, RexPrefix, err = register3bits(n.SrcReg,
// Indicate that SrcReg will be specified by ModRM:r/m.
registerSpecifierPositionModRMFieldRM)
if err != nil {
return
}
var dstRexPrefix byte
reg3bits, dstRexPrefix, err = register3bits(n.DstReg,
// Indicate that DstReg will be specified by ModRM:reg.
registerSpecifierPositionModRMFieldReg)
if err != nil {
return
}
RexPrefix |= dstRexPrefix
}
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
modRM = 0b11_000_000 | // Specifying that dst opeand is register.
(reg3bits << 3) |
rm3bits
return
}
// RexPrefix represents REX prefix https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
type RexPrefix = byte
// REX prefixes are independent of each other and can be combined with OR.
const (
RexPrefixNone RexPrefix = 0x0000_0000 // Indicates that the instruction doesn't need RexPrefix.
RexPrefixDefault RexPrefix = 0b0100_0000
RexPrefixW RexPrefix = 0b0000_1000 | RexPrefixDefault
RexPrefixR RexPrefix = 0b0000_0100 | RexPrefixDefault
RexPrefixX RexPrefix = 0b0000_0010 | RexPrefixDefault
RexPrefixB RexPrefix = 0b0000_0001 | RexPrefixDefault
)
// registerSpecifierPosition represents the position in the instruction bytes where an operand register is placed.
type registerSpecifierPosition byte
const (
registerSpecifierPositionModRMFieldReg registerSpecifierPosition = iota
registerSpecifierPositionModRMFieldRM
registerSpecifierPositionSIBIndex
)
func register3bits(reg asm.Register, registerSpecifierPosition registerSpecifierPosition) (bits byte, prefix RexPrefix, err error) {
prefix = RexPrefixNone
if REG_R8 <= reg && reg <= REG_R15 || REG_X8 <= reg && reg <= REG_X15 {
// https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
switch registerSpecifierPosition {
case registerSpecifierPositionModRMFieldReg:
prefix = RexPrefixR
case registerSpecifierPositionModRMFieldRM:
prefix = RexPrefixB
case registerSpecifierPositionSIBIndex:
prefix = RexPrefixX
}
}
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers
switch reg {
case REG_AX, REG_R8, REG_X0, REG_X8:
bits = 0b000
case REG_CX, REG_R9, REG_X1, REG_X9:
bits = 0b001
case REG_DX, REG_R10, REG_X2, REG_X10:
bits = 0b010
case REG_BX, REG_R11, REG_X3, REG_X11:
bits = 0b011
case REG_SP, REG_R12, REG_X4, REG_X12:
bits = 0b100
case REG_BP, REG_R13, REG_X5, REG_X13:
bits = 0b101
case REG_SI, REG_R14, REG_X6, REG_X14:
bits = 0b110
case REG_DI, REG_R15, REG_X7, REG_X15:
bits = 0b111
default:
err = fmt.Errorf("invalid register [%s]", RegisterName(reg))
}
return
}
func FitIn32bit(v int64) bool {
return math.MinInt32 <= v && v <= math.MaxUint32
}
func fitInSigned8bit(v int64) bool {
return math.MinInt8 <= v && v <= math.MaxInt8
}
func IsFloatRegister(r asm.Register) bool {
return REG_X0 <= r && r <= REG_X15
}