During #425, @neilalexander gave constructive feedback that the API is both moving fast, and not good enough yet. This attempts to reduce the incidental complexity at the cost of a little conflation. ### odd presence of `wasm` and `wasi` packages -> `api` package We had public API packages in wasm and wasi, which helped us avoid leaking too many internals as public. That these had names that look like there should be implementations in them cause unnecessary confusion. This squashes both into one package "api" which has no package collission with anything. We've long struggled with the poorly specified and non-uniformly implemented WASI specification. Trying to bring visibility to its constraints knowing they are routinely invalid taints our API for no good reason. This removes all `WASI` commands for a default to invoke the function `_start` if it exists. In doing so, there's only one path to start a module. Moreover, this puts all wasi code in a top-level package "wasi" as it isn't re-imported by any internal types. ### Reuse of Module for pre and post instantiation to `Binary` -> `Module` Module is defined by WebAssembly in many phases, from decoded to instantiated. However, using the same noun in multiple packages is very confusing. We at one point tried a name "DecodedModule" or "InstantiatedModule", but this is a fools errand. By deviating slightly from the spec we can make it unambiguous what a module is. This make a result of compilation a `Binary`, retaining `Module` for an instantiated one. In doing so, there's no longer any name conflicts whatsoever. ### Confusion about config -> `ModuleConfig` Also caused by splitting wasm into wasm+wasi is configuration. This conflates both into the same type `ModuleConfig` as it is simpler than trying to explain a "will never be finished" api of wasi snapshot-01 in routine use of WebAssembly. In other words, this further moves WASI out of the foreground as it has been nothing but burden. ```diff --- a/README.md +++ b/README.md @@ -49,8 +49,8 @@ For example, here's how you can allow WebAssembly modules to read -wm, err := r.InstantiateModule(wazero.WASISnapshotPreview1()) -defer wm.Close() +wm, err := wasi.InstantiateSnapshotPreview1(r) +defer wm.Close() -sysConfig := wazero.NewSysConfig().WithFS(os.DirFS("/work/home")) -module, err := wazero.StartWASICommandWithConfig(r, compiled, sysConfig) +config := wazero.ModuleConfig().WithFS(os.DirFS("/work/home")) +module, err := r.InstantiateModule(binary, config) defer module.Close() ... ```
2145 lines
72 KiB
Go
2145 lines
72 KiB
Go
package asm_amd64
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/tetratelabs/wazero/internal/asm"
|
|
)
|
|
|
|
// NodeImpl implements asm.Node for amd64.
|
|
type NodeImpl struct {
|
|
// NOTE: fields here are exported for testing with the amd64_debug package.
|
|
|
|
Instruction asm.Instruction
|
|
|
|
OffsetInBinaryField asm.NodeOffsetInBinary // Field suffix to dodge conflict with OffsetInBinary
|
|
|
|
// JumpTarget holds the target node in the linked for the jump-kind instruction.
|
|
JumpTarget *NodeImpl
|
|
Flag NodeFlag
|
|
// next holds the next node from this node in the assembled linked list.
|
|
Next *NodeImpl
|
|
|
|
Types OperandTypes
|
|
SrcReg, DstReg asm.Register
|
|
SrcConst, DstConst asm.ConstantValue
|
|
SrcMemIndex, DstMemIndex asm.Register
|
|
SrcMemScale, DstMemScale byte
|
|
|
|
Mode byte
|
|
|
|
// readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of
|
|
// read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress.
|
|
readInstructionAddressBeforeTargetInstruction asm.Instruction
|
|
|
|
// JumpOrigins hold all the nodes trying to jump into this node. In other words, all the nodes with .JumpTarget == this.
|
|
JumpOrigins map[*NodeImpl]struct{}
|
|
}
|
|
|
|
type NodeFlag byte
|
|
|
|
const (
|
|
// NodeFlagInitializedForEncoding is always set to indicate that node is already initialized. Notably, this is used to judge
|
|
// whether a jump is backward or forward before encoding.
|
|
NodeFlagInitializedForEncoding NodeFlag = (1 << iota)
|
|
NodeFlagBackwardJump
|
|
// NodeFlagShortForwardJump is set to false by default and only used by forward branch jumps, which means .JumpTarget != nil and
|
|
// the target node is encoded afoter this node. False by default means that that we Encode all the jumps with JumpTarget
|
|
// as short jump (i.e. relative signed 8-bit integer offset jump) and try to Encode as small as possible.
|
|
NodeFlagShortForwardJump
|
|
)
|
|
|
|
func (n *NodeImpl) isInitializedForEncoding() bool {
|
|
return n.Flag&NodeFlagInitializedForEncoding != 0
|
|
}
|
|
|
|
func (n *NodeImpl) isJumpNode() bool {
|
|
return n.JumpTarget != nil
|
|
}
|
|
|
|
func (n *NodeImpl) isBackwardJump() bool {
|
|
return n.isJumpNode() && (n.Flag&NodeFlagBackwardJump != 0)
|
|
}
|
|
|
|
func (n *NodeImpl) isForwardJump() bool {
|
|
return n.isJumpNode() && (n.Flag&NodeFlagBackwardJump == 0)
|
|
}
|
|
|
|
func (n *NodeImpl) isForwardShortJump() bool {
|
|
return n.isForwardJump() && n.Flag&NodeFlagShortForwardJump != 0
|
|
}
|
|
|
|
// AssignJumpTarget implements asm.Node.AssignJumpTarget.
|
|
func (n *NodeImpl) AssignJumpTarget(target asm.Node) {
|
|
n.JumpTarget = target.(*NodeImpl)
|
|
}
|
|
|
|
// AssignDestinationConstant implements asm.Node.AssignDestinationConstant.
|
|
func (n *NodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
|
|
n.DstConst = value
|
|
}
|
|
|
|
// AssignSourceConstant implements asm.Node.AssignSourceConstant.
|
|
func (n *NodeImpl) AssignSourceConstant(value asm.ConstantValue) {
|
|
n.SrcConst = value
|
|
}
|
|
|
|
// OffsetInBinary implements asm.Node.OffsetInBinary.
|
|
func (n *NodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
|
|
return n.OffsetInBinaryField
|
|
}
|
|
|
|
// String implements fmt.Stringer.
|
|
//
|
|
// This is for debugging purpose, and the format is almost same as the AT&T assembly syntax,
|
|
// meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand
|
|
// might be embraced by '[]' to represent the memory location.
|
|
func (n *NodeImpl) String() (ret string) {
|
|
instName := InstructionName(n.Instruction)
|
|
switch n.Types {
|
|
case OperandTypesNoneToNone:
|
|
ret = instName
|
|
case OperandTypesNoneToRegister:
|
|
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.DstReg))
|
|
case OperandTypesNoneToMemory:
|
|
if n.DstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x + %s*0x%x]", instName,
|
|
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x]", instName, RegisterName(n.DstReg), n.DstConst)
|
|
}
|
|
case OperandTypesNoneToBranch:
|
|
ret = fmt.Sprintf("%s {%v}", instName, n.JumpTarget)
|
|
case OperandTypesRegisterToNone:
|
|
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.SrcReg))
|
|
case OperandTypesRegisterToRegister:
|
|
ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg))
|
|
case OperandTypesRegisterToMemory:
|
|
if n.DstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s %s, [%s + 0x%x + %s*0x%x]", instName, RegisterName(n.SrcReg),
|
|
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.SrcReg), RegisterName(n.DstReg), n.DstConst)
|
|
}
|
|
case OperandTypesRegisterToConst:
|
|
ret = fmt.Sprintf("%s %s, 0x%x", instName, RegisterName(n.SrcReg), n.DstConst)
|
|
case OperandTypesMemoryToRegister:
|
|
if n.SrcMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + %d + %s*0x%x], %s", instName,
|
|
RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.SrcMemIndex), n.SrcMemScale, RegisterName(n.DstReg))
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.DstReg))
|
|
}
|
|
case OperandTypesMemoryToConst:
|
|
if n.SrcMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + %d + %s*0x%x], 0x%x", instName,
|
|
RegisterName(n.SrcReg), n.SrcConst, RegisterName(n.SrcMemIndex), n.SrcMemScale, n.DstConst)
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x], 0x%x", instName, RegisterName(n.SrcReg), n.SrcConst, n.DstConst)
|
|
}
|
|
case OperandTypesConstToMemory:
|
|
if n.DstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x + %s*0x%x]", instName, n.SrcConst,
|
|
RegisterName(n.DstReg), n.DstConst, RegisterName(n.DstMemIndex), n.DstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x]", instName, n.SrcConst, RegisterName(n.DstReg), n.DstConst)
|
|
}
|
|
case OperandTypesConstToRegister:
|
|
ret = fmt.Sprintf("%s 0x%x, %s", instName, n.SrcConst, RegisterName(n.DstReg))
|
|
}
|
|
return
|
|
}
|
|
|
|
// OperandType represents where an operand is placed for an instruction.
|
|
// Note: this is almost the same as obj.AddrType in GO assembler.
|
|
type OperandType byte
|
|
|
|
const (
|
|
OperandTypeNone OperandType = iota
|
|
OperandTypeRegister
|
|
OperandTypeMemory
|
|
OperandTypeConst
|
|
OperandTypeBranch
|
|
)
|
|
|
|
func (o OperandType) String() (ret string) {
|
|
switch o {
|
|
case OperandTypeNone:
|
|
ret = "none"
|
|
case OperandTypeRegister:
|
|
ret = "register"
|
|
case OperandTypeMemory:
|
|
ret = "memory"
|
|
case OperandTypeConst:
|
|
ret = "const"
|
|
case OperandTypeBranch:
|
|
ret = "branch"
|
|
}
|
|
return
|
|
}
|
|
|
|
// OperandTypes represents the only combinations of two OperandTypes used by wazero
|
|
type OperandTypes struct{ src, dst OperandType }
|
|
|
|
var (
|
|
OperandTypesNoneToNone = OperandTypes{OperandTypeNone, OperandTypeNone}
|
|
OperandTypesNoneToRegister = OperandTypes{OperandTypeNone, OperandTypeRegister}
|
|
OperandTypesNoneToMemory = OperandTypes{OperandTypeNone, OperandTypeMemory}
|
|
OperandTypesNoneToBranch = OperandTypes{OperandTypeNone, OperandTypeBranch}
|
|
OperandTypesRegisterToNone = OperandTypes{OperandTypeRegister, OperandTypeNone}
|
|
OperandTypesRegisterToRegister = OperandTypes{OperandTypeRegister, OperandTypeRegister}
|
|
OperandTypesRegisterToMemory = OperandTypes{OperandTypeRegister, OperandTypeMemory}
|
|
OperandTypesRegisterToConst = OperandTypes{OperandTypeRegister, OperandTypeConst}
|
|
OperandTypesMemoryToRegister = OperandTypes{OperandTypeMemory, OperandTypeRegister}
|
|
OperandTypesMemoryToConst = OperandTypes{OperandTypeMemory, OperandTypeConst}
|
|
OperandTypesConstToRegister = OperandTypes{OperandTypeConst, OperandTypeRegister}
|
|
OperandTypesConstToMemory = OperandTypes{OperandTypeConst, OperandTypeMemory}
|
|
)
|
|
|
|
// String implements fmt.Stringer
|
|
func (o OperandTypes) String() string {
|
|
return fmt.Sprintf("from:%s,to:%s", o.src, o.dst)
|
|
}
|
|
|
|
// AssemblerImpl implements Assembler.
|
|
type AssemblerImpl struct {
|
|
asm.BaseAssemblerImpl
|
|
EnablePadding bool
|
|
Root, Current *NodeImpl
|
|
Buf *bytes.Buffer
|
|
ForceReAssemble bool
|
|
}
|
|
|
|
func NewAssemblerImpl() *AssemblerImpl {
|
|
return &AssemblerImpl{Buf: bytes.NewBuffer(nil), EnablePadding: true}
|
|
}
|
|
|
|
// newNode creates a new Node and appends it into the linked list.
|
|
func (a *AssemblerImpl) newNode(instruction asm.Instruction, types OperandTypes) *NodeImpl {
|
|
n := &NodeImpl{
|
|
Instruction: instruction,
|
|
Next: nil,
|
|
Types: types,
|
|
JumpOrigins: map[*NodeImpl]struct{}{},
|
|
}
|
|
|
|
a.addNode(n)
|
|
return n
|
|
}
|
|
|
|
// addNode appends the new node into the linked list.
|
|
func (a *AssemblerImpl) addNode(node *NodeImpl) {
|
|
if a.Root == nil {
|
|
a.Root = node
|
|
a.Current = node
|
|
} else {
|
|
parent := a.Current
|
|
parent.Next = node
|
|
a.Current = node
|
|
}
|
|
|
|
for _, o := range a.SetBranchTargetOnNextNodes {
|
|
origin := o.(*NodeImpl)
|
|
origin.JumpTarget = node
|
|
}
|
|
a.SetBranchTargetOnNextNodes = nil
|
|
}
|
|
|
|
// EncodeNode encodes the given node into writer.
|
|
func (a *AssemblerImpl) EncodeNode(n *NodeImpl) (err error) {
|
|
switch n.Types {
|
|
case OperandTypesNoneToNone:
|
|
err = a.encodeNoneToNone(n)
|
|
case OperandTypesNoneToRegister:
|
|
err = a.EncodeNoneToRegister(n)
|
|
case OperandTypesNoneToMemory:
|
|
err = a.EncodeNoneToMemory(n)
|
|
case OperandTypesNoneToBranch:
|
|
// Branching operand can be encoded as relative jumps.
|
|
err = a.EncodeRelativeJump(n)
|
|
case OperandTypesRegisterToNone:
|
|
err = a.EncodeRegisterToNone(n)
|
|
case OperandTypesRegisterToRegister:
|
|
err = a.EncodeRegisterToRegister(n)
|
|
case OperandTypesRegisterToMemory:
|
|
err = a.EncodeRegisterToMemory(n)
|
|
case OperandTypesRegisterToConst:
|
|
err = a.EncodeRegisterToConst(n)
|
|
case OperandTypesMemoryToRegister:
|
|
err = a.EncodeMemoryToRegister(n)
|
|
case OperandTypesConstToRegister:
|
|
err = a.EncodeConstToRegister(n)
|
|
case OperandTypesConstToMemory:
|
|
err = a.EncodeConstToMemory(n)
|
|
case OperandTypesMemoryToConst:
|
|
err = a.EncodeMemoryToConst(n)
|
|
default:
|
|
err = fmt.Errorf("encoder undefined for [%s] operand type", n.Types)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Assemble implements asm.AssemblerBase
|
|
func (a *AssemblerImpl) Assemble() ([]byte, error) {
|
|
a.InitializeNodesForEncoding()
|
|
|
|
// Continue encoding until we are not forced to re-assemble which happens when
|
|
// an short relative jump ends up the offset larger than 8-bit length.
|
|
for {
|
|
err := a.Encode()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !a.ForceReAssemble {
|
|
break
|
|
} else {
|
|
// We reset the length of buffer but don't delete the underlying slice since
|
|
// the binary size will roughly the same after reassemble.
|
|
a.Buf.Reset()
|
|
// Reset the re-assemble Flag in order to avoid the infinite loop!
|
|
a.ForceReAssemble = false
|
|
}
|
|
}
|
|
|
|
code := a.Buf.Bytes()
|
|
for _, cb := range a.OnGenerateCallbacks {
|
|
if err := cb(code); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return code, nil
|
|
}
|
|
|
|
// InitializeNodesForEncoding initializes NodeImpl.Flag and determine all the jumps
|
|
// are forward or backward jump.
|
|
func (a *AssemblerImpl) InitializeNodesForEncoding() {
|
|
var count int
|
|
for n := a.Root; n != nil; n = n.Next {
|
|
count++
|
|
n.Flag |= NodeFlagInitializedForEncoding
|
|
if target := n.JumpTarget; target != nil {
|
|
if target.isInitializedForEncoding() {
|
|
// This means the target exists behind.
|
|
n.Flag |= NodeFlagBackwardJump
|
|
} else {
|
|
// Otherwise, this is forward jump.
|
|
// We start with assuming that the jump can be short (8-bit displacement).
|
|
// If it doens't fit, we change this Flag in resolveRelativeForwardJump.
|
|
n.Flag |= NodeFlagShortForwardJump
|
|
}
|
|
}
|
|
}
|
|
|
|
// Roughly allocate the buffer by assuming an instruction has 5-bytes length on average.
|
|
a.Buf.Grow(count * 5)
|
|
}
|
|
|
|
func (a *AssemblerImpl) Encode() (err error) {
|
|
for n := a.Root; n != nil; n = n.Next {
|
|
// If an instruction needs NOP padding, we do so before encoding it.
|
|
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
if a.EnablePadding {
|
|
if err = a.maybeNOPPadding(n); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
// After the padding, we can finalize the offset of this instruction in the binary.
|
|
n.OffsetInBinaryField = (uint64(a.Buf.Len()))
|
|
|
|
if err := a.EncodeNode(n); err != nil {
|
|
return fmt.Errorf("%w: %v", err, n)
|
|
}
|
|
|
|
err = a.ResolveForwardRelativeJumps(n)
|
|
if err != nil {
|
|
err = fmt.Errorf("invalid relative forward jumps: %w", err)
|
|
break
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// maybeNOPpadding maybe appends NOP instructions before the node `n`.
|
|
// This is necessary to avoid Intel's jump erratum:
|
|
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
func (a *AssemblerImpl) maybeNOPPadding(n *NodeImpl) (err error) {
|
|
var instructionLen int32
|
|
|
|
// See in Section 2.1 in for when we have to pad NOP.
|
|
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
switch n.Instruction {
|
|
case RET, JMP, JCC, JCS, JEQ, JGE, JGT, JHI, JLE, JLS, JLT, JMI, JNE, JPC, JPS:
|
|
// In order to know the instruction length before writing into the binary,
|
|
// we try encoding it with the temporary buffer.
|
|
saved := a.Buf
|
|
a.Buf = bytes.NewBuffer(nil)
|
|
|
|
// Assign the temporary offset which may or may not be correct depending on the padding decision.
|
|
n.OffsetInBinaryField = uint64(saved.Len())
|
|
|
|
// Encode the node and get the instruction length.
|
|
if err = a.EncodeNode(n); err != nil {
|
|
return
|
|
}
|
|
instructionLen = int32(a.Buf.Len())
|
|
|
|
// Revert the temporary buffer.
|
|
a.Buf = saved
|
|
case // The possible fused jump instructions if the next node is a conditional jump instruction.
|
|
CMPL, CMPQ, TESTL, TESTQ, ADDL, ADDQ, SUBL, SUBQ, ANDL, ANDQ, INCQ, DECQ:
|
|
instructionLen, err = a.fusedInstructionLength(n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if instructionLen == 0 {
|
|
return
|
|
}
|
|
|
|
const boundaryInBytes int32 = 32
|
|
const mask int32 = boundaryInBytes - 1
|
|
|
|
var padNum int
|
|
currentPos := int32(a.Buf.Len())
|
|
if used := currentPos & mask; used+instructionLen >= boundaryInBytes {
|
|
padNum = int(boundaryInBytes - used)
|
|
}
|
|
|
|
a.padNOP(padNum)
|
|
return
|
|
}
|
|
|
|
// fusedInstructionLength returns the length of "macro fused instruction" if the
|
|
// instruction sequence starting from `n` can be fused by processor. Otherwise,
|
|
// returns zero.
|
|
func (a *AssemblerImpl) fusedInstructionLength(n *NodeImpl) (ret int32, err error) {
|
|
// Find the next non-NOP instruction.
|
|
next := n.Next
|
|
for ; next != nil && next.Instruction == NOP; next = next.Next {
|
|
}
|
|
|
|
if next == nil {
|
|
return
|
|
}
|
|
|
|
inst, jmpInst := n.Instruction, next.Instruction
|
|
|
|
if !(jmpInst == JCC || jmpInst == JCS || jmpInst == JEQ || jmpInst == JGE || jmpInst == JGT ||
|
|
jmpInst == JHI || jmpInst == JLE || jmpInst == JLS || jmpInst == JLT || jmpInst == JMI ||
|
|
jmpInst == JNE || jmpInst == JPC || jmpInst == JPS) {
|
|
// If the next instruction is not jump kind, the instruction will not be fused.
|
|
return
|
|
}
|
|
|
|
// How to determine whether or not the instruction can be fused is described in
|
|
// Section 3.4.2.2 of "Intel Optimization Manual":
|
|
// https://www.intel.com/content/dam/doc/manual/64-ia-32-architectures-optimization-manual.pdf
|
|
isTest := inst == TESTL || inst == TESTQ
|
|
isCmp := inst == CMPQ || inst == CMPL
|
|
isTestCmp := isTest || isCmp
|
|
if isTestCmp && ((n.Types.src == OperandTypeMemory && n.Types.dst == OperandTypeConst) ||
|
|
(n.Types.src == OperandTypeConst && n.Types.dst == OperandTypeMemory)) {
|
|
// The manual says: "CMP and TEST can not be fused when comparing MEM-IMM".
|
|
return
|
|
}
|
|
|
|
// Implement the descision according to the table 3-1 in the manual.
|
|
isAnd := inst == ANDL || inst == ANDQ
|
|
if !isTest && !isAnd {
|
|
if jmpInst == JMI || jmpInst == JPL || jmpInst == JPS || jmpInst == JPC {
|
|
// These jumps are only fused for TEST or AND.
|
|
return
|
|
}
|
|
isAdd := inst == ADDL || inst == ADDQ
|
|
isSub := inst == SUBL || inst == SUBQ
|
|
if !isCmp && !isAdd && !isSub {
|
|
if jmpInst == JCS || jmpInst == JCC || jmpInst == JHI || jmpInst == JLS {
|
|
// Thses jumpst are only fused for TEST, AND, CMP, ADD, or SUB.
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now the instruction is ensured to be fused by the processor.
|
|
// In order to know the fused instruction length before writing into the binary,
|
|
// we try encoding it with the temporary buffer.
|
|
saved := a.Buf
|
|
savedLen := uint64(saved.Len())
|
|
a.Buf = bytes.NewBuffer(nil)
|
|
|
|
for _, fused := range []*NodeImpl{n, next} {
|
|
// Assign the temporary offset which may or may not be correct depending on the padding decision.
|
|
fused.OffsetInBinaryField = savedLen + uint64(a.Buf.Len())
|
|
|
|
// Encode the node into the temporary buffer.
|
|
err = a.EncodeNode(fused)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
ret = int32(a.Buf.Len())
|
|
|
|
// Revert the temporary buffer.
|
|
a.Buf = saved
|
|
return
|
|
}
|
|
|
|
// nopOpcodes is the multi byte NOP instructions table derived from section 5.8 "Code Padding with Operand-Size Override and Multibyte NOP"
|
|
// in "AMD Software Optimization Guide for AMD Family 15h Processors" https://www.amd.com/system/files/TechDocs/47414_15h_sw_opt_guide.pdf
|
|
//
|
|
// Note: We use up to 9 bytes NOP variant to line our implementation with Go's assembler.
|
|
// TODO: After golang-asm removal, add 9, 10 and 11 bytes variants.
|
|
var nopOpcodes = [][9]byte{
|
|
{0x90},
|
|
{0x66, 0x90},
|
|
{0x0f, 0x1f, 0x00},
|
|
{0x0f, 0x1f, 0x40, 0x00},
|
|
{0x0f, 0x1f, 0x44, 0x00, 0x00},
|
|
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
|
|
{0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
|
|
{0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
}
|
|
|
|
func (a *AssemblerImpl) padNOP(num int) {
|
|
for num > 0 {
|
|
singleNopNum := num
|
|
if singleNopNum > len(nopOpcodes) {
|
|
singleNopNum = len(nopOpcodes)
|
|
}
|
|
a.Buf.Write(nopOpcodes[singleNopNum-1][:singleNopNum])
|
|
num -= singleNopNum
|
|
}
|
|
}
|
|
|
|
// CompileStandAlone implements asm.AssemblerBase.CompileStandAlone
|
|
func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
|
|
return a.newNode(instruction, OperandTypesNoneToNone)
|
|
}
|
|
|
|
// CompileConstToRegister implements asm.AssemblerBase.CompileConstToRegister
|
|
func (a *AssemblerImpl) CompileConstToRegister(instruction asm.Instruction, value asm.ConstantValue, destinationReg asm.Register) (inst asm.Node) {
|
|
n := a.newNode(instruction, OperandTypesConstToRegister)
|
|
n.SrcConst = value
|
|
n.DstReg = destinationReg
|
|
return n
|
|
}
|
|
|
|
// CompileRegisterToRegister implements asm.AssemblerBase.CompileRegisterToRegister
|
|
func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
|
|
n := a.newNode(instruction, OperandTypesRegisterToRegister)
|
|
n.SrcReg = from
|
|
n.DstReg = to
|
|
}
|
|
|
|
// CompileMemoryToRegister implements asm.AssemblerBase.CompileMemoryToRegister
|
|
func (a *AssemblerImpl) CompileMemoryToRegister(instruction asm.Instruction, sourceBaseReg asm.Register, sourceOffsetConst asm.ConstantValue, destinationReg asm.Register) {
|
|
n := a.newNode(instruction, OperandTypesMemoryToRegister)
|
|
n.SrcReg = sourceBaseReg
|
|
n.SrcConst = sourceOffsetConst
|
|
n.DstReg = destinationReg
|
|
}
|
|
|
|
// CompileRegisterToMemory implements asm.AssemblerBase.CompileRegisterToMemory
|
|
func (a *AssemblerImpl) CompileRegisterToMemory(instruction asm.Instruction, sourceRegister asm.Register, destinationBaseRegister asm.Register, destinationOffsetConst asm.ConstantValue) {
|
|
n := a.newNode(instruction, OperandTypesRegisterToMemory)
|
|
n.SrcReg = sourceRegister
|
|
n.DstReg = destinationBaseRegister
|
|
n.DstConst = destinationOffsetConst
|
|
}
|
|
|
|
// CompileJump implements asm.AssemblerBase.CompileJump
|
|
func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
|
|
return a.newNode(jmpInstruction, OperandTypesNoneToBranch)
|
|
}
|
|
|
|
// CompileJumpToMemory implements asm.AssemblerBase.CompileJumpToMemory
|
|
func (a *AssemblerImpl) CompileJumpToMemory(jmpInstruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) {
|
|
n := a.newNode(jmpInstruction, OperandTypesNoneToMemory)
|
|
n.DstReg = baseReg
|
|
n.DstConst = offset
|
|
}
|
|
|
|
// CompileJumpToRegister implements asm.AssemblerBase.CompileJumpToRegister
|
|
func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
|
|
n := a.newNode(jmpInstruction, OperandTypesNoneToRegister)
|
|
n.DstReg = reg
|
|
}
|
|
|
|
// CompileReadInstructionAddress implements asm.AssemblerBase.CompileReadInstructionAddress
|
|
func (a *AssemblerImpl) CompileReadInstructionAddress(destinationRegister asm.Register, beforeAcquisitionTargetInstruction asm.Instruction) {
|
|
n := a.newNode(LEAQ, OperandTypesMemoryToRegister)
|
|
n.DstReg = destinationRegister
|
|
n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
|
|
}
|
|
|
|
// CompileRegisterToRegisterWithMode implements assembler.CompileRegisterToRegisterWithMode
|
|
func (a *AssemblerImpl) CompileRegisterToRegisterWithMode(instruction asm.Instruction, from, to asm.Register, mode Mode) {
|
|
n := a.newNode(instruction, OperandTypesRegisterToRegister)
|
|
n.SrcReg = from
|
|
n.DstReg = to
|
|
n.Mode = mode
|
|
}
|
|
|
|
// CompileMemoryWithIndexToRegister implements assembler.CompileMemoryWithIndexToRegister
|
|
func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(instruction asm.Instruction, srcBaseReg asm.Register, srcOffsetConst asm.ConstantValue, srcIndex asm.Register, srcScale int16, dstReg asm.Register) {
|
|
n := a.newNode(instruction, OperandTypesMemoryToRegister)
|
|
n.SrcReg = srcBaseReg
|
|
n.SrcConst = srcOffsetConst
|
|
n.SrcMemIndex = srcIndex
|
|
n.SrcMemScale = byte(srcScale)
|
|
n.DstReg = dstReg
|
|
}
|
|
|
|
// CompileRegisterToMemoryWithIndex implements assembler.CompileRegisterToMemoryWithIndex
|
|
func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(instruction asm.Instruction, srcReg asm.Register, dstBaseReg asm.Register, dstOffsetConst asm.ConstantValue, dstIndex asm.Register, dstScale int16) {
|
|
n := a.newNode(instruction, OperandTypesRegisterToMemory)
|
|
n.SrcReg = srcReg
|
|
n.DstReg = dstBaseReg
|
|
n.DstConst = dstOffsetConst
|
|
n.DstMemIndex = dstIndex
|
|
n.DstMemScale = byte(dstScale)
|
|
}
|
|
|
|
// CompileRegisterToConst implements assembler.CompileRegisterToConst
|
|
func (a *AssemblerImpl) CompileRegisterToConst(instruction asm.Instruction, srcRegister asm.Register, value asm.ConstantValue) asm.Node {
|
|
n := a.newNode(instruction, OperandTypesRegisterToConst)
|
|
n.SrcReg = srcRegister
|
|
n.DstConst = value
|
|
return n
|
|
}
|
|
|
|
// CompileRegisterToNone implements assembler.CompileRegisterToNone
|
|
func (a *AssemblerImpl) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) {
|
|
n := a.newNode(instruction, OperandTypesRegisterToNone)
|
|
n.SrcReg = register
|
|
}
|
|
|
|
// CompileNoneToRegister implements assembler.CompileNoneToRegister
|
|
func (a *AssemblerImpl) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) {
|
|
n := a.newNode(instruction, OperandTypesNoneToRegister)
|
|
n.DstReg = register
|
|
}
|
|
|
|
// CompileNoneToMemory implements assembler.CompileNoneToMemory
|
|
func (a *AssemblerImpl) CompileNoneToMemory(instruction asm.Instruction, baseReg asm.Register, offset asm.ConstantValue) {
|
|
n := a.newNode(instruction, OperandTypesNoneToMemory)
|
|
n.DstReg = baseReg
|
|
n.DstConst = offset
|
|
}
|
|
|
|
// CompileConstToMemory implements assembler.CompileConstToMemory
|
|
func (a *AssemblerImpl) CompileConstToMemory(instruction asm.Instruction, value asm.ConstantValue, dstbaseReg asm.Register, dstOffset asm.ConstantValue) asm.Node {
|
|
n := a.newNode(instruction, OperandTypesConstToMemory)
|
|
n.SrcConst = value
|
|
n.DstReg = dstbaseReg
|
|
n.DstConst = dstOffset
|
|
return n
|
|
}
|
|
|
|
// CompileMemoryToConst implements assembler.CompileMemoryToConst
|
|
func (a *AssemblerImpl) CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset asm.ConstantValue, value asm.ConstantValue) asm.Node {
|
|
n := a.newNode(instruction, OperandTypesMemoryToConst)
|
|
n.SrcReg = srcBaseReg
|
|
n.SrcConst = srcOffset
|
|
n.DstConst = value
|
|
return n
|
|
}
|
|
|
|
func errorEncodingUnsupported(n *NodeImpl) error {
|
|
return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.Instruction), n.Types)
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeNoneToNone(n *NodeImpl) (err error) {
|
|
switch n.Instruction {
|
|
case CDQ:
|
|
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
|
|
err = a.Buf.WriteByte(0x99)
|
|
case CQO:
|
|
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
|
|
_, err = a.Buf.Write([]byte{RexPrefixW, 0x99})
|
|
case NOP:
|
|
// Simply optimize out the NOP instructions.
|
|
case RET:
|
|
// https://www.felixcloutier.com/x86/ret
|
|
err = a.Buf.WriteByte(0xc3)
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeNoneToRegister(n *NodeImpl) (err error) {
|
|
regBits, prefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
if n.Instruction == JMP {
|
|
// JMP's Opcode is defined as "FF /4" meaning that we have to have "4"
|
|
// in 4-6th bits in the ModRM byte. https://www.felixcloutier.com/x86/jmp
|
|
modRM |= 0b00_100_000
|
|
} else {
|
|
if REG_SP <= n.DstReg && n.DstReg <= REG_DI {
|
|
// If the destination is one byte length register, we need to have the default prefix.
|
|
// https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers
|
|
prefix |= RexPrefixDefault
|
|
}
|
|
}
|
|
|
|
if prefix != RexPrefixNone {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Encoding
|
|
if err = a.Buf.WriteByte(prefix); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
switch n.Instruction {
|
|
case JMP:
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
_, err = a.Buf.Write([]byte{0xff, modRM})
|
|
case SETCC:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x93, modRM})
|
|
case SETCS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x92, modRM})
|
|
case SETEQ:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x94, modRM})
|
|
case SETGE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9d, modRM})
|
|
case SETGT:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9f, modRM})
|
|
case SETHI:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x97, modRM})
|
|
case SETLE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9e, modRM})
|
|
case SETLS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x96, modRM})
|
|
case SETLT:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9c, modRM})
|
|
case SETNE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x95, modRM})
|
|
case SETPC:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9b, modRM})
|
|
case SETPS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
_, err = a.Buf.Write([]byte{0x0f, 0x9a, modRM})
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeNoneToMemory(n *NodeImpl) (err error) {
|
|
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var opcode byte
|
|
switch n.Instruction {
|
|
case INCQ:
|
|
// https://www.felixcloutier.com/x86/inc
|
|
RexPrefix |= RexPrefixW
|
|
opcode = 0xff
|
|
case DECQ:
|
|
// https://www.felixcloutier.com/x86/dec
|
|
RexPrefix |= RexPrefixW
|
|
modRM |= 0b00_001_000 // DEC needs "/1" extension in ModRM.
|
|
opcode = 0xff
|
|
case JMP:
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
modRM |= 0b00_100_000 // JMP needs "/4" extension in ModRM.
|
|
opcode = 0xff
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
|
|
a.Buf.Write([]byte{opcode, modRM})
|
|
|
|
if sbi != nil {
|
|
a.Buf.WriteByte(*sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
a.WriteConst(n.DstConst, displacementWidth)
|
|
}
|
|
return
|
|
}
|
|
|
|
type relativeJumpOpcode struct{ short, long []byte }
|
|
|
|
func (o relativeJumpOpcode) instructionLen(short bool) int64 {
|
|
if short {
|
|
return int64(len(o.short)) + 1 // 1 byte = 8 bit offset
|
|
} else {
|
|
return int64(len(o.long)) + 4 // 4 byte = 32 bit offset
|
|
}
|
|
}
|
|
|
|
var relativeJumpOpcodes = map[asm.Instruction]relativeJumpOpcode{
|
|
// https://www.felixcloutier.com/x86/jcc
|
|
JCC: {short: []byte{0x73}, long: []byte{0x0f, 0x83}},
|
|
JCS: {short: []byte{0x72}, long: []byte{0x0f, 0x82}},
|
|
JEQ: {short: []byte{0x74}, long: []byte{0x0f, 0x84}},
|
|
JGE: {short: []byte{0x7d}, long: []byte{0x0f, 0x8d}},
|
|
JGT: {short: []byte{0x7f}, long: []byte{0x0f, 0x8f}},
|
|
JHI: {short: []byte{0x77}, long: []byte{0x0f, 0x87}},
|
|
JLE: {short: []byte{0x7e}, long: []byte{0x0f, 0x8e}},
|
|
JLS: {short: []byte{0x76}, long: []byte{0x0f, 0x86}},
|
|
JLT: {short: []byte{0x7c}, long: []byte{0x0f, 0x8c}},
|
|
JMI: {short: []byte{0x78}, long: []byte{0x0f, 0x88}},
|
|
JNE: {short: []byte{0x75}, long: []byte{0x0f, 0x85}},
|
|
JPC: {short: []byte{0x7b}, long: []byte{0x0f, 0x8b}},
|
|
JPS: {short: []byte{0x7a}, long: []byte{0x0f, 0x8a}},
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
JMP: {short: []byte{0xeb}, long: []byte{0xe9}},
|
|
}
|
|
|
|
func (a *AssemblerImpl) ResolveForwardRelativeJumps(target *NodeImpl) (err error) {
|
|
offsetInBinary := int64(target.OffsetInBinary())
|
|
for origin := range target.JumpOrigins {
|
|
shortJump := origin.isForwardShortJump()
|
|
op := relativeJumpOpcodes[origin.Instruction]
|
|
instructionLen := op.instructionLen(shortJump)
|
|
|
|
// Calculate the offset from the EIP (at the time of executing this jump instruction)
|
|
// to the target instruction. This value is always >= 0 as here we only handle forward jumps.
|
|
offset := offsetInBinary - (int64(origin.OffsetInBinary()) + instructionLen)
|
|
if shortJump {
|
|
if offset > math.MaxInt8 {
|
|
// This forces reassemble in the outer loop inside of AssemblerImpl.Assemble().
|
|
a.ForceReAssemble = true
|
|
// From the next reAssemble phases, this forward jump will be encoded long jump and
|
|
// allocate 32-bit offset bytes by default. This means that this `origin` node
|
|
// will always enter the "long jump offset encoding" block below
|
|
origin.Flag ^= NodeFlagShortForwardJump
|
|
} else {
|
|
a.Buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-1] = byte(offset)
|
|
}
|
|
} else { // long jump offset encoding.
|
|
if offset > math.MaxInt32 {
|
|
return fmt.Errorf("too large jump offset %d for encoding %s", offset, InstructionName(origin.Instruction))
|
|
}
|
|
binary.LittleEndian.PutUint32(a.Buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-4:], uint32(offset))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeRelativeJump(n *NodeImpl) (err error) {
|
|
if n.JumpTarget == nil {
|
|
err = fmt.Errorf("jump target must not be nil for relative %s", InstructionName(n.Instruction))
|
|
return
|
|
}
|
|
|
|
op, ok := relativeJumpOpcodes[n.Instruction]
|
|
if !ok {
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
var isShortJump bool
|
|
// offsetOfEIP means the offset of EIP register at the time of executing this jump instruction.
|
|
// Relative jump instructions can be encoded with the signed 8-bit or 32-bit integer offsets from the EIP.
|
|
var offsetOfEIP int64 = 0 // We set zero and resolve later once the target instruction is encoded for forward jumps
|
|
if n.isBackwardJump() {
|
|
// If this is the backward jump, we can calculate the exact offset now.
|
|
offsetOfJumpInstruction := int64(n.JumpTarget.OffsetInBinary()) - int64(n.OffsetInBinary())
|
|
isShortJump = offsetOfJumpInstruction-2 >= math.MinInt8
|
|
offsetOfEIP = offsetOfJumpInstruction - op.instructionLen(isShortJump)
|
|
} else {
|
|
// For forward jumps, we resolve the offset when we Encode the target node. See AssemblerImpl.ResolveForwardRelativeJumps.
|
|
n.JumpTarget.JumpOrigins[n] = struct{}{}
|
|
isShortJump = n.isForwardShortJump()
|
|
}
|
|
|
|
if offsetOfEIP < math.MinInt32 { // offsetOfEIP is always <= 0 as we don't calculate it for forward jump here.
|
|
return fmt.Errorf("too large jump offset %d for encoding %s", offsetOfEIP, InstructionName(n.Instruction))
|
|
}
|
|
|
|
if isShortJump {
|
|
a.Buf.Write(op.short)
|
|
a.WriteConst(offsetOfEIP, 8)
|
|
} else {
|
|
a.Buf.Write(op.long)
|
|
a.WriteConst(offsetOfEIP, 32)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeRegisterToNone(n *NodeImpl) (err error) {
|
|
regBits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
|
|
var opcode byte
|
|
switch n.Instruction {
|
|
case DIVL:
|
|
// https://www.felixcloutier.com/x86/div
|
|
modRM |= 0b00_110_000
|
|
opcode = 0xf7
|
|
case DIVQ:
|
|
// https://www.felixcloutier.com/x86/div
|
|
prefix |= RexPrefixW
|
|
modRM |= 0b00_110_000
|
|
opcode = 0xf7
|
|
case IDIVL:
|
|
// https://www.felixcloutier.com/x86/idiv
|
|
modRM |= 0b00_111_000
|
|
opcode = 0xf7
|
|
case IDIVQ:
|
|
// https://www.felixcloutier.com/x86/idiv
|
|
prefix |= RexPrefixW
|
|
modRM |= 0b00_111_000
|
|
opcode = 0xf7
|
|
case MULL:
|
|
// https://www.felixcloutier.com/x86/mul
|
|
modRM |= 0b00_100_000
|
|
opcode = 0xf7
|
|
case MULQ:
|
|
// https://www.felixcloutier.com/x86/mul
|
|
prefix |= RexPrefixW
|
|
modRM |= 0b00_100_000
|
|
opcode = 0xf7
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if prefix != RexPrefixNone {
|
|
a.Buf.WriteByte(prefix)
|
|
}
|
|
|
|
a.Buf.Write([]byte{opcode, modRM})
|
|
return
|
|
}
|
|
|
|
var registerToRegisterOpcode = map[asm.Instruction]struct {
|
|
opcode []byte
|
|
rPrefix RexPrefix
|
|
mandatoryPrefix byte
|
|
srcOnModRMReg bool
|
|
isSrc8bit bool
|
|
needMode bool
|
|
requireSrcFloat, requireDstFloat bool
|
|
}{
|
|
// https://www.felixcloutier.com/x86/add
|
|
ADDL: {opcode: []byte{0x1}, srcOnModRMReg: true},
|
|
ADDQ: {opcode: []byte{0x1}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/and
|
|
ANDL: {opcode: []byte{0x21}, srcOnModRMReg: true},
|
|
ANDQ: {opcode: []byte{0x21}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
CMPL: {opcode: []byte{0x39}},
|
|
CMPQ: {opcode: []byte{0x39}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/cmovcc
|
|
CMOVQCS: {opcode: []byte{0x0f, 0x42}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/addsd
|
|
ADDSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x58}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/addss
|
|
ADDSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x58}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/addpd
|
|
ANDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x54}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/addps
|
|
ANDPS: {opcode: []byte{0x0f, 0x54}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/bsr
|
|
BSRL: {opcode: []byte{0xf, 0xbd}},
|
|
BSRQ: {opcode: []byte{0xf, 0xbd}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/comisd
|
|
COMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2f}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/comiss
|
|
COMISS: {opcode: []byte{0x0f, 0x2f}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtsd2ss
|
|
CVTSD2SS: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtsi2sd
|
|
CVTSL2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtsi2sd
|
|
CVTSQ2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, rPrefix: RexPrefixW, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtsi2ss
|
|
CVTSL2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtsi2ss
|
|
CVTSQ2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, rPrefix: RexPrefixW, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvtss2sd
|
|
CVTSS2SD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5a}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvttsd2si
|
|
CVTTSD2SL: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, requireSrcFloat: true},
|
|
CVTTSD2SQ: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, rPrefix: RexPrefixW, requireSrcFloat: true},
|
|
// https://www.felixcloutier.com/x86/cvttss2si
|
|
CVTTSS2SL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, requireSrcFloat: true},
|
|
CVTTSS2SQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, rPrefix: RexPrefixW, requireSrcFloat: true},
|
|
// https://www.felixcloutier.com/x86/divsd
|
|
DIVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/divss
|
|
DIVSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5e}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/lzcnt
|
|
LZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}},
|
|
LZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/maxsd
|
|
MAXSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/maxss
|
|
MAXSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5f}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/minsd
|
|
MINSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/minss
|
|
MINSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5d}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVBLSX: {opcode: []byte{0x0f, 0xbe}, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
MOVBLZX: {opcode: []byte{0x0f, 0xb6}, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVBQSX: {opcode: []byte{0x0f, 0xbe}, rPrefix: RexPrefixW, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVLQSX: {opcode: []byte{0x63}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVWQSX: {opcode: []byte{0x0f, 0xbf}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVWLSX: {opcode: []byte{0x0f, 0xbf}},
|
|
// https://www.felixcloutier.com/x86/mulss
|
|
MULSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/mulsd
|
|
MULSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x59}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/or
|
|
ORL: {opcode: []byte{0x09}, srcOnModRMReg: true},
|
|
ORQ: {opcode: []byte{0x09}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/orpd
|
|
ORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x56}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/orps
|
|
ORPS: {opcode: []byte{0x0f, 0x56}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/popcnt
|
|
POPCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}},
|
|
POPCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/roundss
|
|
ROUNDSS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0a}, needMode: true, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/roundsd
|
|
ROUNDSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0b}, needMode: true, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/sqrtss
|
|
SQRTSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/sqrtsd
|
|
SQRTSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x51}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/sub
|
|
SUBL: {opcode: []byte{0x29}, srcOnModRMReg: true},
|
|
SUBQ: {opcode: []byte{0x29}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/subss
|
|
SUBSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5c}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/subsd
|
|
SUBSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5c}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/test
|
|
TESTL: {opcode: []byte{0x85}, srcOnModRMReg: true},
|
|
TESTQ: {opcode: []byte{0x85}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/tzcnt
|
|
TZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}},
|
|
TZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/ucomisd
|
|
UCOMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2e}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/ucomiss
|
|
UCOMISS: {opcode: []byte{0x0f, 0x2e}, requireSrcFloat: true, requireDstFloat: true},
|
|
// https://www.felixcloutier.com/x86/xor
|
|
XORL: {opcode: []byte{0x31}, srcOnModRMReg: true},
|
|
XORQ: {opcode: []byte{0x31}, rPrefix: RexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/xorpd
|
|
XORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x57}, requireSrcFloat: true, requireDstFloat: true},
|
|
XORPS: {opcode: []byte{0x0f, 0x57}, requireSrcFloat: true, requireDstFloat: true},
|
|
}
|
|
|
|
var RegisterToRegisterShiftOpcode = map[asm.Instruction]struct {
|
|
opcode []byte
|
|
rPrefix RexPrefix
|
|
modRMExtension byte
|
|
}{
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
ROLL: {opcode: []byte{0xd3}},
|
|
ROLQ: {opcode: []byte{0xd3}, rPrefix: RexPrefixW},
|
|
RORL: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000},
|
|
RORQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
SARL: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000},
|
|
SARQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000, rPrefix: RexPrefixW},
|
|
SHLL: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000},
|
|
SHLQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000, rPrefix: RexPrefixW},
|
|
SHRL: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000},
|
|
SHRQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000, rPrefix: RexPrefixW},
|
|
}
|
|
|
|
type registerToRegisterMOVOpcode struct {
|
|
opcode []byte
|
|
mandatoryPrefix byte
|
|
srcOnModRMReg bool
|
|
rPrefix RexPrefix
|
|
}
|
|
|
|
var registerToRegisterMOVOpcodes = map[asm.Instruction]struct {
|
|
i2i, i2f, f2i, f2f registerToRegisterMOVOpcode
|
|
}{
|
|
MOVL: {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
i2i: registerToRegisterMOVOpcode{opcode: []byte{0x89}, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
i2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x6e}, mandatoryPrefix: 0x66, srcOnModRMReg: false},
|
|
f2i: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0x66, srcOnModRMReg: true},
|
|
},
|
|
MOVQ: {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
i2i: registerToRegisterMOVOpcode{opcode: []byte{0x89}, srcOnModRMReg: true, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
i2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x6e}, mandatoryPrefix: 0x66, srcOnModRMReg: false, rPrefix: RexPrefixW},
|
|
f2i: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0x66, srcOnModRMReg: true, rPrefix: RexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movq
|
|
f2f: registerToRegisterMOVOpcode{opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0xf3},
|
|
},
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeRegisterToRegister(n *NodeImpl) (err error) {
|
|
// Alias for readability
|
|
inst := n.Instruction
|
|
|
|
if op, ok := registerToRegisterMOVOpcodes[inst]; ok {
|
|
var opcode registerToRegisterMOVOpcode
|
|
srcIsFloat, dstIsFloat := IsFloatRegister(n.SrcReg), IsFloatRegister(n.DstReg)
|
|
if srcIsFloat && dstIsFloat {
|
|
if inst == MOVL {
|
|
return errors.New("MOVL for float to float is undefined")
|
|
}
|
|
opcode = op.f2f
|
|
} else if srcIsFloat && !dstIsFloat {
|
|
opcode = op.f2i
|
|
} else if !srcIsFloat && dstIsFloat {
|
|
opcode = op.i2f
|
|
} else {
|
|
opcode = op.i2i
|
|
}
|
|
|
|
RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(opcode.srcOnModRMReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
RexPrefix |= opcode.rPrefix
|
|
|
|
if opcode.mandatoryPrefix != 0 {
|
|
a.Buf.WriteByte(opcode.mandatoryPrefix)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
a.Buf.Write(opcode.opcode)
|
|
|
|
a.Buf.WriteByte(modRM)
|
|
return nil
|
|
} else if op, ok := registerToRegisterOpcode[inst]; ok {
|
|
srcIsFloat, dstIsFloat := IsFloatRegister(n.SrcReg), IsFloatRegister(n.DstReg)
|
|
if op.requireSrcFloat && !srcIsFloat {
|
|
return fmt.Errorf("%s require float src register but got %s", InstructionName(inst), RegisterName(n.SrcReg))
|
|
} else if op.requireDstFloat && !dstIsFloat {
|
|
return fmt.Errorf("%s require float dst register but got %s", InstructionName(inst), RegisterName(n.DstReg))
|
|
} else if !op.requireSrcFloat && srcIsFloat {
|
|
return fmt.Errorf("%s require integer src register but got %s", InstructionName(inst), RegisterName(n.SrcReg))
|
|
} else if !op.requireDstFloat && dstIsFloat {
|
|
return fmt.Errorf("%s require integer dst register but got %s", InstructionName(inst), RegisterName(n.DstReg))
|
|
}
|
|
|
|
RexPrefix, modRM, err := n.GetRegisterToRegisterModRM(op.srcOnModRMReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
RexPrefix |= op.rPrefix
|
|
|
|
if op.isSrc8bit && REG_SP <= n.SrcReg && n.SrcReg <= REG_DI {
|
|
// If an operand register is 8-bit length of SP, BP, DI, or SI register, we need to have the default prefix.
|
|
// https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers
|
|
RexPrefix |= RexPrefixDefault
|
|
}
|
|
|
|
if op.mandatoryPrefix != 0 {
|
|
a.Buf.WriteByte(op.mandatoryPrefix)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
a.Buf.Write(op.opcode)
|
|
|
|
a.Buf.WriteByte(modRM)
|
|
|
|
if op.needMode {
|
|
a.WriteConst(int64(n.Mode), 8)
|
|
}
|
|
return nil
|
|
} else if op, ok := RegisterToRegisterShiftOpcode[inst]; ok {
|
|
if n.SrcReg != REG_CX {
|
|
return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(inst), RegisterName(n.SrcReg))
|
|
} else if IsFloatRegister(n.DstReg) {
|
|
return fmt.Errorf("shifting instruction %s require integer register as dst but got %s", InstructionName(inst), RegisterName(n.SrcReg))
|
|
}
|
|
|
|
reg3bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
RexPrefix |= op.rPrefix
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 |
|
|
(op.modRMExtension) |
|
|
reg3bits
|
|
a.Buf.Write(append(op.opcode, modRM))
|
|
return nil
|
|
} else {
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeRegisterToMemory(n *NodeImpl) (err error) {
|
|
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var opcode []byte
|
|
var mandatoryPrefix byte
|
|
var isShiftInstruction bool
|
|
switch n.Instruction {
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
opcode = []byte{0x3b}
|
|
case CMPQ:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x3b}
|
|
case MOVB:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x88}
|
|
case MOVL:
|
|
if IsFloatRegister(n.SrcReg) {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode = []byte{0x0f, 0x7e}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x89}
|
|
}
|
|
case MOVQ:
|
|
if IsFloatRegister(n.SrcReg) {
|
|
// https://www.felixcloutier.com/x86/movq
|
|
opcode = []byte{0x0f, 0xd6}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x89}
|
|
}
|
|
case MOVW:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: Need 0x66 to indicate that the operand size is 16-bit.
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Operand-size_and_address-size_override_prefix
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x89}
|
|
case SARL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_111_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SARQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
RexPrefix |= RexPrefixW
|
|
modRM |= 0b00_111_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHLL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_100_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHLQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
RexPrefix |= RexPrefixW
|
|
modRM |= 0b00_100_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHRL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_101_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHRQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
RexPrefix |= RexPrefixW
|
|
modRM |= 0b00_101_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case ROLL:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case ROLQ:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case RORL:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
modRM |= 0b00_001_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case RORQ:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0xd3}
|
|
modRM |= 0b00_001_000
|
|
isShiftInstruction = true
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if !isShiftInstruction {
|
|
srcReg3Bits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
RexPrefix |= prefix
|
|
modRM |= (srcReg3Bits << 3) // Place the source register on ModRM:reg
|
|
} else {
|
|
if n.SrcReg != REG_CX {
|
|
return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(n.Instruction), RegisterName(n.SrcReg))
|
|
}
|
|
}
|
|
|
|
if mandatoryPrefix != 0 {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
|
|
a.Buf.WriteByte(mandatoryPrefix)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
a.Buf.Write(opcode)
|
|
|
|
a.Buf.WriteByte(modRM)
|
|
|
|
if sbi != nil {
|
|
a.Buf.WriteByte(*sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
a.WriteConst(n.DstConst, displacementWidth)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeRegisterToConst(n *NodeImpl) (err error) {
|
|
regBits, prefix, err := register3bits(n.SrcReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch n.Instruction {
|
|
case CMPL, CMPQ:
|
|
if n.Instruction == CMPQ {
|
|
prefix |= RexPrefixW
|
|
}
|
|
if prefix != RexPrefixNone {
|
|
a.Buf.WriteByte(prefix)
|
|
}
|
|
is8bitConst := fitInSigned8bit(n.DstConst)
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
if n.SrcReg == REG_AX && !is8bitConst {
|
|
a.Buf.Write([]byte{0x3d})
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_111_000 | // CMP with immediate needs "/7" extension.
|
|
regBits
|
|
if is8bitConst {
|
|
a.Buf.Write([]byte{0x83, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{0x81, modRM})
|
|
}
|
|
}
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if fitInSigned8bit(n.DstConst) {
|
|
a.WriteConst(n.DstConst, 8)
|
|
} else {
|
|
a.WriteConst(n.DstConst, 32)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeReadInstructionAddress(n *NodeImpl) error {
|
|
dstReg3Bits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
a.AddOnGenerateCallBack(func(code []byte) error {
|
|
// Find the target instruction node.
|
|
targetNode := n
|
|
for ; targetNode != nil; targetNode = targetNode.Next {
|
|
if targetNode.Instruction == n.readInstructionAddressBeforeTargetInstruction {
|
|
targetNode = targetNode.Next
|
|
break
|
|
}
|
|
}
|
|
|
|
if targetNode == nil {
|
|
return errors.New("BUG: target instruction not found for read instruction address")
|
|
}
|
|
|
|
offset := targetNode.OffsetInBinary() - (n.OffsetInBinary() + 7 /* 7 = the length of the LEAQ instruction */)
|
|
if offset >= math.MaxInt32 {
|
|
return errors.New("BUG: too large offset for LEAQ instruction")
|
|
}
|
|
|
|
binary.LittleEndian.PutUint32(code[n.OffsetInBinary()+3:], uint32(int32(offset)))
|
|
return nil
|
|
})
|
|
|
|
// https://www.felixcloutier.com/x86/lea
|
|
opcode := byte(0x8d)
|
|
RexPrefix |= RexPrefixW
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#64-bit_addressing
|
|
modRM := 0b00_000_101 | // Indicate "LEAQ [RIP + 32bit displacement], DstReg" encoding.
|
|
(dstReg3Bits << 3) // Place the DstReg on ModRM:reg.
|
|
|
|
a.Buf.Write([]byte{RexPrefix, opcode, modRM})
|
|
a.WriteConst(int64(0), 32) // Preserve
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeMemoryToRegister(n *NodeImpl) (err error) {
|
|
if n.Instruction == LEAQ && n.readInstructionAddressBeforeTargetInstruction != NONE {
|
|
return a.encodeReadInstructionAddress(n)
|
|
}
|
|
|
|
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dstReg3Bits, prefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
RexPrefix |= prefix
|
|
modRM |= (dstReg3Bits << 3) // Place the destination register on ModRM:reg
|
|
|
|
var mandatoryPrefix byte
|
|
var opcode []byte
|
|
switch n.Instruction {
|
|
case ADDL:
|
|
// https://www.felixcloutier.com/x86/add
|
|
opcode = []byte{0x03}
|
|
case ADDQ:
|
|
// https://www.felixcloutier.com/x86/add
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x03}
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
opcode = []byte{0x39}
|
|
case CMPQ:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x39}
|
|
case LEAQ:
|
|
// https://www.felixcloutier.com/x86/lea
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x8d}
|
|
case MOVBLSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
opcode = []byte{0x0f, 0xbe}
|
|
case MOVBLZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
opcode = []byte{0x0f, 0xb6}
|
|
case MOVBQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x0f, 0xbe}
|
|
case MOVBQZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x0f, 0xb6}
|
|
case MOVLQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x63}
|
|
case MOVLQZX:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
|
|
// that is semantically equivalent to MOV 32bit to 32bit.
|
|
opcode = []byte{0x8B}
|
|
case MOVL:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
|
|
// that is semantically equivalent to MOV 32bit to 32bit.
|
|
if IsFloatRegister(n.DstReg) {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode = []byte{0x0f, 0x6e}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x8B}
|
|
}
|
|
case MOVQ:
|
|
if IsFloatRegister(n.DstReg) {
|
|
// https://www.felixcloutier.com/x86/movq
|
|
opcode = []byte{0x0f, 0x7e}
|
|
mandatoryPrefix = 0xf3
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x8B}
|
|
}
|
|
case MOVWLSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
opcode = []byte{0x0f, 0xbf}
|
|
case MOVWLZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
opcode = []byte{0x0f, 0xb7}
|
|
case MOVWQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x0f, 0xbf}
|
|
case MOVWQZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x0f, 0xb7}
|
|
case SUBQ:
|
|
// https://www.felixcloutier.com/x86/sub
|
|
RexPrefix |= RexPrefixW
|
|
opcode = []byte{0x2b}
|
|
case SUBSD:
|
|
// https://www.felixcloutier.com/x86/subsd
|
|
opcode = []byte{0x0f, 0x5c}
|
|
mandatoryPrefix = 0xf2
|
|
case SUBSS:
|
|
// https://www.felixcloutier.com/x86/subss
|
|
opcode = []byte{0x0f, 0x5c}
|
|
mandatoryPrefix = 0xf3
|
|
case UCOMISD:
|
|
// https://www.felixcloutier.com/x86/ucomisd
|
|
opcode = []byte{0x0f, 0x2e}
|
|
mandatoryPrefix = 0x66
|
|
case UCOMISS:
|
|
// https://www.felixcloutier.com/x86/ucomiss
|
|
opcode = []byte{0x0f, 0x2e}
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if mandatoryPrefix != 0 {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
|
|
a.Buf.WriteByte(mandatoryPrefix)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
|
|
a.Buf.Write(opcode)
|
|
|
|
a.Buf.WriteByte(modRM)
|
|
|
|
if sbi != nil {
|
|
a.Buf.WriteByte(*sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
a.WriteConst(n.SrcConst, displacementWidth)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeConstToRegister(n *NodeImpl) (err error) {
|
|
regBits, RexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
isFloatReg := IsFloatRegister(n.DstReg)
|
|
switch n.Instruction {
|
|
case PSLLL, PSLLQ, PSRLL, PSRLQ:
|
|
if !isFloatReg {
|
|
return fmt.Errorf("%s needs float register but got %s", InstructionName(n.Instruction), RegisterName(n.DstReg))
|
|
}
|
|
default:
|
|
if isFloatReg {
|
|
return fmt.Errorf("%s needs int register but got %s", InstructionName(n.Instruction), RegisterName(n.DstReg))
|
|
}
|
|
}
|
|
|
|
if n.Instruction != MOVQ && !FitIn32bit(n.SrcConst) {
|
|
return fmt.Errorf("constant must fit in 32-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
|
|
} else if (n.Instruction == SHLQ || n.Instruction == SHRQ) && (n.SrcConst < 0 || n.SrcConst > math.MaxUint8) {
|
|
return fmt.Errorf("constant must fit in positive 8-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
|
|
} else if (n.Instruction == PSLLL ||
|
|
n.Instruction == PSLLQ ||
|
|
n.Instruction == PSRLL ||
|
|
n.Instruction == PSRLQ) && (n.SrcConst < math.MinInt8 || n.SrcConst > math.MaxInt8) {
|
|
return fmt.Errorf("constant must fit in signed 8-bit integer for %s, but got %d", InstructionName(n.Instruction), n.SrcConst)
|
|
}
|
|
|
|
isSigned8bitConst := fitInSigned8bit(n.SrcConst)
|
|
switch inst := n.Instruction; inst {
|
|
case ADDQ:
|
|
// https://www.felixcloutier.com/x86/add
|
|
RexPrefix |= RexPrefixW
|
|
if n.DstReg == REG_AX && !isSigned8bitConst {
|
|
a.Buf.Write([]byte{RexPrefix, 0x05})
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
a.Buf.Write([]byte{RexPrefix, 0x83, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{RexPrefix, 0x81, modRM})
|
|
}
|
|
}
|
|
if isSigned8bitConst {
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.WriteConst(n.SrcConst, 32)
|
|
}
|
|
case ANDQ:
|
|
// https://www.felixcloutier.com/x86/and
|
|
RexPrefix |= RexPrefixW
|
|
if n.DstReg == REG_AX && !isSigned8bitConst {
|
|
a.Buf.Write([]byte{RexPrefix, 0x25})
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_100_000 | // AND with immediate needs "/4" extension.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
a.Buf.Write([]byte{RexPrefix, 0x83, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{RexPrefix, 0x81, modRM})
|
|
}
|
|
}
|
|
if fitInSigned8bit(n.SrcConst) {
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.WriteConst(n.SrcConst, 32)
|
|
}
|
|
case MOVL:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
a.Buf.Write([]byte{0xb8 | regBits})
|
|
a.WriteConst(n.SrcConst, 32)
|
|
case MOVQ:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
if FitIn32bit(n.SrcConst) {
|
|
if n.SrcConst > math.MaxInt32 {
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
a.Buf.Write([]byte{0xb8 | regBits})
|
|
} else {
|
|
RexPrefix |= RexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
a.Buf.Write([]byte{RexPrefix, 0xc7, modRM})
|
|
}
|
|
a.WriteConst(n.SrcConst, 32)
|
|
} else {
|
|
RexPrefix |= RexPrefixW
|
|
a.Buf.Write([]byte{RexPrefix, 0xb8 | regBits})
|
|
a.WriteConst(n.SrcConst, 64)
|
|
}
|
|
case SHLQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
RexPrefix |= RexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_100_000 | // SHL with immediate needs "/4" extension.
|
|
regBits
|
|
if n.SrcConst == 1 {
|
|
a.Buf.Write([]byte{RexPrefix, 0xd1, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{RexPrefix, 0xc1, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case SHRQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
RexPrefix |= RexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_101_000 | // SHR with immediate needs "/5" extension.
|
|
regBits
|
|
if n.SrcConst == 1 {
|
|
a.Buf.Write([]byte{RexPrefix, 0xd1, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{RexPrefix, 0xc1, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case PSLLL:
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // PSLL with immediate needs "/6" extension.
|
|
regBits
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case PSLLQ:
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // PSLL with immediate needs "/6" extension.
|
|
regBits
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case PSRLL:
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_010_000 | // PSRL with immediate needs "/2" extension.
|
|
regBits
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x72, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.Buf.Write([]byte{0x66, 0x0f, 0x72, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case PSRLQ:
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_010_000 | // PSRL with immediate needs "/2" extension.
|
|
regBits
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.Write([]byte{0x66, RexPrefix, 0x0f, 0x73, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.Buf.Write([]byte{0x66, 0x0f, 0x73, modRM})
|
|
a.WriteConst(n.SrcConst, 8)
|
|
}
|
|
case XORL, XORQ:
|
|
// https://www.felixcloutier.com/x86/xor
|
|
if inst == XORQ {
|
|
RexPrefix |= RexPrefixW
|
|
}
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
if n.DstReg == REG_AX && !isSigned8bitConst {
|
|
a.Buf.Write([]byte{0x35})
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // XOR with immediate needs "/6" extension.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
a.Buf.Write([]byte{0x83, modRM})
|
|
} else {
|
|
a.Buf.Write([]byte{0x81, modRM})
|
|
}
|
|
}
|
|
if fitInSigned8bit(n.SrcConst) {
|
|
a.WriteConst(n.SrcConst, 8)
|
|
} else {
|
|
a.WriteConst(n.SrcConst, 32)
|
|
}
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeMemoryToConst(n *NodeImpl) (err error) {
|
|
if !FitIn32bit(n.DstConst) {
|
|
return fmt.Errorf("too large target const %d for %s", n.DstConst, InstructionName(n.Instruction))
|
|
}
|
|
|
|
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Alias for readability.
|
|
c := n.DstConst
|
|
|
|
var opcode, constWidth byte
|
|
switch n.Instruction {
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
if fitInSigned8bit(c) {
|
|
opcode = 0x83
|
|
constWidth = 8
|
|
} else {
|
|
opcode = 0x81
|
|
constWidth = 32
|
|
}
|
|
modRM |= 0b00_111_000
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
|
|
a.Buf.Write([]byte{opcode, modRM})
|
|
|
|
if sbi != nil {
|
|
a.Buf.WriteByte(*sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
a.WriteConst(n.SrcConst, displacementWidth)
|
|
}
|
|
|
|
a.WriteConst(c, constWidth)
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) EncodeConstToMemory(n *NodeImpl) (err error) {
|
|
RexPrefix, modRM, sbi, displacementWidth, err := n.GetMemoryLocation()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Alias for readability.
|
|
inst := n.Instruction
|
|
c := n.SrcConst
|
|
|
|
if inst == MOVB && !fitInSigned8bit(c) {
|
|
return fmt.Errorf("too large load target const %d for MOVB", c)
|
|
} else if !FitIn32bit(c) {
|
|
return fmt.Errorf("too large load target const %d for %s", c, InstructionName(n.Instruction))
|
|
}
|
|
|
|
var constWidth, opcode byte
|
|
switch inst {
|
|
case MOVB:
|
|
opcode = 0xc6
|
|
constWidth = 8
|
|
case MOVL:
|
|
opcode = 0xc7
|
|
constWidth = 32
|
|
case MOVQ:
|
|
RexPrefix |= RexPrefixW
|
|
opcode = 0xc7
|
|
constWidth = 32
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if RexPrefix != RexPrefixNone {
|
|
a.Buf.WriteByte(RexPrefix)
|
|
}
|
|
|
|
a.Buf.Write([]byte{opcode, modRM})
|
|
|
|
if sbi != nil {
|
|
a.Buf.WriteByte(*sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
a.WriteConst(n.DstConst, displacementWidth)
|
|
}
|
|
|
|
a.WriteConst(c, constWidth)
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) WriteConst(v int64, length byte) {
|
|
switch length {
|
|
case 8:
|
|
a.Buf.WriteByte(byte(int8(v)))
|
|
case 32:
|
|
// TODO: any way to directly put little endian bytes into bytes.Buffer?
|
|
offsetBytes := make([]byte, 4)
|
|
binary.LittleEndian.PutUint32(offsetBytes, uint32(int32(v)))
|
|
a.Buf.Write(offsetBytes)
|
|
case 64:
|
|
// TODO: any way to directly put little endian bytes into bytes.Buffer?
|
|
offsetBytes := make([]byte, 8)
|
|
binary.LittleEndian.PutUint64(offsetBytes, uint64(v))
|
|
a.Buf.Write(offsetBytes)
|
|
default:
|
|
panic("BUG: length must be one of 8, 32 or 64")
|
|
}
|
|
}
|
|
|
|
func (n *NodeImpl) GetMemoryLocation() (p RexPrefix, modRM byte, sbi *byte, displacementWidth byte, err error) {
|
|
var baseReg, indexReg asm.Register
|
|
var offset asm.ConstantValue
|
|
var scale byte
|
|
if n.Types.dst == OperandTypeMemory {
|
|
baseReg, offset, indexReg, scale = n.DstReg, n.DstConst, n.DstMemIndex, n.DstMemScale
|
|
} else if n.Types.src == OperandTypeMemory {
|
|
baseReg, offset, indexReg, scale = n.SrcReg, n.SrcConst, n.SrcMemIndex, n.SrcMemScale
|
|
} else {
|
|
err = fmt.Errorf("memory location is not supported for %s", n.Types)
|
|
return
|
|
}
|
|
|
|
if !FitIn32bit(offset) {
|
|
err = errors.New("offset does not fit in 32-bit integer")
|
|
return
|
|
}
|
|
|
|
if baseReg == asm.NilRegister && indexReg != asm.NilRegister {
|
|
// [(index*scale) + displacement] addressing is possible, but we haven't used it for now.
|
|
err = errors.New("addressing without base register but with index is not implemented")
|
|
} else if baseReg == asm.NilRegister {
|
|
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
|
|
sbiValue := byte(0b00_100_101)
|
|
sbi = &sbiValue
|
|
displacementWidth = 32
|
|
} else if indexReg == asm.NilRegister {
|
|
modRM, p, err = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Create ModR/M byte so that this instrction takes [R/M + displacement] operand if displacement !=0
|
|
// and otherwise [R/M].
|
|
withoutDisplacement := offset == 0 &&
|
|
// If the target register is R13 or BP, we have to keep [R/M + displacement] even if the value
|
|
// is zero since it's not [R/M] operand is not defined for these two registers.
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
|
|
baseReg != REG_R13 && baseReg != REG_BP
|
|
if withoutDisplacement {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b00_000_000 // Specifying that operand is memory without displacement
|
|
displacementWidth = 0
|
|
} else if fitInSigned8bit(offset) {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b01_000_000 // Specifying that operand is memory + 8bit displacement.
|
|
displacementWidth = 8
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b10_000_000 // Specifying that operand is memory + 32bit displacement.
|
|
displacementWidth = 32
|
|
}
|
|
|
|
// For SP and R12 register, we have [SIB + displacement] if the const is non-zero, otherwise [SIP].
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
|
|
//
|
|
// Thefore we emit the SIB byte before the const so that [SIB + displacement] ends up [register + displacement].
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
|
|
if baseReg == REG_SP || baseReg == REG_R12 {
|
|
sbiValue := byte(0b00_100_100)
|
|
sbi = &sbiValue
|
|
}
|
|
} else {
|
|
if indexReg == REG_SP {
|
|
err = errors.New("SP cannot be used for SIB index")
|
|
return
|
|
}
|
|
|
|
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
|
|
|
|
withoutDisplacement := offset == 0 &&
|
|
// For R13 and BP, base registers cannot be encoded "without displacement" mod (i.e. 0b00 mod).
|
|
baseReg != REG_R13 && baseReg != REG_BP
|
|
if withoutDisplacement {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b00_000_000 // Specifying that operand is SIB without displacement
|
|
displacementWidth = 0
|
|
} else if fitInSigned8bit(offset) {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b01_000_000 // Specifying that operand is SIB + 8bit displacement.
|
|
displacementWidth = 8
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b10_000_000 // Specifying that operand is SIB + 32bit displacement.
|
|
displacementWidth = 32
|
|
}
|
|
|
|
var baseRegBits byte
|
|
baseRegBits, p, err = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
var indexRegBits byte
|
|
var indexRegPrefix RexPrefix
|
|
indexRegBits, indexRegPrefix, err = register3bits(indexReg, registerSpecifierPositionSIBIndex)
|
|
if err != nil {
|
|
return
|
|
}
|
|
p |= indexRegPrefix
|
|
|
|
sbiValue := baseRegBits | (indexRegBits << 3)
|
|
switch scale {
|
|
case 1:
|
|
sbiValue |= 0b00_000_000
|
|
case 2:
|
|
sbiValue |= 0b01_000_000
|
|
case 4:
|
|
sbiValue |= 0b10_000_000
|
|
case 8:
|
|
sbiValue |= 0b11_000_000
|
|
default:
|
|
err = fmt.Errorf("scale in SIB must be one of 1, 2, 4, 8 but got %d", scale)
|
|
return
|
|
}
|
|
|
|
sbi = &sbiValue
|
|
}
|
|
return
|
|
}
|
|
|
|
// TODO: srcOnModRMReg can be deleted after golang-asm removal. This is necessary to match our implementation
|
|
// with golang-asm, but in practice, there are equivalent opcodes to always have src on ModRM:reg without ambiguity.
|
|
func (n *NodeImpl) GetRegisterToRegisterModRM(srcOnModRMReg bool) (RexPrefix, modRM byte, err error) {
|
|
var reg3bits, rm3bits byte
|
|
if srcOnModRMReg {
|
|
reg3bits, RexPrefix, err = register3bits(n.SrcReg,
|
|
// Indicate that SrcReg will be specified by ModRM:reg.
|
|
registerSpecifierPositionModRMFieldReg)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
var dstRexPrefix byte
|
|
rm3bits, dstRexPrefix, err = register3bits(n.DstReg,
|
|
// Indicate that DstReg will be specified by ModRM:r/m.
|
|
registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return
|
|
}
|
|
RexPrefix |= dstRexPrefix
|
|
} else {
|
|
rm3bits, RexPrefix, err = register3bits(n.SrcReg,
|
|
// Indicate that SrcReg will be specified by ModRM:r/m.
|
|
registerSpecifierPositionModRMFieldRM)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
var dstRexPrefix byte
|
|
reg3bits, dstRexPrefix, err = register3bits(n.DstReg,
|
|
// Indicate that DstReg will be specified by ModRM:reg.
|
|
registerSpecifierPositionModRMFieldReg)
|
|
if err != nil {
|
|
return
|
|
}
|
|
RexPrefix |= dstRexPrefix
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM = 0b11_000_000 | // Specifying that dst opeand is register.
|
|
(reg3bits << 3) |
|
|
rm3bits
|
|
|
|
return
|
|
}
|
|
|
|
// RexPrefix represents REX prefix https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
|
|
type RexPrefix = byte
|
|
|
|
// REX prefixes are independent of each other and can be combined with OR.
|
|
const (
|
|
RexPrefixNone RexPrefix = 0x0000_0000 // Indicates that the instruction doesn't need RexPrefix.
|
|
RexPrefixDefault RexPrefix = 0b0100_0000
|
|
RexPrefixW RexPrefix = 0b0000_1000 | RexPrefixDefault
|
|
RexPrefixR RexPrefix = 0b0000_0100 | RexPrefixDefault
|
|
RexPrefixX RexPrefix = 0b0000_0010 | RexPrefixDefault
|
|
RexPrefixB RexPrefix = 0b0000_0001 | RexPrefixDefault
|
|
)
|
|
|
|
// registerSpecifierPosition represents the position in the instruction bytes where an operand register is placed.
|
|
type registerSpecifierPosition byte
|
|
|
|
const (
|
|
registerSpecifierPositionModRMFieldReg registerSpecifierPosition = iota
|
|
registerSpecifierPositionModRMFieldRM
|
|
registerSpecifierPositionSIBIndex
|
|
)
|
|
|
|
func register3bits(reg asm.Register, registerSpecifierPosition registerSpecifierPosition) (bits byte, prefix RexPrefix, err error) {
|
|
prefix = RexPrefixNone
|
|
if REG_R8 <= reg && reg <= REG_R15 || REG_X8 <= reg && reg <= REG_X15 {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
|
|
switch registerSpecifierPosition {
|
|
case registerSpecifierPositionModRMFieldReg:
|
|
prefix = RexPrefixR
|
|
case registerSpecifierPositionModRMFieldRM:
|
|
prefix = RexPrefixB
|
|
case registerSpecifierPositionSIBIndex:
|
|
prefix = RexPrefixX
|
|
}
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers
|
|
switch reg {
|
|
case REG_AX, REG_R8, REG_X0, REG_X8:
|
|
bits = 0b000
|
|
case REG_CX, REG_R9, REG_X1, REG_X9:
|
|
bits = 0b001
|
|
case REG_DX, REG_R10, REG_X2, REG_X10:
|
|
bits = 0b010
|
|
case REG_BX, REG_R11, REG_X3, REG_X11:
|
|
bits = 0b011
|
|
case REG_SP, REG_R12, REG_X4, REG_X12:
|
|
bits = 0b100
|
|
case REG_BP, REG_R13, REG_X5, REG_X13:
|
|
bits = 0b101
|
|
case REG_SI, REG_R14, REG_X6, REG_X14:
|
|
bits = 0b110
|
|
case REG_DI, REG_R15, REG_X7, REG_X15:
|
|
bits = 0b111
|
|
default:
|
|
err = fmt.Errorf("invalid register [%s]", RegisterName(reg))
|
|
}
|
|
return
|
|
}
|
|
|
|
func FitIn32bit(v int64) bool {
|
|
return math.MinInt32 <= v && v <= math.MaxUint32
|
|
}
|
|
|
|
func fitInSigned8bit(v int64) bool {
|
|
return math.MinInt8 <= v && v <= math.MaxInt8
|
|
}
|
|
|
|
func IsFloatRegister(r asm.Register) bool {
|
|
return REG_X0 <= r && r <= REG_X15
|
|
}
|