Signed-off-by: Achille Roussel <achille.roussel@gmail.com> Co-authored-by: Crypt Keeper <64215+codefromthecrypt@users.noreply.github.com>
2813 lines
96 KiB
Go
2813 lines
96 KiB
Go
package amd64
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/tetratelabs/wazero/internal/asm"
|
|
)
|
|
|
|
// nodeImpl implements asm.Node for amd64.
|
|
type nodeImpl struct {
|
|
// jumpTarget holds the target node in the linked for the jump-kind instruction.
|
|
jumpTarget *nodeImpl
|
|
|
|
// prev and next hold the prev/next node from this node in the assembled linked list.
|
|
prev, next *nodeImpl
|
|
|
|
// forwardJumpOrigins hold all the nodes trying to jump into this node as a
|
|
// singly linked list. In other words, all the nodes with .jumpTarget == this.
|
|
forwardJumpOrigins *nodeImpl
|
|
|
|
staticConst *asm.StaticConst
|
|
|
|
dstConst asm.ConstantValue
|
|
offsetInBinary asm.NodeOffsetInBinary
|
|
srcConst asm.ConstantValue
|
|
instruction asm.Instruction
|
|
|
|
// readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of
|
|
// read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress.
|
|
readInstructionAddressBeforeTargetInstruction asm.Instruction
|
|
flag nodeFlag
|
|
types operandTypes
|
|
srcReg, dstReg asm.Register
|
|
srcMemIndex, dstMemIndex asm.Register
|
|
srcMemScale, dstMemScale byte
|
|
arg byte
|
|
|
|
// staticConstReferrersAdded true if this node is already added into AssemblerImpl.staticConstReferrers.
|
|
// Only used when staticConst is not nil. Through re-assembly, we might end up adding multiple times which causes unnecessary
|
|
// allocations, so we use this flag to do it once.
|
|
staticConstReferrersAdded bool
|
|
}
|
|
|
|
type nodeFlag byte
|
|
|
|
const (
|
|
// nodeFlagInitializedForEncoding is always set to indicate that node is already initialized. Notably, this is used to judge
|
|
// whether a jump is backward or forward before encoding.
|
|
nodeFlagInitializedForEncoding nodeFlag = 1 << iota
|
|
nodeFlagBackwardJump
|
|
// nodeFlagShortForwardJump is set to false by default and only used by forward branch jumps, which means .jumpTarget != nil and
|
|
// the target node is encoded after this node. False by default means that we Encode all the jumps with jumpTarget
|
|
// as short jump (i.e. relative signed 8-bit integer offset jump) and try to Encode as small as possible.
|
|
nodeFlagShortForwardJump
|
|
)
|
|
|
|
func (n *nodeImpl) isInitializedForEncoding() bool {
|
|
return n.flag&nodeFlagInitializedForEncoding != 0
|
|
}
|
|
|
|
func (n *nodeImpl) isJumpNode() bool {
|
|
return n.jumpTarget != nil
|
|
}
|
|
|
|
func (n *nodeImpl) isBackwardJump() bool {
|
|
return n.isJumpNode() && (n.flag&nodeFlagBackwardJump != 0)
|
|
}
|
|
|
|
func (n *nodeImpl) isForwardJump() bool {
|
|
return n.isJumpNode() && (n.flag&nodeFlagBackwardJump == 0)
|
|
}
|
|
|
|
func (n *nodeImpl) isForwardShortJump() bool {
|
|
return n.isForwardJump() && n.flag&nodeFlagShortForwardJump != 0
|
|
}
|
|
|
|
// AssignJumpTarget implements asm.Node.AssignJumpTarget.
|
|
func (n *nodeImpl) AssignJumpTarget(target asm.Node) {
|
|
n.jumpTarget = target.(*nodeImpl)
|
|
}
|
|
|
|
// AssignDestinationConstant implements asm.Node.AssignDestinationConstant.
|
|
func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) {
|
|
n.dstConst = value
|
|
}
|
|
|
|
// AssignSourceConstant implements asm.Node.AssignSourceConstant.
|
|
func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) {
|
|
n.srcConst = value
|
|
}
|
|
|
|
// OffsetInBinary implements asm.Node.OffsetInBinary.
|
|
func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary {
|
|
return n.offsetInBinary
|
|
}
|
|
|
|
// String implements fmt.Stringer.
|
|
//
|
|
// This is for debugging purpose, and the format is almost same as the AT&T assembly syntax,
|
|
// meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand
|
|
// might be embraced by '[]' to represent the memory location.
|
|
func (n *nodeImpl) String() (ret string) {
|
|
instName := InstructionName(n.instruction)
|
|
switch n.types {
|
|
case operandTypesNoneToNone:
|
|
ret = instName
|
|
case operandTypesNoneToRegister:
|
|
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg))
|
|
case operandTypesNoneToMemory:
|
|
if n.dstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x + %s*0x%x]", instName,
|
|
RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x]", instName, RegisterName(n.dstReg), n.dstConst)
|
|
}
|
|
case operandTypesNoneToBranch:
|
|
ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget)
|
|
case operandTypesRegisterToNone:
|
|
ret = fmt.Sprintf("%s %s", instName, RegisterName(n.srcReg))
|
|
case operandTypesRegisterToRegister:
|
|
ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg))
|
|
case operandTypesRegisterToMemory:
|
|
if n.dstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s %s, [%s + 0x%x + %s*0x%x]", instName, RegisterName(n.srcReg),
|
|
RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst)
|
|
}
|
|
case operandTypesRegisterToConst:
|
|
ret = fmt.Sprintf("%s %s, 0x%x", instName, RegisterName(n.srcReg), n.dstConst)
|
|
case operandTypesMemoryToRegister:
|
|
if n.srcMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + %#x + %s*%#x], %s", instName,
|
|
RegisterName(n.srcReg), n.srcConst, RegisterName(n.srcMemIndex), n.srcMemScale, RegisterName(n.dstReg))
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg))
|
|
}
|
|
case operandTypesMemoryToConst:
|
|
if n.srcMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s [%s + %#x + %s*0x%x], 0x%x", instName,
|
|
RegisterName(n.srcReg), n.srcConst, RegisterName(n.srcMemIndex), n.srcMemScale, n.dstConst)
|
|
} else {
|
|
ret = fmt.Sprintf("%s [%s + %#x], 0x%x", instName, RegisterName(n.srcReg), n.srcConst, n.dstConst)
|
|
}
|
|
case operandTypesConstToMemory:
|
|
if n.dstMemIndex != asm.NilRegister {
|
|
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x + %s*0x%x]", instName, n.srcConst,
|
|
RegisterName(n.dstReg), n.dstConst, RegisterName(n.dstMemIndex), n.dstMemScale)
|
|
} else {
|
|
ret = fmt.Sprintf("%s 0x%x, [%s + 0x%x]", instName, n.srcConst, RegisterName(n.dstReg), n.dstConst)
|
|
}
|
|
case operandTypesConstToRegister:
|
|
ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg))
|
|
case operandTypesStaticConstToRegister:
|
|
ret = fmt.Sprintf("%s $%#x, %s", instName, n.staticConst.Raw, RegisterName(n.dstReg))
|
|
case operandTypesRegisterToStaticConst:
|
|
ret = fmt.Sprintf("%s %s, $%#x", instName, RegisterName(n.srcReg), n.staticConst.Raw)
|
|
}
|
|
return
|
|
}
|
|
|
|
type operandTypes byte
|
|
|
|
const (
|
|
operandTypesNoneToNone operandTypes = iota
|
|
operandTypesNoneToRegister
|
|
operandTypesNoneToMemory
|
|
operandTypesNoneToBranch
|
|
operandTypesRegisterToNone
|
|
operandTypesRegisterToRegister
|
|
operandTypesRegisterToMemory
|
|
operandTypesRegisterToConst
|
|
operandTypesMemoryToRegister
|
|
operandTypesMemoryToConst
|
|
operandTypesConstToRegister
|
|
operandTypesConstToMemory
|
|
operandTypesStaticConstToRegister
|
|
operandTypesRegisterToStaticConst
|
|
)
|
|
|
|
// String implements fmt.Stringer
|
|
func (o operandTypes) String() (ret string) {
|
|
switch o {
|
|
case operandTypesNoneToNone:
|
|
ret = "NoneToNone"
|
|
case operandTypesNoneToRegister:
|
|
ret = "NoneToRegister"
|
|
case operandTypesNoneToMemory:
|
|
ret = "NoneToMemory"
|
|
case operandTypesNoneToBranch:
|
|
ret = "NoneToBranch"
|
|
case operandTypesRegisterToNone:
|
|
ret = "RegisterToNone"
|
|
case operandTypesRegisterToRegister:
|
|
ret = "RegisterToRegister"
|
|
case operandTypesRegisterToMemory:
|
|
ret = "RegisterToMemory"
|
|
case operandTypesRegisterToConst:
|
|
ret = "RegisterToConst"
|
|
case operandTypesMemoryToRegister:
|
|
ret = "MemoryToRegister"
|
|
case operandTypesMemoryToConst:
|
|
ret = "MemoryToConst"
|
|
case operandTypesConstToRegister:
|
|
ret = "ConstToRegister"
|
|
case operandTypesConstToMemory:
|
|
ret = "ConstToMemory"
|
|
case operandTypesStaticConstToRegister:
|
|
ret = "StaticConstToRegister"
|
|
case operandTypesRegisterToStaticConst:
|
|
ret = "RegisterToStaticConst"
|
|
}
|
|
return
|
|
}
|
|
|
|
type (
|
|
// AssemblerImpl implements Assembler.
|
|
AssemblerImpl struct {
|
|
root *nodeImpl
|
|
current *nodeImpl
|
|
asm.BaseAssemblerImpl
|
|
readInstructionAddressNodes []*nodeImpl
|
|
|
|
// staticConstReferrers maintains the list of static const referrers which requires the
|
|
// offset resolution after finalizing the binary layout.
|
|
staticConstReferrers []staticConstReferrer
|
|
|
|
nodePool nodePool
|
|
pool asm.StaticConstPool
|
|
|
|
// MaxDisplacementForConstantPool is fixed to defaultMaxDisplacementForConstantPool
|
|
// but have it as an exported field here for testability.
|
|
MaxDisplacementForConstantPool int
|
|
|
|
forceReAssemble bool
|
|
}
|
|
|
|
// staticConstReferrer represents a referrer of a asm.StaticConst.
|
|
staticConstReferrer struct {
|
|
n *nodeImpl
|
|
// instLen is the encoded length of the instruction for `n`.
|
|
instLen int
|
|
}
|
|
)
|
|
|
|
func NewAssembler() *AssemblerImpl {
|
|
return &AssemblerImpl{
|
|
nodePool: nodePool{index: nodePageSize},
|
|
pool: asm.NewStaticConstPool(),
|
|
MaxDisplacementForConstantPool: defaultMaxDisplacementForConstantPool,
|
|
}
|
|
}
|
|
|
|
const nodePageSize = 128
|
|
|
|
type nodePage = [nodePageSize]nodeImpl
|
|
|
|
// nodePool is the central allocation pool for nodeImpl used by a single AssemblerImpl.
|
|
// This reduces the allocations over compilation by reusing AssemblerImpl.
|
|
type nodePool struct {
|
|
pages []*nodePage
|
|
index int
|
|
}
|
|
|
|
// allocNode allocates a new nodeImpl for use from the pool.
|
|
// This expands the pool if there is no space left for it.
|
|
func (n *nodePool) allocNode() *nodeImpl {
|
|
if n.index == nodePageSize {
|
|
if len(n.pages) == cap(n.pages) {
|
|
n.pages = append(n.pages, new(nodePage))
|
|
} else {
|
|
i := len(n.pages)
|
|
n.pages = n.pages[:i+1]
|
|
if n.pages[i] == nil {
|
|
n.pages[i] = new(nodePage)
|
|
}
|
|
}
|
|
n.index = 0
|
|
}
|
|
ret := &n.pages[len(n.pages)-1][n.index]
|
|
n.index++
|
|
return ret
|
|
}
|
|
|
|
func (n *nodePool) reset() {
|
|
for _, ns := range n.pages {
|
|
pages := ns[:]
|
|
for i := range pages {
|
|
pages[i] = nodeImpl{}
|
|
}
|
|
}
|
|
n.pages = n.pages[:0]
|
|
n.index = nodePageSize
|
|
}
|
|
|
|
// AllocateNOP implements asm.AssemblerBase.
|
|
func (a *AssemblerImpl) AllocateNOP() asm.Node {
|
|
n := a.nodePool.allocNode()
|
|
n.instruction = NOP
|
|
n.types = operandTypesNoneToNone
|
|
return n
|
|
}
|
|
|
|
// Add implements asm.AssemblerBase.
|
|
func (a *AssemblerImpl) Add(n asm.Node) {
|
|
a.addNode(n.(*nodeImpl))
|
|
}
|
|
|
|
// Reset implements asm.AssemblerBase.
|
|
func (a *AssemblerImpl) Reset() {
|
|
pool := a.pool
|
|
pool.Reset()
|
|
*a = AssemblerImpl{
|
|
nodePool: a.nodePool,
|
|
pool: pool,
|
|
readInstructionAddressNodes: a.readInstructionAddressNodes[:0],
|
|
staticConstReferrers: a.staticConstReferrers[:0],
|
|
BaseAssemblerImpl: asm.BaseAssemblerImpl{
|
|
SetBranchTargetOnNextNodes: a.SetBranchTargetOnNextNodes[:0],
|
|
JumpTableEntries: a.JumpTableEntries[:0],
|
|
},
|
|
}
|
|
a.nodePool.reset()
|
|
}
|
|
|
|
// newNode creates a new Node and appends it into the linked list.
|
|
func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl {
|
|
n := a.nodePool.allocNode()
|
|
n.instruction = instruction
|
|
n.types = types
|
|
a.addNode(n)
|
|
return n
|
|
}
|
|
|
|
// addNode appends the new node into the linked list.
|
|
func (a *AssemblerImpl) addNode(node *nodeImpl) {
|
|
if a.root == nil {
|
|
a.root = node
|
|
a.current = node
|
|
} else {
|
|
parent := a.current
|
|
parent.next = node
|
|
node.prev = parent
|
|
a.current = node
|
|
}
|
|
|
|
for _, o := range a.SetBranchTargetOnNextNodes {
|
|
origin := o.(*nodeImpl)
|
|
origin.jumpTarget = node
|
|
}
|
|
// Reuse the underlying slice to avoid re-allocations.
|
|
a.SetBranchTargetOnNextNodes = a.SetBranchTargetOnNextNodes[:0]
|
|
}
|
|
|
|
// encodeNode encodes the given node into writer.
|
|
func (a *AssemblerImpl) encodeNode(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
switch n.types {
|
|
case operandTypesNoneToNone:
|
|
err = a.encodeNoneToNone(buf, n)
|
|
case operandTypesNoneToRegister:
|
|
err = a.encodeNoneToRegister(buf, n)
|
|
case operandTypesNoneToMemory:
|
|
err = a.encodeNoneToMemory(buf, n)
|
|
case operandTypesNoneToBranch:
|
|
// Branching operand can be encoded as relative jumps.
|
|
err = a.encodeRelativeJump(buf, n)
|
|
case operandTypesRegisterToNone:
|
|
err = a.encodeRegisterToNone(buf, n)
|
|
case operandTypesRegisterToRegister:
|
|
err = a.encodeRegisterToRegister(buf, n)
|
|
case operandTypesRegisterToMemory:
|
|
err = a.encodeRegisterToMemory(buf, n)
|
|
case operandTypesRegisterToConst:
|
|
err = a.encodeRegisterToConst(buf, n)
|
|
case operandTypesMemoryToRegister:
|
|
err = a.encodeMemoryToRegister(buf, n)
|
|
case operandTypesMemoryToConst:
|
|
err = a.encodeMemoryToConst(buf, n)
|
|
case operandTypesConstToRegister:
|
|
err = a.encodeConstToRegister(buf, n)
|
|
case operandTypesConstToMemory:
|
|
err = a.encodeConstToMemory(buf, n)
|
|
case operandTypesStaticConstToRegister:
|
|
err = a.encodeStaticConstToRegister(buf, n)
|
|
case operandTypesRegisterToStaticConst:
|
|
err = a.encodeRegisterToStaticConst(buf, n)
|
|
default:
|
|
err = fmt.Errorf("encoder undefined for [%s] operand type", n.types)
|
|
}
|
|
if err != nil {
|
|
err = fmt.Errorf("%w: %s", err, n) // Ensure the error is debuggable by including the string value of the node.
|
|
}
|
|
return
|
|
}
|
|
|
|
// Assemble implements asm.AssemblerBase
|
|
func (a *AssemblerImpl) Assemble(buf asm.Buffer) error {
|
|
a.initializeNodesForEncoding()
|
|
|
|
// Continue encoding until we are not forced to re-assemble which happens when
|
|
// a short relative jump ends up the offset larger than 8-bit length.
|
|
for {
|
|
err := a.encode(buf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !a.forceReAssemble {
|
|
break
|
|
} else {
|
|
// We reset the length of buffer but don't delete the underlying slice since
|
|
// the binary size will roughly the same after reassemble.
|
|
buf.Reset()
|
|
// Reset the re-assemble flag in order to avoid the infinite loop!
|
|
a.forceReAssemble = false
|
|
}
|
|
}
|
|
|
|
code := buf.Bytes()
|
|
for _, n := range a.readInstructionAddressNodes {
|
|
if err := a.finalizeReadInstructionAddressNode(code, n); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Now that we've finished the layout, fill out static consts offsets.
|
|
for i := range a.staticConstReferrers {
|
|
ref := &a.staticConstReferrers[i]
|
|
n, instLen := ref.n, ref.instLen
|
|
// Calculate the displacement between the RIP (the offset _after_ n) and the static constant.
|
|
displacement := int(n.staticConst.OffsetInBinary) - int(n.OffsetInBinary()) - instLen
|
|
// The offset must be stored at the 4 bytes from the tail of this n. See AssemblerImpl.encodeStaticConstImpl for detail.
|
|
displacementOffsetInInstruction := n.OffsetInBinary() + uint64(instLen-4)
|
|
binary.LittleEndian.PutUint32(code[displacementOffsetInInstruction:], uint32(int32(displacement)))
|
|
}
|
|
|
|
return a.FinalizeJumpTableEntry(code)
|
|
}
|
|
|
|
// initializeNodesForEncoding initializes nodeImpl.flag and determine all the jumps
|
|
// are forward or backward jump.
|
|
func (a *AssemblerImpl) initializeNodesForEncoding() {
|
|
for n := a.root; n != nil; n = n.next {
|
|
n.flag |= nodeFlagInitializedForEncoding
|
|
if target := n.jumpTarget; target != nil {
|
|
if target.isInitializedForEncoding() {
|
|
// This means the target exists behind.
|
|
n.flag |= nodeFlagBackwardJump
|
|
} else {
|
|
// Otherwise, this is forward jump.
|
|
// We start with assuming that the jump can be short (8-bit displacement).
|
|
// If it doens't fit, we change this flag in resolveRelativeForwardJump.
|
|
n.flag |= nodeFlagShortForwardJump
|
|
|
|
// If the target node is also the branching instruction, we replace the target with the NOP
|
|
// node so that we can avoid the collision of the target.forwardJumpOrigins both as destination and origins.
|
|
if target.types == operandTypesNoneToBranch {
|
|
// Allocate the NOP node from the pool.
|
|
nop := a.nodePool.allocNode()
|
|
nop.instruction = NOP
|
|
nop.types = operandTypesNoneToNone
|
|
// Insert it between target.prev and target: [target.prev, target] -> [target.prev, nop, target]
|
|
prev := target.prev
|
|
nop.prev = prev
|
|
prev.next = nop
|
|
nop.next = target
|
|
target.prev = nop
|
|
n.jumpTarget = nop
|
|
target = nop
|
|
}
|
|
|
|
// We add this node `n` into the end of the linked list (.forwardJumpOrigins) beginning from the `target.forwardJumpOrigins`.
|
|
// Insert the current `n` as the head of the list.
|
|
n.forwardJumpOrigins = target.forwardJumpOrigins
|
|
target.forwardJumpOrigins = n
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *AssemblerImpl) encode(buf asm.Buffer) error {
|
|
for n := a.root; n != nil; n = n.next {
|
|
// If an instruction needs NOP padding, we do so before encoding it.
|
|
//
|
|
// This is necessary to avoid Intel's jump erratum; see in Section 2.1
|
|
// in for when we have to pad NOP:
|
|
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
//
|
|
// This logic used to be implemented in a function called maybeNOPPadding,
|
|
// but the complexity of the logic made it impossible for the compiler to
|
|
// inline. Since this function is on a hot code path, we inlined the
|
|
// initial checks to skip the function call when instructions do not need
|
|
// NOP padding.
|
|
switch info := nopPaddingInfo[n.instruction]; {
|
|
case info.jmp:
|
|
if err := a.encodeJmpNOPPadding(buf, n); err != nil {
|
|
return err
|
|
}
|
|
case info.onNextJmp:
|
|
if err := a.encodeOnNextJmpNOPPAdding(buf, n); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// After the padding, we can finalize the offset of this instruction in the binary.
|
|
n.offsetInBinary = uint64(buf.Len())
|
|
|
|
if err := a.encodeNode(buf, n); err != nil {
|
|
return err
|
|
}
|
|
|
|
if n.forwardJumpOrigins != nil {
|
|
if err := a.resolveForwardRelativeJumps(buf, n); err != nil {
|
|
return fmt.Errorf("invalid relative forward jumps: %w", err)
|
|
}
|
|
}
|
|
|
|
a.maybeFlushConstants(buf, n.next == nil)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var nopPaddingInfo = [instructionEnd]struct {
|
|
jmp, onNextJmp bool
|
|
}{
|
|
RET: {jmp: true},
|
|
JMP: {jmp: true},
|
|
JCC: {jmp: true},
|
|
JCS: {jmp: true},
|
|
JEQ: {jmp: true},
|
|
JGE: {jmp: true},
|
|
JGT: {jmp: true},
|
|
JHI: {jmp: true},
|
|
JLE: {jmp: true},
|
|
JLS: {jmp: true},
|
|
JLT: {jmp: true},
|
|
JMI: {jmp: true},
|
|
JNE: {jmp: true},
|
|
JPC: {jmp: true},
|
|
JPS: {jmp: true},
|
|
// The possible fused jump instructions if the next node is a conditional jump instruction.
|
|
CMPL: {onNextJmp: true},
|
|
CMPQ: {onNextJmp: true},
|
|
TESTL: {onNextJmp: true},
|
|
TESTQ: {onNextJmp: true},
|
|
ADDL: {onNextJmp: true},
|
|
ADDQ: {onNextJmp: true},
|
|
SUBL: {onNextJmp: true},
|
|
SUBQ: {onNextJmp: true},
|
|
ANDL: {onNextJmp: true},
|
|
ANDQ: {onNextJmp: true},
|
|
INCQ: {onNextJmp: true},
|
|
DECQ: {onNextJmp: true},
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeJmpNOPPadding(buf asm.Buffer, n *nodeImpl) error {
|
|
// In order to know the instruction length before writing into the binary,
|
|
// we try encoding it.
|
|
prevLen := buf.Len()
|
|
|
|
// Assign the temporary offset which may or may not be correct depending on the padding decision.
|
|
n.offsetInBinary = uint64(prevLen)
|
|
|
|
// Encode the node and get the instruction length.
|
|
if err := a.encodeNode(buf, n); err != nil {
|
|
return err
|
|
}
|
|
instructionLen := int32(buf.Len() - prevLen)
|
|
|
|
// Revert the written bytes.
|
|
buf.Truncate(prevLen)
|
|
return a.encodeNOPPadding(buf, instructionLen)
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeOnNextJmpNOPPAdding(buf asm.Buffer, n *nodeImpl) error {
|
|
instructionLen, err := a.fusedInstructionLength(buf, n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return a.encodeNOPPadding(buf, instructionLen)
|
|
}
|
|
|
|
// encodeNOPPadding maybe appends NOP instructions before the node `n`.
|
|
// This is necessary to avoid Intel's jump erratum:
|
|
// https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
|
func (a *AssemblerImpl) encodeNOPPadding(buf asm.Buffer, instructionLen int32) error {
|
|
const boundaryInBytes int32 = 32
|
|
const mask = boundaryInBytes - 1
|
|
var padNum int
|
|
currentPos := int32(buf.Len())
|
|
if used := currentPos & mask; used+instructionLen >= boundaryInBytes {
|
|
padNum = int(boundaryInBytes - used)
|
|
}
|
|
a.padNOP(buf, padNum)
|
|
return nil
|
|
}
|
|
|
|
// fusedInstructionLength returns the length of "macro fused instruction" if the
|
|
// instruction sequence starting from `n` can be fused by processor. Otherwise,
|
|
// returns zero.
|
|
func (a *AssemblerImpl) fusedInstructionLength(buf asm.Buffer, n *nodeImpl) (ret int32, err error) {
|
|
// Find the next non-NOP instruction.
|
|
next := n.next
|
|
for ; next != nil && next.instruction == NOP; next = next.next {
|
|
}
|
|
|
|
if next == nil {
|
|
return
|
|
}
|
|
|
|
inst, jmpInst := n.instruction, next.instruction
|
|
|
|
if !nopPaddingInfo[jmpInst].jmp {
|
|
// If the next instruction is not jump kind, the instruction will not be fused.
|
|
return
|
|
}
|
|
|
|
// How to determine whether the instruction can be fused is described in
|
|
// Section 3.4.2.2 of "Intel Optimization Manual":
|
|
// https://www.intel.com/content/dam/doc/manual/64-ia-32-architectures-optimization-manual.pdf
|
|
isTest := inst == TESTL || inst == TESTQ
|
|
isCmp := inst == CMPQ || inst == CMPL
|
|
isTestCmp := isTest || isCmp
|
|
if isTestCmp && (n.types == operandTypesMemoryToConst || n.types == operandTypesConstToMemory) {
|
|
// The manual says: "CMP and TEST can not be fused when comparing MEM-IMM".
|
|
return
|
|
}
|
|
|
|
// Implement the decision according to the table 3-1 in the manual.
|
|
isAnd := inst == ANDL || inst == ANDQ
|
|
if !isTest && !isAnd {
|
|
if jmpInst == JMI || jmpInst == JPL || jmpInst == JPS || jmpInst == JPC {
|
|
// These jumps are only fused for TEST or AND.
|
|
return
|
|
}
|
|
isAdd := inst == ADDL || inst == ADDQ
|
|
isSub := inst == SUBL || inst == SUBQ
|
|
if !isCmp && !isAdd && !isSub {
|
|
if jmpInst == JCS || jmpInst == JCC || jmpInst == JHI || jmpInst == JLS {
|
|
// Thses jumpst are only fused for TEST, AND, CMP, ADD, or SUB.
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now the instruction is ensured to be fused by the processor.
|
|
// In order to know the fused instruction length before writing into the binary,
|
|
// we try encoding it.
|
|
savedLen := uint64(buf.Len())
|
|
|
|
// Encode the nodes into the buffer.
|
|
if err = a.encodeNode(buf, n); err != nil {
|
|
return
|
|
}
|
|
if err = a.encodeNode(buf, next); err != nil {
|
|
return
|
|
}
|
|
|
|
ret = int32(uint64(buf.Len()) - savedLen)
|
|
|
|
// Revert the written bytes.
|
|
buf.Truncate(int(savedLen))
|
|
return
|
|
}
|
|
|
|
// nopOpcodes is the multi byte NOP instructions table derived from section 5.8 "Code Padding with Operand-Size Override and Multibyte NOP"
|
|
// in "AMD Software Optimization Guide for AMD Family 15h Processors" https://www.amd.com/system/files/TechDocs/47414_15h_sw_opt_guide.pdf
|
|
var nopOpcodes = [][11]byte{
|
|
{0x90},
|
|
{0x66, 0x90},
|
|
{0x0f, 0x1f, 0x00},
|
|
{0x0f, 0x1f, 0x40, 0x00},
|
|
{0x0f, 0x1f, 0x44, 0x00, 0x00},
|
|
{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
|
|
{0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
|
|
{0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
{0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
{0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
}
|
|
|
|
func (a *AssemblerImpl) padNOP(buf asm.Buffer, num int) {
|
|
for num > 0 {
|
|
singleNopNum := num
|
|
if singleNopNum > len(nopOpcodes) {
|
|
singleNopNum = len(nopOpcodes)
|
|
}
|
|
buf.AppendBytes(nopOpcodes[singleNopNum-1][:singleNopNum])
|
|
num -= singleNopNum
|
|
}
|
|
}
|
|
|
|
// CompileStandAlone implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node {
|
|
return a.newNode(instruction, operandTypesNoneToNone)
|
|
}
|
|
|
|
// CompileConstToRegister implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileConstToRegister(
|
|
instruction asm.Instruction,
|
|
value asm.ConstantValue,
|
|
destinationReg asm.Register,
|
|
) (inst asm.Node) {
|
|
n := a.newNode(instruction, operandTypesConstToRegister)
|
|
n.srcConst = value
|
|
n.dstReg = destinationReg
|
|
return n
|
|
}
|
|
|
|
// CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) {
|
|
n := a.newNode(instruction, operandTypesRegisterToRegister)
|
|
n.srcReg = from
|
|
n.dstReg = to
|
|
}
|
|
|
|
// CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileMemoryToRegister(
|
|
instruction asm.Instruction,
|
|
sourceBaseReg asm.Register,
|
|
sourceOffsetConst asm.ConstantValue,
|
|
destinationReg asm.Register,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesMemoryToRegister)
|
|
n.srcReg = sourceBaseReg
|
|
n.srcConst = sourceOffsetConst
|
|
n.dstReg = destinationReg
|
|
}
|
|
|
|
// CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileRegisterToMemory(
|
|
instruction asm.Instruction,
|
|
sourceRegister, destinationBaseRegister asm.Register,
|
|
destinationOffsetConst asm.ConstantValue,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesRegisterToMemory)
|
|
n.srcReg = sourceRegister
|
|
n.dstReg = destinationBaseRegister
|
|
n.dstConst = destinationOffsetConst
|
|
}
|
|
|
|
// CompileJump implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node {
|
|
return a.newNode(jmpInstruction, operandTypesNoneToBranch)
|
|
}
|
|
|
|
// CompileJumpToMemory implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileJumpToMemory(
|
|
jmpInstruction asm.Instruction,
|
|
baseReg asm.Register,
|
|
offset asm.ConstantValue,
|
|
) {
|
|
n := a.newNode(jmpInstruction, operandTypesNoneToMemory)
|
|
n.dstReg = baseReg
|
|
n.dstConst = offset
|
|
}
|
|
|
|
// CompileJumpToRegister implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) {
|
|
n := a.newNode(jmpInstruction, operandTypesNoneToRegister)
|
|
n.dstReg = reg
|
|
}
|
|
|
|
// CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase.
|
|
func (a *AssemblerImpl) CompileReadInstructionAddress(
|
|
destinationRegister asm.Register,
|
|
beforeAcquisitionTargetInstruction asm.Instruction,
|
|
) {
|
|
n := a.newNode(LEAQ, operandTypesMemoryToRegister)
|
|
n.dstReg = destinationRegister
|
|
n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction
|
|
}
|
|
|
|
// CompileRegisterToRegisterWithArg implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileRegisterToRegisterWithArg(
|
|
instruction asm.Instruction,
|
|
from, to asm.Register,
|
|
arg byte,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesRegisterToRegister)
|
|
n.srcReg = from
|
|
n.dstReg = to
|
|
n.arg = arg
|
|
}
|
|
|
|
// CompileMemoryWithIndexToRegister implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileMemoryWithIndexToRegister(
|
|
instruction asm.Instruction,
|
|
srcBaseReg asm.Register,
|
|
srcOffsetConst asm.ConstantValue,
|
|
srcIndex asm.Register,
|
|
srcScale int16,
|
|
dstReg asm.Register,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesMemoryToRegister)
|
|
n.srcReg = srcBaseReg
|
|
n.srcConst = srcOffsetConst
|
|
n.srcMemIndex = srcIndex
|
|
n.srcMemScale = byte(srcScale)
|
|
n.dstReg = dstReg
|
|
}
|
|
|
|
// CompileMemoryWithIndexAndArgToRegister implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileMemoryWithIndexAndArgToRegister(
|
|
instruction asm.Instruction,
|
|
srcBaseReg asm.Register,
|
|
srcOffsetConst asm.ConstantValue,
|
|
srcIndex asm.Register,
|
|
srcScale int16,
|
|
dstReg asm.Register,
|
|
arg byte,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesMemoryToRegister)
|
|
n.srcReg = srcBaseReg
|
|
n.srcConst = srcOffsetConst
|
|
n.srcMemIndex = srcIndex
|
|
n.srcMemScale = byte(srcScale)
|
|
n.dstReg = dstReg
|
|
n.arg = arg
|
|
}
|
|
|
|
// CompileRegisterToMemoryWithIndex implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileRegisterToMemoryWithIndex(
|
|
instruction asm.Instruction,
|
|
srcReg, dstBaseReg asm.Register,
|
|
dstOffsetConst asm.ConstantValue,
|
|
dstIndex asm.Register,
|
|
dstScale int16,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesRegisterToMemory)
|
|
n.srcReg = srcReg
|
|
n.dstReg = dstBaseReg
|
|
n.dstConst = dstOffsetConst
|
|
n.dstMemIndex = dstIndex
|
|
n.dstMemScale = byte(dstScale)
|
|
}
|
|
|
|
// CompileRegisterToMemoryWithIndexAndArg implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileRegisterToMemoryWithIndexAndArg(
|
|
instruction asm.Instruction,
|
|
srcReg, dstBaseReg asm.Register,
|
|
dstOffsetConst asm.ConstantValue,
|
|
dstIndex asm.Register,
|
|
dstScale int16,
|
|
arg byte,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesRegisterToMemory)
|
|
n.srcReg = srcReg
|
|
n.dstReg = dstBaseReg
|
|
n.dstConst = dstOffsetConst
|
|
n.dstMemIndex = dstIndex
|
|
n.dstMemScale = byte(dstScale)
|
|
n.arg = arg
|
|
}
|
|
|
|
// CompileRegisterToConst implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileRegisterToConst(
|
|
instruction asm.Instruction,
|
|
srcRegister asm.Register,
|
|
value asm.ConstantValue,
|
|
) asm.Node {
|
|
n := a.newNode(instruction, operandTypesRegisterToConst)
|
|
n.srcReg = srcRegister
|
|
n.dstConst = value
|
|
return n
|
|
}
|
|
|
|
// CompileRegisterToNone implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileRegisterToNone(instruction asm.Instruction, register asm.Register) {
|
|
n := a.newNode(instruction, operandTypesRegisterToNone)
|
|
n.srcReg = register
|
|
}
|
|
|
|
// CompileNoneToRegister implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileNoneToRegister(instruction asm.Instruction, register asm.Register) {
|
|
n := a.newNode(instruction, operandTypesNoneToRegister)
|
|
n.dstReg = register
|
|
}
|
|
|
|
// CompileNoneToMemory implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileNoneToMemory(
|
|
instruction asm.Instruction,
|
|
baseReg asm.Register,
|
|
offset asm.ConstantValue,
|
|
) {
|
|
n := a.newNode(instruction, operandTypesNoneToMemory)
|
|
n.dstReg = baseReg
|
|
n.dstConst = offset
|
|
}
|
|
|
|
// CompileConstToMemory implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileConstToMemory(
|
|
instruction asm.Instruction,
|
|
value asm.ConstantValue,
|
|
dstbaseReg asm.Register,
|
|
dstOffset asm.ConstantValue,
|
|
) asm.Node {
|
|
n := a.newNode(instruction, operandTypesConstToMemory)
|
|
n.srcConst = value
|
|
n.dstReg = dstbaseReg
|
|
n.dstConst = dstOffset
|
|
return n
|
|
}
|
|
|
|
// CompileMemoryToConst implements the same method as documented on amd64.Assembler.
|
|
func (a *AssemblerImpl) CompileMemoryToConst(
|
|
instruction asm.Instruction,
|
|
srcBaseReg asm.Register,
|
|
srcOffset, value asm.ConstantValue,
|
|
) asm.Node {
|
|
n := a.newNode(instruction, operandTypesMemoryToConst)
|
|
n.srcReg = srcBaseReg
|
|
n.srcConst = srcOffset
|
|
n.dstConst = value
|
|
return n
|
|
}
|
|
|
|
func errorEncodingUnsupported(n *nodeImpl) error {
|
|
return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types)
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeNoneToNone(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
// Throughout the encoding methods, we use this pair of base offset and
|
|
// code buffer to write instructions.
|
|
//
|
|
// The code buffer is allocated at the end of the current buffer to a size
|
|
// large enough to hold all the bytes that may be written by the method.
|
|
//
|
|
// We use Go's append builtin to write to the buffer because it allows the
|
|
// compiler to generate much better code than if we made calls to write
|
|
// methods to mutate an encapsulated byte slice.
|
|
//
|
|
// At the end of the method, we truncate the buffer size back to the base
|
|
// plus the length of the code buffer so the end of the buffer points right
|
|
// after the last byte that was written.
|
|
base := buf.Len()
|
|
code := buf.Append(4)[:0]
|
|
|
|
switch n.instruction {
|
|
case CDQ:
|
|
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
|
|
code = append(code, 0x99)
|
|
case CQO:
|
|
// https://www.felixcloutier.com/x86/cwd:cdq:cqo
|
|
code = append(code, rexPrefixW, 0x99)
|
|
case NOP:
|
|
// Simply optimize out the NOP instructions.
|
|
case RET:
|
|
// https://www.felixcloutier.com/x86/ret
|
|
code = append(code, 0xc3)
|
|
case UD2:
|
|
// https://mudongliang.github.io/x86/html/file_module_x86_id_318.html
|
|
code = append(code, 0x0f, 0x0b)
|
|
case REPMOVSQ:
|
|
code = append(code, 0xf3, rexPrefixW, 0xa5)
|
|
case REPSTOSQ:
|
|
code = append(code, 0xf3, rexPrefixW, 0xab)
|
|
case STD:
|
|
code = append(code, 0xfd)
|
|
case CLD:
|
|
code = append(code, 0xfc)
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeNoneToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
regBits, prefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
if n.instruction == JMP {
|
|
// JMP's opcode is defined as "FF /4" meaning that we have to have "4"
|
|
// in 4-6th bits in the ModRM byte. https://www.felixcloutier.com/x86/jmp
|
|
modRM |= 0b00_100_000
|
|
} else if n.instruction == NEGQ {
|
|
prefix |= rexPrefixW
|
|
modRM |= 0b00_011_000
|
|
} else if n.instruction == INCQ {
|
|
prefix |= rexPrefixW
|
|
} else if n.instruction == DECQ {
|
|
prefix |= rexPrefixW
|
|
modRM |= 0b00_001_000
|
|
} else {
|
|
if RegSP <= n.dstReg && n.dstReg <= RegDI {
|
|
// If the destination is one byte length register, we need to have the default prefix.
|
|
// https: //wiki.osdev.org/X86-64_Instruction_Encoding#Registers
|
|
prefix |= rexPrefixDefault
|
|
}
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(4)[:0]
|
|
|
|
if prefix != rexPrefixNone {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Encoding
|
|
code = append(code, prefix)
|
|
}
|
|
|
|
switch n.instruction {
|
|
case JMP:
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
code = append(code, 0xff, modRM)
|
|
case SETCC:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x93, modRM)
|
|
case SETCS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x92, modRM)
|
|
case SETEQ:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x94, modRM)
|
|
case SETGE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9d, modRM)
|
|
case SETGT:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9f, modRM)
|
|
case SETHI:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x97, modRM)
|
|
case SETLE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9e, modRM)
|
|
case SETLS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x96, modRM)
|
|
case SETLT:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9c, modRM)
|
|
case SETNE:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x95, modRM)
|
|
case SETPC:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9b, modRM)
|
|
case SETPS:
|
|
// https://www.felixcloutier.com/x86/setcc
|
|
code = append(code, 0x0f, 0x9a, modRM)
|
|
case NEGQ:
|
|
// https://www.felixcloutier.com/x86/neg
|
|
code = append(code, 0xf7, modRM)
|
|
case INCQ:
|
|
// https://www.felixcloutier.com/x86/inc
|
|
code = append(code, 0xff, modRM)
|
|
case DECQ:
|
|
// https://www.felixcloutier.com/x86/dec
|
|
code = append(code, 0xff, modRM)
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeNoneToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var opcode byte
|
|
switch n.instruction {
|
|
case INCQ:
|
|
// https://www.felixcloutier.com/x86/inc
|
|
rexPrefix |= rexPrefixW
|
|
opcode = 0xff
|
|
case DECQ:
|
|
// https://www.felixcloutier.com/x86/dec
|
|
rexPrefix |= rexPrefixW
|
|
modRM |= 0b00_001_000 // DEC needs "/1" extension in ModRM.
|
|
opcode = 0xff
|
|
case JMP:
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
modRM |= 0b00_100_000 // JMP needs "/4" extension in ModRM.
|
|
opcode = 0xff
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(12)[:0]
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
code = append(code, opcode, modRM)
|
|
|
|
if sbiExist {
|
|
code = append(code, sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
code = appendConst(code, n.dstConst, displacementWidth)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
type relativeJumpOpcode struct{ short, long []byte }
|
|
|
|
func (o relativeJumpOpcode) instructionLen(short bool) int64 {
|
|
if short {
|
|
return int64(len(o.short)) + 1 // 1 byte = 8 bit offset
|
|
} else {
|
|
return int64(len(o.long)) + 4 // 4 byte = 32 bit offset
|
|
}
|
|
}
|
|
|
|
var relativeJumpOpcodes = [...]relativeJumpOpcode{
|
|
// https://www.felixcloutier.com/x86/jcc
|
|
JCC: {short: []byte{0x73}, long: []byte{0x0f, 0x83}},
|
|
JCS: {short: []byte{0x72}, long: []byte{0x0f, 0x82}},
|
|
JEQ: {short: []byte{0x74}, long: []byte{0x0f, 0x84}},
|
|
JGE: {short: []byte{0x7d}, long: []byte{0x0f, 0x8d}},
|
|
JGT: {short: []byte{0x7f}, long: []byte{0x0f, 0x8f}},
|
|
JHI: {short: []byte{0x77}, long: []byte{0x0f, 0x87}},
|
|
JLE: {short: []byte{0x7e}, long: []byte{0x0f, 0x8e}},
|
|
JLS: {short: []byte{0x76}, long: []byte{0x0f, 0x86}},
|
|
JLT: {short: []byte{0x7c}, long: []byte{0x0f, 0x8c}},
|
|
JMI: {short: []byte{0x78}, long: []byte{0x0f, 0x88}},
|
|
JPL: {short: []byte{0x79}, long: []byte{0x0f, 0x89}},
|
|
JNE: {short: []byte{0x75}, long: []byte{0x0f, 0x85}},
|
|
JPC: {short: []byte{0x7b}, long: []byte{0x0f, 0x8b}},
|
|
JPS: {short: []byte{0x7a}, long: []byte{0x0f, 0x8a}},
|
|
// https://www.felixcloutier.com/x86/jmp
|
|
JMP: {short: []byte{0xeb}, long: []byte{0xe9}},
|
|
}
|
|
|
|
func (a *AssemblerImpl) resolveForwardRelativeJumps(buf asm.Buffer, target *nodeImpl) (err error) {
|
|
offsetInBinary := int64(target.OffsetInBinary())
|
|
origin := target.forwardJumpOrigins
|
|
for ; origin != nil; origin = origin.forwardJumpOrigins {
|
|
shortJump := origin.isForwardShortJump()
|
|
op := relativeJumpOpcodes[origin.instruction]
|
|
instructionLen := op.instructionLen(shortJump)
|
|
|
|
// Calculate the offset from the EIP (at the time of executing this jump instruction)
|
|
// to the target instruction. This value is always >= 0 as here we only handle forward jumps.
|
|
offset := offsetInBinary - (int64(origin.OffsetInBinary()) + instructionLen)
|
|
if shortJump {
|
|
if offset > math.MaxInt8 {
|
|
// This forces reassemble in the outer loop inside AssemblerImpl.Assemble().
|
|
a.forceReAssemble = true
|
|
// From the next reAssemble phases, this forward jump will be encoded long jump and
|
|
// allocate 32-bit offset bytes by default. This means that this `origin` node
|
|
// will always enter the "long jump offset encoding" block below
|
|
origin.flag ^= nodeFlagShortForwardJump
|
|
} else {
|
|
buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-1] = byte(offset)
|
|
}
|
|
} else { // long jump offset encoding.
|
|
if offset > math.MaxInt32 {
|
|
return fmt.Errorf("too large jump offset %d for encoding %s", offset, InstructionName(origin.instruction))
|
|
}
|
|
binary.LittleEndian.PutUint32(buf.Bytes()[origin.OffsetInBinary()+uint64(instructionLen)-4:], uint32(offset))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeRelativeJump(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
if n.jumpTarget == nil {
|
|
err = fmt.Errorf("jump target must not be nil for relative %s", InstructionName(n.instruction))
|
|
return
|
|
}
|
|
|
|
op := relativeJumpOpcodes[n.instruction]
|
|
var isShortJump bool
|
|
// offsetOfEIP means the offset of EIP register at the time of executing this jump instruction.
|
|
// Relative jump instructions can be encoded with the signed 8-bit or 32-bit integer offsets from the EIP.
|
|
var offsetOfEIP int64 = 0 // We set zero and resolve later once the target instruction is encoded for forward jumps
|
|
if n.isBackwardJump() {
|
|
// If this is the backward jump, we can calculate the exact offset now.
|
|
offsetOfJumpInstruction := int64(n.jumpTarget.OffsetInBinary()) - int64(n.OffsetInBinary())
|
|
isShortJump = offsetOfJumpInstruction-2 >= math.MinInt8
|
|
offsetOfEIP = offsetOfJumpInstruction - op.instructionLen(isShortJump)
|
|
} else {
|
|
// For forward jumps, we resolve the offset when we Encode the target node. See AssemblerImpl.ResolveForwardRelativeJumps.
|
|
isShortJump = n.isForwardShortJump()
|
|
}
|
|
|
|
if offsetOfEIP < math.MinInt32 { // offsetOfEIP is always <= 0 as we don't calculate it for forward jump here.
|
|
return fmt.Errorf("too large jump offset %d for encoding %s", offsetOfEIP, InstructionName(n.instruction))
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(6)[:0]
|
|
|
|
if isShortJump {
|
|
code = append(code, op.short...)
|
|
code = append(code, byte(offsetOfEIP))
|
|
} else {
|
|
code = append(code, op.long...)
|
|
code = appendUint32(code, uint32(offsetOfEIP))
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeRegisterToNone(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
regBits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
|
|
var opcode byte
|
|
switch n.instruction {
|
|
case DIVL:
|
|
// https://www.felixcloutier.com/x86/div
|
|
modRM |= 0b00_110_000
|
|
opcode = 0xf7
|
|
case DIVQ:
|
|
// https://www.felixcloutier.com/x86/div
|
|
prefix |= rexPrefixW
|
|
modRM |= 0b00_110_000
|
|
opcode = 0xf7
|
|
case IDIVL:
|
|
// https://www.felixcloutier.com/x86/idiv
|
|
modRM |= 0b00_111_000
|
|
opcode = 0xf7
|
|
case IDIVQ:
|
|
// https://www.felixcloutier.com/x86/idiv
|
|
prefix |= rexPrefixW
|
|
modRM |= 0b00_111_000
|
|
opcode = 0xf7
|
|
case MULL:
|
|
// https://www.felixcloutier.com/x86/mul
|
|
modRM |= 0b00_100_000
|
|
opcode = 0xf7
|
|
case MULQ:
|
|
// https://www.felixcloutier.com/x86/mul
|
|
prefix |= rexPrefixW
|
|
modRM |= 0b00_100_000
|
|
opcode = 0xf7
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(3)[:0]
|
|
|
|
if prefix != rexPrefixNone {
|
|
code = append(code, prefix)
|
|
}
|
|
|
|
code = append(code, opcode, modRM)
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
var registerToRegisterOpcode = [instructionEnd]*struct {
|
|
opcode []byte
|
|
rPrefix rexPrefix
|
|
mandatoryPrefix byte
|
|
srcOnModRMReg bool
|
|
isSrc8bit bool
|
|
needArg bool
|
|
}{
|
|
// https://www.felixcloutier.com/x86/add
|
|
ADDL: {opcode: []byte{0x1}, srcOnModRMReg: true},
|
|
ADDQ: {opcode: []byte{0x1}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/and
|
|
ANDL: {opcode: []byte{0x21}, srcOnModRMReg: true},
|
|
ANDQ: {opcode: []byte{0x21}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
CMPL: {opcode: []byte{0x39}},
|
|
CMPQ: {opcode: []byte{0x39}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/cmovcc
|
|
CMOVQCS: {opcode: []byte{0x0f, 0x42}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/addsd
|
|
ADDSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x58}},
|
|
// https://www.felixcloutier.com/x86/addss
|
|
ADDSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x58}},
|
|
// https://www.felixcloutier.com/x86/addpd
|
|
ANDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x54}},
|
|
// https://www.felixcloutier.com/x86/addps
|
|
ANDPS: {opcode: []byte{0x0f, 0x54}},
|
|
// https://www.felixcloutier.com/x86/bsr
|
|
BSRL: {opcode: []byte{0xf, 0xbd}},
|
|
BSRQ: {opcode: []byte{0xf, 0xbd}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/comisd
|
|
COMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2f}},
|
|
// https://www.felixcloutier.com/x86/comiss
|
|
COMISS: {opcode: []byte{0x0f, 0x2f}},
|
|
// https://www.felixcloutier.com/x86/cvtsd2ss
|
|
CVTSD2SS: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5a}},
|
|
// https://www.felixcloutier.com/x86/cvtsi2sd
|
|
CVTSL2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}},
|
|
// https://www.felixcloutier.com/x86/cvtsi2sd
|
|
CVTSQ2SD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2a}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/cvtsi2ss
|
|
CVTSL2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}},
|
|
// https://www.felixcloutier.com/x86/cvtsi2ss
|
|
CVTSQ2SS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2a}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/cvtss2sd
|
|
CVTSS2SD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5a}},
|
|
// https://www.felixcloutier.com/x86/cvttsd2si
|
|
CVTTSD2SL: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}},
|
|
CVTTSD2SQ: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x2c}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/cvttss2si
|
|
CVTTSS2SL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}},
|
|
CVTTSS2SQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x2c}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/divsd
|
|
DIVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5e}},
|
|
// https://www.felixcloutier.com/x86/divss
|
|
DIVSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5e}},
|
|
// https://www.felixcloutier.com/x86/lzcnt
|
|
LZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}},
|
|
LZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbd}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/maxsd
|
|
MAXSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5f}},
|
|
// https://www.felixcloutier.com/x86/maxss
|
|
MAXSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5f}},
|
|
// https://www.felixcloutier.com/x86/minsd
|
|
MINSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5d}},
|
|
// https://www.felixcloutier.com/x86/minss
|
|
MINSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5d}},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVBLSX: {opcode: []byte{0x0f, 0xbe}, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
MOVBLZX: {opcode: []byte{0x0f, 0xb6}, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
MOVWLZX: {opcode: []byte{0x0f, 0xb7}, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVBQSX: {opcode: []byte{0x0f, 0xbe}, rPrefix: rexPrefixW, isSrc8bit: true},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVLQSX: {opcode: []byte{0x63}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVWQSX: {opcode: []byte{0x0f, 0xbf}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
MOVWLSX: {opcode: []byte{0x0f, 0xbf}},
|
|
// https://www.felixcloutier.com/x86/imul
|
|
IMULQ: {opcode: []byte{0x0f, 0xaf}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/mulss
|
|
MULSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x59}},
|
|
// https://www.felixcloutier.com/x86/mulsd
|
|
MULSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x59}},
|
|
// https://www.felixcloutier.com/x86/or
|
|
ORL: {opcode: []byte{0x09}, srcOnModRMReg: true},
|
|
ORQ: {opcode: []byte{0x09}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/orpd
|
|
ORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x56}},
|
|
// https://www.felixcloutier.com/x86/orps
|
|
ORPS: {opcode: []byte{0x0f, 0x56}},
|
|
// https://www.felixcloutier.com/x86/popcnt
|
|
POPCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}},
|
|
POPCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xb8}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/roundss
|
|
ROUNDSS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0a}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/roundsd
|
|
ROUNDSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0b}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/sqrtss
|
|
SQRTSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x51}},
|
|
// https://www.felixcloutier.com/x86/sqrtsd
|
|
SQRTSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x51}},
|
|
// https://www.felixcloutier.com/x86/sub
|
|
SUBL: {opcode: []byte{0x29}, srcOnModRMReg: true},
|
|
SUBQ: {opcode: []byte{0x29}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/subss
|
|
SUBSS: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5c}},
|
|
// https://www.felixcloutier.com/x86/subsd
|
|
SUBSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x5c}},
|
|
// https://www.felixcloutier.com/x86/test
|
|
TESTL: {opcode: []byte{0x85}, srcOnModRMReg: true},
|
|
TESTQ: {opcode: []byte{0x85}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/tzcnt
|
|
TZCNTL: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}},
|
|
TZCNTQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xbc}, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/ucomisd
|
|
UCOMISD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x2e}},
|
|
// https://www.felixcloutier.com/x86/ucomiss
|
|
UCOMISS: {opcode: []byte{0x0f, 0x2e}},
|
|
// https://www.felixcloutier.com/x86/xchg
|
|
XCHGQ: {opcode: []byte{0x87}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/xor
|
|
XORL: {opcode: []byte{0x31}, srcOnModRMReg: true},
|
|
XORQ: {opcode: []byte{0x31}, rPrefix: rexPrefixW, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/xorpd
|
|
XORPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x57}},
|
|
XORPS: {opcode: []byte{0x0f, 0x57}},
|
|
// https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
PINSRB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x20}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pinsrw
|
|
PINSRW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc4}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
PINSRD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x22}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
PINSRQ: {mandatoryPrefix: 0x66, rPrefix: rexPrefixW, opcode: []byte{0x0f, 0x3a, 0x22}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
|
MOVDQU: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x6f}},
|
|
// https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
|
|
MOVDQA: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x6f}},
|
|
// https://www.felixcloutier.com/x86/paddb:paddw:paddd:paddq
|
|
PADDB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfc}},
|
|
PADDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfd}},
|
|
PADDD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfe}},
|
|
PADDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd4}},
|
|
// https://www.felixcloutier.com/x86/psubb:psubw:psubd
|
|
PSUBB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf8}},
|
|
PSUBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf9}},
|
|
PSUBD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfa}},
|
|
// https://www.felixcloutier.com/x86/psubq
|
|
PSUBQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xfb}},
|
|
// https://www.felixcloutier.com/x86/addps
|
|
ADDPS: {opcode: []byte{0x0f, 0x58}},
|
|
// https://www.felixcloutier.com/x86/addpd
|
|
ADDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x58}},
|
|
// https://www.felixcloutier.com/x86/subps
|
|
SUBPS: {opcode: []byte{0x0f, 0x5c}},
|
|
// https://www.felixcloutier.com/x86/subpd
|
|
SUBPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5c}},
|
|
// https://www.felixcloutier.com/x86/pxor
|
|
PXOR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xef}},
|
|
// https://www.felixcloutier.com/x86/pand
|
|
PAND: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdb}},
|
|
// https://www.felixcloutier.com/x86/por
|
|
POR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xeb}},
|
|
// https://www.felixcloutier.com/x86/pandn
|
|
PANDN: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdf}},
|
|
// https://www.felixcloutier.com/x86/pshufb
|
|
PSHUFB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x0}},
|
|
// https://www.felixcloutier.com/x86/pshufd
|
|
PSHUFD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x70}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
PEXTRB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x14}, needArg: true, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/pextrw
|
|
PEXTRW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc5}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
PEXTRD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x16}, needArg: true, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
PEXTRQ: {rPrefix: rexPrefixW, mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x16}, needArg: true, srcOnModRMReg: true},
|
|
// https://www.felixcloutier.com/x86/insertps
|
|
INSERTPS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x21}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/movlhps
|
|
MOVLHPS: {opcode: []byte{0x0f, 0x16}},
|
|
// https://www.felixcloutier.com/x86/ptest
|
|
PTEST: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x17}},
|
|
// https://www.felixcloutier.com/x86/pcmpeqb:pcmpeqw:pcmpeqd
|
|
PCMPEQB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x74}},
|
|
PCMPEQW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x75}},
|
|
PCMPEQD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x76}},
|
|
// https://www.felixcloutier.com/x86/pcmpeqq
|
|
PCMPEQQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x29}},
|
|
// https://www.felixcloutier.com/x86/paddusb:paddusw
|
|
PADDUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdc}},
|
|
// https://www.felixcloutier.com/x86/movsd
|
|
MOVSD: {mandatoryPrefix: 0xf2, opcode: []byte{0x0f, 0x10}},
|
|
// https://www.felixcloutier.com/x86/packsswb:packssdw
|
|
PACKSSWB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x63}},
|
|
// https://www.felixcloutier.com/x86/pmovmskb
|
|
PMOVMSKB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd7}},
|
|
// https://www.felixcloutier.com/x86/movmskps
|
|
MOVMSKPS: {opcode: []byte{0x0f, 0x50}},
|
|
// https://www.felixcloutier.com/x86/movmskpd
|
|
MOVMSKPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x50}},
|
|
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
|
|
PSRAD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe2}},
|
|
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
|
|
PSRAW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe1}},
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
PSRLQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd3}},
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
PSRLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd2}},
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
PSRLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd1}},
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
PSLLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf1}},
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
PSLLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf2}},
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
PSLLQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf3}},
|
|
// https://www.felixcloutier.com/x86/punpcklbw:punpcklwd:punpckldq:punpcklqdq
|
|
PUNPCKLBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x60}},
|
|
// https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq
|
|
PUNPCKHBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x68}},
|
|
// https://www.felixcloutier.com/x86/cmpps
|
|
CMPPS: {opcode: []byte{0x0f, 0xc2}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/cmppd
|
|
CMPPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pcmpgtq
|
|
PCMPGTQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x37}},
|
|
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
|
|
PCMPGTD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x66}},
|
|
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
|
|
PCMPGTW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x65}},
|
|
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
|
|
PCMPGTB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x64}},
|
|
// https://www.felixcloutier.com/x86/pminsd:pminsq
|
|
PMINSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x39}},
|
|
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
|
|
PMAXSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3d}},
|
|
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
|
|
PMAXSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xee}},
|
|
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
|
|
PMAXSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3c}},
|
|
// https://www.felixcloutier.com/x86/pminsb:pminsw
|
|
PMINSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xea}},
|
|
// https://www.felixcloutier.com/x86/pminsb:pminsw
|
|
PMINSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x38}},
|
|
// https://www.felixcloutier.com/x86/pminud:pminuq
|
|
PMINUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3b}},
|
|
// https://www.felixcloutier.com/x86/pminub:pminuw
|
|
PMINUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3a}},
|
|
// https://www.felixcloutier.com/x86/pminub:pminuw
|
|
PMINUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xda}},
|
|
// https://www.felixcloutier.com/x86/pmaxud:pmaxuq
|
|
PMAXUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3f}},
|
|
// https://www.felixcloutier.com/x86/pmaxub:pmaxuw
|
|
PMAXUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3e}},
|
|
// https://www.felixcloutier.com/x86/pmaxub:pmaxuw
|
|
PMAXUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xde}},
|
|
// https://www.felixcloutier.com/x86/pmullw
|
|
PMULLW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd5}},
|
|
// https://www.felixcloutier.com/x86/pmulld:pmullq
|
|
PMULLD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x40}},
|
|
// https://www.felixcloutier.com/x86/pmuludq
|
|
PMULUDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf4}},
|
|
// https://www.felixcloutier.com/x86/psubsb:psubsw
|
|
PSUBSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe8}},
|
|
// https://www.felixcloutier.com/x86/psubsb:psubsw
|
|
PSUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe9}},
|
|
// https://www.felixcloutier.com/x86/psubusb:psubusw
|
|
PSUBUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd8}},
|
|
// https://www.felixcloutier.com/x86/psubusb:psubusw
|
|
PSUBUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xd9}},
|
|
// https://www.felixcloutier.com/x86/paddsb:paddsw
|
|
PADDSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xed}},
|
|
// https://www.felixcloutier.com/x86/paddsb:paddsw
|
|
PADDSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xec}},
|
|
// https://www.felixcloutier.com/x86/paddusb:paddusw
|
|
PADDUSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdd}},
|
|
// https://www.felixcloutier.com/x86/pavgb:pavgw
|
|
PAVGB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe0}},
|
|
// https://www.felixcloutier.com/x86/pavgb:pavgw
|
|
PAVGW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe3}},
|
|
// https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq
|
|
PABSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1c}},
|
|
// https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq
|
|
PABSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1d}},
|
|
// https://www.felixcloutier.com/x86/pabsb:pabsw:pabsd:pabsq
|
|
PABSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x1e}},
|
|
// https://www.felixcloutier.com/x86/blendvpd
|
|
BLENDVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x15}},
|
|
// https://www.felixcloutier.com/x86/maxpd
|
|
MAXPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5f}},
|
|
// https://www.felixcloutier.com/x86/maxps
|
|
MAXPS: {opcode: []byte{0x0f, 0x5f}},
|
|
// https://www.felixcloutier.com/x86/minpd
|
|
MINPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5d}},
|
|
// https://www.felixcloutier.com/x86/minps
|
|
MINPS: {opcode: []byte{0x0f, 0x5d}},
|
|
// https://www.felixcloutier.com/x86/andnpd
|
|
ANDNPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x55}},
|
|
// https://www.felixcloutier.com/x86/andnps
|
|
ANDNPS: {opcode: []byte{0x0f, 0x55}},
|
|
// https://www.felixcloutier.com/x86/mulps
|
|
MULPS: {opcode: []byte{0x0f, 0x59}},
|
|
// https://www.felixcloutier.com/x86/mulpd
|
|
MULPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x59}},
|
|
// https://www.felixcloutier.com/x86/divps
|
|
DIVPS: {opcode: []byte{0x0f, 0x5e}},
|
|
// https://www.felixcloutier.com/x86/divpd
|
|
DIVPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5e}},
|
|
// https://www.felixcloutier.com/x86/sqrtps
|
|
SQRTPS: {opcode: []byte{0x0f, 0x51}},
|
|
// https://www.felixcloutier.com/x86/sqrtpd
|
|
SQRTPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x51}},
|
|
// https://www.felixcloutier.com/x86/roundps
|
|
ROUNDPS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x08}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/roundpd
|
|
ROUNDPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x09}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/palignr
|
|
PALIGNR: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x3a, 0x0f}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/punpcklbw:punpcklwd:punpckldq:punpcklqdq
|
|
PUNPCKLWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x61}},
|
|
// https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq
|
|
PUNPCKHWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x69}},
|
|
// https://www.felixcloutier.com/x86/pmulhuw
|
|
PMULHUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe4}},
|
|
// https://www.felixcloutier.com/x86/pmuldq
|
|
PMULDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x28}},
|
|
// https://www.felixcloutier.com/x86/pmulhrsw
|
|
PMULHRSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x0b}},
|
|
// https://www.felixcloutier.com/x86/pmovsx
|
|
PMOVSXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x20}},
|
|
// https://www.felixcloutier.com/x86/pmovsx
|
|
PMOVSXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x23}},
|
|
// https://www.felixcloutier.com/x86/pmovsx
|
|
PMOVSXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x25}},
|
|
// https://www.felixcloutier.com/x86/pmovzx
|
|
PMOVZXBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x30}},
|
|
// https://www.felixcloutier.com/x86/pmovzx
|
|
PMOVZXWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x33}},
|
|
// https://www.felixcloutier.com/x86/pmovzx
|
|
PMOVZXDQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x35}},
|
|
// https://www.felixcloutier.com/x86/pmulhw
|
|
PMULHW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe5}},
|
|
// https://www.felixcloutier.com/x86/cmpps
|
|
CMPEQPS: {opcode: []byte{0x0f, 0xc2}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/cmppd
|
|
CMPEQPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/cvttps2dq
|
|
CVTTPS2DQ: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x5b}},
|
|
// https://www.felixcloutier.com/x86/cvtdq2ps
|
|
CVTDQ2PS: {opcode: []byte{0x0f, 0x5b}},
|
|
// https://www.felixcloutier.com/x86/cvtdq2pd
|
|
CVTDQ2PD: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0xe6}},
|
|
// https://www.felixcloutier.com/x86/cvtpd2ps
|
|
CVTPD2PS: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x5a}},
|
|
// https://www.felixcloutier.com/x86/cvtps2pd
|
|
CVTPS2PD: {opcode: []byte{0x0f, 0x5a}},
|
|
// https://www.felixcloutier.com/x86/movupd
|
|
MOVUPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x10}},
|
|
// https://www.felixcloutier.com/x86/shufps
|
|
SHUFPS: {opcode: []byte{0x0f, 0xc6}, needArg: true},
|
|
// https://www.felixcloutier.com/x86/pmaddwd
|
|
PMADDWD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xf5}},
|
|
// https://www.felixcloutier.com/x86/unpcklps
|
|
UNPCKLPS: {opcode: []byte{0x0f, 0x14}},
|
|
// https://www.felixcloutier.com/x86/packuswb
|
|
PACKUSWB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x67}},
|
|
// https://www.felixcloutier.com/x86/packsswb:packssdw
|
|
PACKSSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x6b}},
|
|
// https://www.felixcloutier.com/x86/packusdw
|
|
PACKUSDW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x2b}},
|
|
// https://www.felixcloutier.com/x86/pmaddubsw
|
|
PMADDUBSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x04}},
|
|
// https://www.felixcloutier.com/x86/cvttpd2dq
|
|
CVTTPD2DQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xe6}},
|
|
}
|
|
|
|
var registerToRegisterShiftOpcode = [instructionEnd]*struct {
|
|
opcode []byte
|
|
rPrefix rexPrefix
|
|
modRMExtension byte
|
|
}{
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
ROLL: {opcode: []byte{0xd3}},
|
|
ROLQ: {opcode: []byte{0xd3}, rPrefix: rexPrefixW},
|
|
RORL: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000},
|
|
RORQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_001_000, rPrefix: rexPrefixW},
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
SARL: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000},
|
|
SARQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_111_000, rPrefix: rexPrefixW},
|
|
SHLL: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000},
|
|
SHLQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_100_000, rPrefix: rexPrefixW},
|
|
SHRL: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000},
|
|
SHRQ: {opcode: []byte{0xd3}, modRMExtension: 0b00_101_000, rPrefix: rexPrefixW},
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
// Alias for readability
|
|
inst := n.instruction
|
|
base := buf.Len()
|
|
code := buf.Append(8)[:0]
|
|
|
|
switch inst {
|
|
case MOVL, MOVQ:
|
|
var (
|
|
opcode []byte
|
|
mandatoryPrefix byte
|
|
srcOnModRMReg bool
|
|
rPrefix rexPrefix
|
|
)
|
|
srcIsFloat, dstIsFloat := isVectorRegister(n.srcReg), isVectorRegister(n.dstReg)
|
|
f2f := srcIsFloat && dstIsFloat
|
|
if f2f {
|
|
// https://www.felixcloutier.com/x86/movq
|
|
opcode, mandatoryPrefix = []byte{0x0f, 0x7e}, 0xf3
|
|
} else if srcIsFloat && !dstIsFloat {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode, mandatoryPrefix, srcOnModRMReg = []byte{0x0f, 0x7e}, 0x66, true
|
|
} else if !srcIsFloat && dstIsFloat {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode, mandatoryPrefix, srcOnModRMReg = []byte{0x0f, 0x6e}, 0x66, false
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode, srcOnModRMReg = []byte{0x89}, true
|
|
}
|
|
|
|
rexPrefix, modRM, err := n.getRegisterToRegisterModRM(srcOnModRMReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
rexPrefix |= rPrefix
|
|
|
|
if inst == MOVQ && !f2f {
|
|
rexPrefix |= rexPrefixW
|
|
}
|
|
if mandatoryPrefix != 0 {
|
|
code = append(code, mandatoryPrefix)
|
|
}
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, opcode...)
|
|
code = append(code, modRM)
|
|
buf.Truncate(base + len(code))
|
|
return nil
|
|
}
|
|
|
|
if op := registerToRegisterOpcode[inst]; op != nil {
|
|
rexPrefix, modRM, err := n.getRegisterToRegisterModRM(op.srcOnModRMReg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
rexPrefix |= op.rPrefix
|
|
|
|
if op.isSrc8bit && RegSP <= n.srcReg && n.srcReg <= RegDI {
|
|
// If an operand register is 8-bit length of SP, BP, DI, or SI register, we need to have the default prefix.
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers
|
|
rexPrefix |= rexPrefixDefault
|
|
}
|
|
|
|
if op.mandatoryPrefix != 0 {
|
|
code = append(code, op.mandatoryPrefix)
|
|
}
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, op.opcode...)
|
|
code = append(code, modRM)
|
|
|
|
if op.needArg {
|
|
code = append(code, n.arg)
|
|
}
|
|
} else if op := registerToRegisterShiftOpcode[inst]; op != nil {
|
|
reg3bits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
|
|
rexPrefix |= op.rPrefix
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 |
|
|
(op.modRMExtension) |
|
|
reg3bits
|
|
code = append(code, op.opcode...)
|
|
code = append(code, modRM)
|
|
} else {
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var opcode []byte
|
|
var mandatoryPrefix byte
|
|
var isShiftInstruction bool
|
|
var needArg bool
|
|
switch n.instruction {
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
opcode = []byte{0x3b}
|
|
case CMPQ:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x3b}
|
|
case MOVB:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x88}
|
|
// 1 byte register operands need default prefix for the following registers.
|
|
if n.srcReg >= RegSP && n.srcReg <= RegDI {
|
|
rexPrefix |= rexPrefixDefault
|
|
}
|
|
case MOVL:
|
|
if isVectorRegister(n.srcReg) {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode = []byte{0x0f, 0x7e}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x89}
|
|
}
|
|
case MOVQ:
|
|
if isVectorRegister(n.srcReg) {
|
|
// https://www.felixcloutier.com/x86/movq
|
|
opcode = []byte{0x0f, 0xd6}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x89}
|
|
}
|
|
case MOVW:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: Need 0x66 to indicate that the operand size is 16-bit.
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Operand-size_and_address-size_override_prefix
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x89}
|
|
case SARL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_111_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SARQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
rexPrefix |= rexPrefixW
|
|
modRM |= 0b00_111_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHLL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_100_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHLQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
rexPrefix |= rexPrefixW
|
|
modRM |= 0b00_100_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHRL:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
modRM |= 0b00_101_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case SHRQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
rexPrefix |= rexPrefixW
|
|
modRM |= 0b00_101_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case ROLL:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case ROLQ:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case RORL:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
modRM |= 0b00_001_000
|
|
opcode = []byte{0xd3}
|
|
isShiftInstruction = true
|
|
case RORQ:
|
|
// https://www.felixcloutier.com/x86/rcl:rcr:rol:ror
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0xd3}
|
|
modRM |= 0b00_001_000
|
|
isShiftInstruction = true
|
|
case MOVDQU:
|
|
// https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
|
mandatoryPrefix = 0xf3
|
|
opcode = []byte{0x0f, 0x7f}
|
|
case PEXTRB: // https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x14}
|
|
needArg = true
|
|
case PEXTRW: // https://www.felixcloutier.com/x86/pextrw
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x15}
|
|
needArg = true
|
|
case PEXTRD: // https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x16}
|
|
needArg = true
|
|
case PEXTRQ: // https://www.felixcloutier.com/x86/pextrb:pextrd:pextrq
|
|
mandatoryPrefix = 0x66
|
|
rexPrefix |= rexPrefixW // REX.W
|
|
opcode = []byte{0x0f, 0x3a, 0x16}
|
|
needArg = true
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if !isShiftInstruction {
|
|
srcReg3Bits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldReg)
|
|
|
|
rexPrefix |= prefix
|
|
modRM |= srcReg3Bits << 3 // Place the source register on ModRM:reg
|
|
} else {
|
|
if n.srcReg != RegCX {
|
|
return fmt.Errorf("shifting instruction %s require CX register as src but got %s", InstructionName(n.instruction), RegisterName(n.srcReg))
|
|
}
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(16)[:0]
|
|
|
|
if mandatoryPrefix != 0 {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
|
|
code = append(code, mandatoryPrefix)
|
|
}
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
code = append(code, opcode...)
|
|
code = append(code, modRM)
|
|
|
|
if sbiExist {
|
|
code = append(code, sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
code = appendConst(code, n.dstConst, displacementWidth)
|
|
}
|
|
|
|
if needArg {
|
|
code = append(code, n.arg)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeRegisterToConst(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
regBits, prefix := register3bits(n.srcReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(10)[:0]
|
|
|
|
switch n.instruction {
|
|
case CMPL, CMPQ:
|
|
if n.instruction == CMPQ {
|
|
prefix |= rexPrefixW
|
|
}
|
|
if prefix != rexPrefixNone {
|
|
code = append(code, prefix)
|
|
}
|
|
is8bitConst := fitInSigned8bit(n.dstConst)
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
if n.srcReg == RegAX && !is8bitConst {
|
|
code = append(code, 0x3d)
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_111_000 | // CMP with immediate needs "/7" extension.
|
|
regBits
|
|
if is8bitConst {
|
|
code = append(code, 0x83, modRM)
|
|
} else {
|
|
code = append(code, 0x81, modRM)
|
|
}
|
|
}
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
if fitInSigned8bit(n.dstConst) {
|
|
code = append(code, byte(n.dstConst))
|
|
} else {
|
|
code = appendUint32(code, uint32(n.dstConst))
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) finalizeReadInstructionAddressNode(code []byte, n *nodeImpl) (err error) {
|
|
// Find the target instruction node.
|
|
targetNode := n
|
|
for ; targetNode != nil; targetNode = targetNode.next {
|
|
if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction {
|
|
targetNode = targetNode.next
|
|
break
|
|
}
|
|
}
|
|
|
|
if targetNode == nil {
|
|
return errors.New("BUG: target instruction not found for read instruction address")
|
|
}
|
|
|
|
offset := targetNode.OffsetInBinary() - (n.OffsetInBinary() + 7 /* 7 = the length of the LEAQ instruction */)
|
|
if offset >= math.MaxInt32 {
|
|
return errors.New("BUG: too large offset for LEAQ instruction")
|
|
}
|
|
|
|
binary.LittleEndian.PutUint32(code[n.OffsetInBinary()+3:], uint32(int32(offset)))
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeReadInstructionAddress(buf asm.Buffer, n *nodeImpl) error {
|
|
dstReg3Bits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldReg)
|
|
|
|
a.readInstructionAddressNodes = append(a.readInstructionAddressNodes, n)
|
|
|
|
// https://www.felixcloutier.com/x86/lea
|
|
opcode := byte(0x8d)
|
|
rexPrefix |= rexPrefixW
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
|
|
modRM := 0b00_000_101 | // Indicate "LEAQ [RIP + 32bit displacement], dstReg" encoding.
|
|
(dstReg3Bits << 3) // Place the dstReg on ModRM:reg.
|
|
|
|
code := buf.Append(7)
|
|
code[0] = rexPrefix
|
|
code[1] = opcode
|
|
code[2] = modRM
|
|
binary.LittleEndian.PutUint32(code[3:], 0) // Preserve
|
|
return nil
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeMemoryToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
if n.instruction == LEAQ && n.readInstructionAddressBeforeTargetInstruction != NONE {
|
|
return a.encodeReadInstructionAddress(buf, n)
|
|
}
|
|
|
|
rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dstReg3Bits, prefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldReg)
|
|
rexPrefix |= prefix
|
|
modRM |= dstReg3Bits << 3 // Place the destination register on ModRM:reg
|
|
|
|
var mandatoryPrefix byte
|
|
var opcode []byte
|
|
var needArg bool
|
|
|
|
switch n.instruction {
|
|
case ADDL:
|
|
// https://www.felixcloutier.com/x86/add
|
|
opcode = []byte{0x03}
|
|
case ADDQ:
|
|
// https://www.felixcloutier.com/x86/add
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x03}
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
opcode = []byte{0x39}
|
|
case CMPQ:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x39}
|
|
case LEAQ:
|
|
// https://www.felixcloutier.com/x86/lea
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x8d}
|
|
case MOVBLSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
opcode = []byte{0x0f, 0xbe}
|
|
case MOVBLZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
opcode = []byte{0x0f, 0xb6}
|
|
case MOVBQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x0f, 0xbe}
|
|
case MOVBQZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x0f, 0xb6}
|
|
case MOVLQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x63}
|
|
case MOVLQZX:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
|
|
// that is semantically equivalent to MOV 32bit to 32bit.
|
|
opcode = []byte{0x8B}
|
|
case MOVL:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
// Note: MOVLQZX means zero extending 32bit reg to 64-bit reg and
|
|
// that is semantically equivalent to MOV 32bit to 32bit.
|
|
if isVectorRegister(n.dstReg) {
|
|
// https://www.felixcloutier.com/x86/movd:movq
|
|
opcode = []byte{0x0f, 0x6e}
|
|
mandatoryPrefix = 0x66
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
opcode = []byte{0x8B}
|
|
}
|
|
case MOVQ:
|
|
if isVectorRegister(n.dstReg) {
|
|
// https://www.felixcloutier.com/x86/movq
|
|
opcode = []byte{0x0f, 0x7e}
|
|
mandatoryPrefix = 0xf3
|
|
} else {
|
|
// https://www.felixcloutier.com/x86/mov
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x8B}
|
|
}
|
|
case MOVWLSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
opcode = []byte{0x0f, 0xbf}
|
|
case MOVWLZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
opcode = []byte{0x0f, 0xb7}
|
|
case MOVWQSX:
|
|
// https://www.felixcloutier.com/x86/movsx:movsxd
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x0f, 0xbf}
|
|
case MOVWQZX:
|
|
// https://www.felixcloutier.com/x86/movzx
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x0f, 0xb7}
|
|
case SUBQ:
|
|
// https://www.felixcloutier.com/x86/sub
|
|
rexPrefix |= rexPrefixW
|
|
opcode = []byte{0x2b}
|
|
case SUBSD:
|
|
// https://www.felixcloutier.com/x86/subsd
|
|
opcode = []byte{0x0f, 0x5c}
|
|
mandatoryPrefix = 0xf2
|
|
case SUBSS:
|
|
// https://www.felixcloutier.com/x86/subss
|
|
opcode = []byte{0x0f, 0x5c}
|
|
mandatoryPrefix = 0xf3
|
|
case UCOMISD:
|
|
// https://www.felixcloutier.com/x86/ucomisd
|
|
opcode = []byte{0x0f, 0x2e}
|
|
mandatoryPrefix = 0x66
|
|
case UCOMISS:
|
|
// https://www.felixcloutier.com/x86/ucomiss
|
|
opcode = []byte{0x0f, 0x2e}
|
|
case MOVDQU:
|
|
// https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64
|
|
mandatoryPrefix = 0xf3
|
|
opcode = []byte{0x0f, 0x6f}
|
|
case PMOVSXBW: // https://www.felixcloutier.com/x86/pmovsx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x20}
|
|
case PMOVSXWD: // https://www.felixcloutier.com/x86/pmovsx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x23}
|
|
case PMOVSXDQ: // https://www.felixcloutier.com/x86/pmovsx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x25}
|
|
case PMOVZXBW: // https://www.felixcloutier.com/x86/pmovzx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x30}
|
|
case PMOVZXWD: // https://www.felixcloutier.com/x86/pmovzx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x33}
|
|
case PMOVZXDQ: // https://www.felixcloutier.com/x86/pmovzx
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x38, 0x35}
|
|
case PINSRB: // https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x20}
|
|
needArg = true
|
|
case PINSRW: // https://www.felixcloutier.com/x86/pinsrw
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0xc4}
|
|
needArg = true
|
|
case PINSRD: // https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x22}
|
|
needArg = true
|
|
case PINSRQ: // https://www.felixcloutier.com/x86/pinsrb:pinsrd:pinsrq
|
|
rexPrefix |= rexPrefixW
|
|
mandatoryPrefix = 0x66
|
|
opcode = []byte{0x0f, 0x3a, 0x22}
|
|
needArg = true
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(16)[:0]
|
|
|
|
if mandatoryPrefix != 0 {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#Mandatory_prefix
|
|
code = append(code, mandatoryPrefix)
|
|
}
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
code = append(code, opcode...)
|
|
code = append(code, modRM)
|
|
|
|
if sbiExist {
|
|
code = append(code, sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
code = appendConst(code, n.srcConst, displacementWidth)
|
|
}
|
|
|
|
if needArg {
|
|
code = append(code, n.arg)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeConstToRegister(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
regBits, rexPrefix := register3bits(n.dstReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
isFloatReg := isVectorRegister(n.dstReg)
|
|
switch n.instruction {
|
|
case PSLLD, PSLLQ, PSRLD, PSRLQ, PSRAW, PSRLW, PSLLW, PSRAD:
|
|
if !isFloatReg {
|
|
return fmt.Errorf("%s needs float register but got %s", InstructionName(n.instruction), RegisterName(n.dstReg))
|
|
}
|
|
default:
|
|
if isFloatReg {
|
|
return fmt.Errorf("%s needs int register but got %s", InstructionName(n.instruction), RegisterName(n.dstReg))
|
|
}
|
|
}
|
|
|
|
if n.instruction != MOVQ && !fitIn32bit(n.srcConst) {
|
|
return fmt.Errorf("constant must fit in 32-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
|
|
} else if (n.instruction == SHLQ || n.instruction == SHRQ) && (n.srcConst < 0 || n.srcConst > math.MaxUint8) {
|
|
return fmt.Errorf("constant must fit in positive 8-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
|
|
} else if (n.instruction == PSLLD ||
|
|
n.instruction == PSLLQ ||
|
|
n.instruction == PSRLD ||
|
|
n.instruction == PSRLQ) && (n.srcConst < math.MinInt8 || n.srcConst > math.MaxInt8) {
|
|
return fmt.Errorf("constant must fit in signed 8-bit integer for %s, but got %d", InstructionName(n.instruction), n.srcConst)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(32)[:0]
|
|
|
|
isSigned8bitConst := fitInSigned8bit(n.srcConst)
|
|
switch inst := n.instruction; inst {
|
|
case ADDQ:
|
|
// https://www.felixcloutier.com/x86/add
|
|
rexPrefix |= rexPrefixW
|
|
if n.dstReg == RegAX && !isSigned8bitConst {
|
|
code = append(code, rexPrefix, 0x05)
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
code = append(code, rexPrefix, 0x83, modRM)
|
|
} else {
|
|
code = append(code, rexPrefix, 0x81, modRM)
|
|
}
|
|
}
|
|
if isSigned8bitConst {
|
|
code = append(code, byte(n.srcConst))
|
|
} else {
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
}
|
|
case ANDQ:
|
|
// https://www.felixcloutier.com/x86/and
|
|
rexPrefix |= rexPrefixW
|
|
if n.dstReg == RegAX && !isSigned8bitConst {
|
|
code = append(code, rexPrefix, 0x25)
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_100_000 | // AND with immediate needs "/4" extension.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
code = append(code, rexPrefix, 0x83, modRM)
|
|
} else {
|
|
code = append(code, rexPrefix, 0x81, modRM)
|
|
}
|
|
}
|
|
if fitInSigned8bit(n.srcConst) {
|
|
code = append(code, byte(n.srcConst))
|
|
} else {
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
}
|
|
case TESTQ:
|
|
// https://www.felixcloutier.com/x86/test
|
|
rexPrefix |= rexPrefixW
|
|
if n.dstReg == RegAX && !isSigned8bitConst {
|
|
code = append(code, rexPrefix, 0xa9)
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that operand is register
|
|
regBits
|
|
code = append(code, rexPrefix, 0xf7, modRM)
|
|
}
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
case MOVL:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, 0xb8|regBits)
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
case MOVQ:
|
|
// https://www.felixcloutier.com/x86/mov
|
|
if fitIn32bit(n.srcConst) {
|
|
if n.srcConst > math.MaxInt32 {
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, 0xb8|regBits)
|
|
} else {
|
|
rexPrefix |= rexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
regBits
|
|
code = append(code, rexPrefix, 0xc7, modRM)
|
|
}
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
} else {
|
|
rexPrefix |= rexPrefixW
|
|
code = append(code, rexPrefix, 0xb8|regBits)
|
|
code = appendUint64(code, uint64(n.srcConst))
|
|
}
|
|
case SHLQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
rexPrefix |= rexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_100_000 | // SHL with immediate needs "/4" extension.
|
|
regBits
|
|
if n.srcConst == 1 {
|
|
code = append(code, rexPrefix, 0xd1, modRM)
|
|
} else {
|
|
code = append(code, rexPrefix, 0xc1, modRM, byte(n.srcConst))
|
|
}
|
|
case SHRQ:
|
|
// https://www.felixcloutier.com/x86/sal:sar:shl:shr
|
|
rexPrefix |= rexPrefixW
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_101_000 | // SHR with immediate needs "/5" extension.
|
|
regBits
|
|
if n.srcConst == 1 {
|
|
code = append(code, rexPrefix, 0xd1, modRM)
|
|
} else {
|
|
code = append(code, rexPrefix, 0xc1, modRM, byte(n.srcConst))
|
|
}
|
|
case PSLLD:
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // PSLL with immediate needs "/6" extension.
|
|
regBits
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, 0x66, rexPrefix, 0x0f, 0x72, modRM, byte(n.srcConst))
|
|
} else {
|
|
code = append(code, 0x66, 0x0f, 0x72, modRM, byte(n.srcConst))
|
|
}
|
|
case PSLLQ:
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // PSLL with immediate needs "/6" extension.
|
|
regBits
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, 0x66, rexPrefix, 0x0f, 0x73, modRM, byte(n.srcConst))
|
|
} else {
|
|
code = append(code, 0x66, 0x0f, 0x73, modRM, byte(n.srcConst))
|
|
}
|
|
case PSRLD:
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that operand is register.
|
|
0b00_010_000 | // PSRL with immediate needs "/2" extension.
|
|
regBits
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, 0x66, rexPrefix, 0x0f, 0x72, modRM, byte(n.srcConst))
|
|
} else {
|
|
code = append(code, 0x66, 0x0f, 0x72, modRM, byte(n.srcConst))
|
|
}
|
|
case PSRLQ:
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
modRM := 0b11_000_000 | // Specifying that operand is register.
|
|
0b00_010_000 | // PSRL with immediate needs "/2" extension.
|
|
regBits
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, 0x66, rexPrefix, 0x0f, 0x73, modRM, byte(n.srcConst))
|
|
} else {
|
|
code = append(code, 0x66, 0x0f, 0x73, modRM, byte(n.srcConst))
|
|
}
|
|
case PSRAW, PSRAD:
|
|
// https://www.felixcloutier.com/x86/psraw:psrad:psraq
|
|
modRM := 0b11_000_000 | // Specifying that operand is register.
|
|
0b00_100_000 | // PSRAW with immediate needs "/4" extension.
|
|
regBits
|
|
code = append(code, 0x66)
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
var op byte
|
|
if inst == PSRAD {
|
|
op = 0x72
|
|
} else { // PSRAW
|
|
op = 0x71
|
|
}
|
|
|
|
code = append(code, 0x0f, op, modRM, byte(n.srcConst))
|
|
case PSRLW:
|
|
// https://www.felixcloutier.com/x86/psrlw:psrld:psrlq
|
|
modRM := 0b11_000_000 | // Specifying that operand is register.
|
|
0b00_010_000 | // PSRLW with immediate needs "/2" extension.
|
|
regBits
|
|
code = append(code, 0x66)
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, 0x0f, 0x71, modRM, byte(n.srcConst))
|
|
case PSLLW:
|
|
// https://www.felixcloutier.com/x86/psllw:pslld:psllq
|
|
modRM := 0b11_000_000 | // Specifying that operand is register.
|
|
0b00_110_000 | // PSLLW with immediate needs "/6" extension.
|
|
regBits
|
|
code = append(code, 0x66)
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
code = append(code, 0x0f, 0x71, modRM, byte(n.srcConst))
|
|
case XORL, XORQ:
|
|
// https://www.felixcloutier.com/x86/xor
|
|
if inst == XORQ {
|
|
rexPrefix |= rexPrefixW
|
|
}
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
if n.dstReg == RegAX && !isSigned8bitConst {
|
|
code = append(code, 0x35)
|
|
} else {
|
|
modRM := 0b11_000_000 | // Specifying that opeand is register.
|
|
0b00_110_000 | // XOR with immediate needs "/6" extension.
|
|
regBits
|
|
if isSigned8bitConst {
|
|
code = append(code, 0x83, modRM)
|
|
} else {
|
|
code = append(code, 0x81, modRM)
|
|
}
|
|
}
|
|
if fitInSigned8bit(n.srcConst) {
|
|
code = append(code, byte(n.srcConst))
|
|
} else {
|
|
code = appendUint32(code, uint32(n.srcConst))
|
|
}
|
|
default:
|
|
err = errorEncodingUnsupported(n)
|
|
}
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeMemoryToConst(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
if !fitIn32bit(n.dstConst) {
|
|
return fmt.Errorf("too large target const %d for %s", n.dstConst, InstructionName(n.instruction))
|
|
}
|
|
|
|
rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Alias for readability.
|
|
c := n.dstConst
|
|
|
|
var opcode, constWidth byte
|
|
switch n.instruction {
|
|
case CMPL:
|
|
// https://www.felixcloutier.com/x86/cmp
|
|
if fitInSigned8bit(c) {
|
|
opcode = 0x83
|
|
constWidth = 8
|
|
} else {
|
|
opcode = 0x81
|
|
constWidth = 32
|
|
}
|
|
modRM |= 0b00_111_000
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(20)[:0]
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
code = append(code, opcode, modRM)
|
|
|
|
if sbiExist {
|
|
code = append(code, sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
code = appendConst(code, n.srcConst, displacementWidth)
|
|
}
|
|
|
|
code = appendConst(code, c, constWidth)
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func (a *AssemblerImpl) encodeConstToMemory(buf asm.Buffer, n *nodeImpl) (err error) {
|
|
rexPrefix, modRM, sbi, sbiExist, displacementWidth, err := n.getMemoryLocation(true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Alias for readability.
|
|
inst := n.instruction
|
|
c := n.srcConst
|
|
|
|
if inst == MOVB && !fitInSigned8bit(c) {
|
|
return fmt.Errorf("too large load target const %d for MOVB", c)
|
|
} else if !fitIn32bit(c) {
|
|
return fmt.Errorf("too large load target const %d for %s", c, InstructionName(n.instruction))
|
|
}
|
|
|
|
var constWidth, opcode byte
|
|
switch inst {
|
|
case MOVB:
|
|
opcode = 0xc6
|
|
constWidth = 8
|
|
case MOVL:
|
|
opcode = 0xc7
|
|
constWidth = 32
|
|
case MOVQ:
|
|
rexPrefix |= rexPrefixW
|
|
opcode = 0xc7
|
|
constWidth = 32
|
|
default:
|
|
return errorEncodingUnsupported(n)
|
|
}
|
|
|
|
base := buf.Len()
|
|
code := buf.Append(20)[:0]
|
|
|
|
if rexPrefix != rexPrefixNone {
|
|
code = append(code, rexPrefix)
|
|
}
|
|
|
|
code = append(code, opcode, modRM)
|
|
|
|
if sbiExist {
|
|
code = append(code, sbi)
|
|
}
|
|
|
|
if displacementWidth != 0 {
|
|
code = appendConst(code, n.dstConst, displacementWidth)
|
|
}
|
|
|
|
code = appendConst(code, c, constWidth)
|
|
|
|
buf.Truncate(base + len(code))
|
|
return
|
|
}
|
|
|
|
func appendUint32(code []byte, v uint32) []byte {
|
|
b := [4]byte{}
|
|
binary.LittleEndian.PutUint32(b[:], uint32(v))
|
|
return append(code, b[:]...)
|
|
}
|
|
|
|
func appendUint64(code []byte, v uint64) []byte {
|
|
b := [8]byte{}
|
|
binary.LittleEndian.PutUint64(b[:], uint64(v))
|
|
return append(code, b[:]...)
|
|
}
|
|
|
|
func appendConst(code []byte, v int64, length byte) []byte {
|
|
switch length {
|
|
case 8:
|
|
return append(code, byte(v))
|
|
case 32:
|
|
return appendUint32(code, uint32(v))
|
|
default:
|
|
return appendUint64(code, uint64(v))
|
|
}
|
|
}
|
|
|
|
func (n *nodeImpl) getMemoryLocation(dstMem bool) (p rexPrefix, modRM byte, sbi byte, sbiExist bool, displacementWidth byte, err error) {
|
|
var baseReg, indexReg asm.Register
|
|
var offset asm.ConstantValue
|
|
var scale byte
|
|
if dstMem {
|
|
baseReg, offset, indexReg, scale = n.dstReg, n.dstConst, n.dstMemIndex, n.dstMemScale
|
|
} else {
|
|
baseReg, offset, indexReg, scale = n.srcReg, n.srcConst, n.srcMemIndex, n.srcMemScale
|
|
}
|
|
|
|
if !fitIn32bit(offset) {
|
|
err = errors.New("offset does not fit in 32-bit integer")
|
|
return
|
|
}
|
|
|
|
if baseReg == asm.NilRegister && indexReg != asm.NilRegister {
|
|
// [(index*scale) + displacement] addressing is possible, but we haven't used it for now.
|
|
err = errors.New("addressing without base register but with index is not implemented")
|
|
} else if baseReg == asm.NilRegister {
|
|
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
|
|
sbi, sbiExist = byte(0b00_100_101), true
|
|
displacementWidth = 32
|
|
} else if indexReg == asm.NilRegister {
|
|
modRM, p = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
// Create ModR/M byte so that this instruction takes [R/M + displacement] operand if displacement !=0
|
|
// and otherwise [R/M].
|
|
withoutDisplacement := offset == 0 &&
|
|
// If the target register is R13 or BP, we have to keep [R/M + displacement] even if the value
|
|
// is zero since it's not [R/M] operand is not defined for these two registers.
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
|
|
baseReg != RegR13 && baseReg != RegBP
|
|
if withoutDisplacement {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b00_000_000 // Specifying that operand is memory without displacement
|
|
displacementWidth = 0
|
|
} else if fitInSigned8bit(offset) {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b01_000_000 // Specifying that operand is memory + 8bit displacement.
|
|
displacementWidth = 8
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b10_000_000 // Specifying that operand is memory + 32bit displacement.
|
|
displacementWidth = 32
|
|
}
|
|
|
|
// For SP and R12 register, we have [SIB + displacement] if the const is non-zero, otherwise [SIP].
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing
|
|
//
|
|
// Thefore we emit the SIB byte before the const so that [SIB + displacement] ends up [register + displacement].
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing_2
|
|
if baseReg == RegSP || baseReg == RegR12 {
|
|
sbi, sbiExist = byte(0b00_100_100), true
|
|
}
|
|
} else {
|
|
if indexReg == RegSP {
|
|
err = errors.New("SP cannot be used for SIB index")
|
|
return
|
|
}
|
|
|
|
modRM = 0b00_000_100 // Indicate that the memory location is specified by SIB.
|
|
|
|
withoutDisplacement := offset == 0 &&
|
|
// For R13 and BP, base registers cannot be encoded "without displacement" mod (i.e. 0b00 mod).
|
|
baseReg != RegR13 && baseReg != RegBP
|
|
if withoutDisplacement {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b00_000_000 // Specifying that operand is SIB without displacement
|
|
displacementWidth = 0
|
|
} else if fitInSigned8bit(offset) {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b01_000_000 // Specifying that operand is SIB + 8bit displacement.
|
|
displacementWidth = 8
|
|
} else {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM |= 0b10_000_000 // Specifying that operand is SIB + 32bit displacement.
|
|
displacementWidth = 32
|
|
}
|
|
|
|
var baseRegBits byte
|
|
baseRegBits, p = register3bits(baseReg, registerSpecifierPositionModRMFieldRM)
|
|
|
|
var indexRegBits byte
|
|
var indexRegPrefix rexPrefix
|
|
indexRegBits, indexRegPrefix = register3bits(indexReg, registerSpecifierPositionSIBIndex)
|
|
p |= indexRegPrefix
|
|
|
|
sbi, sbiExist = baseRegBits|(indexRegBits<<3), true
|
|
switch scale {
|
|
case 1:
|
|
sbi |= 0b00_000_000
|
|
case 2:
|
|
sbi |= 0b01_000_000
|
|
case 4:
|
|
sbi |= 0b10_000_000
|
|
case 8:
|
|
sbi |= 0b11_000_000
|
|
default:
|
|
err = fmt.Errorf("scale in SIB must be one of 1, 2, 4, 8 but got %d", scale)
|
|
return
|
|
}
|
|
|
|
}
|
|
return
|
|
}
|
|
|
|
// getRegisterToRegisterModRM does XXXX
|
|
//
|
|
// TODO: srcOnModRMReg can be deleted after golang-asm removal. This is necessary to match our implementation
|
|
// with golang-asm, but in practice, there are equivalent opcodes to always have src on ModRM:reg without ambiguity.
|
|
func (n *nodeImpl) getRegisterToRegisterModRM(srcOnModRMReg bool) (rexPrefix, modRM byte, err error) {
|
|
var reg3bits, rm3bits byte
|
|
if srcOnModRMReg {
|
|
reg3bits, rexPrefix = register3bits(n.srcReg,
|
|
// Indicate that srcReg will be specified by ModRM:reg.
|
|
registerSpecifierPositionModRMFieldReg)
|
|
|
|
var dstRexPrefix byte
|
|
rm3bits, dstRexPrefix = register3bits(n.dstReg,
|
|
// Indicate that dstReg will be specified by ModRM:r/m.
|
|
registerSpecifierPositionModRMFieldRM)
|
|
rexPrefix |= dstRexPrefix
|
|
} else {
|
|
rm3bits, rexPrefix = register3bits(n.srcReg,
|
|
// Indicate that srcReg will be specified by ModRM:r/m.
|
|
registerSpecifierPositionModRMFieldRM)
|
|
|
|
var dstRexPrefix byte
|
|
reg3bits, dstRexPrefix = register3bits(n.dstReg,
|
|
// Indicate that dstReg will be specified by ModRM:reg.
|
|
registerSpecifierPositionModRMFieldReg)
|
|
rexPrefix |= dstRexPrefix
|
|
}
|
|
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#ModR.2FM
|
|
modRM = 0b11_000_000 | // Specifying that dst operand is register.
|
|
(reg3bits << 3) |
|
|
rm3bits
|
|
|
|
return
|
|
}
|
|
|
|
// RexPrefix represents REX prefix https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
|
|
type rexPrefix = byte
|
|
|
|
// REX prefixes are independent of each other and can be combined with OR.
|
|
const (
|
|
rexPrefixNone rexPrefix = 0x0000_0000 // Indicates that the instruction doesn't need RexPrefix.
|
|
rexPrefixDefault rexPrefix = 0b0100_0000
|
|
rexPrefixW = 0b0000_1000 | rexPrefixDefault // REX.W
|
|
rexPrefixR = 0b0000_0100 | rexPrefixDefault // REX.R
|
|
rexPrefixX = 0b0000_0010 | rexPrefixDefault // REX.X
|
|
rexPrefixB = 0b0000_0001 | rexPrefixDefault // REX.B
|
|
)
|
|
|
|
// registerSpecifierPosition represents the position in the instruction bytes where an operand register is placed.
|
|
type registerSpecifierPosition byte
|
|
|
|
const (
|
|
registerSpecifierPositionModRMFieldReg registerSpecifierPosition = iota
|
|
registerSpecifierPositionModRMFieldRM
|
|
registerSpecifierPositionSIBIndex
|
|
)
|
|
|
|
var regInfo = [...]struct {
|
|
bits byte
|
|
needRex bool
|
|
}{
|
|
RegAX: {bits: 0b000},
|
|
RegCX: {bits: 0b001},
|
|
RegDX: {bits: 0b010},
|
|
RegBX: {bits: 0b011},
|
|
RegSP: {bits: 0b100},
|
|
RegBP: {bits: 0b101},
|
|
RegSI: {bits: 0b110},
|
|
RegDI: {bits: 0b111},
|
|
RegR8: {bits: 0b000, needRex: true},
|
|
RegR9: {bits: 0b001, needRex: true},
|
|
RegR10: {bits: 0b010, needRex: true},
|
|
RegR11: {bits: 0b011, needRex: true},
|
|
RegR12: {bits: 0b100, needRex: true},
|
|
RegR13: {bits: 0b101, needRex: true},
|
|
RegR14: {bits: 0b110, needRex: true},
|
|
RegR15: {bits: 0b111, needRex: true},
|
|
RegX0: {bits: 0b000},
|
|
RegX1: {bits: 0b001},
|
|
RegX2: {bits: 0b010},
|
|
RegX3: {bits: 0b011},
|
|
RegX4: {bits: 0b100},
|
|
RegX5: {bits: 0b101},
|
|
RegX6: {bits: 0b110},
|
|
RegX7: {bits: 0b111},
|
|
RegX8: {bits: 0b000, needRex: true},
|
|
RegX9: {bits: 0b001, needRex: true},
|
|
RegX10: {bits: 0b010, needRex: true},
|
|
RegX11: {bits: 0b011, needRex: true},
|
|
RegX12: {bits: 0b100, needRex: true},
|
|
RegX13: {bits: 0b101, needRex: true},
|
|
RegX14: {bits: 0b110, needRex: true},
|
|
RegX15: {bits: 0b111, needRex: true},
|
|
}
|
|
|
|
func register3bits(
|
|
reg asm.Register,
|
|
registerSpecifierPosition registerSpecifierPosition,
|
|
) (bits byte, prefix rexPrefix) {
|
|
info := regInfo[reg]
|
|
bits = info.bits
|
|
if info.needRex {
|
|
// https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix
|
|
switch registerSpecifierPosition {
|
|
case registerSpecifierPositionModRMFieldReg:
|
|
prefix = rexPrefixR
|
|
case registerSpecifierPositionModRMFieldRM:
|
|
prefix = rexPrefixB
|
|
case registerSpecifierPositionSIBIndex:
|
|
prefix = rexPrefixX
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func fitIn32bit(v int64) bool {
|
|
return math.MinInt32 <= v && v <= math.MaxUint32
|
|
}
|
|
|
|
func fitInSigned8bit(v int64) bool {
|
|
return math.MinInt8 <= v && v <= math.MaxInt8
|
|
}
|
|
|
|
func isVectorRegister(r asm.Register) bool {
|
|
return RegX0 <= r && r <= RegX15
|
|
}
|