Disassemble command

Implements #368
2016-02-06 07:00:48 +01:00 · 2016-02-06 07:00:48 +01:00 · e7a9a3ea9a
commit e7a9a3ea9a
parent b370e20cd5
26 changed files with 14830 additions and 3 deletions
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -62,6 +62,10 @@
 		{
 			"ImportPath": "gopkg.in/yaml.v2",
 			"Rev": "53feefa2559fb8dfa8d81baad31be332c97d6c77"
+		},
+		{
+			"ImportPath": "rsc.io/x86/x86asm",
+			"Rev": "01d8f0379593fd888e08a4c4057d69f5765ab2e4"
 		}
 	]
 }
--- a/_fixtures/locationsprog2.go
+++ b/_fixtures/locationsprog2.go
@ -25,5 +25,7 @@ func main() {
 		fmt.Println("inline", s)
 	}
 	runtime.Breakpoint()
+	fn1("test")
+	afunction("test")
 	fmt.Println(fn1, fn2, fn3, o)
 }
--- a/proc/disasm.go
+++ b/proc/disasm.go
@ -0,0 +1,64 @@
+package proc
+
+type AsmInstruction struct {
+	Loc        Location
+	DestLoc    *Location
+	Bytes      []byte
+	Breakpoint bool
+	AtPC       bool
+	Inst       *ArchInst
+}
+
+type AssemblyFlavour int
+
+const (
+	GNUFlavour = AssemblyFlavour(iota)
+	IntelFlavour
+)
+
+// Disassemble disassembles target memory between startPC and endPC
+// If currentGoroutine is set and thread is stopped at a CALL instruction Disassemble will evaluate the argument of the CALL instruction using the thread's registers
+// Be aware that the Bytes field of each returned instruction is a slice of a larger array of size endPC - startPC
+func (thread *Thread) Disassemble(startPC, endPC uint64, currentGoroutine bool) ([]AsmInstruction, error) {
+	if thread.dbp.exited {
+		return nil, &ProcessExitedError{}
+	}
+	mem, err := thread.readMemory(uintptr(startPC), int(endPC-startPC))
+	if err != nil {
+		return nil, err
+	}
+
+	r := make([]AsmInstruction, 0, len(mem)/15)
+	pc := startPC
+
+	regs, _ := thread.Registers()
+	var curpc uint64
+	if regs != nil {
+		curpc = regs.PC()
+	}
+
+	for len(mem) > 0 {
+		bp, atbp := thread.dbp.Breakpoints[pc]
+		if atbp {
+			for i := range bp.OriginalData {
+				mem[i] = bp.OriginalData[i]
+			}
+		}
+		file, line, fn := thread.dbp.PCToLine(pc)
+		loc := Location{PC: pc, File: file, Line: line, Fn: fn}
+		inst, err := asmDecode(mem, pc)
+		if err == nil {
+			atpc := currentGoroutine && (curpc == pc)
+			destloc := thread.resolveCallArg(inst, atpc, regs)
+			r = append(r, AsmInstruction{Loc: loc, DestLoc: destloc, Bytes: mem[:inst.Len], Breakpoint: atbp, AtPC: atpc, Inst: inst})
+
+			pc += uint64(inst.Size())
+			mem = mem[inst.Size():]
+		} else {
+			r = append(r, AsmInstruction{Loc: loc, Bytes: mem[:1], Breakpoint: atbp, Inst: nil})
+			pc++
+			mem = mem[1:]
+		}
+	}
+	return r, nil
+}
--- a/proc/disasm_amd64.go
+++ b/proc/disasm_amd64.go
@ -0,0 +1,113 @@
+package proc
+
+import (
+	"encoding/binary"
+	"rsc.io/x86/x86asm"
+)
+
+type ArchInst x86asm.Inst
+
+func asmDecode(mem []byte, pc uint64) (*ArchInst, error) {
+	inst, err := x86asm.Decode(mem, 64)
+	if err != nil {
+		return nil, err
+	}
+	patchPCRel(pc, &inst)
+	r := ArchInst(inst)
+	return &r, nil
+}
+
+func (inst *ArchInst) Size() int {
+	return inst.Len
+}
+
+// converts PC relative arguments to absolute addresses
+func patchPCRel(pc uint64, inst *x86asm.Inst) {
+	for i := range inst.Args {
+		rel, isrel := inst.Args[i].(x86asm.Rel)
+		if isrel {
+			inst.Args[i] = x86asm.Imm(int64(pc) + int64(rel) + int64(inst.Len))
+		}
+	}
+	return
+}
+
+func (inst *AsmInstruction) Text(flavour AssemblyFlavour) string {
+	if inst.Inst == nil {
+		return "?"
+	}
+
+	var text string
+
+	switch flavour {
+	case GNUFlavour:
+		text = x86asm.GNUSyntax(x86asm.Inst(*inst.Inst))
+	case IntelFlavour:
+		fallthrough
+	default:
+		text = x86asm.IntelSyntax(x86asm.Inst(*inst.Inst))
+	}
+
+	if inst.IsCall() && inst.DestLoc != nil && inst.DestLoc.Fn != nil {
+		text += " " + inst.DestLoc.Fn.Name
+	}
+
+	return text
+}
+
+func (inst *AsmInstruction) IsCall() bool {
+	return inst.Inst.Op == x86asm.CALL || inst.Inst.Op == x86asm.LCALL
+}
+
+func (thread *Thread) resolveCallArg(inst *ArchInst, currentGoroutine bool, regs Registers) *Location {
+	if inst.Op != x86asm.CALL && inst.Op != x86asm.LCALL {
+		return nil
+	}
+
+	var pc uint64
+	var err error
+
+	switch arg := inst.Args[0].(type) {
+	case x86asm.Imm:
+		pc = uint64(arg)
+	case x86asm.Reg:
+		if !currentGoroutine || regs == nil {
+			return nil
+		}
+		pc, err = regs.Get(int(arg))
+		if err != nil {
+			return nil
+		}
+	case x86asm.Mem:
+		if !currentGoroutine || regs == nil {
+			return nil
+		}
+		if arg.Segment != 0 {
+			return nil
+		}
+		regs, err := thread.Registers()
+		if err != nil {
+			return nil
+		}
+		base, err1 := regs.Get(int(arg.Base))
+		index, err2 := regs.Get(int(arg.Index))
+		if err1 != nil || err2 != nil {
+			return nil
+		}
+		addr := uintptr(int64(base) + int64(index*uint64(arg.Scale)) + arg.Disp)
+		//TODO: should this always be 64 bits instead of inst.MemBytes?
+		pcbytes, err := thread.readMemory(addr, inst.MemBytes)
+		if err != nil {
+			return nil
+		}
+		pc = binary.LittleEndian.Uint64(pcbytes)
+	default:
+		return nil
+	}
+
+	file, line, fn := thread.dbp.PCToLine(pc)
+	if fn == nil {
+		return nil
+	}
+	return &Location{PC: pc, File: file, Line: line, Fn: fn}
+}
--- a/proc/registers.go
+++ b/proc/registers.go
@ -1,6 +1,7 @@
 package proc

 import "fmt"
+import "errors"

 // Registers is an interface for a generic register type. The
 // interface encapsulates the generic values / actions
@ -11,10 +12,13 @@ type Registers interface {
 	SP() uint64
 	CX() uint64
 	TLS() uint64
+	Get(int) (uint64, error)
 	SetPC(*Thread, uint64) error
 	String() string
 }

+var UnknownRegisterError = errors.New("unknown register")
+
 // Registers obtains register values from the debugged process.
 func (t *Thread) Registers() (Registers, error) {
 	regs, err := registers(t)
--- a/proc/registers_darwin_amd64.go
+++ b/proc/registers_darwin_amd64.go
@ -5,6 +5,7 @@ import "C"
 import (
 	"bytes"
 	"fmt"
+	"rsc.io/x86/x86asm"
 )

 // Regs represents CPU registers on an AMD64 processor.
@ -101,6 +102,163 @@ func (r *Regs) SetPC(thread *Thread, pc uint64) error {
 	return nil
 }

+func (r *Regs) Get(n int) (uint64, error) {
+	reg := x86asm.Reg(n)
+	const (
+		mask8  = 0x000f
+		mask16 = 0x00ff
+		mask32 = 0xffff
+	)
+
+	switch reg {
+	// 8-bit
+	case x86asm.AL:
+		return r.rax & mask8, nil
+	case x86asm.CL:
+		return r.rcx & mask8, nil
+	case x86asm.DL:
+		return r.rdx & mask8, nil
+	case x86asm.BL:
+		return r.rbx & mask8, nil
+	case x86asm.AH:
+		return (r.rax >> 8) & mask8, nil
+	case x86asm.CH:
+		return (r.rax >> 8) & mask8, nil
+	case x86asm.DH:
+		return (r.rdx >> 8) & mask8, nil
+	case x86asm.BH:
+		return (r.rbx >> 8) & mask8, nil
+	case x86asm.SPB:
+		return r.rsp & mask8, nil
+	case x86asm.BPB:
+		return r.rbp & mask8, nil
+	case x86asm.SIB:
+		return r.rsi & mask8, nil
+	case x86asm.DIB:
+		return r.rdi & mask8, nil
+	case x86asm.R8B:
+		return r.r8 & mask8, nil
+	case x86asm.R9B:
+		return r.r9 & mask8, nil
+	case x86asm.R10B:
+		return r.r10 & mask8, nil
+	case x86asm.R11B:
+		return r.r11 & mask8, nil
+	case x86asm.R12B:
+		return r.r12 & mask8, nil
+	case x86asm.R13B:
+		return r.r13 & mask8, nil
+	case x86asm.R14B:
+		return r.r14 & mask8, nil
+	case x86asm.R15B:
+		return r.r15 & mask8, nil
+
+	// 16-bit
+	case x86asm.AX:
+		return r.rax & mask16, nil
+	case x86asm.CX:
+		return r.rcx & mask16, nil
+	case x86asm.DX:
+		return r.rdx & mask16, nil
+	case x86asm.BX:
+		return r.rbx & mask16, nil
+	case x86asm.SP:
+		return r.rsp & mask16, nil
+	case x86asm.BP:
+		return r.rbp & mask16, nil
+	case x86asm.SI:
+		return r.rsi & mask16, nil
+	case x86asm.DI:
+		return r.rdi & mask16, nil
+	case x86asm.R8W:
+		return r.r8 & mask16, nil
+	case x86asm.R9W:
+		return r.r9 & mask16, nil
+	case x86asm.R10W:
+		return r.r10 & mask16, nil
+	case x86asm.R11W:
+		return r.r11 & mask16, nil
+	case x86asm.R12W:
+		return r.r12 & mask16, nil
+	case x86asm.R13W:
+		return r.r13 & mask16, nil
+	case x86asm.R14W:
+		return r.r14 & mask16, nil
+	case x86asm.R15W:
+		return r.r15 & mask16, nil
+
+	// 32-bit
+	case x86asm.EAX:
+		return r.rax & mask32, nil
+	case x86asm.ECX:
+		return r.rcx & mask32, nil
+	case x86asm.EDX:
+		return r.rdx & mask32, nil
+	case x86asm.EBX:
+		return r.rbx & mask32, nil
+	case x86asm.ESP:
+		return r.rsp & mask32, nil
+	case x86asm.EBP:
+		return r.rbp & mask32, nil
+	case x86asm.ESI:
+		return r.rsi & mask32, nil
+	case x86asm.EDI:
+		return r.rdi & mask32, nil
+	case x86asm.R8L:
+		return r.r8 & mask32, nil
+	case x86asm.R9L:
+		return r.r9 & mask32, nil
+	case x86asm.R10L:
+		return r.r10 & mask32, nil
+	case x86asm.R11L:
+		return r.r11 & mask32, nil
+	case x86asm.R12L:
+		return r.r12 & mask32, nil
+	case x86asm.R13L:
+		return r.r13 & mask32, nil
+	case x86asm.R14L:
+		return r.r14 & mask32, nil
+	case x86asm.R15L:
+		return r.r15 & mask32, nil
+
+	// 64-bit
+	case x86asm.RAX:
+		return r.rax, nil
+	case x86asm.RCX:
+		return r.rcx, nil
+	case x86asm.RDX:
+		return r.rdx, nil
+	case x86asm.RBX:
+		return r.rbx, nil
+	case x86asm.RSP:
+		return r.rsp, nil
+	case x86asm.RBP:
+		return r.rbp, nil
+	case x86asm.RSI:
+		return r.rsi, nil
+	case x86asm.RDI:
+		return r.rdi, nil
+	case x86asm.R8:
+		return r.r8, nil
+	case x86asm.R9:
+		return r.r9, nil
+	case x86asm.R10:
+		return r.r10, nil
+	case x86asm.R11:
+		return r.r11, nil
+	case x86asm.R12:
+		return r.r12, nil
+	case x86asm.R13:
+		return r.r13, nil
+	case x86asm.R14:
+		return r.r14, nil
+	case x86asm.R15:
+		return r.r15, nil
+	}
+
+	return 0, UnknownRegisterError
+}
+
 func registers(thread *Thread) (Registers, error) {
 	var state C.x86_thread_state64_t
 	var identity C.thread_identifier_info_data_t
--- a/proc/registers_linux_amd64.go
+++ b/proc/registers_linux_amd64.go
@ -3,6 +3,7 @@ package proc
 import "fmt"
 import "bytes"
 import sys "golang.org/x/sys/unix"
+import "rsc.io/x86/x86asm"

 // Regs is a wrapper for sys.PtraceRegs.
 type Regs struct {
@ -77,6 +78,163 @@ func (r *Regs) SetPC(thread *Thread, pc uint64) (err error) {
 	return
 }

+func (r *Regs) Get(n int) (uint64, error) {
+	reg := x86asm.Reg(n)
+	const (
+		mask8  = 0x000f
+		mask16 = 0x00ff
+		mask32 = 0xffff
+	)
+
+	switch reg {
+	// 8-bit
+	case x86asm.AL:
+		return r.regs.Rax & mask8, nil
+	case x86asm.CL:
+		return r.regs.Rcx & mask8, nil
+	case x86asm.DL:
+		return r.regs.Rdx & mask8, nil
+	case x86asm.BL:
+		return r.regs.Rbx & mask8, nil
+	case x86asm.AH:
+		return (r.regs.Rax >> 8) & mask8, nil
+	case x86asm.CH:
+		return (r.regs.Rax >> 8) & mask8, nil
+	case x86asm.DH:
+		return (r.regs.Rdx >> 8) & mask8, nil
+	case x86asm.BH:
+		return (r.regs.Rbx >> 8) & mask8, nil
+	case x86asm.SPB:
+		return r.regs.Rsp & mask8, nil
+	case x86asm.BPB:
+		return r.regs.Rbp & mask8, nil
+	case x86asm.SIB:
+		return r.regs.Rsi & mask8, nil
+	case x86asm.DIB:
+		return r.regs.Rdi & mask8, nil
+	case x86asm.R8B:
+		return r.regs.R8 & mask8, nil
+	case x86asm.R9B:
+		return r.regs.R9 & mask8, nil
+	case x86asm.R10B:
+		return r.regs.R10 & mask8, nil
+	case x86asm.R11B:
+		return r.regs.R11 & mask8, nil
+	case x86asm.R12B:
+		return r.regs.R12 & mask8, nil
+	case x86asm.R13B:
+		return r.regs.R13 & mask8, nil
+	case x86asm.R14B:
+		return r.regs.R14 & mask8, nil
+	case x86asm.R15B:
+		return r.regs.R15 & mask8, nil
+
+	// 16-bit
+	case x86asm.AX:
+		return r.regs.Rax & mask16, nil
+	case x86asm.CX:
+		return r.regs.Rcx & mask16, nil
+	case x86asm.DX:
+		return r.regs.Rdx & mask16, nil
+	case x86asm.BX:
+		return r.regs.Rbx & mask16, nil
+	case x86asm.SP:
+		return r.regs.Rsp & mask16, nil
+	case x86asm.BP:
+		return r.regs.Rbp & mask16, nil
+	case x86asm.SI:
+		return r.regs.Rsi & mask16, nil
+	case x86asm.DI:
+		return r.regs.Rdi & mask16, nil
+	case x86asm.R8W:
+		return r.regs.R8 & mask16, nil
+	case x86asm.R9W:
+		return r.regs.R9 & mask16, nil
+	case x86asm.R10W:
+		return r.regs.R10 & mask16, nil
+	case x86asm.R11W:
+		return r.regs.R11 & mask16, nil
+	case x86asm.R12W:
+		return r.regs.R12 & mask16, nil
+	case x86asm.R13W:
+		return r.regs.R13 & mask16, nil
+	case x86asm.R14W:
+		return r.regs.R14 & mask16, nil
+	case x86asm.R15W:
+		return r.regs.R15 & mask16, nil
+
+	// 32-bit
+	case x86asm.EAX:
+		return r.regs.Rax & mask32, nil
+	case x86asm.ECX:
+		return r.regs.Rcx & mask32, nil
+	case x86asm.EDX:
+		return r.regs.Rdx & mask32, nil
+	case x86asm.EBX:
+		return r.regs.Rbx & mask32, nil
+	case x86asm.ESP:
+		return r.regs.Rsp & mask32, nil
+	case x86asm.EBP:
+		return r.regs.Rbp & mask32, nil
+	case x86asm.ESI:
+		return r.regs.Rsi & mask32, nil
+	case x86asm.EDI:
+		return r.regs.Rdi & mask32, nil
+	case x86asm.R8L:
+		return r.regs.R8 & mask32, nil
+	case x86asm.R9L:
+		return r.regs.R9 & mask32, nil
+	case x86asm.R10L:
+		return r.regs.R10 & mask32, nil
+	case x86asm.R11L:
+		return r.regs.R11 & mask32, nil
+	case x86asm.R12L:
+		return r.regs.R12 & mask32, nil
+	case x86asm.R13L:
+		return r.regs.R13 & mask32, nil
+	case x86asm.R14L:
+		return r.regs.R14 & mask32, nil
+	case x86asm.R15L:
+		return r.regs.R15 & mask32, nil
+
+	// 64-bit
+	case x86asm.RAX:
+		return r.regs.Rax, nil
+	case x86asm.RCX:
+		return r.regs.Rcx, nil
+	case x86asm.RDX:
+		return r.regs.Rdx, nil
+	case x86asm.RBX:
+		return r.regs.Rbx, nil
+	case x86asm.RSP:
+		return r.regs.Rsp, nil
+	case x86asm.RBP:
+		return r.regs.Rbp, nil
+	case x86asm.RSI:
+		return r.regs.Rsi, nil
+	case x86asm.RDI:
+		return r.regs.Rdi, nil
+	case x86asm.R8:
+		return r.regs.R8, nil
+	case x86asm.R9:
+		return r.regs.R9, nil
+	case x86asm.R10:
+		return r.regs.R10, nil
+	case x86asm.R11:
+		return r.regs.R11, nil
+	case x86asm.R12:
+		return r.regs.R12, nil
+	case x86asm.R13:
+		return r.regs.R13, nil
+	case x86asm.R14:
+		return r.regs.R14, nil
+	case x86asm.R15:
+		return r.regs.R15, nil
+	}
+
+	return 0, UnknownRegisterError
+}
+
 func registers(thread *Thread) (Registers, error) {
 	var (
 		regs sys.PtraceRegs
--- a/proc/registers_windows_amd64.go
+++ b/proc/registers_windows_amd64.go
@ -7,6 +7,7 @@ import (
 	"fmt"
 	"syscall"
 	"unsafe"
+	"rsc.io/x86/x86asm"
 )

 // Regs represents CPU registers on an AMD64 processor.
@ -114,6 +115,163 @@ func (r *Regs) SetPC(thread *Thread, pc uint64) error {
 	return nil
 }

+func (r *Regs) Get(n int) (uint64, error) {
+	reg := x86asm.Reg(n)
+	const (
+		mask8  = 0x000f
+		mask16 = 0x00ff
+		mask32 = 0xffff
+	)
+
+	switch reg {
+	// 8-bit
+	case x86asm.AL:
+		return r.rax & mask8, nil
+	case x86asm.CL:
+		return r.rcx & mask8, nil
+	case x86asm.DL:
+		return r.rdx & mask8, nil
+	case x86asm.BL:
+		return r.rbx & mask8, nil
+	case x86asm.AH:
+		return (r.rax >> 8) & mask8, nil
+	case x86asm.CH:
+		return (r.rax >> 8) & mask8, nil
+	case x86asm.DH:
+		return (r.rdx >> 8) & mask8, nil
+	case x86asm.BH:
+		return (r.rbx >> 8) & mask8, nil
+	case x86asm.SPB:
+		return r.rsp & mask8, nil
+	case x86asm.BPB:
+		return r.rbp & mask8, nil
+	case x86asm.SIB:
+		return r.rsi & mask8, nil
+	case x86asm.DIB:
+		return r.rdi & mask8, nil
+	case x86asm.R8B:
+		return r.r8 & mask8, nil
+	case x86asm.R9B:
+		return r.r9 & mask8, nil
+	case x86asm.R10B:
+		return r.r10 & mask8, nil
+	case x86asm.R11B:
+		return r.r11 & mask8, nil
+	case x86asm.R12B:
+		return r.r12 & mask8, nil
+	case x86asm.R13B:
+		return r.r13 & mask8, nil
+	case x86asm.R14B:
+		return r.r14 & mask8, nil
+	case x86asm.R15B:
+		return r.r15 & mask8, nil
+
+	// 16-bit
+	case x86asm.AX:
+		return r.rax & mask16, nil
+	case x86asm.CX:
+		return r.rcx & mask16, nil
+	case x86asm.DX:
+		return r.rdx & mask16, nil
+	case x86asm.BX:
+		return r.rbx & mask16, nil
+	case x86asm.SP:
+		return r.rsp & mask16, nil
+	case x86asm.BP:
+		return r.rbp & mask16, nil
+	case x86asm.SI:
+		return r.rsi & mask16, nil
+	case x86asm.DI:
+		return r.rdi & mask16, nil
+	case x86asm.R8W:
+		return r.r8 & mask16, nil
+	case x86asm.R9W:
+		return r.r9 & mask16, nil
+	case x86asm.R10W:
+		return r.r10 & mask16, nil
+	case x86asm.R11W:
+		return r.r11 & mask16, nil
+	case x86asm.R12W:
+		return r.r12 & mask16, nil
+	case x86asm.R13W:
+		return r.r13 & mask16, nil
+	case x86asm.R14W:
+		return r.r14 & mask16, nil
+	case x86asm.R15W:
+		return r.r15 & mask16, nil
+
+	// 32-bit
+	case x86asm.EAX:
+		return r.rax & mask32, nil
+	case x86asm.ECX:
+		return r.rcx & mask32, nil
+	case x86asm.EDX:
+		return r.rdx & mask32, nil
+	case x86asm.EBX:
+		return r.rbx & mask32, nil
+	case x86asm.ESP:
+		return r.rsp & mask32, nil
+	case x86asm.EBP:
+		return r.rbp & mask32, nil
+	case x86asm.ESI:
+		return r.rsi & mask32, nil
+	case x86asm.EDI:
+		return r.rdi & mask32, nil
+	case x86asm.R8L:
+		return r.r8 & mask32, nil
+	case x86asm.R9L:
+		return r.r9 & mask32, nil
+	case x86asm.R10L:
+		return r.r10 & mask32, nil
+	case x86asm.R11L:
+		return r.r11 & mask32, nil
+	case x86asm.R12L:
+		return r.r12 & mask32, nil
+	case x86asm.R13L:
+		return r.r13 & mask32, nil
+	case x86asm.R14L:
+		return r.r14 & mask32, nil
+	case x86asm.R15L:
+		return r.r15 & mask32, nil
+
+	// 64-bit
+	case x86asm.RAX:
+		return r.rax, nil
+	case x86asm.RCX:
+		return r.rcx, nil
+	case x86asm.RDX:
+		return r.rdx, nil
+	case x86asm.RBX:
+		return r.rbx, nil
+	case x86asm.RSP:
+		return r.rsp, nil
+	case x86asm.RBP:
+		return r.rbp, nil
+	case x86asm.RSI:
+		return r.rsi, nil
+	case x86asm.RDI:
+		return r.rdi, nil
+	case x86asm.R8:
+		return r.r8, nil
+	case x86asm.R9:
+		return r.r9, nil
+	case x86asm.R10:
+		return r.r10, nil
+	case x86asm.R11:
+		return r.r11, nil
+	case x86asm.R12:
+		return r.r12, nil
+	case x86asm.R13:
+		return r.r13, nil
+	case x86asm.R14:
+		return r.r14, nil
+	case x86asm.R15:
+		return r.r15, nil
+	}
+
+	return 0, UnknownRegisterError
+}
+
 func registers(thread *Thread) (Registers, error) {
 	var context C.CONTEXT

--- a/service/api/conversions.go
+++ b/service/api/conversions.go
@ -193,3 +193,19 @@ func ConvertLocation(loc proc.Location) Location {
 		Function: ConvertFunction(loc.Fn),
 	}
 }
+
+func ConvertAsmInstruction(inst proc.AsmInstruction, text string) AsmInstruction {
+	var destloc *Location
+	if inst.DestLoc != nil {
+		r := ConvertLocation(*inst.DestLoc)
+		destloc = &r
+	}
+	return AsmInstruction{
+		Loc:        ConvertLocation(inst.Loc),
+		DestLoc:    destloc,
+		Text:       text,
+		Bytes:      inst.Bytes,
+		Breakpoint: inst.Breakpoint,
+		AtPC:       inst.AtPC,
+	}
+}
--- a/service/api/types.go
+++ b/service/api/types.go
@ -1,6 +1,9 @@
 package api

-import "reflect"
+import (
+	"github.com/derekparker/delve/proc"
+	"reflect"
+)

 // DebuggerState represents the current context of the debugger.
 type DebuggerState struct {
@ -197,3 +200,28 @@ const (
 	// Halt suspends the process.
 	Halt = "halt"
 )
+
+type AssemblyFlavour int
+
+const (
+	GNUFlavour   = AssemblyFlavour(proc.GNUFlavour)
+	IntelFlavour = AssemblyFlavour(proc.IntelFlavour)
+)
+
+// AsmInstruction represents one assembly instruction at some address
+type AsmInstruction struct {
+	// Loc is the location of this instruction
+	Loc Location
+	// Destination of CALL instructions
+	DestLoc *Location
+	// Text is the formatted representation of the instruction
+	Text string
+	// Bytes is the instruction as read from memory
+	Bytes []byte
+	// If Breakpoint is true a breakpoint is set at this instruction
+	Breakpoint bool
+	// In AtPC is true this is the instruction the current thread is stopped at
+	AtPC bool
+}
+
+type AsmInstructions []AsmInstruction
--- a/service/client.go
+++ b/service/client.go
@ -90,4 +90,9 @@ type Client interface {
 	// * *<address> returns the location corresponding to the specified address
 	// NOTE: this function does not actually set breakpoints.
 	FindLocation(scope api.EvalScope, loc string) ([]api.Location, error)
+
+	// Disassemble code between startPC and endPC
+	DisassembleRange(scope api.EvalScope, startPC, endPC uint64, flavour api.AssemblyFlavour) (api.AsmInstructions, error)
+	// Disassemble code of the function containing PC
+	DisassemblePC(scope api.EvalScope, pc uint64, flavour api.AssemblyFlavour) (api.AsmInstructions, error)
 }
--- a/service/debugger/debugger.go
+++ b/service/debugger/debugger.go
@ -575,3 +575,41 @@ func (d *Debugger) FindLocation(scope api.EvalScope, locStr string) ([]api.Locat
 	}
 	return locs, err
 }
+
+// Disassembles code between startPC and endPC
+// if endPC == 0 it will find the function containing startPC and disassemble the whole function
+func (d *Debugger) Disassemble(scope api.EvalScope, startPC, endPC uint64, flavour api.AssemblyFlavour) (api.AsmInstructions, error) {
+	if endPC == 0 {
+		_, _, fn := d.process.PCToLine(startPC)
+		if fn == nil {
+			return nil, fmt.Errorf("Address 0x%x does not belong to any function", startPC)
+		}
+		startPC = fn.Entry
+		endPC = fn.End
+	}
+
+	s, err := d.process.ConvertEvalScope(scope.GoroutineID, scope.Frame)
+	if err != nil {
+		return nil, err
+	}
+
+	currentGoroutine := true
+	if scope.GoroutineID != -1 {
+		g, _ := s.Thread.GetG()
+		if g == nil || g.ID != scope.GoroutineID {
+			currentGoroutine = false
+		}
+	}
+
+	insts, err := s.Thread.Disassemble(startPC, endPC, currentGoroutine)
+	if err != nil {
+		return nil, err
+	}
+	disass := make(api.AsmInstructions, len(insts))
+
+	for i := range insts {
+		disass[i] = api.ConvertAsmInstruction(insts[i], insts[i].Text(proc.AssemblyFlavour(flavour)))
+	}
+
+	return disass, nil
+}
--- a/service/debugger/debugger_linux.go
+++ b/service/debugger/debugger_linux.go
@ -2,10 +2,10 @@ package debugger

 import (
 	"fmt"
+	sys "golang.org/x/sys/unix"
 	"io/ioutil"
 	"os"
 	"syscall"
-	sys "golang.org/x/sys/unix"
 )

 func attachErrorMessage(pid int, err error) error {
--- a/service/rpc/client.go
+++ b/service/rpc/client.go
@ -246,6 +246,20 @@ func (c *RPCClient) FindLocation(scope api.EvalScope, loc string) ([]api.Locatio
 	return answer, err
 }

+// Disassemble code between startPC and endPC
+func (c *RPCClient) DisassembleRange(scope api.EvalScope, startPC, endPC uint64, flavour api.AssemblyFlavour) (api.AsmInstructions, error) {
+	var r api.AsmInstructions
+	err := c.call("Disassemble", DisassembleRequest{scope, startPC, endPC, flavour}, &r)
+	return r, err
+}
+
+// Disassemble function containing pc
+func (c *RPCClient) DisassemblePC(scope api.EvalScope, pc uint64, flavour api.AssemblyFlavour) (api.AsmInstructions, error) {
+	var r api.AsmInstructions
+	err := c.call("Disassemble", DisassembleRequest{scope, pc, 0, flavour}, &r)
+	return r, err
+}
+
 func (c *RPCClient) url(path string) string {
 	return fmt.Sprintf("http://%s%s", c.addr, path)
 }
--- a/service/rpc/server.go
+++ b/service/rpc/server.go
@ -322,3 +322,15 @@ func (c *RPCServer) FindLocation(args FindLocationArgs, answer *[]api.Location)
 	*answer, err = c.debugger.FindLocation(args.Scope, args.Loc)
 	return err
 }
+
+type DisassembleRequest struct {
+	Scope          api.EvalScope
+	StartPC, EndPC uint64
+	Flavour        api.AssemblyFlavour
+}
+
+func (c *RPCServer) Disassemble(args DisassembleRequest, answer *api.AsmInstructions) error {
+	var err error
+	*answer, err = c.debugger.Disassemble(args.Scope, args.StartPC, args.EndPC, args.Flavour)
+	return err
+}
--- a/service/test/integration_test.go
+++ b/service/test/integration_test.go
@ -835,6 +835,122 @@ func TestIssue355(t *testing.T) {
 		assertError(err, t, "Stacktrace()")
 		_, err = c.FindLocation(api.EvalScope{gid, 0}, "+1")
 		assertError(err, t, "FindLocation()")
+		_, err = c.DisassemblePC(api.EvalScope{-1, 0}, 0x40100, api.IntelFlavour)
+		assertError(err, t, "DisassemblePC()")
+	})
+}
+
+func getCurinstr(d3 api.AsmInstructions) *api.AsmInstruction {
+	for i := range d3 {
+		if d3[i].AtPC {
+			return &d3[i]
+		}
+	}
+	return nil
+}
+
+func TestDisasm(t *testing.T) {
+	// Tests that disassembling by PC, range, and current PC all yeld similar results
+	// Tests that disassembly by current PC will return a disassembly containing the instruction at PC
+	// Tests that stepping on a calculated CALL instruction will yield a disassembly that contains the
+	// effective destination of the CALL instruction
+	withTestClient("locationsprog2", t, func(c service.Client) {
+		ch := c.Continue()
+		state := <-ch
+		assertNoError(state.Err, t, "Continue()")
+
+		locs, err := c.FindLocation(api.EvalScope{-1, 0}, "main.main")
+		assertNoError(err, t, "FindLocation()")
+		if len(locs) != 1 {
+			t.Fatalf("wrong number of locations for main.main: %d", len(locs))
+		}
+		d1, err := c.DisassemblePC(api.EvalScope{-1, 0}, locs[0].PC, api.IntelFlavour)
+		assertNoError(err, t, "DisassemblePC()")
+		if len(d1) < 2 {
+			t.Fatalf("wrong size of disassembly: %d", len(d1))
+		}
+
+		pcstart := d1[0].Loc.PC
+		pcend := d1[len(d1)-1].Loc.PC + uint64(len(d1[len(d1)-1].Bytes))
+		d2, err := c.DisassembleRange(api.EvalScope{-1, 0}, pcstart, pcend, api.IntelFlavour)
+		assertNoError(err, t, "DisassembleRange()")
+
+		if len(d1) != len(d2) {
+			t.Logf("d1: %v", d1)
+			t.Logf("d2: %v", d2)
+			t.Fatal("mismatched length between disassemble pc and disassemble range")
+		}
+
+		d3, err := c.DisassemblePC(api.EvalScope{-1, 0}, state.CurrentThread.PC, api.IntelFlavour)
+		assertNoError(err, t, "DisassemblePC() - second call")
+
+		if len(d1) != len(d3) {
+			t.Logf("d1: %v", d1)
+			t.Logf("d3: %v", d3)
+			t.Fatal("mismatched length between the two calls of disassemble pc")
+		}
+
+		// look for static call to afunction() on line 29
+		found := false
+		for i := range d3 {
+			if d3[i].Loc.Line == 29 && strings.HasPrefix(d3[i].Text, "call") && d3[i].DestLoc != nil && d3[i].DestLoc.Function != nil && d3[i].DestLoc.Function.Name == "main.afunction" {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Fatal("Could not find call to main.afunction on line 29")
+		}
+
+		haspc := false
+		for i := range d3 {
+			if d3[i].AtPC {
+				haspc = true
+				break
+			}
+		}
+
+		if !haspc {
+			t.Logf("d3: %v", d3)
+			t.Fatal("PC instruction not found")
+		}
+
+		startinstr := getCurinstr(d3)
+
+		count := 0
+		for {
+			if count > 20 {
+				t.Fatal("too many step instructions executed without finding a call instruction")
+			}
+			state, err := c.StepInstruction()
+			assertNoError(err, t, fmt.Sprintf("StepInstruction() %d", count))
+
+			d3, err = c.DisassemblePC(api.EvalScope{-1, 0}, state.CurrentThread.PC, api.IntelFlavour)
+			assertNoError(err, t, fmt.Sprintf("StepInstruction() %d", count))
+
+			curinstr := getCurinstr(d3)
+
+			if curinstr == nil {
+				t.Fatalf("Could not find current instruction %d", count)
+			}
+
+			if curinstr.Loc.Line != startinstr.Loc.Line {
+				t.Fatal("Calling StepInstruction() repeatedly did not find the call instruction")
+			}
+
+			if strings.HasPrefix(curinstr.Text, "call") {
+				t.Logf("call: %v", curinstr)
+				if curinstr.DestLoc == nil || curinstr.DestLoc.Function == nil {
+					t.Fatalf("Call instruction does not have destination: %v", curinstr)
+				}
+				if curinstr.DestLoc.Function.Name != "main.afunction" {
+					t.Fatalf("Call instruction destination not main.afunction: %v", curinstr)
+				}
+				break
+			}
+
+			count++
+		}
 	})
 }

--- a/terminal/command.go
+++ b/terminal/command.go
@ -4,6 +4,7 @@ package terminal

 import (
 	"bufio"
+	"errors"
 	"fmt"
 	"go/parser"
 	"go/scanner"
@ -83,6 +84,7 @@ func DebugCommands(client service.Client) *Commands {
 		{aliases: []string{"stack", "bt"}, cmdFn: stackCommand, helpMsg: "stack [<depth>] [-full]. Prints stack."},
 		{aliases: []string{"frame"}, cmdFn: frame, helpMsg: "Sets current stack frame (0 is the top of the stack)"},
 		{aliases: []string{"source"}, cmdFn: c.sourceCommand, helpMsg: "Executes a file containing a list of delve commands"},
+		{aliases: []string{"diassemble", "disass"}, cmdFn: g0f0(disassCommand), helpMsg: "Disassembles memory"},
 	}

 	return c
@ -371,6 +373,8 @@ func scopePrefix(t *Term, cmdstr string) error {
 			return printVar(t, scope, rest)
 		case "set":
 			return setVar(t, scope, rest)
+		case "disassemble", "disasm":
+			return disassCommand(t, scope, rest)
 		default:
 			return fmt.Errorf("unknown command %s", cmd)
 		}
@ -818,6 +822,67 @@ func (c *Commands) sourceCommand(t *Term, args string) error {
 	return c.executeFile(t, args)
 }

+var disasmUsageError = errors.New("wrong number of arguments: disassemble [-a <start> <end>] [-l <locspec>]")
+
+func disassCommand(t *Term, scope api.EvalScope, args string) error {
+	var cmd, rest string
+
+	if args != "" {
+		argv := strings.SplitN(args, " ", 2)
+		if len(argv) != 2 {
+			return disasmUsageError
+		}
+		cmd = argv[0]
+		rest = argv[1]
+	}
+
+	var disasm api.AsmInstructions
+	var disasmErr error
+
+	switch cmd {
+	case "":
+		locs, err := t.client.FindLocation(scope, "+0")
+		if err != nil {
+			return err
+		}
+		disasm, disasmErr = t.client.DisassemblePC(scope, locs[0].PC, api.IntelFlavour)
+	case "-a":
+		v := strings.SplitN(rest, " ", 2)
+		if len(v) != 2 {
+			return disasmUsageError
+		}
+		startpc, err := strconv.ParseInt(v[0], 0, 64)
+		if err != nil {
+			return fmt.Errorf("wrong argument: %s is not a number", v[0])
+		}
+		endpc, err := strconv.ParseInt(v[1], 0, 64)
+		if err != nil {
+			return fmt.Errorf("wrong argument: %s is not a number", v[1])
+		}
+		disasm, disasmErr = t.client.DisassembleRange(scope, uint64(startpc), uint64(endpc), api.IntelFlavour)
+	case "-l":
+		locs, err := t.client.FindLocation(scope, rest)
+		if err != nil {
+			return err
+		}
+		if len(locs) != 1 {
+			return errors.New("expression specifies multiple locations")
+		}
+		disasm, disasmErr = t.client.DisassemblePC(scope, locs[0].PC, api.IntelFlavour)
+	default:
+		return disasmUsageError
+	}
+
+	if disasmErr != nil {
+		return disasmErr
+	}
+
+	fmt.Printf("printing\n")
+	DisasmPrint(disasm, os.Stdout)
+
+	return nil
+}
+
 func digits(n int) int {
 	if n <= 0 {
 		return 1
--- a/terminal/command_test.go
+++ b/terminal/command_test.go
@ -101,6 +101,6 @@ func TestExecuteFile(t *testing.T) {
 }

 func TestIssue354(t *testing.T) {
-	printStack([]api.Stackframe{ }, "")
+	printStack([]api.Stackframe{}, "")
 	printStack([]api.Stackframe{{api.Location{PC: 0, File: "irrelevant.go", Line: 10, Function: nil}, nil, nil}}, "")
 }
--- a/terminal/disasmprint.go
+++ b/terminal/disasmprint.go
@ -0,0 +1,31 @@
+package terminal
+
+import (
+	"bufio"
+	"fmt"
+	"github.com/derekparker/delve/service/api"
+	"io"
+	"path/filepath"
+	"text/tabwriter"
+)
+
+func DisasmPrint(dv api.AsmInstructions, out io.Writer) {
+	bw := bufio.NewWriter(out)
+	defer bw.Flush()
+	if len(dv) > 0 && dv[0].Loc.Function != nil {
+		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", dv[0].Loc.Function.Name, dv[0].Loc.File)
+	}
+	tw := tabwriter.NewWriter(bw, 1, 8, 1, '\t', 0)
+	defer tw.Flush()
+	for _, inst := range dv {
+		atbp := ""
+		if inst.Breakpoint {
+			atbp = "*"
+		}
+		atpc := ""
+		if inst.AtPC {
+			atpc = "=>"
+		}
+		fmt.Fprintf(tw, "%s\t%s:%d\t%#x%s\t%x\t%s\n", atpc, filepath.Base(inst.Loc.File), inst.Loc.Line, inst.Loc.PC, atbp, inst.Bytes, inst.Text)
+	}
+}
--- a/vendor/rsc.io/x86/x86asm/Makefile
+++ b/vendor/rsc.io/x86/x86asm/Makefile
@ -0,0 +1,2 @@
+tables.go: ../x86map/map.go ../x86.csv
+	go run ../x86map/map.go -fmt=decoder ../x86.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
--- a/vendor/rsc.io/x86/x86asm/decode.go
+++ b/vendor/rsc.io/x86/x86asm/decode.go
--- a/vendor/rsc.io/x86/x86asm/gnu.go
+++ b/vendor/rsc.io/x86/x86asm/gnu.go
@ -0,0 +1,926 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
+func GNUSyntax(inst Inst) string {
+	// Rewrite instruction to mimic GNU peculiarities.
+	// Note that inst has been passed by value and contains
+	// no pointers, so any changes we make here are local
+	// and will not propagate back out to the caller.
+
+	// Adjust opcode [sic].
+	switch inst.Op {
+	case FDIV, FDIVR, FSUB, FSUBR, FDIVP, FDIVRP, FSUBP, FSUBRP:
+		// DC E0, DC F0: libopcodes swaps FSUBR/FSUB and FDIVR/FDIV, at least
+		// if you believe the Intel manual is correct (the encoding is irregular as given;
+		// libopcodes uses the more regular expected encoding).
+		// TODO(rsc): Test to ensure Intel manuals are correct and report to libopcodes maintainers?
+		// NOTE: iant thinks this is deliberate, but we can't find the history.
+		_, reg1 := inst.Args[0].(Reg)
+		_, reg2 := inst.Args[1].(Reg)
+		if reg1 && reg2 && (inst.Opcode>>24 == 0xDC || inst.Opcode>>24 == 0xDE) {
+			switch inst.Op {
+			case FDIV:
+				inst.Op = FDIVR
+			case FDIVR:
+				inst.Op = FDIV
+			case FSUB:
+				inst.Op = FSUBR
+			case FSUBR:
+				inst.Op = FSUB
+			case FDIVP:
+				inst.Op = FDIVRP
+			case FDIVRP:
+				inst.Op = FDIVP
+			case FSUBP:
+				inst.Op = FSUBRP
+			case FSUBRP:
+				inst.Op = FSUBP
+			}
+		}
+
+	case MOVNTSD:
+		// MOVNTSD is F2 0F 2B /r.
+		// MOVNTSS is F3 0F 2B /r (supposedly; not in manuals).
+		// Usually inner prefixes win for display,
+		// so that F3 F2 0F 2B 11 is REP MOVNTSD
+		// and F2 F3 0F 2B 11 is REPN MOVNTSS.
+		// Libopcodes always prefers MOVNTSS regardless of prefix order.
+		if countPrefix(&inst, 0xF3) > 0 {
+			found := false
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				switch inst.Prefix[i] & 0xFF {
+				case 0xF3:
+					if !found {
+						found = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case 0xF2:
+					inst.Prefix[i] &^= PrefixImplicit
+				}
+			}
+			inst.Op = MOVNTSS
+		}
+	}
+
+	// Add implicit arguments.
+	switch inst.Op {
+	case MONITOR:
+		inst.Args[0] = EDX
+		inst.Args[1] = ECX
+		inst.Args[2] = EAX
+		if inst.AddrSize == 16 {
+			inst.Args[2] = AX
+		}
+
+	case MWAIT:
+		if inst.Mode == 64 {
+			inst.Args[0] = RCX
+			inst.Args[1] = RAX
+		} else {
+			inst.Args[0] = ECX
+			inst.Args[1] = EAX
+		}
+	}
+
+	// Adjust which prefixes will be displayed.
+	// The rule is to display all the prefixes not implied by
+	// the usual instruction display, that is, all the prefixes
+	// except the ones with PrefixImplicit set.
+	// However, of course, there are exceptions to the rule.
+	switch inst.Op {
+	case CRC32:
+		// CRC32 has a mandatory F2 prefix.
+		// If there are multiple F2s and no F3s, the extra F2s do not print.
+		// (And Decode has already marked them implicit.)
+		// However, if there is an F3 anywhere, then the extra F2s do print.
+		// If there are multiple F2 prefixes *and* an (ignored) F3,
+		// then libopcodes prints the extra F2s as REPNs.
+		if countPrefix(&inst, 0xF2) > 1 {
+			unmarkImplicit(&inst, 0xF2)
+			markLastImplicit(&inst, 0xF2)
+		}
+
+		// An unused data size override should probably be shown,
+		// to distinguish DATA16 CRC32B from plain CRC32B,
+		// but libopcodes always treats the final override as implicit
+		// and the others as explicit.
+		unmarkImplicit(&inst, PrefixDataSize)
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case CVTSI2SD, CVTSI2SS:
+		if !isMem(inst.Args[1]) {
+			markLastImplicit(&inst, PrefixDataSize)
+		}
+
+	case CVTSD2SI, CVTSS2SI, CVTTSD2SI, CVTTSS2SI,
+		ENTER, FLDENV, FNSAVE, FNSTENV, FRSTOR, LGDT, LIDT, LRET,
+		POP, PUSH, RET, SGDT, SIDT, SYSRET, XBEGIN:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case LOOP, LOOPE, LOOPNE, MONITOR:
+		markLastImplicit(&inst, PrefixAddrSize)
+
+	case MOV:
+		// The 16-bit and 32-bit forms of MOV Sreg, dst and MOV src, Sreg
+		// cannot be distinguished when src or dst refers to memory, because
+		// Sreg is always a 16-bit value, even when we're doing a 32-bit
+		// instruction. Because the instruction tables distinguished these two,
+		// any operand size prefix has been marked as used (to decide which
+		// branch to take). Unmark it, so that it will show up in disassembly,
+		// so that the reader can tell the size of memory operand.
+		// up with the same arguments
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= src && src <= GS && isMem(inst.Args[0]) || ES <= dst && dst <= GS && isMem(inst.Args[1]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case MOVDQU:
+		if countPrefix(&inst, 0xF3) > 1 {
+			unmarkImplicit(&inst, 0xF3)
+			markLastImplicit(&inst, 0xF3)
+		}
+
+	case MOVQ2DQ:
+		markLastImplicit(&inst, PrefixDataSize)
+
+	case SLDT, SMSW, STR, FXRSTOR, XRSTOR, XSAVE, XSAVEOPT, CMPXCHG8B:
+		if isMem(inst.Args[0]) {
+			unmarkImplicit(&inst, PrefixDataSize)
+		}
+
+	case SYSEXIT:
+		unmarkImplicit(&inst, PrefixDataSize)
+	}
+
+	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		if countPrefix(&inst, PrefixCS) > 0 && countPrefix(&inst, PrefixDS) > 0 {
+			for i, p := range inst.Prefix {
+				switch p & 0xFFF {
+				case PrefixPN, PrefixPT:
+					inst.Prefix[i] &= 0xF0FF // cut interpretation bits, producing original segment prefix
+				}
+			}
+		}
+	}
+
+	// XACQUIRE/XRELEASE adjustment.
+	if inst.Op == MOV {
+		// MOV into memory is a candidate for turning REP into XRELEASE.
+		// However, if the REP is followed by a REPN, that REPN blocks the
+		// conversion.
+		haveREPN := false
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			switch inst.Prefix[i] &^ PrefixIgnored {
+			case PrefixREPN:
+				haveREPN = true
+			case PrefixXRELEASE:
+				if haveREPN {
+					inst.Prefix[i] = PrefixREP
+				}
+			}
+		}
+	}
+
+	// We only format the final F2/F3 as XRELEASE/XACQUIRE.
+	haveXA := false
+	haveXR := false
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		switch inst.Prefix[i] &^ PrefixIgnored {
+		case PrefixXRELEASE:
+			if !haveXR {
+				haveXR = true
+			} else {
+				inst.Prefix[i] = PrefixREP
+			}
+
+		case PrefixXACQUIRE:
+			if !haveXA {
+				haveXA = true
+			} else {
+				inst.Prefix[i] = PrefixREPN
+			}
+		}
+	}
+
+	// Determine opcode.
+	op := strings.ToLower(inst.Op.String())
+	if alt := gnuOp[inst.Op]; alt != "" {
+		op = alt
+	}
+
+	// Determine opcode suffix.
+	// Libopcodes omits the suffix if the width of the operation
+	// can be inferred from a register arguments. For example,
+	// add $1, %ebx has no suffix because you can tell from the
+	// 32-bit register destination that it is a 32-bit add,
+	// but in addl $1, (%ebx), the destination is memory, so the
+	// size is not evident without the l suffix.
+	needSuffix := true
+SuffixLoop:
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch a := a.(type) {
+		case Reg:
+			switch inst.Op {
+			case MOVSX, MOVZX:
+				continue
+
+			case SHL, SHR, RCL, RCR, ROL, ROR, SAR:
+				if i == 1 {
+					// shift count does not tell us operand size
+					continue
+				}
+
+			case CRC32:
+				// The source argument does tell us operand size,
+				// but libopcodes still always puts a suffix on crc32.
+				continue
+
+			case PUSH, POP:
+				// Even though segment registers are 16-bit, push and pop
+				// can save/restore them from 32-bit slots, so they
+				// do not imply operand size.
+				if ES <= a && a <= GS {
+					continue
+				}
+
+			case CVTSI2SD, CVTSI2SS:
+				// The integer register argument takes priority.
+				if X0 <= a && a <= X15 {
+					continue
+				}
+			}
+
+			if AL <= a && a <= R15 || ES <= a && a <= GS || X0 <= a && a <= X15 || M0 <= a && a <= M7 {
+				needSuffix = false
+				break SuffixLoop
+			}
+		}
+	}
+
+	if needSuffix {
+		switch inst.Op {
+		case CMPXCHG8B, FLDCW, FNSTCW, FNSTSW, LDMXCSR, LLDT, LMSW, LTR, PCLMULQDQ,
+			SETA, SETAE, SETB, SETBE, SETE, SETG, SETGE, SETL, SETLE, SETNE, SETNO, SETNP, SETNS, SETO, SETP, SETS,
+			SLDT, SMSW, STMXCSR, STR, VERR, VERW:
+			// For various reasons, libopcodes emits no suffix for these instructions.
+
+		case CRC32:
+			op += byteSizeSuffix(argBytes(&inst, inst.Args[1]))
+
+		case LGDT, LIDT, SGDT, SIDT:
+			op += byteSizeSuffix(inst.DataSize / 8)
+
+		case MOVZX, MOVSX:
+			// Integer size conversions get two suffixes.
+			op = op[:4] + byteSizeSuffix(argBytes(&inst, inst.Args[1])) + byteSizeSuffix(argBytes(&inst, inst.Args[0]))
+
+		case LOOP, LOOPE, LOOPNE:
+			// Add w suffix to indicate use of CX register instead of ECX.
+			if inst.AddrSize == 16 {
+				op += "w"
+			}
+
+		case CALL, ENTER, JMP, LCALL, LEAVE, LJMP, LRET, RET, SYSRET, XBEGIN:
+			// Add w suffix to indicate use of 16-bit target.
+			// Exclude JMP rel8.
+			if inst.Opcode>>24 == 0xEB {
+				break
+			}
+			if inst.DataSize == 16 && inst.Mode != 16 {
+				markLastImplicit(&inst, PrefixDataSize)
+				op += "w"
+			} else if inst.Mode == 64 {
+				op += "q"
+			}
+
+		case FRSTOR, FNSAVE, FNSTENV, FLDENV:
+			// Add s suffix to indicate shortened FPU state (I guess).
+			if inst.DataSize == 16 {
+				op += "s"
+			}
+
+		case PUSH, POP:
+			if markLastImplicit(&inst, PrefixDataSize) {
+				op += byteSizeSuffix(inst.DataSize / 8)
+			} else if inst.Mode == 64 {
+				op += "q"
+			} else {
+				op += byteSizeSuffix(inst.MemBytes)
+			}
+
+		default:
+			if isFloat(inst.Op) {
+				// I can't explain any of this, but it's what libopcodes does.
+				switch inst.MemBytes {
+				default:
+					if (inst.Op == FLD || inst.Op == FSTP) && isMem(inst.Args[0]) {
+						op += "t"
+					}
+				case 4:
+					if isFloatInt(inst.Op) {
+						op += "l"
+					} else {
+						op += "s"
+					}
+				case 8:
+					if isFloatInt(inst.Op) {
+						op += "ll"
+					} else {
+						op += "l"
+					}
+				}
+				break
+			}
+
+			op += byteSizeSuffix(inst.MemBytes)
+		}
+	}
+
+	// Adjust special case opcodes.
+	switch inst.Op {
+	case 0:
+		if inst.Prefix[0] != 0 {
+			return strings.ToLower(inst.Prefix[0].String())
+		}
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			inst.Args[0] = nil
+			op = "int3"
+		}
+
+	case CMPPS, CMPPD, CMPSD_XMM, CMPSS:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && 0 <= imm && imm < 8 {
+			inst.Args[2] = nil
+			op = cmppsOps[imm] + op[3:]
+		}
+
+	case PCLMULQDQ:
+		imm, ok := inst.Args[2].(Imm)
+		if ok && imm&^0x11 == 0 {
+			inst.Args[2] = nil
+			op = pclmulqOps[(imm&0x10)>>3|(imm&1)]
+		}
+
+	case XLATB:
+		if markLastImplicit(&inst, PrefixAddrSize) {
+			op = "xlat" // not xlatb
+		}
+	}
+
+	// Build list of argument strings.
+	var (
+		usedPrefixes bool     // segment prefixes consumed by Mem formatting
+		args         []string // formatted arguments
+	)
+	for i, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, OUTSB, OUTSW, OUTSD:
+			if i == 0 {
+				usedPrefixes = true // disable use of prefixes for first argument
+			} else {
+				usedPrefixes = false
+			}
+		}
+		if a == Imm(1) && (inst.Opcode>>24)&^1 == 0xD0 {
+			continue
+		}
+		args = append(args, gnuArg(&inst, a, &usedPrefixes))
+	}
+
+	// The default is to print the arguments in reverse Intel order.
+	// A few instructions inhibit this behavior.
+	switch inst.Op {
+	case BOUND, LCALL, ENTER, LJMP:
+		// no reverse
+	default:
+		// reverse args
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+	}
+
+	// Build prefix string.
+	// Must be after argument formatting, which can turn off segment prefixes.
+	var (
+		prefix       = "" // output string
+		numAddr      = 0
+		numData      = 0
+		implicitData = false
+	)
+	for _, p := range inst.Prefix {
+		if p&0xFF == PrefixDataSize && p&PrefixImplicit != 0 {
+			implicitData = true
+		}
+	}
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		switch p &^ (PrefixIgnored | PrefixInvalid) {
+		default:
+			if p.IsREX() {
+				if p&0xFF == PrefixREX {
+					prefix += "rex "
+				} else {
+					prefix += "rex." + p.String()[4:] + " "
+				}
+				break
+			}
+			prefix += strings.ToLower(p.String()) + " "
+
+		case PrefixPN:
+			op += ",pn"
+			continue
+
+		case PrefixPT:
+			op += ",pt"
+			continue
+
+		case PrefixAddrSize, PrefixAddr16, PrefixAddr32:
+			// For unknown reasons, if the addr16 prefix is repeated,
+			// libopcodes displays all but the last as addr32, even though
+			// the addressing form used in a memory reference is clearly
+			// still 16-bit.
+			n := 32
+			if inst.Mode == 32 {
+				n = 16
+			}
+			numAddr++
+			if countPrefix(&inst, PrefixAddrSize) > numAddr {
+				n = inst.Mode
+			}
+			prefix += fmt.Sprintf("addr%d ", n)
+			continue
+
+		case PrefixData16, PrefixData32:
+			if implicitData && countPrefix(&inst, PrefixDataSize) > 1 {
+				// Similar to the addr32 logic above, but it only kicks in
+				// when something used the data size prefix (one is implicit).
+				n := 16
+				if inst.Mode == 16 {
+					n = 32
+				}
+				numData++
+				if countPrefix(&inst, PrefixDataSize) > numData {
+					if inst.Mode == 16 {
+						n = 16
+					} else {
+						n = 32
+					}
+				}
+				prefix += fmt.Sprintf("data%d ", n)
+				continue
+			}
+			prefix += strings.ToLower(p.String()) + " "
+		}
+	}
+
+	// Finally! Put it all together.
+	text := prefix + op
+	if args != nil {
+		text += " "
+		// Indirect call/jmp gets a star to distinguish from direct jump address.
+		if (inst.Op == CALL || inst.Op == JMP || inst.Op == LJMP || inst.Op == LCALL) && (isMem(inst.Args[0]) || isReg(inst.Args[0])) {
+			text += "*"
+		}
+		text += strings.Join(args, ",")
+	}
+	return text
+}
+
+// gnuArg returns the GNU syntax for the argument x from the instruction inst.
+// If *usedPrefixes is false and x is a Mem, then the formatting
+// includes any segment prefixes and sets *usedPrefixes to true.
+func gnuArg(inst *Inst, x Arg, usedPrefixes *bool) string {
+	if x == nil {
+		return "<nil>"
+	}
+	switch x := x.(type) {
+	case Reg:
+		switch inst.Op {
+		case CVTSI2SS, CVTSI2SD, CVTSS2SI, CVTSD2SI, CVTTSD2SI, CVTTSS2SI:
+			if inst.DataSize == 16 && EAX <= x && x <= R15L {
+				x -= EAX - AX
+			}
+
+		case IN, INSB, INSW, INSD, OUT, OUTSB, OUTSW, OUTSD:
+			// DX is the port, but libopcodes prints it as if it were a memory reference.
+			if x == DX {
+				return "(%dx)"
+			}
+		}
+		return gccRegName[x]
+	case Mem:
+		seg := ""
+		var haveCS, haveDS, haveES, haveFS, haveGS, haveSS bool
+		switch x.Segment {
+		case CS:
+			haveCS = true
+		case DS:
+			haveDS = true
+		case ES:
+			haveES = true
+		case FS:
+			haveFS = true
+		case GS:
+			haveGS = true
+		case SS:
+			haveSS = true
+		}
+		switch inst.Op {
+		case INSB, INSW, INSD, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ:
+			// These do not accept segment prefixes, at least in the GNU rendering.
+		default:
+			if *usedPrefixes {
+				break
+			}
+			for i := len(inst.Prefix) - 1; i >= 0; i-- {
+				p := inst.Prefix[i] &^ PrefixIgnored
+				if p == 0 {
+					continue
+				}
+				switch p {
+				case PrefixCS:
+					if !haveCS {
+						haveCS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixDS:
+					if !haveDS {
+						haveDS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixES:
+					if !haveES {
+						haveES = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixFS:
+					if !haveFS {
+						haveFS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixGS:
+					if !haveGS {
+						haveGS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				case PrefixSS:
+					if !haveSS {
+						haveSS = true
+						inst.Prefix[i] |= PrefixImplicit
+					}
+				}
+			}
+			*usedPrefixes = true
+		}
+		if haveCS {
+			seg += "%cs:"
+		}
+		if haveDS {
+			seg += "%ds:"
+		}
+		if haveSS {
+			seg += "%ss:"
+		}
+		if haveES {
+			seg += "%es:"
+		}
+		if haveFS {
+			seg += "%fs:"
+		}
+		if haveGS {
+			seg += "%gs:"
+		}
+		disp := ""
+		if x.Disp != 0 {
+			disp = fmt.Sprintf("%#x", x.Disp)
+		}
+		if x.Scale == 0 || x.Index == 0 && x.Scale == 1 && (x.Base == ESP || x.Base == RSP || x.Base == 0 && inst.Mode == 64) {
+			if x.Base == 0 {
+				return seg + disp
+			}
+			return fmt.Sprintf("%s%s(%s)", seg, disp, gccRegName[x.Base])
+		}
+		base := gccRegName[x.Base]
+		if x.Base == 0 {
+			base = ""
+		}
+		index := gccRegName[x.Index]
+		if x.Index == 0 {
+			if inst.AddrSize == 64 {
+				index = "%riz"
+			} else {
+				index = "%eiz"
+			}
+		}
+		if AX <= x.Base && x.Base <= DI {
+			// 16-bit addressing - no scale
+			return fmt.Sprintf("%s%s(%s,%s)", seg, disp, base, index)
+		}
+		return fmt.Sprintf("%s%s(%s,%s,%d)", seg, disp, base, index, x.Scale)
+	case Rel:
+		return fmt.Sprintf(".%+#x", int32(x))
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(x))
+		}
+		return fmt.Sprintf("$%#x", int64(x))
+	}
+	return x.String()
+}
+
+var gccRegName = [...]string{
+	0:    "REG0",
+	AL:   "%al",
+	CL:   "%cl",
+	BL:   "%bl",
+	DL:   "%dl",
+	AH:   "%ah",
+	CH:   "%ch",
+	BH:   "%bh",
+	DH:   "%dh",
+	SPB:  "%spl",
+	BPB:  "%bpl",
+	SIB:  "%sil",
+	DIB:  "%dil",
+	R8B:  "%r8b",
+	R9B:  "%r9b",
+	R10B: "%r10b",
+	R11B: "%r11b",
+	R12B: "%r12b",
+	R13B: "%r13b",
+	R14B: "%r14b",
+	R15B: "%r15b",
+	AX:   "%ax",
+	CX:   "%cx",
+	BX:   "%bx",
+	DX:   "%dx",
+	SP:   "%sp",
+	BP:   "%bp",
+	SI:   "%si",
+	DI:   "%di",
+	R8W:  "%r8w",
+	R9W:  "%r9w",
+	R10W: "%r10w",
+	R11W: "%r11w",
+	R12W: "%r12w",
+	R13W: "%r13w",
+	R14W: "%r14w",
+	R15W: "%r15w",
+	EAX:  "%eax",
+	ECX:  "%ecx",
+	EDX:  "%edx",
+	EBX:  "%ebx",
+	ESP:  "%esp",
+	EBP:  "%ebp",
+	ESI:  "%esi",
+	EDI:  "%edi",
+	R8L:  "%r8d",
+	R9L:  "%r9d",
+	R10L: "%r10d",
+	R11L: "%r11d",
+	R12L: "%r12d",
+	R13L: "%r13d",
+	R14L: "%r14d",
+	R15L: "%r15d",
+	RAX:  "%rax",
+	RCX:  "%rcx",
+	RDX:  "%rdx",
+	RBX:  "%rbx",
+	RSP:  "%rsp",
+	RBP:  "%rbp",
+	RSI:  "%rsi",
+	RDI:  "%rdi",
+	R8:   "%r8",
+	R9:   "%r9",
+	R10:  "%r10",
+	R11:  "%r11",
+	R12:  "%r12",
+	R13:  "%r13",
+	R14:  "%r14",
+	R15:  "%r15",
+	IP:   "%ip",
+	EIP:  "%eip",
+	RIP:  "%rip",
+	F0:   "%st",
+	F1:   "%st(1)",
+	F2:   "%st(2)",
+	F3:   "%st(3)",
+	F4:   "%st(4)",
+	F5:   "%st(5)",
+	F6:   "%st(6)",
+	F7:   "%st(7)",
+	M0:   "%mm0",
+	M1:   "%mm1",
+	M2:   "%mm2",
+	M3:   "%mm3",
+	M4:   "%mm4",
+	M5:   "%mm5",
+	M6:   "%mm6",
+	M7:   "%mm7",
+	X0:   "%xmm0",
+	X1:   "%xmm1",
+	X2:   "%xmm2",
+	X3:   "%xmm3",
+	X4:   "%xmm4",
+	X5:   "%xmm5",
+	X6:   "%xmm6",
+	X7:   "%xmm7",
+	X8:   "%xmm8",
+	X9:   "%xmm9",
+	X10:  "%xmm10",
+	X11:  "%xmm11",
+	X12:  "%xmm12",
+	X13:  "%xmm13",
+	X14:  "%xmm14",
+	X15:  "%xmm15",
+	CS:   "%cs",
+	SS:   "%ss",
+	DS:   "%ds",
+	ES:   "%es",
+	FS:   "%fs",
+	GS:   "%gs",
+	GDTR: "%gdtr",
+	IDTR: "%idtr",
+	LDTR: "%ldtr",
+	MSW:  "%msw",
+	TASK: "%task",
+	CR0:  "%cr0",
+	CR1:  "%cr1",
+	CR2:  "%cr2",
+	CR3:  "%cr3",
+	CR4:  "%cr4",
+	CR5:  "%cr5",
+	CR6:  "%cr6",
+	CR7:  "%cr7",
+	CR8:  "%cr8",
+	CR9:  "%cr9",
+	CR10: "%cr10",
+	CR11: "%cr11",
+	CR12: "%cr12",
+	CR13: "%cr13",
+	CR14: "%cr14",
+	CR15: "%cr15",
+	DR0:  "%db0",
+	DR1:  "%db1",
+	DR2:  "%db2",
+	DR3:  "%db3",
+	DR4:  "%db4",
+	DR5:  "%db5",
+	DR6:  "%db6",
+	DR7:  "%db7",
+	TR0:  "%tr0",
+	TR1:  "%tr1",
+	TR2:  "%tr2",
+	TR3:  "%tr3",
+	TR4:  "%tr4",
+	TR5:  "%tr5",
+	TR6:  "%tr6",
+	TR7:  "%tr7",
+}
+
+var gnuOp = map[Op]string{
+	CBW:       "cbtw",
+	CDQ:       "cltd",
+	CMPSD:     "cmpsl",
+	CMPSD_XMM: "cmpsd",
+	CWD:       "cwtd",
+	CWDE:      "cwtl",
+	CQO:       "cqto",
+	INSD:      "insl",
+	IRET:      "iretw",
+	IRETD:     "iret",
+	IRETQ:     "iretq",
+	LODSB:     "lods",
+	LODSD:     "lods",
+	LODSQ:     "lods",
+	LODSW:     "lods",
+	MOVSD:     "movsl",
+	MOVSD_XMM: "movsd",
+	OUTSD:     "outsl",
+	POPA:      "popaw",
+	POPAD:     "popa",
+	POPF:      "popfw",
+	POPFD:     "popf",
+	PUSHA:     "pushaw",
+	PUSHAD:    "pusha",
+	PUSHF:     "pushfw",
+	PUSHFD:    "pushf",
+	SCASB:     "scas",
+	SCASD:     "scas",
+	SCASQ:     "scas",
+	SCASW:     "scas",
+	STOSB:     "stos",
+	STOSD:     "stos",
+	STOSQ:     "stos",
+	STOSW:     "stos",
+	XLATB:     "xlat",
+}
+
+var cmppsOps = []string{
+	"cmpeq",
+	"cmplt",
+	"cmple",
+	"cmpunord",
+	"cmpneq",
+	"cmpnlt",
+	"cmpnle",
+	"cmpord",
+}
+
+var pclmulqOps = []string{
+	"pclmullqlqdq",
+	"pclmulhqlqdq",
+	"pclmullqhqdq",
+	"pclmulhqhqdq",
+}
+
+func countPrefix(inst *Inst, target Prefix) int {
+	n := 0
+	for _, p := range inst.Prefix {
+		if p&0xFF == target&0xFF {
+			n++
+		}
+	}
+	return n
+}
+
+func markLastImplicit(inst *Inst, prefix Prefix) bool {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] |= PrefixImplicit
+			return true
+		}
+	}
+	return false
+}
+
+func unmarkImplicit(inst *Inst, prefix Prefix) {
+	for i := len(inst.Prefix) - 1; i >= 0; i-- {
+		p := inst.Prefix[i]
+		if p&0xFF == prefix {
+			inst.Prefix[i] &^= PrefixImplicit
+		}
+	}
+}
+
+func byteSizeSuffix(b int) string {
+	switch b {
+	case 1:
+		return "b"
+	case 2:
+		return "w"
+	case 4:
+		return "l"
+	case 8:
+		return "q"
+	}
+	return ""
+}
+
+func argBytes(inst *Inst, arg Arg) int {
+	if isMem(arg) {
+		return inst.MemBytes
+	}
+	return regBytes(arg)
+}
+
+func isFloat(op Op) bool {
+	switch op {
+	case FADD, FCOM, FCOMP, FDIV, FDIVR, FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR, FLD, FMUL, FST, FSTP, FSUB, FSUBR:
+		return true
+	}
+	return false
+}
+
+func isFloatInt(op Op) bool {
+	switch op {
+	case FIADD, FICOM, FICOMP, FIDIV, FIDIVR, FILD, FIMUL, FIST, FISTP, FISTTP, FISUB, FISUBR:
+		return true
+	}
+	return false
+}
--- a/vendor/rsc.io/x86/x86asm/inst.go
+++ b/vendor/rsc.io/x86/x86asm/inst.go
@ -0,0 +1,643 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package x86asm implements decoding of x86 machine code.
+package x86asm
+
+import (
+	"bytes"
+	"fmt"
+)
+
+// An Inst is a single instruction.
+type Inst struct {
+	Prefix   Prefixes // Prefixes applied to the instruction.
+	Op       Op       // Opcode mnemonic
+	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
+	Args     Args     // Instruction arguments, in Intel order
+	Mode     int      // processor mode in bits: 16, 32, or 64
+	AddrSize int      // address size in bits: 16, 32, or 64
+	DataSize int      // operand size in bits: 16, 32, or 64
+	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
+	Len      int      // length of encoded instruction in bytes
+	PCRel int // length of PC-relative address in instruction encoding
+	PCRelOff int // index of start of PC-relative address in instruction encoding
+}
+
+// Prefixes is an array of prefixes associated with a single instruction.
+// The prefixes are listed in the same order as found in the instruction:
+// each prefix byte corresponds to one slot in the array. The first zero
+// in the array marks the end of the prefixes.
+type Prefixes [14]Prefix
+
+// A Prefix represents an Intel instruction prefix.
+// The low 8 bits are the actual prefix byte encoding,
+// and the top 8 bits contain distinguishing bits and metadata.
+type Prefix uint16
+
+const (
+	// Metadata about the role of a prefix in an instruction.
+	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
+	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
+	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
+
+	// Memory segment overrides.
+	PrefixES Prefix = 0x26 // ES segment override
+	PrefixCS Prefix = 0x2E // CS segment override
+	PrefixSS Prefix = 0x36 // SS segment override
+	PrefixDS Prefix = 0x3E // DS segment override
+	PrefixFS Prefix = 0x64 // FS segment override
+	PrefixGS Prefix = 0x65 // GS segment override
+
+	// Branch prediction.
+	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
+	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
+
+	// Size attributes.
+	PrefixDataSize Prefix = 0x66 // operand size override
+	PrefixData16   Prefix = 0x166
+	PrefixData32   Prefix = 0x266
+	PrefixAddrSize Prefix = 0x67 // address size override
+	PrefixAddr16   Prefix = 0x167
+	PrefixAddr32   Prefix = 0x267
+
+	// One of a kind.
+	PrefixLOCK     Prefix = 0xF0 // lock
+	PrefixREPN     Prefix = 0xF2 // repeat not zero
+	PrefixXACQUIRE Prefix = 0x1F2
+	PrefixBND      Prefix = 0x2F2
+	PrefixREP      Prefix = 0xF3 // repeat
+	PrefixXRELEASE Prefix = 0x1F3
+
+	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
+	// the other bits are set or not according to the intended use.
+	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
+	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
+	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
+	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
+	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
+)
+
+// IsREX reports whether p is a REX prefix byte.
+func (p Prefix) IsREX() bool {
+	return p&0xF0 == PrefixREX
+}
+
+func (p Prefix) String() string {
+	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
+	if s := prefixNames[p]; s != "" {
+		return s
+	}
+
+	if p.IsREX() {
+		s := "REX."
+		if p&PrefixREXW != 0 {
+			s += "W"
+		}
+		if p&PrefixREXR != 0 {
+			s += "R"
+		}
+		if p&PrefixREXX != 0 {
+			s += "X"
+		}
+		if p&PrefixREXB != 0 {
+			s += "B"
+		}
+		return s
+	}
+
+	return fmt.Sprintf("Prefix(%#x)", int(p))
+}
+
+// An Op is an x86 opcode.
+type Op uint32
+
+func (op Op) String() string {
+	i := int(op)
+	if i < 0 || i >= len(opNames) || opNames[i] == "" {
+		return fmt.Sprintf("Op(%d)", i)
+	}
+	return opNames[i]
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 4 arguments,
+// the final elements in the array are nil.
+type Args [4]Arg
+
+// An Arg is a single instruction argument,
+// one of these types: Reg, Mem, Imm, Rel.
+type Arg interface {
+	String() string
+	isArg()
+}
+
+// Note that the implements of Arg that follow are all sized
+// so that on a 64-bit machine the data can be inlined in
+// the interface value instead of requiring an allocation.
+
+// A Reg is a single register.
+// The zero Reg value has no name but indicates ``no register.''
+type Reg uint8
+
+const (
+	_ Reg = iota
+
+	// 8-bit
+	AL
+	CL
+	DL
+	BL
+	AH
+	CH
+	DH
+	BH
+	SPB
+	BPB
+	SIB
+	DIB
+	R8B
+	R9B
+	R10B
+	R11B
+	R12B
+	R13B
+	R14B
+	R15B
+
+	// 16-bit
+	AX
+	CX
+	DX
+	BX
+	SP
+	BP
+	SI
+	DI
+	R8W
+	R9W
+	R10W
+	R11W
+	R12W
+	R13W
+	R14W
+	R15W
+
+	// 32-bit
+	EAX
+	ECX
+	EDX
+	EBX
+	ESP
+	EBP
+	ESI
+	EDI
+	R8L
+	R9L
+	R10L
+	R11L
+	R12L
+	R13L
+	R14L
+	R15L
+
+	// 64-bit
+	RAX
+	RCX
+	RDX
+	RBX
+	RSP
+	RBP
+	RSI
+	RDI
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+
+	// Instruction pointer.
+	IP  // 16-bit
+	EIP // 32-bit
+	RIP // 64-bit
+
+	// 387 floating point registers.
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+
+	// MMX registers.
+	M0
+	M1
+	M2
+	M3
+	M4
+	M5
+	M6
+	M7
+
+	// XMM registers.
+	X0
+	X1
+	X2
+	X3
+	X4
+	X5
+	X6
+	X7
+	X8
+	X9
+	X10
+	X11
+	X12
+	X13
+	X14
+	X15
+
+	// Segment registers.
+	ES
+	CS
+	SS
+	DS
+	FS
+	GS
+
+	// System registers.
+	GDTR
+	IDTR
+	LDTR
+	MSW
+	TASK
+
+	// Control registers.
+	CR0
+	CR1
+	CR2
+	CR3
+	CR4
+	CR5
+	CR6
+	CR7
+	CR8
+	CR9
+	CR10
+	CR11
+	CR12
+	CR13
+	CR14
+	CR15
+
+	// Debug registers.
+	DR0
+	DR1
+	DR2
+	DR3
+	DR4
+	DR5
+	DR6
+	DR7
+	DR8
+	DR9
+	DR10
+	DR11
+	DR12
+	DR13
+	DR14
+	DR15
+
+	// Task registers.
+	TR0
+	TR1
+	TR2
+	TR3
+	TR4
+	TR5
+	TR6
+	TR7
+)
+
+const regMax = TR7
+
+func (Reg) isArg() {}
+
+func (r Reg) String() string {
+	i := int(r)
+	if i < 0 || i >= len(regNames) || regNames[i] == "" {
+		return fmt.Sprintf("Reg(%d)", i)
+	}
+	return regNames[i]
+}
+
+// A Mem is a memory reference.
+// The general form is Segment:[Base+Scale*Index+Disp].
+type Mem struct {
+	Segment Reg
+	Base    Reg
+	Scale   uint8
+	Index   Reg
+	Disp    int64
+}
+
+func (Mem) isArg() {}
+
+func (m Mem) String() string {
+	var base, plus, scale, index, disp string
+
+	if m.Base != 0 {
+		base = m.Base.String()
+	}
+	if m.Scale != 0 {
+		if m.Base != 0 {
+			plus = "+"
+		}
+		if m.Scale > 1 {
+			scale = fmt.Sprintf("%d*", m.Scale)
+		}
+		index = m.Index.String()
+	}
+	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
+		disp = fmt.Sprintf("%+#x", m.Disp)
+	}
+	return "[" + base + plus + scale + index + disp + "]"
+}
+
+// A Rel is an offset relative to the current instruction pointer.
+type Rel int32
+
+func (Rel) isArg() {}
+
+func (r Rel) String() string {
+	return fmt.Sprintf(".%+d", r)
+}
+
+// An Imm is an integer constant.
+type Imm int64
+
+func (Imm) isArg() {}
+
+func (i Imm) String() string {
+	return fmt.Sprintf("%#x", int64(i))
+}
+
+func (i Inst) String() string {
+	var buf bytes.Buffer
+	for _, p := range i.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&PrefixImplicit != 0 {
+			continue
+		}
+		fmt.Fprintf(&buf, "%v ", p)
+	}
+	fmt.Fprintf(&buf, "%v", i.Op)
+	sep := " "
+	for _, v := range i.Args {
+		if v == nil {
+			break
+		}
+		fmt.Fprintf(&buf, "%s%v", sep, v)
+		sep = ", "
+	}
+	return buf.String()
+}
+
+func isReg(a Arg) bool {
+	_, ok := a.(Reg)
+	return ok
+}
+
+func isSegReg(a Arg) bool {
+	r, ok := a.(Reg)
+	return ok && ES <= r && r <= GS
+}
+
+func isMem(a Arg) bool {
+	_, ok := a.(Mem)
+	return ok
+}
+
+func isImm(a Arg) bool {
+	_, ok := a.(Imm)
+	return ok
+}
+
+func regBytes(a Arg) int {
+	r, ok := a.(Reg)
+	if !ok {
+		return 0
+	}
+	if AL <= r && r <= R15B {
+		return 1
+	}
+	if AX <= r && r <= R15W {
+		return 2
+	}
+	if EAX <= r && r <= R15L {
+		return 4
+	}
+	if RAX <= r && r <= R15 {
+		return 8
+	}
+	return 0
+}
+
+func isSegment(p Prefix) bool {
+	switch p {
+	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+		return true
+	}
+	return false
+}
+
+// The Op definitions and string list are in tables.go.
+
+var prefixNames = map[Prefix]string{
+	PrefixCS:       "CS",
+	PrefixDS:       "DS",
+	PrefixES:       "ES",
+	PrefixFS:       "FS",
+	PrefixGS:       "GS",
+	PrefixSS:       "SS",
+	PrefixLOCK:     "LOCK",
+	PrefixREP:      "REP",
+	PrefixREPN:     "REPN",
+	PrefixAddrSize: "ADDRSIZE",
+	PrefixDataSize: "DATASIZE",
+	PrefixAddr16:   "ADDR16",
+	PrefixData16:   "DATA16",
+	PrefixAddr32:   "ADDR32",
+	PrefixData32:   "DATA32",
+	PrefixBND:      "BND",
+	PrefixXACQUIRE: "XACQUIRE",
+	PrefixXRELEASE: "XRELEASE",
+	PrefixREX:      "REX",
+	PrefixPT:       "PT",
+	PrefixPN:       "PN",
+}
+
+var regNames = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SPB",
+	BPB:  "BPB",
+	SIB:  "SIB",
+	DIB:  "DIB",
+	R8B:  "R8B",
+	R9B:  "R9B",
+	R10B: "R10B",
+	R11B: "R11B",
+	R12B: "R12B",
+	R13B: "R13B",
+	R14B: "R14B",
+	R15B: "R15B",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8W",
+	R9W:  "R9W",
+	R10W: "R10W",
+	R11W: "R11W",
+	R12W: "R12W",
+	R13W: "R13W",
+	R14W: "R14W",
+	R15W: "R15W",
+	EAX:  "EAX",
+	ECX:  "ECX",
+	EDX:  "EDX",
+	EBX:  "EBX",
+	ESP:  "ESP",
+	EBP:  "EBP",
+	ESI:  "ESI",
+	EDI:  "EDI",
+	R8L:  "R8L",
+	R9L:  "R9L",
+	R10L: "R10L",
+	R11L: "R11L",
+	R12L: "R12L",
+	R13L: "R13L",
+	R14L: "R14L",
+	R15L: "R15L",
+	RAX:  "RAX",
+	RCX:  "RCX",
+	RDX:  "RDX",
+	RBX:  "RBX",
+	RSP:  "RSP",
+	RBP:  "RBP",
+	RSI:  "RSI",
+	RDI:  "RDI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "EIP",
+	RIP:  "RIP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
--- a/vendor/rsc.io/x86/x86asm/intel.go
+++ b/vendor/rsc.io/x86/x86asm/intel.go
@ -0,0 +1,518 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// IntelSyntax returns the Intel assembler syntax for the instruction, as defined by Intel's XED tool.
+func IntelSyntax(inst Inst) string {
+	var iargs []Arg
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		iargs = append(iargs, a)
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, LOOPNE, JCXZ, JECXZ, JRCXZ, LOOP, LOOPE, MOV, XLATB:
+		if inst.Op == MOV && (inst.Opcode>>16)&0xFFFC != 0x0F20 {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixAddrSize {
+				inst.Prefix[i] &^= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case MOV:
+		dst, _ := inst.Args[0].(Reg)
+		src, _ := inst.Args[1].(Reg)
+		if ES <= dst && dst <= GS && EAX <= src && src <= R15L {
+			src -= EAX - AX
+			iargs[1] = src
+		}
+		if ES <= dst && dst <= GS && RAX <= src && src <= R15 {
+			src -= RAX - AX
+			iargs[1] = src
+		}
+
+		if inst.Opcode>>24&^3 == 0xA0 {
+			for i, p := range inst.Prefix {
+				if p&0xFF == PrefixAddrSize {
+					inst.Prefix[i] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAM, AAD:
+		if imm, ok := iargs[0].(Imm); ok {
+			if inst.DataSize == 32 {
+				iargs[0] = Imm(uint32(int8(imm)))
+			} else if inst.DataSize == 16 {
+				iargs[0] = Imm(uint16(int8(imm)))
+			}
+		}
+
+	case PUSH:
+		if imm, ok := iargs[0].(Imm); ok {
+			iargs[0] = Imm(uint32(imm))
+		}
+	}
+
+	for _, p := range inst.Prefix {
+		if p&PrefixImplicit != 0 {
+			for j, pj := range inst.Prefix {
+				if pj&0xFF == p&0xFF {
+					inst.Prefix[j] |= PrefixImplicit
+				}
+			}
+		}
+	}
+
+	if inst.Op != 0 {
+		for i, p := range inst.Prefix {
+			switch p &^ PrefixIgnored {
+			case PrefixData16, PrefixData32, PrefixCS, PrefixDS, PrefixES, PrefixSS:
+				inst.Prefix[i] |= PrefixImplicit
+			}
+			if p.IsREX() {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	if isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
+		for i, p := range inst.Prefix {
+			if p == PrefixPT || p == PrefixPN {
+				inst.Prefix[i] |= PrefixImplicit
+			}
+		}
+	}
+
+	switch inst.Op {
+	case AAA, AAS, CBW, CDQE, CLC, CLD, CLI, CLTS, CMC, CPUID, CQO, CWD, DAA, DAS,
+		FDECSTP, FINCSTP, FNCLEX, FNINIT, FNOP, FWAIT, HLT,
+		ICEBP, INSB, INSD, INSW, INT, INTO, INVD, IRET, IRETQ,
+		LAHF, LEAVE, LRET, MONITOR, MWAIT, NOP, OUTSB, OUTSD, OUTSW,
+		PAUSE, POPA, POPF, POPFQ, PUSHA, PUSHF, PUSHFQ,
+		RDMSR, RDPMC, RDTSC, RDTSCP, RET, RSM,
+		SAHF, STC, STD, STI, SYSENTER, SYSEXIT, SYSRET,
+		UD2, WBINVD, WRMSR, XEND, XLATB, XTEST:
+
+		if inst.Op == NOP && inst.Opcode>>24 != 0x90 {
+			break
+		}
+		if inst.Op == RET && inst.Opcode>>24 != 0xC3 {
+			break
+		}
+		if inst.Op == INT && inst.Opcode>>24 != 0xCC {
+			break
+		}
+		if inst.Op == LRET && inst.Opcode>>24 != 0xcb {
+			break
+		}
+		for i, p := range inst.Prefix {
+			if p&0xFF == PrefixDataSize {
+				inst.Prefix[i] &^= PrefixImplicit | PrefixIgnored
+			}
+		}
+
+	case 0:
+		// ok
+	}
+
+	switch inst.Op {
+	case INSB, INSD, INSW, OUTSB, OUTSD, OUTSW, MONITOR, MWAIT, XLATB:
+		iargs = nil
+
+	case STOSB, STOSW, STOSD, STOSQ:
+		iargs = iargs[:1]
+
+	case LODSB, LODSW, LODSD, LODSQ, SCASB, SCASW, SCASD, SCASQ:
+		iargs = iargs[1:]
+	}
+
+	const (
+		haveData16 = 1 << iota
+		haveData32
+		haveAddr16
+		haveAddr32
+		haveXacquire
+		haveXrelease
+		haveLock
+		haveHintTaken
+		haveHintNotTaken
+		haveBnd
+	)
+	var prefixBits uint32
+	prefix := ""
+	for _, p := range inst.Prefix {
+		if p == 0 {
+			break
+		}
+		if p&0xFF == 0xF3 {
+			prefixBits &^= haveBnd
+		}
+		if p&(PrefixImplicit|PrefixIgnored) != 0 {
+			continue
+		}
+		switch p {
+		default:
+			prefix += strings.ToLower(p.String()) + " "
+		case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+			if inst.Op == 0 {
+				prefix += strings.ToLower(p.String()) + " "
+			}
+		case PrefixREPN:
+			prefix += "repne "
+		case PrefixLOCK:
+			prefixBits |= haveLock
+		case PrefixData16, PrefixDataSize:
+			prefixBits |= haveData16
+		case PrefixData32:
+			prefixBits |= haveData32
+		case PrefixAddrSize, PrefixAddr16:
+			prefixBits |= haveAddr16
+		case PrefixAddr32:
+			prefixBits |= haveAddr32
+		case PrefixXACQUIRE:
+			prefixBits |= haveXacquire
+		case PrefixXRELEASE:
+			prefixBits |= haveXrelease
+		case PrefixPT:
+			prefixBits |= haveHintTaken
+		case PrefixPN:
+			prefixBits |= haveHintNotTaken
+		case PrefixBND:
+			prefixBits |= haveBnd
+		}
+	}
+	switch inst.Op {
+	case JMP:
+		if inst.Opcode>>24 == 0xEB {
+			prefixBits &^= haveBnd
+		}
+	case RET, LRET:
+		prefixBits &^= haveData16 | haveData32
+	}
+
+	if prefixBits&haveXacquire != 0 {
+		prefix += "xacquire "
+	}
+	if prefixBits&haveXrelease != 0 {
+		prefix += "xrelease "
+	}
+	if prefixBits&haveLock != 0 {
+		prefix += "lock "
+	}
+	if prefixBits&haveBnd != 0 {
+		prefix += "bnd "
+	}
+	if prefixBits&haveHintTaken != 0 {
+		prefix += "hint-taken "
+	}
+	if prefixBits&haveHintNotTaken != 0 {
+		prefix += "hint-not-taken "
+	}
+	if prefixBits&haveAddr16 != 0 {
+		prefix += "addr16 "
+	}
+	if prefixBits&haveAddr32 != 0 {
+		prefix += "addr32 "
+	}
+	if prefixBits&haveData16 != 0 {
+		prefix += "data16 "
+	}
+	if prefixBits&haveData32 != 0 {
+		prefix += "data32 "
+	}
+
+	if inst.Op == 0 {
+		if prefix == "" {
+			return "<no instruction>"
+		}
+		return prefix[:len(prefix)-1]
+	}
+
+	var args []string
+	for _, a := range iargs {
+		if a == nil {
+			break
+		}
+		args = append(args, intelArg(&inst, a))
+	}
+
+	var op string
+	switch inst.Op {
+	case NOP:
+		if inst.Opcode>>24 == 0x0F {
+			if inst.DataSize == 16 {
+				args = append(args, "ax")
+			} else {
+				args = append(args, "eax")
+			}
+		}
+
+	case BLENDVPD, BLENDVPS, PBLENDVB:
+		args = args[:2]
+
+	case INT:
+		if inst.Opcode>>24 == 0xCC {
+			args = nil
+			op = "int3"
+		}
+
+	case LCALL, LJMP:
+		if len(args) == 2 {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case FCHS, FABS, FTST, FLDPI, FLDL2E, FLDLG2, F2XM1, FXAM, FLD1, FLDL2T, FSQRT, FRNDINT, FCOS, FSIN:
+		if len(args) == 0 {
+			args = append(args, "st0")
+		}
+
+	case FPTAN, FSINCOS, FUCOMPP, FCOMPP, FYL2X, FPATAN, FXTRACT, FPREM1, FPREM, FYL2XP1, FSCALE:
+		if len(args) == 0 {
+			args = []string{"st0", "st1"}
+		}
+
+	case FST, FSTP, FISTTP, FIST, FISTP, FBSTP:
+		if len(args) == 1 {
+			args = append(args, "st0")
+		}
+
+	case FLD, FXCH, FCOM, FCOMP, FIADD, FIMUL, FICOM, FICOMP, FISUBR, FIDIV, FUCOM, FUCOMP, FILD, FBLD, FADD, FMUL, FSUB, FSUBR, FISUB, FDIV, FDIVR, FIDIVR:
+		if len(args) == 1 {
+			args = []string{"st0", args[0]}
+		}
+
+	case MASKMOVDQU, MASKMOVQ, XLATB, OUTSB, OUTSW, OUTSD:
+	FixSegment:
+		for i := len(inst.Prefix) - 1; i >= 0; i-- {
+			p := inst.Prefix[i] & 0xFF
+			switch p {
+			case PrefixCS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
+				if inst.Mode != 64 || p == PrefixFS || p == PrefixGS {
+					args = append(args, strings.ToLower((inst.Prefix[i] & 0xFF).String()))
+					break FixSegment
+				}
+			case PrefixDS:
+				if inst.Mode != 64 {
+					break FixSegment
+				}
+			}
+		}
+	}
+
+	if op == "" {
+		op = intelOp[inst.Op]
+	}
+	if op == "" {
+		op = strings.ToLower(inst.Op.String())
+	}
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+	return prefix + op
+}
+
+func intelArg(inst *Inst, arg Arg) string {
+	switch a := arg.(type) {
+	case Imm:
+		if inst.Mode == 32 {
+			return fmt.Sprintf("%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("%#x", int64(a))
+		}
+		return fmt.Sprintf("%#x", uint64(a))
+	case Mem:
+		if a.Base == EIP {
+			a.Base = RIP
+		}
+		prefix := ""
+		switch inst.MemBytes {
+		case 1:
+			prefix = "byte "
+		case 2:
+			prefix = "word "
+		case 4:
+			prefix = "dword "
+		case 8:
+			prefix = "qword "
+		case 16:
+			prefix = "xmmword "
+		}
+		switch inst.Op {
+		case INVLPG:
+			prefix = "byte "
+		case STOSB, MOVSB, CMPSB, LODSB, SCASB:
+			prefix = "byte "
+		case STOSW, MOVSW, CMPSW, LODSW, SCASW:
+			prefix = "word "
+		case STOSD, MOVSD, CMPSD, LODSD, SCASD:
+			prefix = "dword "
+		case STOSQ, MOVSQ, CMPSQ, LODSQ, SCASQ:
+			prefix = "qword "
+		case LAR:
+			prefix = "word "
+		case BOUND:
+			if inst.Mode == 32 {
+				prefix = "qword "
+			} else {
+				prefix = "dword "
+			}
+		case PREFETCHW, PREFETCHNTA, PREFETCHT0, PREFETCHT1, PREFETCHT2, CLFLUSH:
+			prefix = "zmmword "
+		}
+		switch inst.Op {
+		case MOVSB, MOVSW, MOVSD, MOVSQ, CMPSB, CMPSW, CMPSD, CMPSQ, STOSB, STOSW, STOSD, STOSQ, SCASB, SCASW, SCASD, SCASQ, LODSB, LODSW, LODSD, LODSQ:
+			switch a.Base {
+			case DI, EDI, RDI:
+				if a.Segment == ES {
+					a.Segment = 0
+				}
+			case SI, ESI, RSI:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		case LEA:
+			a.Segment = 0
+		default:
+			switch a.Base {
+			case SP, ESP, RSP, BP, EBP, RBP:
+				if a.Segment == SS {
+					a.Segment = 0
+				}
+			default:
+				if a.Segment == DS {
+					a.Segment = 0
+				}
+			}
+		}
+
+		if inst.Mode == 64 && a.Segment != FS && a.Segment != GS {
+			a.Segment = 0
+		}
+
+		prefix += "ptr "
+		if a.Segment != 0 {
+			prefix += strings.ToLower(a.Segment.String()) + ":"
+		}
+		prefix += "["
+		if a.Base != 0 {
+			prefix += intelArg(inst, a.Base)
+		}
+		if a.Scale != 0 && a.Index != 0 {
+			if a.Base != 0 {
+				prefix += "+"
+			}
+			prefix += fmt.Sprintf("%s*%d", intelArg(inst, a.Index), a.Scale)
+		}
+		if a.Disp != 0 {
+			if prefix[len(prefix)-1] == '[' && (a.Disp >= 0 || int64(int32(a.Disp)) != a.Disp) {
+				prefix += fmt.Sprintf("%#x", uint64(a.Disp))
+			} else {
+				prefix += fmt.Sprintf("%+#x", a.Disp)
+			}
+		}
+		prefix += "]"
+		return prefix
+	case Rel:
+		return fmt.Sprintf(".%+#x", int64(a))
+	case Reg:
+		if int(a) < len(intelReg) && intelReg[a] != "" {
+			return intelReg[a]
+		}
+	}
+	return strings.ToLower(arg.String())
+}
+
+var intelOp = map[Op]string{
+	JAE:       "jnb",
+	JA:        "jnbe",
+	JGE:       "jnl",
+	JNE:       "jnz",
+	JG:        "jnle",
+	JE:        "jz",
+	SETAE:     "setnb",
+	SETA:      "setnbe",
+	SETGE:     "setnl",
+	SETNE:     "setnz",
+	SETG:      "setnle",
+	SETE:      "setz",
+	CMOVAE:    "cmovnb",
+	CMOVA:     "cmovnbe",
+	CMOVGE:    "cmovnl",
+	CMOVNE:    "cmovnz",
+	CMOVG:     "cmovnle",
+	CMOVE:     "cmovz",
+	LCALL:     "call far",
+	LJMP:      "jmp far",
+	LRET:      "ret far",
+	ICEBP:     "int1",
+	MOVSD_XMM: "movsd",
+	XLATB:     "xlat",
+}
+
+var intelReg = [...]string{
+	F0:  "st0",
+	F1:  "st1",
+	F2:  "st2",
+	F3:  "st3",
+	F4:  "st4",
+	F5:  "st5",
+	F6:  "st6",
+	F7:  "st7",
+	M0:  "mmx0",
+	M1:  "mmx1",
+	M2:  "mmx2",
+	M3:  "mmx3",
+	M4:  "mmx4",
+	M5:  "mmx5",
+	M6:  "mmx6",
+	M7:  "mmx7",
+	X0:  "xmm0",
+	X1:  "xmm1",
+	X2:  "xmm2",
+	X3:  "xmm3",
+	X4:  "xmm4",
+	X5:  "xmm5",
+	X6:  "xmm6",
+	X7:  "xmm7",
+	X8:  "xmm8",
+	X9:  "xmm9",
+	X10: "xmm10",
+	X11: "xmm11",
+	X12: "xmm12",
+	X13: "xmm13",
+	X14: "xmm14",
+	X15: "xmm15",
+
+	// TODO: Maybe the constants are named wrong.
+	SPB: "spl",
+	BPB: "bpl",
+	SIB: "sil",
+	DIB: "dil",
+
+	R8L:  "r8d",
+	R9L:  "r9d",
+	R10L: "r10d",
+	R11L: "r11d",
+	R12L: "r12d",
+	R13L: "r13d",
+	R14L: "r14d",
+	R15L: "r15d",
+}
--- a/vendor/rsc.io/x86/x86asm/plan9x.go
+++ b/vendor/rsc.io/x86/x86asm/plan9x.go
@ -0,0 +1,346 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// plan9Syntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for expanding
+// PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name and base
+// address of the symbol containing the target, if any; otherwise it returns "", 0.
+func plan9Syntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+	var args []string
+	for i := len(inst.Args) - 1; i >= 0; i-- {
+		a := inst.Args[i]
+		if a == nil {
+			continue
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	var last Prefix
+	for _, p := range inst.Prefix {
+		if p == 0 || p.IsREX() {
+			break
+		}
+		last = p
+	}
+
+	prefix := ""
+	switch last & 0xFF {
+	case 0, 0x66, 0x67:
+		// ignore
+	case PrefixREPN:
+		prefix += "REPNE "
+	default:
+		prefix += last.String() + " "
+	}
+
+	op := inst.Op.String()
+	if plan9Suffix[inst.Op] {
+		switch inst.DataSize {
+		case 8:
+			op += "B"
+		case 16:
+			op += "W"
+		case 32:
+			op += "L"
+		case 64:
+			op += "Q"
+		}
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return prefix + op
+}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch a := arg.(type) {
+	case Reg:
+		return plan9Reg[a]
+	case Rel:
+		if pc == 0 {
+			break
+		}
+		// If the absolute address is the start of a symbol, use the name.
+		// Otherwise use the raw address, so that things like relative
+		// jumps show up as JMP 0x123 instead of JMP f+10(SB).
+		// It is usually easier to search for 0x123 than to do the mental
+		// arithmetic to find f+10.
+		addr := pc + uint64(inst.Len) + uint64(a)
+		if s, base := symname(addr); s != "" && addr == base {
+			return fmt.Sprintf("%s(SB)", s)
+		}
+		return fmt.Sprintf("%#x", addr)
+
+	case Imm:
+		if s, base := symname(uint64(a)); s != "" {
+			suffix := ""
+			if uint64(a) != base {
+				suffix = fmt.Sprintf("%+d", uint64(a)-base)
+			}
+			return fmt.Sprintf("$%s%s(SB)", s, suffix)
+		}
+		if inst.Mode == 32 {
+			return fmt.Sprintf("$%#x", uint32(a))
+		}
+		if Imm(int32(a)) == a {
+			return fmt.Sprintf("$%#x", int64(a))
+		}
+		return fmt.Sprintf("$%#x", uint64(a))
+	case Mem:
+		if a.Segment == 0 && a.Disp != 0 && a.Base == 0 && (a.Index == 0 || a.Scale == 0) {
+			if s, base := symname(uint64(a.Disp)); s != "" {
+				suffix := ""
+				if uint64(a.Disp) != base {
+					suffix = fmt.Sprintf("%+d", uint64(a.Disp)-base)
+				}
+				return fmt.Sprintf("%s%s(SB)", s, suffix)
+			}
+		}
+		s := ""
+		if a.Segment != 0 {
+			s += fmt.Sprintf("%s:", plan9Reg[a.Segment])
+		}
+		if a.Disp != 0 {
+			s += fmt.Sprintf("%#x", a.Disp)
+		} else {
+			s += "0"
+		}
+		if a.Base != 0 {
+			s += fmt.Sprintf("(%s)", plan9Reg[a.Base])
+		}
+		if a.Index != 0 && a.Scale != 0 {
+			s += fmt.Sprintf("(%s*%d)", plan9Reg[a.Index], a.Scale)
+		}
+		return s
+	}
+	return arg.String()
+}
+
+var plan9Suffix = [maxOp + 1]bool{
+	ADC:       true,
+	ADD:       true,
+	AND:       true,
+	BSF:       true,
+	BSR:       true,
+	BT:        true,
+	BTC:       true,
+	BTR:       true,
+	BTS:       true,
+	CMP:       true,
+	CMPXCHG:   true,
+	CVTSI2SD:  true,
+	CVTSI2SS:  true,
+	CVTSD2SI:  true,
+	CVTSS2SI:  true,
+	CVTTSD2SI: true,
+	CVTTSS2SI: true,
+	DEC:       true,
+	DIV:       true,
+	FLDENV:    true,
+	FRSTOR:    true,
+	IDIV:      true,
+	IMUL:      true,
+	IN:        true,
+	INC:       true,
+	LEA:       true,
+	MOV:       true,
+	MOVNTI:    true,
+	MUL:       true,
+	NEG:       true,
+	NOP:       true,
+	NOT:       true,
+	OR:        true,
+	OUT:       true,
+	POP:       true,
+	POPA:      true,
+	PUSH:      true,
+	PUSHA:     true,
+	RCL:       true,
+	RCR:       true,
+	ROL:       true,
+	ROR:       true,
+	SAR:       true,
+	SBB:       true,
+	SHL:       true,
+	SHLD:      true,
+	SHR:       true,
+	SHRD:      true,
+	SUB:       true,
+	TEST:      true,
+	XADD:      true,
+	XCHG:      true,
+	XOR:       true,
+}
+
+var plan9Reg = [...]string{
+	AL:   "AL",
+	CL:   "CL",
+	BL:   "BL",
+	DL:   "DL",
+	AH:   "AH",
+	CH:   "CH",
+	BH:   "BH",
+	DH:   "DH",
+	SPB:  "SP",
+	BPB:  "BP",
+	SIB:  "SI",
+	DIB:  "DI",
+	R8B:  "R8",
+	R9B:  "R9",
+	R10B: "R10",
+	R11B: "R11",
+	R12B: "R12",
+	R13B: "R13",
+	R14B: "R14",
+	R15B: "R15",
+	AX:   "AX",
+	CX:   "CX",
+	BX:   "BX",
+	DX:   "DX",
+	SP:   "SP",
+	BP:   "BP",
+	SI:   "SI",
+	DI:   "DI",
+	R8W:  "R8",
+	R9W:  "R9",
+	R10W: "R10",
+	R11W: "R11",
+	R12W: "R12",
+	R13W: "R13",
+	R14W: "R14",
+	R15W: "R15",
+	EAX:  "AX",
+	ECX:  "CX",
+	EDX:  "DX",
+	EBX:  "BX",
+	ESP:  "SP",
+	EBP:  "BP",
+	ESI:  "SI",
+	EDI:  "DI",
+	R8L:  "R8",
+	R9L:  "R9",
+	R10L: "R10",
+	R11L: "R11",
+	R12L: "R12",
+	R13L: "R13",
+	R14L: "R14",
+	R15L: "R15",
+	RAX:  "AX",
+	RCX:  "CX",
+	RDX:  "DX",
+	RBX:  "BX",
+	RSP:  "SP",
+	RBP:  "BP",
+	RSI:  "SI",
+	RDI:  "DI",
+	R8:   "R8",
+	R9:   "R9",
+	R10:  "R10",
+	R11:  "R11",
+	R12:  "R12",
+	R13:  "R13",
+	R14:  "R14",
+	R15:  "R15",
+	IP:   "IP",
+	EIP:  "IP",
+	RIP:  "IP",
+	F0:   "F0",
+	F1:   "F1",
+	F2:   "F2",
+	F3:   "F3",
+	F4:   "F4",
+	F5:   "F5",
+	F6:   "F6",
+	F7:   "F7",
+	M0:   "M0",
+	M1:   "M1",
+	M2:   "M2",
+	M3:   "M3",
+	M4:   "M4",
+	M5:   "M5",
+	M6:   "M6",
+	M7:   "M7",
+	X0:   "X0",
+	X1:   "X1",
+	X2:   "X2",
+	X3:   "X3",
+	X4:   "X4",
+	X5:   "X5",
+	X6:   "X6",
+	X7:   "X7",
+	X8:   "X8",
+	X9:   "X9",
+	X10:  "X10",
+	X11:  "X11",
+	X12:  "X12",
+	X13:  "X13",
+	X14:  "X14",
+	X15:  "X15",
+	CS:   "CS",
+	SS:   "SS",
+	DS:   "DS",
+	ES:   "ES",
+	FS:   "FS",
+	GS:   "GS",
+	GDTR: "GDTR",
+	IDTR: "IDTR",
+	LDTR: "LDTR",
+	MSW:  "MSW",
+	TASK: "TASK",
+	CR0:  "CR0",
+	CR1:  "CR1",
+	CR2:  "CR2",
+	CR3:  "CR3",
+	CR4:  "CR4",
+	CR5:  "CR5",
+	CR6:  "CR6",
+	CR7:  "CR7",
+	CR8:  "CR8",
+	CR9:  "CR9",
+	CR10: "CR10",
+	CR11: "CR11",
+	CR12: "CR12",
+	CR13: "CR13",
+	CR14: "CR14",
+	CR15: "CR15",
+	DR0:  "DR0",
+	DR1:  "DR1",
+	DR2:  "DR2",
+	DR3:  "DR3",
+	DR4:  "DR4",
+	DR5:  "DR5",
+	DR6:  "DR6",
+	DR7:  "DR7",
+	DR8:  "DR8",
+	DR9:  "DR9",
+	DR10: "DR10",
+	DR11: "DR11",
+	DR12: "DR12",
+	DR13: "DR13",
+	DR14: "DR14",
+	DR15: "DR15",
+	TR0:  "TR0",
+	TR1:  "TR1",
+	TR2:  "TR2",
+	TR3:  "TR3",
+	TR4:  "TR4",
+	TR5:  "TR5",
+	TR6:  "TR6",
+	TR7:  "TR7",
+}
--- a/vendor/rsc.io/x86/x86asm/tables.go
+++ b/vendor/rsc.io/x86/x86asm/tables.go