1// Copyright 2014 The Go Authors.  All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package x86asm implements decoding of x86 machine code.
6package x86asm
7
8import (
9	"bytes"
10	"fmt"
11)
12
13// An Inst is a single instruction.
14type Inst struct {
15	Prefix   Prefixes // Prefixes applied to the instruction.
16	Op       Op       // Opcode mnemonic
17	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
18	Args     Args     // Instruction arguments, in Intel order
19	Mode     int      // processor mode in bits: 16, 32, or 64
20	AddrSize int      // address size in bits: 16, 32, or 64
21	DataSize int      // operand size in bits: 16, 32, or 64
22	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
23	Len      int      // length of encoded instruction in bytes
24	PCRel    int      // length of PC-relative address in instruction encoding
25	PCRelOff int      // index of start of PC-relative address in instruction encoding
26}
27
28// Prefixes is an array of prefixes associated with a single instruction.
29// The prefixes are listed in the same order as found in the instruction:
30// each prefix byte corresponds to one slot in the array. The first zero
31// in the array marks the end of the prefixes.
32type Prefixes [14]Prefix
33
34// A Prefix represents an Intel instruction prefix.
35// The low 8 bits are the actual prefix byte encoding,
36// and the top 8 bits contain distinguishing bits and metadata.
37type Prefix uint16
38
39const (
40	// Metadata about the role of a prefix in an instruction.
41	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
42	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
43	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)
44
45	// Memory segment overrides.
46	PrefixES Prefix = 0x26 // ES segment override
47	PrefixCS Prefix = 0x2E // CS segment override
48	PrefixSS Prefix = 0x36 // SS segment override
49	PrefixDS Prefix = 0x3E // DS segment override
50	PrefixFS Prefix = 0x64 // FS segment override
51	PrefixGS Prefix = 0x65 // GS segment override
52
53	// Branch prediction.
54	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
55	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)
56
57	// Size attributes.
58	PrefixDataSize Prefix = 0x66 // operand size override
59	PrefixData16   Prefix = 0x166
60	PrefixData32   Prefix = 0x266
61	PrefixAddrSize Prefix = 0x67 // address size override
62	PrefixAddr16   Prefix = 0x167
63	PrefixAddr32   Prefix = 0x267
64
65	// One of a kind.
66	PrefixLOCK     Prefix = 0xF0 // lock
67	PrefixREPN     Prefix = 0xF2 // repeat not zero
68	PrefixXACQUIRE Prefix = 0x1F2
69	PrefixBND      Prefix = 0x2F2
70	PrefixREP      Prefix = 0xF3 // repeat
71	PrefixXRELEASE Prefix = 0x1F3
72
73	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
74	// the other bits are set or not according to the intended use.
75	PrefixREX       Prefix = 0x40 // REX 64-bit extension prefix
76	PrefixREXW      Prefix = 0x08 // extension bit W (64-bit instruction width)
77	PrefixREXR      Prefix = 0x04 // extension bit R (r field in modrm)
78	PrefixREXX      Prefix = 0x02 // extension bit X (index field in sib)
79	PrefixREXB      Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
80	PrefixVEX2Bytes Prefix = 0xC5 // Short form of vex prefix
81	PrefixVEX3Bytes Prefix = 0xC4 // Long form of vex prefix
82)
83
84// IsREX reports whether p is a REX prefix byte.
85func (p Prefix) IsREX() bool {
86	return p&0xF0 == PrefixREX
87}
88
89func (p Prefix) IsVEX() bool {
90	return p&0xFF == PrefixVEX2Bytes || p&0xFF == PrefixVEX3Bytes
91}
92
93func (p Prefix) String() string {
94	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
95	if s := prefixNames[p]; s != "" {
96		return s
97	}
98
99	if p.IsREX() {
100		s := "REX."
101		if p&PrefixREXW != 0 {
102			s += "W"
103		}
104		if p&PrefixREXR != 0 {
105			s += "R"
106		}
107		if p&PrefixREXX != 0 {
108			s += "X"
109		}
110		if p&PrefixREXB != 0 {
111			s += "B"
112		}
113		return s
114	}
115
116	return fmt.Sprintf("Prefix(%#x)", int(p))
117}
118
119// An Op is an x86 opcode.
120type Op uint32
121
122func (op Op) String() string {
123	i := int(op)
124	if i < 0 || i >= len(opNames) || opNames[i] == "" {
125		return fmt.Sprintf("Op(%d)", i)
126	}
127	return opNames[i]
128}
129
130// An Args holds the instruction arguments.
131// If an instruction has fewer than 4 arguments,
132// the final elements in the array are nil.
133type Args [4]Arg
134
135// An Arg is a single instruction argument,
136// one of these types: Reg, Mem, Imm, Rel.
137type Arg interface {
138	String() string
139	isArg()
140}
141
142// Note that the implements of Arg that follow are all sized
143// so that on a 64-bit machine the data can be inlined in
144// the interface value instead of requiring an allocation.
145
146// A Reg is a single register.
147// The zero Reg value has no name but indicates ``no register.''
148type Reg uint8
149
150const (
151	_ Reg = iota
152
153	// 8-bit
154	AL
155	CL
156	DL
157	BL
158	AH
159	CH
160	DH
161	BH
162	SPB
163	BPB
164	SIB
165	DIB
166	R8B
167	R9B
168	R10B
169	R11B
170	R12B
171	R13B
172	R14B
173	R15B
174
175	// 16-bit
176	AX
177	CX
178	DX
179	BX
180	SP
181	BP
182	SI
183	DI
184	R8W
185	R9W
186	R10W
187	R11W
188	R12W
189	R13W
190	R14W
191	R15W
192
193	// 32-bit
194	EAX
195	ECX
196	EDX
197	EBX
198	ESP
199	EBP
200	ESI
201	EDI
202	R8L
203	R9L
204	R10L
205	R11L
206	R12L
207	R13L
208	R14L
209	R15L
210
211	// 64-bit
212	RAX
213	RCX
214	RDX
215	RBX
216	RSP
217	RBP
218	RSI
219	RDI
220	R8
221	R9
222	R10
223	R11
224	R12
225	R13
226	R14
227	R15
228
229	// Instruction pointer.
230	IP  // 16-bit
231	EIP // 32-bit
232	RIP // 64-bit
233
234	// 387 floating point registers.
235	F0
236	F1
237	F2
238	F3
239	F4
240	F5
241	F6
242	F7
243
244	// MMX registers.
245	M0
246	M1
247	M2
248	M3
249	M4
250	M5
251	M6
252	M7
253
254	// XMM registers.
255	X0
256	X1
257	X2
258	X3
259	X4
260	X5
261	X6
262	X7
263	X8
264	X9
265	X10
266	X11
267	X12
268	X13
269	X14
270	X15
271
272	// Segment registers.
273	ES
274	CS
275	SS
276	DS
277	FS
278	GS
279
280	// System registers.
281	GDTR
282	IDTR
283	LDTR
284	MSW
285	TASK
286
287	// Control registers.
288	CR0
289	CR1
290	CR2
291	CR3
292	CR4
293	CR5
294	CR6
295	CR7
296	CR8
297	CR9
298	CR10
299	CR11
300	CR12
301	CR13
302	CR14
303	CR15
304
305	// Debug registers.
306	DR0
307	DR1
308	DR2
309	DR3
310	DR4
311	DR5
312	DR6
313	DR7
314	DR8
315	DR9
316	DR10
317	DR11
318	DR12
319	DR13
320	DR14
321	DR15
322
323	// Task registers.
324	TR0
325	TR1
326	TR2
327	TR3
328	TR4
329	TR5
330	TR6
331	TR7
332)
333
334const regMax = TR7
335
336func (Reg) isArg() {}
337
338func (r Reg) String() string {
339	i := int(r)
340	if i < 0 || i >= len(regNames) || regNames[i] == "" {
341		return fmt.Sprintf("Reg(%d)", i)
342	}
343	return regNames[i]
344}
345
346// A Mem is a memory reference.
347// The general form is Segment:[Base+Scale*Index+Disp].
348type Mem struct {
349	Segment Reg
350	Base    Reg
351	Scale   uint8
352	Index   Reg
353	Disp    int64
354}
355
356func (Mem) isArg() {}
357
358func (m Mem) String() string {
359	var base, plus, scale, index, disp string
360
361	if m.Base != 0 {
362		base = m.Base.String()
363	}
364	if m.Scale != 0 {
365		if m.Base != 0 {
366			plus = "+"
367		}
368		if m.Scale > 1 {
369			scale = fmt.Sprintf("%d*", m.Scale)
370		}
371		index = m.Index.String()
372	}
373	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
374		disp = fmt.Sprintf("%+#x", m.Disp)
375	}
376	return "[" + base + plus + scale + index + disp + "]"
377}
378
379// A Rel is an offset relative to the current instruction pointer.
380type Rel int32
381
382func (Rel) isArg() {}
383
384func (r Rel) String() string {
385	return fmt.Sprintf(".%+d", r)
386}
387
388// An Imm is an integer constant.
389type Imm int64
390
391func (Imm) isArg() {}
392
393func (i Imm) String() string {
394	return fmt.Sprintf("%#x", int64(i))
395}
396
397func (i Inst) String() string {
398	var buf bytes.Buffer
399	for _, p := range i.Prefix {
400		if p == 0 {
401			break
402		}
403		if p&PrefixImplicit != 0 {
404			continue
405		}
406		fmt.Fprintf(&buf, "%v ", p)
407	}
408	fmt.Fprintf(&buf, "%v", i.Op)
409	sep := " "
410	for _, v := range i.Args {
411		if v == nil {
412			break
413		}
414		fmt.Fprintf(&buf, "%s%v", sep, v)
415		sep = ", "
416	}
417	return buf.String()
418}
419
420func isReg(a Arg) bool {
421	_, ok := a.(Reg)
422	return ok
423}
424
425func isSegReg(a Arg) bool {
426	r, ok := a.(Reg)
427	return ok && ES <= r && r <= GS
428}
429
430func isMem(a Arg) bool {
431	_, ok := a.(Mem)
432	return ok
433}
434
435func isImm(a Arg) bool {
436	_, ok := a.(Imm)
437	return ok
438}
439
440func regBytes(a Arg) int {
441	r, ok := a.(Reg)
442	if !ok {
443		return 0
444	}
445	if AL <= r && r <= R15B {
446		return 1
447	}
448	if AX <= r && r <= R15W {
449		return 2
450	}
451	if EAX <= r && r <= R15L {
452		return 4
453	}
454	if RAX <= r && r <= R15 {
455		return 8
456	}
457	return 0
458}
459
460func isSegment(p Prefix) bool {
461	switch p {
462	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
463		return true
464	}
465	return false
466}
467
468// The Op definitions and string list are in tables.go.
469
470var prefixNames = map[Prefix]string{
471	PrefixCS:       "CS",
472	PrefixDS:       "DS",
473	PrefixES:       "ES",
474	PrefixFS:       "FS",
475	PrefixGS:       "GS",
476	PrefixSS:       "SS",
477	PrefixLOCK:     "LOCK",
478	PrefixREP:      "REP",
479	PrefixREPN:     "REPN",
480	PrefixAddrSize: "ADDRSIZE",
481	PrefixDataSize: "DATASIZE",
482	PrefixAddr16:   "ADDR16",
483	PrefixData16:   "DATA16",
484	PrefixAddr32:   "ADDR32",
485	PrefixData32:   "DATA32",
486	PrefixBND:      "BND",
487	PrefixXACQUIRE: "XACQUIRE",
488	PrefixXRELEASE: "XRELEASE",
489	PrefixREX:      "REX",
490	PrefixPT:       "PT",
491	PrefixPN:       "PN",
492}
493
494var regNames = [...]string{
495	AL:   "AL",
496	CL:   "CL",
497	BL:   "BL",
498	DL:   "DL",
499	AH:   "AH",
500	CH:   "CH",
501	BH:   "BH",
502	DH:   "DH",
503	SPB:  "SPB",
504	BPB:  "BPB",
505	SIB:  "SIB",
506	DIB:  "DIB",
507	R8B:  "R8B",
508	R9B:  "R9B",
509	R10B: "R10B",
510	R11B: "R11B",
511	R12B: "R12B",
512	R13B: "R13B",
513	R14B: "R14B",
514	R15B: "R15B",
515	AX:   "AX",
516	CX:   "CX",
517	BX:   "BX",
518	DX:   "DX",
519	SP:   "SP",
520	BP:   "BP",
521	SI:   "SI",
522	DI:   "DI",
523	R8W:  "R8W",
524	R9W:  "R9W",
525	R10W: "R10W",
526	R11W: "R11W",
527	R12W: "R12W",
528	R13W: "R13W",
529	R14W: "R14W",
530	R15W: "R15W",
531	EAX:  "EAX",
532	ECX:  "ECX",
533	EDX:  "EDX",
534	EBX:  "EBX",
535	ESP:  "ESP",
536	EBP:  "EBP",
537	ESI:  "ESI",
538	EDI:  "EDI",
539	R8L:  "R8L",
540	R9L:  "R9L",
541	R10L: "R10L",
542	R11L: "R11L",
543	R12L: "R12L",
544	R13L: "R13L",
545	R14L: "R14L",
546	R15L: "R15L",
547	RAX:  "RAX",
548	RCX:  "RCX",
549	RDX:  "RDX",
550	RBX:  "RBX",
551	RSP:  "RSP",
552	RBP:  "RBP",
553	RSI:  "RSI",
554	RDI:  "RDI",
555	R8:   "R8",
556	R9:   "R9",
557	R10:  "R10",
558	R11:  "R11",
559	R12:  "R12",
560	R13:  "R13",
561	R14:  "R14",
562	R15:  "R15",
563	IP:   "IP",
564	EIP:  "EIP",
565	RIP:  "RIP",
566	F0:   "F0",
567	F1:   "F1",
568	F2:   "F2",
569	F3:   "F3",
570	F4:   "F4",
571	F5:   "F5",
572	F6:   "F6",
573	F7:   "F7",
574	M0:   "M0",
575	M1:   "M1",
576	M2:   "M2",
577	M3:   "M3",
578	M4:   "M4",
579	M5:   "M5",
580	M6:   "M6",
581	M7:   "M7",
582	X0:   "X0",
583	X1:   "X1",
584	X2:   "X2",
585	X3:   "X3",
586	X4:   "X4",
587	X5:   "X5",
588	X6:   "X6",
589	X7:   "X7",
590	X8:   "X8",
591	X9:   "X9",
592	X10:  "X10",
593	X11:  "X11",
594	X12:  "X12",
595	X13:  "X13",
596	X14:  "X14",
597	X15:  "X15",
598	CS:   "CS",
599	SS:   "SS",
600	DS:   "DS",
601	ES:   "ES",
602	FS:   "FS",
603	GS:   "GS",
604	GDTR: "GDTR",
605	IDTR: "IDTR",
606	LDTR: "LDTR",
607	MSW:  "MSW",
608	TASK: "TASK",
609	CR0:  "CR0",
610	CR1:  "CR1",
611	CR2:  "CR2",
612	CR3:  "CR3",
613	CR4:  "CR4",
614	CR5:  "CR5",
615	CR6:  "CR6",
616	CR7:  "CR7",
617	CR8:  "CR8",
618	CR9:  "CR9",
619	CR10: "CR10",
620	CR11: "CR11",
621	CR12: "CR12",
622	CR13: "CR13",
623	CR14: "CR14",
624	CR15: "CR15",
625	DR0:  "DR0",
626	DR1:  "DR1",
627	DR2:  "DR2",
628	DR3:  "DR3",
629	DR4:  "DR4",
630	DR5:  "DR5",
631	DR6:  "DR6",
632	DR7:  "DR7",
633	DR8:  "DR8",
634	DR9:  "DR9",
635	DR10: "DR10",
636	DR11: "DR11",
637	DR12: "DR12",
638	DR13: "DR13",
639	DR14: "DR14",
640	DR15: "DR15",
641	TR0:  "TR0",
642	TR1:  "TR1",
643	TR2:  "TR2",
644	TR3:  "TR3",
645	TR4:  "TR4",
646	TR5:  "TR5",
647	TR6:  "TR6",
648	TR7:  "TR7",
649}
650