1 /* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 #include "udint.h"
27 #include "types.h"
28 #include "decode.h"
29
30 #ifndef __UD_STANDALONE__
31 # include <string.h>
32 #endif /* __UD_STANDALONE__ */
33
34 /* The max number of prefixes to an instruction */
35 #define MAX_PREFIXES 15
36
37 /* rex prefix bits */
38 #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
39 #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
40 #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
41 #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
42 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
43 ( P_REXR(n) << 2 ) | \
44 ( P_REXX(n) << 1 ) | \
45 ( P_REXB(n) << 0 ) )
46
47 /* scable-index-base bits */
48 #define SIB_S(b) ( ( b ) >> 6 )
49 #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
50 #define SIB_B(b) ( ( b ) & 7 )
51
52 /* modrm bits */
53 #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
54 #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
55 #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
56 #define MODRM_RM(b) ( ( b ) & 7 )
57
58 static int decode_ext(struct ud *u, uint16_t ptr);
59
60 enum reg_class { /* register classes */
61 REGCLASS_GPR,
62 REGCLASS_MMX,
63 REGCLASS_CR,
64 REGCLASS_DB,
65 REGCLASS_SEG,
66 REGCLASS_XMM
67 };
68
69 /*
70 * inp_start
71 * Should be called before each de-code operation.
72 */
73 static void
inp_start(struct ud * u)74 inp_start(struct ud *u)
75 {
76 u->inp_ctr = 0;
77 }
78
79
80 static uint8_t
inp_next(struct ud * u)81 inp_next(struct ud *u)
82 {
83 if (u->inp_end == 0) {
84 if (u->inp_buf != NULL) {
85 if (u->inp_buf_index < u->inp_buf_size) {
86 u->inp_ctr++;
87 return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
88 }
89 } else {
90 int c;
91 if ((c = u->inp_hook(u)) != UD_EOI) {
92 u->inp_curr = c;
93 u->inp_sess[u->inp_ctr++] = u->inp_curr;
94 return u->inp_curr;
95 }
96 }
97 }
98 u->inp_end = 1;
99 UDERR(u, "byte expected, eoi received\n");
100 return 0;
101 }
102
103 static uint8_t
inp_curr(struct ud * u)104 inp_curr(struct ud *u)
105 {
106 return u->inp_curr;
107 }
108
109
110 /*
111 * inp_uint8
112 * int_uint16
113 * int_uint32
114 * int_uint64
115 * Load little-endian values from input
116 */
117 static uint8_t
inp_uint8(struct ud * u)118 inp_uint8(struct ud* u)
119 {
120 return inp_next(u);
121 }
122
123 static uint16_t
inp_uint16(struct ud * u)124 inp_uint16(struct ud* u)
125 {
126 uint16_t r, ret;
127
128 ret = inp_next(u);
129 r = inp_next(u);
130 return ret | (r << 8);
131 }
132
133 static uint32_t
inp_uint32(struct ud * u)134 inp_uint32(struct ud* u)
135 {
136 uint32_t r, ret;
137
138 ret = inp_next(u);
139 r = inp_next(u);
140 ret = ret | (r << 8);
141 r = inp_next(u);
142 ret = ret | (r << 16);
143 r = inp_next(u);
144 return ret | (r << 24);
145 }
146
147 static uint64_t
inp_uint64(struct ud * u)148 inp_uint64(struct ud* u)
149 {
150 uint64_t r, ret;
151
152 ret = inp_next(u);
153 r = inp_next(u);
154 ret = ret | (r << 8);
155 r = inp_next(u);
156 ret = ret | (r << 16);
157 r = inp_next(u);
158 ret = ret | (r << 24);
159 r = inp_next(u);
160 ret = ret | (r << 32);
161 r = inp_next(u);
162 ret = ret | (r << 40);
163 r = inp_next(u);
164 ret = ret | (r << 48);
165 r = inp_next(u);
166 return ret | (r << 56);
167 }
168
169
170 static inline int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)171 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
172 {
173 if (dis_mode == 64) {
174 return rex_w ? 64 : (pfx_opr ? 16 : 32);
175 } else if (dis_mode == 32) {
176 return pfx_opr ? 16 : 32;
177 } else {
178 UD_ASSERT(dis_mode == 16);
179 return pfx_opr ? 32 : 16;
180 }
181 }
182
183
184 static inline int
eff_adr_mode(int dis_mode,int pfx_adr)185 eff_adr_mode(int dis_mode, int pfx_adr)
186 {
187 if (dis_mode == 64) {
188 return pfx_adr ? 32 : 64;
189 } else if (dis_mode == 32) {
190 return pfx_adr ? 16 : 32;
191 } else {
192 UD_ASSERT(dis_mode == 16);
193 return pfx_adr ? 32 : 16;
194 }
195 }
196
197
198 /*
199 * decode_prefixes
200 *
201 * Extracts instruction prefixes.
202 */
203 static int
decode_prefixes(struct ud * u)204 decode_prefixes(struct ud *u)
205 {
206 int done = 0;
207 uint8_t curr, last = 0;
208 UD_RETURN_ON_ERROR(u);
209
210 do {
211 last = curr;
212 curr = inp_next(u);
213 UD_RETURN_ON_ERROR(u);
214 if (u->inp_ctr == MAX_INSN_LENGTH) {
215 UD_RETURN_WITH_ERROR(u, "max instruction length");
216 }
217
218 switch (curr)
219 {
220 case 0x2E:
221 u->pfx_seg = UD_R_CS;
222 break;
223 case 0x36:
224 u->pfx_seg = UD_R_SS;
225 break;
226 case 0x3E:
227 u->pfx_seg = UD_R_DS;
228 break;
229 case 0x26:
230 u->pfx_seg = UD_R_ES;
231 break;
232 case 0x64:
233 u->pfx_seg = UD_R_FS;
234 break;
235 case 0x65:
236 u->pfx_seg = UD_R_GS;
237 break;
238 case 0x67: /* adress-size override prefix */
239 u->pfx_adr = 0x67;
240 break;
241 case 0xF0:
242 u->pfx_lock = 0xF0;
243 break;
244 case 0x66:
245 u->pfx_opr = 0x66;
246 break;
247 case 0xF2:
248 u->pfx_str = 0xf2;
249 break;
250 case 0xF3:
251 u->pfx_str = 0xf3;
252 break;
253 default:
254 /* consume if rex */
255 done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
256 break;
257 }
258 } while (!done);
259 /* rex prefixes in 64bit mode, must be the last prefix */
260 if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
261 u->pfx_rex = last;
262 }
263 return 0;
264 }
265
266
modrm(struct ud * u)267 static inline unsigned int modrm( struct ud * u )
268 {
269 if ( !u->have_modrm ) {
270 u->modrm = inp_next( u );
271 u->have_modrm = 1;
272 }
273 return u->modrm;
274 }
275
276
277 static unsigned int
resolve_operand_size(const struct ud * u,unsigned int s)278 resolve_operand_size( const struct ud * u, unsigned int s )
279 {
280 switch ( s )
281 {
282 case SZ_V:
283 return ( u->opr_mode );
284 case SZ_Z:
285 return ( u->opr_mode == 16 ) ? 16 : 32;
286 case SZ_Y:
287 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
288 case SZ_RDQ:
289 return ( u->dis_mode == 64 ) ? 64 : 32;
290 default:
291 return s;
292 }
293 }
294
295
resolve_mnemonic(struct ud * u)296 static int resolve_mnemonic( struct ud* u )
297 {
298 /* resolve 3dnow weirdness. */
299 if ( u->mnemonic == UD_I3dnow ) {
300 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
301 }
302 /* SWAPGS is only valid in 64bits mode */
303 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
304 UDERR(u, "swapgs invalid in 64bits mode\n");
305 return -1;
306 }
307
308 if (u->mnemonic == UD_Ixchg) {
309 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
310 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
311 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
312 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
313 u->operand[0].type = UD_NONE;
314 u->operand[1].type = UD_NONE;
315 u->mnemonic = UD_Inop;
316 }
317 }
318
319 if (u->mnemonic == UD_Inop && u->pfx_repe) {
320 u->pfx_repe = 0;
321 u->mnemonic = UD_Ipause;
322 }
323 return 0;
324 }
325
326
327 /* -----------------------------------------------------------------------------
328 * decode_a()- Decodes operands of the type seg:offset
329 * -----------------------------------------------------------------------------
330 */
331 static void
decode_a(struct ud * u,struct ud_operand * op)332 decode_a(struct ud* u, struct ud_operand *op)
333 {
334 if (u->opr_mode == 16) {
335 /* seg16:off16 */
336 op->type = UD_OP_PTR;
337 op->size = 32;
338 op->lval.ptr.off = inp_uint16(u);
339 op->lval.ptr.seg = inp_uint16(u);
340 } else {
341 /* seg16:off32 */
342 op->type = UD_OP_PTR;
343 op->size = 48;
344 op->lval.ptr.off = inp_uint32(u);
345 op->lval.ptr.seg = inp_uint16(u);
346 }
347 }
348
349 /* -----------------------------------------------------------------------------
350 * decode_gpr() - Returns decoded General Purpose Register
351 * -----------------------------------------------------------------------------
352 */
353 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)354 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
355 {
356 switch (s) {
357 case 64:
358 return UD_R_RAX + rm;
359 case 32:
360 return UD_R_EAX + rm;
361 case 16:
362 return UD_R_AX + rm;
363 case 8:
364 if (u->dis_mode == 64 && u->pfx_rex) {
365 if (rm >= 4)
366 return UD_R_SPL + (rm-4);
367 return UD_R_AL + rm;
368 } else return UD_R_AL + rm;
369 case 0:
370 /* invalid size in case of a decode error */
371 UD_ASSERT(u->error);
372 return UD_NONE;
373 default:
374 UD_ASSERT(!"invalid operand size");
375 return UD_NONE;
376 }
377 }
378
379 static void
decode_reg(struct ud * u,struct ud_operand * opr,int type,int num,int size)380 decode_reg(struct ud *u,
381 struct ud_operand *opr,
382 int type,
383 int num,
384 int size)
385 {
386 int reg;
387 size = resolve_operand_size(u, size);
388 switch (type) {
389 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
390 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
391 case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
392 case REGCLASS_CR : reg = UD_R_CR0 + num; break;
393 case REGCLASS_DB : reg = UD_R_DR0 + num; break;
394 case REGCLASS_SEG : {
395 /*
396 * Only 6 segment registers, anything else is an error.
397 */
398 if ((num & 7) > 5) {
399 UDERR(u, "invalid segment register value\n");
400 return;
401 } else {
402 reg = UD_R_ES + (num & 7);
403 }
404 break;
405 }
406 default:
407 UD_ASSERT(!"invalid register type");
408 return;
409 }
410 opr->type = UD_OP_REG;
411 opr->base = reg;
412 opr->size = size;
413 }
414
415
416 /*
417 * decode_imm
418 *
419 * Decode Immediate values.
420 */
421 static void
decode_imm(struct ud * u,unsigned int size,struct ud_operand * op)422 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
423 {
424 op->size = resolve_operand_size(u, size);
425 op->type = UD_OP_IMM;
426
427 switch (op->size) {
428 case 8: op->lval.sbyte = inp_uint8(u); break;
429 case 16: op->lval.uword = inp_uint16(u); break;
430 case 32: op->lval.udword = inp_uint32(u); break;
431 case 64: op->lval.uqword = inp_uint64(u); break;
432 default: return;
433 }
434 }
435
436
437 /*
438 * decode_mem_disp
439 *
440 * Decode mem address displacement.
441 */
442 static void
decode_mem_disp(struct ud * u,unsigned int size,struct ud_operand * op)443 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
444 {
445 switch (size) {
446 case 8:
447 op->offset = 8;
448 op->lval.ubyte = inp_uint8(u);
449 break;
450 case 16:
451 op->offset = 16;
452 op->lval.uword = inp_uint16(u);
453 break;
454 case 32:
455 op->offset = 32;
456 op->lval.udword = inp_uint32(u);
457 break;
458 case 64:
459 op->offset = 64;
460 op->lval.uqword = inp_uint64(u);
461 break;
462 default:
463 return;
464 }
465 }
466
467
468 /*
469 * decode_modrm_reg
470 *
471 * Decodes reg field of mod/rm byte
472 *
473 */
474 static inline void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)475 decode_modrm_reg(struct ud *u,
476 struct ud_operand *operand,
477 unsigned int type,
478 unsigned int size)
479 {
480 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
481 decode_reg(u, operand, type, reg, size);
482 }
483
484
485 /*
486 * decode_modrm_rm
487 *
488 * Decodes rm field of mod/rm byte
489 *
490 */
491 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)492 decode_modrm_rm(struct ud *u,
493 struct ud_operand *op,
494 unsigned char type, /* register type */
495 unsigned int size) /* operand size */
496
497 {
498 size_t offset = 0;
499 unsigned char mod, rm;
500
501 /* get mod, r/m and reg fields */
502 mod = MODRM_MOD(modrm(u));
503 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
504
505 /*
506 * If mod is 11b, then the modrm.rm specifies a register.
507 *
508 */
509 if (mod == 3) {
510 decode_reg(u, op, type, rm, size);
511 return;
512 }
513
514 /*
515 * !11b => Memory Address
516 */
517 op->type = UD_OP_MEM;
518 op->size = resolve_operand_size(u, size);
519
520 if (u->adr_mode == 64) {
521 op->base = UD_R_RAX + rm;
522 if (mod == 1) {
523 offset = 8;
524 } else if (mod == 2) {
525 offset = 32;
526 } else if (mod == 0 && (rm & 7) == 5) {
527 op->base = UD_R_RIP;
528 offset = 32;
529 } else {
530 offset = 0;
531 }
532 /*
533 * Scale-Index-Base (SIB)
534 */
535 if ((rm & 7) == 4) {
536 inp_next(u);
537
538 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
539 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
540 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
541
542 /* special conditions for base reference */
543 if (op->index == UD_R_RSP) {
544 op->index = UD_NONE;
545 op->scale = UD_NONE;
546 }
547
548 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
549 if (mod == 0) {
550 op->base = UD_NONE;
551 }
552 if (mod == 1) {
553 offset = 8;
554 } else {
555 offset = 32;
556 }
557 }
558 }
559 } else if (u->adr_mode == 32) {
560 op->base = UD_R_EAX + rm;
561 if (mod == 1) {
562 offset = 8;
563 } else if (mod == 2) {
564 offset = 32;
565 } else if (mod == 0 && rm == 5) {
566 op->base = UD_NONE;
567 offset = 32;
568 } else {
569 offset = 0;
570 }
571
572 /* Scale-Index-Base (SIB) */
573 if ((rm & 7) == 4) {
574 inp_next(u);
575
576 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
577 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
578 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
579
580 if (op->index == UD_R_ESP) {
581 op->index = UD_NONE;
582 op->scale = UD_NONE;
583 }
584
585 /* special condition for base reference */
586 if (op->base == UD_R_EBP) {
587 if (mod == 0) {
588 op->base = UD_NONE;
589 }
590 if (mod == 1) {
591 offset = 8;
592 } else {
593 offset = 32;
594 }
595 }
596 }
597 } else {
598 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
599 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
600 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
601 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
602 op->base = bases[rm & 7];
603 op->index = indices[rm & 7];
604 if (mod == 0 && rm == 6) {
605 offset = 16;
606 op->base = UD_NONE;
607 } else if (mod == 1) {
608 offset = 8;
609 } else if (mod == 2) {
610 offset = 16;
611 }
612 }
613
614 if (offset) {
615 decode_mem_disp(u, offset, op);
616 }
617 }
618
619
620 /*
621 * decode_moffset
622 * Decode offset-only memory operand
623 */
624 static void
decode_moffset(struct ud * u,unsigned int size,struct ud_operand * opr)625 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
626 {
627 opr->type = UD_OP_MEM;
628 opr->size = resolve_operand_size(u, size);
629 decode_mem_disp(u, u->adr_mode, opr);
630 }
631
632
633 /* -----------------------------------------------------------------------------
634 * decode_operands() - Disassembles Operands.
635 * -----------------------------------------------------------------------------
636 */
637 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)638 decode_operand(struct ud *u,
639 struct ud_operand *operand,
640 enum ud_operand_code type,
641 unsigned int size)
642 {
643 operand->_oprcode = type;
644
645 switch (type) {
646 case OP_A :
647 decode_a(u, operand);
648 break;
649 case OP_MR:
650 decode_modrm_rm(u, operand, REGCLASS_GPR,
651 MODRM_MOD(modrm(u)) == 3 ?
652 Mx_reg_size(size) : Mx_mem_size(size));
653 break;
654 case OP_F:
655 u->br_far = 1;
656 /* intended fall through */
657 case OP_M:
658 if (MODRM_MOD(modrm(u)) == 3) {
659 UDERR(u, "expected modrm.mod != 3\n");
660 }
661 /* intended fall through */
662 case OP_E:
663 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
664 break;
665 case OP_G:
666 decode_modrm_reg(u, operand, REGCLASS_GPR, size);
667 break;
668 case OP_sI:
669 case OP_I:
670 decode_imm(u, size, operand);
671 break;
672 case OP_I1:
673 operand->type = UD_OP_CONST;
674 operand->lval.udword = 1;
675 break;
676 case OP_N:
677 if (MODRM_MOD(modrm(u)) != 3) {
678 UDERR(u, "expected modrm.mod == 3\n");
679 }
680 /* intended fall through */
681 case OP_Q:
682 decode_modrm_rm(u, operand, REGCLASS_MMX, size);
683 break;
684 case OP_P:
685 decode_modrm_reg(u, operand, REGCLASS_MMX, size);
686 break;
687 case OP_U:
688 if (MODRM_MOD(modrm(u)) != 3) {
689 UDERR(u, "expected modrm.mod == 3\n");
690 }
691 /* intended fall through */
692 case OP_W:
693 decode_modrm_rm(u, operand, REGCLASS_XMM, size);
694 break;
695 case OP_V:
696 decode_modrm_reg(u, operand, REGCLASS_XMM, size);
697 break;
698 case OP_MU:
699 decode_modrm_rm(u, operand, REGCLASS_XMM,
700 MODRM_MOD(modrm(u)) == 3 ?
701 Mx_reg_size(size) : Mx_mem_size(size));
702 break;
703 case OP_S:
704 decode_modrm_reg(u, operand, REGCLASS_SEG, size);
705 break;
706 case OP_O:
707 decode_moffset(u, size, operand);
708 break;
709 case OP_R0:
710 case OP_R1:
711 case OP_R2:
712 case OP_R3:
713 case OP_R4:
714 case OP_R5:
715 case OP_R6:
716 case OP_R7:
717 decode_reg(u, operand, REGCLASS_GPR,
718 (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
719 break;
720 case OP_AL:
721 case OP_AX:
722 case OP_eAX:
723 case OP_rAX:
724 decode_reg(u, operand, REGCLASS_GPR, 0, size);
725 break;
726 case OP_CL:
727 case OP_CX:
728 case OP_eCX:
729 decode_reg(u, operand, REGCLASS_GPR, 1, size);
730 break;
731 case OP_DL:
732 case OP_DX:
733 case OP_eDX:
734 decode_reg(u, operand, REGCLASS_GPR, 2, size);
735 break;
736 case OP_ES:
737 case OP_CS:
738 case OP_DS:
739 case OP_SS:
740 case OP_FS:
741 case OP_GS:
742 /* in 64bits mode, only fs and gs are allowed */
743 if (u->dis_mode == 64) {
744 if (type != OP_FS && type != OP_GS) {
745 UDERR(u, "invalid segment register in 64bits\n");
746 }
747 }
748 operand->type = UD_OP_REG;
749 operand->base = (type - OP_ES) + UD_R_ES;
750 operand->size = 16;
751 break;
752 case OP_J :
753 decode_imm(u, size, operand);
754 operand->type = UD_OP_JIMM;
755 break ;
756 case OP_R :
757 if (MODRM_MOD(modrm(u)) != 3) {
758 UDERR(u, "expected modrm.mod == 3\n");
759 }
760 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
761 break;
762 case OP_C:
763 decode_modrm_reg(u, operand, REGCLASS_CR, size);
764 break;
765 case OP_D:
766 decode_modrm_reg(u, operand, REGCLASS_DB, size);
767 break;
768 case OP_I3 :
769 operand->type = UD_OP_CONST;
770 operand->lval.sbyte = 3;
771 break;
772 case OP_ST0:
773 case OP_ST1:
774 case OP_ST2:
775 case OP_ST3:
776 case OP_ST4:
777 case OP_ST5:
778 case OP_ST6:
779 case OP_ST7:
780 operand->type = UD_OP_REG;
781 operand->base = (type - OP_ST0) + UD_R_ST0;
782 operand->size = 80;
783 break;
784 default :
785 break;
786 }
787 return 0;
788 }
789
790
791 /*
792 * decode_operands
793 *
794 * Disassemble upto 3 operands of the current instruction being
795 * disassembled. By the end of the function, the operand fields
796 * of the ud structure will have been filled.
797 */
798 static int
decode_operands(struct ud * u)799 decode_operands(struct ud* u)
800 {
801 decode_operand(u, &u->operand[0],
802 u->itab_entry->operand1.type,
803 u->itab_entry->operand1.size);
804 decode_operand(u, &u->operand[1],
805 u->itab_entry->operand2.type,
806 u->itab_entry->operand2.size);
807 decode_operand(u, &u->operand[2],
808 u->itab_entry->operand3.type,
809 u->itab_entry->operand3.size);
810 return 0;
811 }
812
813 /* -----------------------------------------------------------------------------
814 * clear_insn() - clear instruction structure
815 * -----------------------------------------------------------------------------
816 */
817 static void
clear_insn(register struct ud * u)818 clear_insn(register struct ud* u)
819 {
820 u->error = 0;
821 u->pfx_seg = 0;
822 u->pfx_opr = 0;
823 u->pfx_adr = 0;
824 u->pfx_lock = 0;
825 u->pfx_repne = 0;
826 u->pfx_rep = 0;
827 u->pfx_repe = 0;
828 u->pfx_rex = 0;
829 u->pfx_str = 0;
830 u->mnemonic = UD_Inone;
831 u->itab_entry = NULL;
832 u->have_modrm = 0;
833 u->br_far = 0;
834
835 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
836 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
837 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
838 }
839
840
841 static inline int
resolve_pfx_str(struct ud * u)842 resolve_pfx_str(struct ud* u)
843 {
844 if (u->pfx_str == 0xf3) {
845 if (P_STR(u->itab_entry->prefix)) {
846 u->pfx_rep = 0xf3;
847 } else {
848 u->pfx_repe = 0xf3;
849 }
850 } else if (u->pfx_str == 0xf2) {
851 u->pfx_repne = 0xf3;
852 }
853 return 0;
854 }
855
856
857 static int
resolve_mode(struct ud * u)858 resolve_mode( struct ud* u )
859 {
860 int default64;
861 /* if in error state, bail out */
862 if ( u->error ) return -1;
863
864 /* propagate prefix effects */
865 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
866
867 /* Check validity of instruction m64 */
868 if ( P_INV64( u->itab_entry->prefix ) ) {
869 UDERR(u, "instruction invalid in 64bits\n");
870 return -1;
871 }
872
873 /* effective rex prefix is the effective mask for the
874 * instruction hard-coded in the opcode map.
875 */
876 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
877 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
878
879 /* whether this instruction has a default operand size of
880 * 64bit, also hardcoded into the opcode map.
881 */
882 default64 = P_DEF64( u->itab_entry->prefix );
883 /* calculate effective operand size */
884 if ( REX_W( u->pfx_rex ) ) {
885 u->opr_mode = 64;
886 } else if ( u->pfx_opr ) {
887 u->opr_mode = 16;
888 } else {
889 /* unless the default opr size of instruction is 64,
890 * the effective operand size in the absence of rex.w
891 * prefix is 32.
892 */
893 u->opr_mode = default64 ? 64 : 32;
894 }
895
896 /* calculate effective address size */
897 u->adr_mode = (u->pfx_adr) ? 32 : 64;
898 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
899 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
900 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
901 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
902 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
903 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
904 }
905
906 return 0;
907 }
908
909
910 static inline int
decode_insn(struct ud * u,uint16_t ptr)911 decode_insn(struct ud *u, uint16_t ptr)
912 {
913 UD_ASSERT((ptr & 0x8000) == 0);
914 u->itab_entry = &ud_itab[ ptr ];
915 u->mnemonic = u->itab_entry->mnemonic;
916 return (resolve_pfx_str(u) == 0 &&
917 resolve_mode(u) == 0 &&
918 decode_operands(u) == 0 &&
919 resolve_mnemonic(u) == 0) ? 0 : -1;
920 }
921
922
923 /*
924 * decode_3dnow()
925 *
926 * Decoding 3dnow is a little tricky because of its strange opcode
927 * structure. The final opcode disambiguation depends on the last
928 * byte that comes after the operands have been decoded. Fortunately,
929 * all 3dnow instructions have the same set of operand types. So we
930 * go ahead and decode the instruction by picking an arbitrarily chosen
931 * valid entry in the table, decode the operands, and read the final
932 * byte to resolve the menmonic.
933 */
934 static inline int
decode_3dnow(struct ud * u)935 decode_3dnow(struct ud* u)
936 {
937 uint16_t ptr;
938 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
939 UD_ASSERT(u->le->table[0xc] != 0);
940 decode_insn(u, u->le->table[0xc]);
941 inp_next(u);
942 if (u->error) {
943 return -1;
944 }
945 ptr = u->le->table[inp_curr(u)];
946 UD_ASSERT((ptr & 0x8000) == 0);
947 u->mnemonic = ud_itab[ptr].mnemonic;
948 return 0;
949 }
950
951
952 static int
decode_ssepfx(struct ud * u)953 decode_ssepfx(struct ud *u)
954 {
955 uint8_t idx;
956 uint8_t pfx;
957
958 /*
959 * String prefixes (f2, f3) take precedence over operand
960 * size prefix (66).
961 */
962 pfx = u->pfx_str;
963 if (pfx == 0) {
964 pfx = u->pfx_opr;
965 }
966 idx = ((pfx & 0xf) + 1) / 2;
967 if (u->le->table[idx] == 0) {
968 idx = 0;
969 }
970 if (idx && u->le->table[idx] != 0) {
971 /*
972 * "Consume" the prefix as a part of the opcode, so it is no
973 * longer exported as an instruction prefix.
974 */
975 u->pfx_str = 0;
976 if (pfx == 0x66) {
977 /*
978 * consume "66" only if it was used for decoding, leaving
979 * it to be used as an operands size override for some
980 * simd instructions.
981 */
982 u->pfx_opr = 0;
983 }
984 }
985 return decode_ext(u, u->le->table[idx]);
986 }
987
988
989 /*
990 * decode_ext()
991 *
992 * Decode opcode extensions (if any)
993 */
994 static int
decode_ext(struct ud * u,uint16_t ptr)995 decode_ext(struct ud *u, uint16_t ptr)
996 {
997 uint8_t idx = 0;
998 if ((ptr & 0x8000) == 0) {
999 return decode_insn(u, ptr);
1000 }
1001 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1002 if (u->le->type == UD_TAB__OPC_3DNOW) {
1003 return decode_3dnow(u);
1004 }
1005
1006 switch (u->le->type) {
1007 case UD_TAB__OPC_MOD:
1008 /* !11 = 0, 11 = 1 */
1009 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1010 break;
1011 /* disassembly mode/operand size/address size based tables.
1012 * 16 = 0,, 32 = 1, 64 = 2
1013 */
1014 case UD_TAB__OPC_MODE:
1015 idx = u->dis_mode != 64 ? 0 : 1;
1016 break;
1017 case UD_TAB__OPC_OSIZE:
1018 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1019 break;
1020 case UD_TAB__OPC_ASIZE:
1021 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1022 break;
1023 case UD_TAB__OPC_X87:
1024 idx = modrm(u) - 0xC0;
1025 break;
1026 case UD_TAB__OPC_VENDOR:
1027 if (u->vendor == UD_VENDOR_ANY) {
1028 /* choose a valid entry */
1029 idx = (u->le->table[idx] != 0) ? 0 : 1;
1030 } else if (u->vendor == UD_VENDOR_AMD) {
1031 idx = 0;
1032 } else {
1033 idx = 1;
1034 }
1035 break;
1036 case UD_TAB__OPC_RM:
1037 idx = MODRM_RM(modrm(u));
1038 break;
1039 case UD_TAB__OPC_REG:
1040 idx = MODRM_REG(modrm(u));
1041 break;
1042 case UD_TAB__OPC_SSE:
1043 return decode_ssepfx(u);
1044 default:
1045 UD_ASSERT(!"not reached");
1046 break;
1047 }
1048
1049 return decode_ext(u, u->le->table[idx]);
1050 }
1051
1052
1053 static int
decode_opcode(struct ud * u)1054 decode_opcode(struct ud *u)
1055 {
1056 uint16_t ptr;
1057 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1058 UD_RETURN_ON_ERROR(u);
1059 u->primary_opcode = inp_curr(u);
1060 ptr = u->le->table[inp_curr(u)];
1061 if (ptr & 0x8000) {
1062 u->le = &ud_lookup_table_list[ptr & ~0x8000];
1063 if (u->le->type == UD_TAB__OPC_TABLE) {
1064 inp_next(u);
1065 return decode_opcode(u);
1066 }
1067 }
1068 return decode_ext(u, ptr);
1069 }
1070
1071
1072 /* =============================================================================
1073 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1074 * =============================================================================
1075 */
1076 unsigned int
ud_decode(struct ud * u)1077 ud_decode(struct ud *u)
1078 {
1079 inp_start(u);
1080 clear_insn(u);
1081 u->le = &ud_lookup_table_list[0];
1082 u->error = decode_prefixes(u) == -1 ||
1083 decode_opcode(u) == -1 ||
1084 u->error;
1085 /* Handle decode error. */
1086 if (u->error) {
1087 /* clear out the decode data. */
1088 clear_insn(u);
1089 /* mark the sequence of bytes as invalid. */
1090 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1091 u->mnemonic = u->itab_entry->mnemonic;
1092 }
1093
1094 /* maybe this stray segment override byte
1095 * should be spewed out?
1096 */
1097 if ( !P_SEG( u->itab_entry->prefix ) &&
1098 u->operand[0].type != UD_OP_MEM &&
1099 u->operand[1].type != UD_OP_MEM )
1100 u->pfx_seg = 0;
1101
1102 u->insn_offset = u->pc; /* set offset of instruction */
1103 u->asm_buf_fill = 0; /* set translation buffer index to 0 */
1104 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1105
1106 /* return number of bytes disassembled. */
1107 return u->inp_ctr;
1108 }
1109
1110 /*
1111 vim: set ts=2 sw=2 expandtab
1112 */
1113