1 /* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28
29 #if USE(UDIS86)
30
31 #include "udis86_extern.h"
32 #include "udis86_types.h"
33 #include "udis86_input.h"
34 #include "udis86_decode.h"
35 #include <wtf/Assertions.h>
36
37 #ifndef _MSC_VER
38 #define dbg(x, n...)
39 /* #define dbg printf */
40 #endif // _MSC_VER
41
42 #ifndef __UD_STANDALONE__
43 # include <string.h>
44 #endif /* __UD_STANDALONE__ */
45
46 /* The max number of prefixes to an instruction */
47 #define MAX_PREFIXES 15
48
49 /* instruction aliases and special cases */
50 static struct ud_itab_entry s_ie__invalid =
51 { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
52
53 static int
54 decode_ext(struct ud *u, uint16_t ptr);
55
56
57 static inline int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)58 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
59 {
60 if (dis_mode == 64) {
61 return rex_w ? 64 : (pfx_opr ? 16 : 32);
62 } else if (dis_mode == 32) {
63 return pfx_opr ? 16 : 32;
64 } else {
65 ASSERT(dis_mode == 16);
66 return pfx_opr ? 32 : 16;
67 }
68 }
69
70
71 static inline int
eff_adr_mode(int dis_mode,int pfx_adr)72 eff_adr_mode(int dis_mode, int pfx_adr)
73 {
74 if (dis_mode == 64) {
75 return pfx_adr ? 32 : 64;
76 } else if (dis_mode == 32) {
77 return pfx_adr ? 16 : 32;
78 } else {
79 ASSERT(dis_mode == 16);
80 return pfx_adr ? 32 : 16;
81 }
82 }
83
84
85 /* Looks up mnemonic code in the mnemonic string table
86 * Returns NULL if the mnemonic code is invalid
87 */
ud_lookup_mnemonic(enum ud_mnemonic_code c)88 const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
89 {
90 return ud_mnemonics_str[ c ];
91 }
92
93
94 /*
95 * decode_prefixes
96 *
97 * Extracts instruction prefixes.
98 */
99 static int
decode_prefixes(struct ud * u)100 decode_prefixes(struct ud *u)
101 {
102 unsigned int have_pfx = 1;
103 unsigned int i;
104 uint8_t curr;
105
106 /* if in error state, bail out */
107 if ( u->error )
108 return -1;
109
110 /* keep going as long as there are prefixes available */
111 for ( i = 0; have_pfx ; ++i ) {
112
113 /* Get next byte. */
114 ud_inp_next(u);
115 if ( u->error )
116 return -1;
117 curr = ud_inp_curr( u );
118
119 /* rex prefixes in 64bit mode */
120 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
121 u->pfx_rex = curr;
122 } else {
123 switch ( curr )
124 {
125 case 0x2E :
126 u->pfx_seg = UD_R_CS;
127 u->pfx_rex = 0;
128 break;
129 case 0x36 :
130 u->pfx_seg = UD_R_SS;
131 u->pfx_rex = 0;
132 break;
133 case 0x3E :
134 u->pfx_seg = UD_R_DS;
135 u->pfx_rex = 0;
136 break;
137 case 0x26 :
138 u->pfx_seg = UD_R_ES;
139 u->pfx_rex = 0;
140 break;
141 case 0x64 :
142 u->pfx_seg = UD_R_FS;
143 u->pfx_rex = 0;
144 break;
145 case 0x65 :
146 u->pfx_seg = UD_R_GS;
147 u->pfx_rex = 0;
148 break;
149 case 0x67 : /* adress-size override prefix */
150 u->pfx_adr = 0x67;
151 u->pfx_rex = 0;
152 break;
153 case 0xF0 :
154 u->pfx_lock = 0xF0;
155 u->pfx_rex = 0;
156 break;
157 case 0x66:
158 /* the 0x66 sse prefix is only effective if no other sse prefix
159 * has already been specified.
160 */
161 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
162 u->pfx_opr = 0x66;
163 u->pfx_rex = 0;
164 break;
165 case 0xF2:
166 u->pfx_insn = 0xF2;
167 u->pfx_repne = 0xF2;
168 u->pfx_rex = 0;
169 break;
170 case 0xF3:
171 u->pfx_insn = 0xF3;
172 u->pfx_rep = 0xF3;
173 u->pfx_repe = 0xF3;
174 u->pfx_rex = 0;
175 break;
176 default :
177 /* No more prefixes */
178 have_pfx = 0;
179 break;
180 }
181 }
182
183 /* check if we reached max instruction length */
184 if ( i + 1 == MAX_INSN_LENGTH ) {
185 u->error = 1;
186 break;
187 }
188 }
189
190 /* return status */
191 if ( u->error )
192 return -1;
193
194 /* rewind back one byte in stream, since the above loop
195 * stops with a non-prefix byte.
196 */
197 ud_inp_back(u);
198 return 0;
199 }
200
201
modrm(struct ud * u)202 static inline unsigned int modrm( struct ud * u )
203 {
204 if ( !u->have_modrm ) {
205 u->modrm = ud_inp_next( u );
206 u->have_modrm = 1;
207 }
208 return u->modrm;
209 }
210
211
resolve_operand_size(const struct ud * u,unsigned int s)212 static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
213 {
214 switch ( s )
215 {
216 case SZ_V:
217 return ( u->opr_mode );
218 case SZ_Z:
219 return ( u->opr_mode == 16 ) ? 16 : 32;
220 case SZ_P:
221 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
222 case SZ_MDQ:
223 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
224 case SZ_RDQ:
225 return ( u->dis_mode == 64 ) ? 64 : 32;
226 default:
227 return s;
228 }
229 }
230
231
resolve_mnemonic(struct ud * u)232 static int resolve_mnemonic( struct ud* u )
233 {
234 /* far/near flags */
235 u->br_far = 0;
236 u->br_near = 0;
237 /* readjust operand sizes for call/jmp instrcutions */
238 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
239 /* WP: 16:16 pointer */
240 if ( u->operand[ 0 ].size == SZ_WP ) {
241 u->operand[ 0 ].size = 16;
242 u->br_far = 1;
243 u->br_near= 0;
244 /* DP: 32:32 pointer */
245 } else if ( u->operand[ 0 ].size == SZ_DP ) {
246 u->operand[ 0 ].size = 32;
247 u->br_far = 1;
248 u->br_near= 0;
249 } else {
250 u->br_far = 0;
251 u->br_near= 1;
252 }
253 /* resolve 3dnow weirdness. */
254 } else if ( u->mnemonic == UD_I3dnow ) {
255 u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic;
256 }
257 /* SWAPGS is only valid in 64bits mode */
258 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
259 u->error = 1;
260 return -1;
261 }
262
263 if (u->mnemonic == UD_Ixchg) {
264 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
265 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
266 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
267 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
268 u->operand[0].type = UD_NONE;
269 u->operand[1].type = UD_NONE;
270 u->mnemonic = UD_Inop;
271 }
272 }
273
274 if (u->mnemonic == UD_Inop && u->pfx_rep) {
275 u->pfx_rep = 0;
276 u->mnemonic = UD_Ipause;
277 }
278 return 0;
279 }
280
281
282 /* -----------------------------------------------------------------------------
283 * decode_a()- Decodes operands of the type seg:offset
284 * -----------------------------------------------------------------------------
285 */
286 static void
decode_a(struct ud * u,struct ud_operand * op)287 decode_a(struct ud* u, struct ud_operand *op)
288 {
289 if (u->opr_mode == 16) {
290 /* seg16:off16 */
291 op->type = UD_OP_PTR;
292 op->size = 32;
293 op->lval.ptr.off = ud_inp_uint16(u);
294 op->lval.ptr.seg = ud_inp_uint16(u);
295 } else {
296 /* seg16:off32 */
297 op->type = UD_OP_PTR;
298 op->size = 48;
299 op->lval.ptr.off = ud_inp_uint32(u);
300 op->lval.ptr.seg = ud_inp_uint16(u);
301 }
302 }
303
304 /* -----------------------------------------------------------------------------
305 * decode_gpr() - Returns decoded General Purpose Register
306 * -----------------------------------------------------------------------------
307 */
308 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)309 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
310 {
311 s = resolve_operand_size(u, s);
312
313 switch (s) {
314 case 64:
315 return UD_R_RAX + rm;
316 case SZ_DP:
317 case 32:
318 return UD_R_EAX + rm;
319 case SZ_WP:
320 case 16:
321 return UD_R_AX + rm;
322 case 8:
323 if (u->dis_mode == 64 && u->pfx_rex) {
324 if (rm >= 4)
325 return UD_R_SPL + (rm-4);
326 return UD_R_AL + rm;
327 } else return UD_R_AL + rm;
328 default:
329 return 0;
330 }
331 }
332
333 /* -----------------------------------------------------------------------------
334 * resolve_gpr64() - 64bit General Purpose Register-Selection.
335 * -----------------------------------------------------------------------------
336 */
337 static enum ud_type
resolve_gpr64(struct ud * u,enum ud_operand_code gpr_op,enum ud_operand_size * size)338 resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size)
339 {
340 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
341 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
342 else gpr_op = (gpr_op - OP_rAX);
343
344 if (u->opr_mode == 16) {
345 *size = 16;
346 return gpr_op + UD_R_AX;
347 }
348 if (u->dis_mode == 32 ||
349 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
350 *size = 32;
351 return gpr_op + UD_R_EAX;
352 }
353
354 *size = 64;
355 return gpr_op + UD_R_RAX;
356 }
357
358 /* -----------------------------------------------------------------------------
359 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
360 * -----------------------------------------------------------------------------
361 */
362 static enum ud_type
resolve_gpr32(struct ud * u,enum ud_operand_code gpr_op)363 resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
364 {
365 gpr_op = gpr_op - OP_eAX;
366
367 if (u->opr_mode == 16)
368 return gpr_op + UD_R_AX;
369
370 return gpr_op + UD_R_EAX;
371 }
372
373 /* -----------------------------------------------------------------------------
374 * resolve_reg() - Resolves the register type
375 * -----------------------------------------------------------------------------
376 */
377 static enum ud_type
resolve_reg(struct ud * u,unsigned int type,unsigned char i)378 resolve_reg(struct ud* u, unsigned int type, unsigned char i)
379 {
380 switch (type) {
381 case T_MMX : return UD_R_MM0 + (i & 7);
382 case T_XMM : return UD_R_XMM0 + i;
383 case T_CRG : return UD_R_CR0 + i;
384 case T_DBG : return UD_R_DR0 + i;
385 case T_SEG : {
386 /*
387 * Only 6 segment registers, anything else is an error.
388 */
389 if ((i & 7) > 5) {
390 u->error = 1;
391 } else {
392 return UD_R_ES + (i & 7);
393 }
394 }
395 case T_NONE:
396 default: return UD_NONE;
397 }
398 }
399
400 /* -----------------------------------------------------------------------------
401 * decode_imm() - Decodes Immediate values.
402 * -----------------------------------------------------------------------------
403 */
404 static void
decode_imm(struct ud * u,unsigned int s,struct ud_operand * op)405 decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
406 {
407 op->size = resolve_operand_size(u, s);
408 op->type = UD_OP_IMM;
409
410 switch (op->size) {
411 case 8: op->lval.sbyte = ud_inp_uint8(u); break;
412 case 16: op->lval.uword = ud_inp_uint16(u); break;
413 case 32: op->lval.udword = ud_inp_uint32(u); break;
414 case 64: op->lval.uqword = ud_inp_uint64(u); break;
415 default: return;
416 }
417 }
418
419
420 /*
421 * decode_modrm_reg
422 *
423 * Decodes reg field of mod/rm byte
424 *
425 */
426 static void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)427 decode_modrm_reg(struct ud *u,
428 struct ud_operand *operand,
429 unsigned int type,
430 unsigned int size)
431 {
432 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
433 operand->type = UD_OP_REG;
434 operand->size = resolve_operand_size(u, size);
435
436 if (type == T_GPR) {
437 operand->base = decode_gpr(u, operand->size, reg);
438 } else {
439 operand->base = resolve_reg(u, type, reg);
440 }
441 }
442
443
444 /*
445 * decode_modrm_rm
446 *
447 * Decodes rm field of mod/rm byte
448 *
449 */
450 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)451 decode_modrm_rm(struct ud *u,
452 struct ud_operand *op,
453 unsigned char type,
454 unsigned int size)
455
456 {
457 unsigned char mod, rm, reg;
458
459 /* get mod, r/m and reg fields */
460 mod = MODRM_MOD(modrm(u));
461 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
462 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
463
464 op->size = resolve_operand_size(u, size);
465
466 /*
467 * If mod is 11b, then the modrm.rm specifies a register.
468 *
469 */
470 if (mod == 3) {
471 op->type = UD_OP_REG;
472 if (type == T_GPR) {
473 op->base = decode_gpr(u, op->size, rm);
474 } else {
475 op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7));
476 }
477 return;
478 }
479
480
481 /*
482 * !11 => Memory Address
483 */
484 op->type = UD_OP_MEM;
485
486 if (u->adr_mode == 64) {
487 op->base = UD_R_RAX + rm;
488 if (mod == 1) {
489 op->offset = 8;
490 } else if (mod == 2) {
491 op->offset = 32;
492 } else if (mod == 0 && (rm & 7) == 5) {
493 op->base = UD_R_RIP;
494 op->offset = 32;
495 } else {
496 op->offset = 0;
497 }
498 /*
499 * Scale-Index-Base (SIB)
500 */
501 if ((rm & 7) == 4) {
502 ud_inp_next(u);
503
504 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
505 op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
506 op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
507
508 /* special conditions for base reference */
509 if (op->index == UD_R_RSP) {
510 op->index = UD_NONE;
511 op->scale = UD_NONE;
512 }
513
514 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
515 if (mod == 0) {
516 op->base = UD_NONE;
517 }
518 if (mod == 1) {
519 op->offset = 8;
520 } else {
521 op->offset = 32;
522 }
523 }
524 }
525 } else if (u->adr_mode == 32) {
526 op->base = UD_R_EAX + rm;
527 if (mod == 1) {
528 op->offset = 8;
529 } else if (mod == 2) {
530 op->offset = 32;
531 } else if (mod == 0 && rm == 5) {
532 op->base = UD_NONE;
533 op->offset = 32;
534 } else {
535 op->offset = 0;
536 }
537
538 /* Scale-Index-Base (SIB) */
539 if ((rm & 7) == 4) {
540 ud_inp_next(u);
541
542 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
543 op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
544 op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
545
546 if (op->index == UD_R_ESP) {
547 op->index = UD_NONE;
548 op->scale = UD_NONE;
549 }
550
551 /* special condition for base reference */
552 if (op->base == UD_R_EBP) {
553 if (mod == 0) {
554 op->base = UD_NONE;
555 }
556 if (mod == 1) {
557 op->offset = 8;
558 } else {
559 op->offset = 32;
560 }
561 }
562 }
563 } else {
564 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
565 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
566 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
567 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
568 op->base = bases[rm & 7];
569 op->index = indices[rm & 7];
570 if (mod == 0 && rm == 6) {
571 op->offset= 16;
572 op->base = UD_NONE;
573 } else if (mod == 1) {
574 op->offset = 8;
575 } else if (mod == 2) {
576 op->offset = 16;
577 }
578 }
579
580 /*
581 * extract offset, if any
582 */
583 switch (op->offset) {
584 case 8 : op->lval.ubyte = ud_inp_uint8(u); break;
585 case 16: op->lval.uword = ud_inp_uint16(u); break;
586 case 32: op->lval.udword = ud_inp_uint32(u); break;
587 case 64: op->lval.uqword = ud_inp_uint64(u); break;
588 default: break;
589 }
590 }
591
592 /* -----------------------------------------------------------------------------
593 * decode_o() - Decodes offset
594 * -----------------------------------------------------------------------------
595 */
596 static void
decode_o(struct ud * u,unsigned int s,struct ud_operand * op)597 decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
598 {
599 switch (u->adr_mode) {
600 case 64:
601 op->offset = 64;
602 op->lval.uqword = ud_inp_uint64(u);
603 break;
604 case 32:
605 op->offset = 32;
606 op->lval.udword = ud_inp_uint32(u);
607 break;
608 case 16:
609 op->offset = 16;
610 op->lval.uword = ud_inp_uint16(u);
611 break;
612 default:
613 return;
614 }
615 op->type = UD_OP_MEM;
616 op->size = resolve_operand_size(u, s);
617 }
618
619 /* -----------------------------------------------------------------------------
620 * decode_operands() - Disassembles Operands.
621 * -----------------------------------------------------------------------------
622 */
623 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)624 decode_operand(struct ud *u,
625 struct ud_operand *operand,
626 enum ud_operand_code type,
627 unsigned int size)
628 {
629 switch (type) {
630 case OP_A :
631 decode_a(u, operand);
632 break;
633 case OP_MR:
634 if (MODRM_MOD(modrm(u)) == 3) {
635 decode_modrm_rm(u, operand, T_GPR,
636 size == SZ_DY ? SZ_MDQ : SZ_V);
637 } else if (size == SZ_WV) {
638 decode_modrm_rm( u, operand, T_GPR, SZ_W);
639 } else if (size == SZ_BV) {
640 decode_modrm_rm( u, operand, T_GPR, SZ_B);
641 } else if (size == SZ_DY) {
642 decode_modrm_rm( u, operand, T_GPR, SZ_D);
643 } else {
644 ASSERT(!"unexpected size");
645 }
646 break;
647 case OP_M:
648 if (MODRM_MOD(modrm(u)) == 3) {
649 u->error = 1;
650 }
651 /* intended fall through */
652 case OP_E:
653 decode_modrm_rm(u, operand, T_GPR, size);
654 break;
655 break;
656 case OP_G:
657 decode_modrm_reg(u, operand, T_GPR, size);
658 break;
659 case OP_I:
660 decode_imm(u, size, operand);
661 break;
662 case OP_I1:
663 operand->type = UD_OP_CONST;
664 operand->lval.udword = 1;
665 break;
666 case OP_PR:
667 if (MODRM_MOD(modrm(u)) != 3) {
668 u->error = 1;
669 }
670 decode_modrm_rm(u, operand, T_MMX, size);
671 break;
672 case OP_P:
673 decode_modrm_reg(u, operand, T_MMX, size);
674 break;
675 case OP_VR:
676 if (MODRM_MOD(modrm(u)) != 3) {
677 u->error = 1;
678 }
679 /* intended fall through */
680 case OP_W:
681 decode_modrm_rm(u, operand, T_XMM, size);
682 break;
683 case OP_V:
684 decode_modrm_reg(u, operand, T_XMM, size);
685 break;
686 case OP_S:
687 decode_modrm_reg(u, operand, T_SEG, size);
688 break;
689 case OP_AL:
690 case OP_CL:
691 case OP_DL:
692 case OP_BL:
693 case OP_AH:
694 case OP_CH:
695 case OP_DH:
696 case OP_BH:
697 operand->type = UD_OP_REG;
698 operand->base = UD_R_AL + (type - OP_AL);
699 operand->size = 8;
700 break;
701 case OP_DX:
702 operand->type = UD_OP_REG;
703 operand->base = UD_R_DX;
704 operand->size = 16;
705 break;
706 case OP_O:
707 decode_o(u, size, operand);
708 break;
709 case OP_rAXr8:
710 case OP_rCXr9:
711 case OP_rDXr10:
712 case OP_rBXr11:
713 case OP_rSPr12:
714 case OP_rBPr13:
715 case OP_rSIr14:
716 case OP_rDIr15:
717 case OP_rAX:
718 case OP_rCX:
719 case OP_rDX:
720 case OP_rBX:
721 case OP_rSP:
722 case OP_rBP:
723 case OP_rSI:
724 case OP_rDI:
725 operand->type = UD_OP_REG;
726 operand->base = resolve_gpr64(u, type, (enum ud_operand_size*)(&operand->size));
727 break;
728 case OP_ALr8b:
729 case OP_CLr9b:
730 case OP_DLr10b:
731 case OP_BLr11b:
732 case OP_AHr12b:
733 case OP_CHr13b:
734 case OP_DHr14b:
735 case OP_BHr15b: {
736 ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL
737 + (REX_B(u->pfx_rex) << 3);
738 if (UD_R_AH <= gpr && u->pfx_rex) {
739 gpr = gpr + 4;
740 }
741 operand->type = UD_OP_REG;
742 operand->base = gpr;
743 break;
744 }
745 case OP_eAX:
746 case OP_eCX:
747 case OP_eDX:
748 case OP_eBX:
749 case OP_eSP:
750 case OP_eBP:
751 case OP_eSI:
752 case OP_eDI:
753 operand->type = UD_OP_REG;
754 operand->base = resolve_gpr32(u, type);
755 operand->size = u->opr_mode == 16 ? 16 : 32;
756 break;
757 case OP_ES:
758 case OP_CS:
759 case OP_DS:
760 case OP_SS:
761 case OP_FS:
762 case OP_GS:
763 /* in 64bits mode, only fs and gs are allowed */
764 if (u->dis_mode == 64) {
765 if (type != OP_FS && type != OP_GS) {
766 u->error= 1;
767 }
768 }
769 operand->type = UD_OP_REG;
770 operand->base = (type - OP_ES) + UD_R_ES;
771 operand->size = 16;
772 break;
773 case OP_J :
774 decode_imm(u, size, operand);
775 operand->type = UD_OP_JIMM;
776 break ;
777 case OP_Q:
778 decode_modrm_rm(u, operand, T_MMX, size);
779 break;
780 case OP_R :
781 decode_modrm_rm(u, operand, T_GPR, size);
782 break;
783 case OP_C:
784 decode_modrm_reg(u, operand, T_CRG, size);
785 break;
786 case OP_D:
787 decode_modrm_reg(u, operand, T_DBG, size);
788 break;
789 case OP_I3 :
790 operand->type = UD_OP_CONST;
791 operand->lval.sbyte = 3;
792 break;
793 case OP_ST0:
794 case OP_ST1:
795 case OP_ST2:
796 case OP_ST3:
797 case OP_ST4:
798 case OP_ST5:
799 case OP_ST6:
800 case OP_ST7:
801 operand->type = UD_OP_REG;
802 operand->base = (type - OP_ST0) + UD_R_ST0;
803 operand->size = 0;
804 break;
805 case OP_AX:
806 operand->type = UD_OP_REG;
807 operand->base = UD_R_AX;
808 operand->size = 16;
809 break;
810 default :
811 operand->type = UD_NONE;
812 break;
813 }
814 return 0;
815 }
816
817
818 /*
819 * decode_operands
820 *
821 * Disassemble upto 3 operands of the current instruction being
822 * disassembled. By the end of the function, the operand fields
823 * of the ud structure will have been filled.
824 */
825 static int
decode_operands(struct ud * u)826 decode_operands(struct ud* u)
827 {
828 decode_operand(u, &u->operand[0],
829 u->itab_entry->operand1.type,
830 u->itab_entry->operand1.size);
831 decode_operand(u, &u->operand[1],
832 u->itab_entry->operand2.type,
833 u->itab_entry->operand2.size);
834 decode_operand(u, &u->operand[2],
835 u->itab_entry->operand3.type,
836 u->itab_entry->operand3.size);
837 return 0;
838 }
839
840 /* -----------------------------------------------------------------------------
841 * clear_insn() - clear instruction structure
842 * -----------------------------------------------------------------------------
843 */
844 static void
clear_insn(register struct ud * u)845 clear_insn(register struct ud* u)
846 {
847 u->error = 0;
848 u->pfx_seg = 0;
849 u->pfx_opr = 0;
850 u->pfx_adr = 0;
851 u->pfx_lock = 0;
852 u->pfx_repne = 0;
853 u->pfx_rep = 0;
854 u->pfx_repe = 0;
855 u->pfx_rex = 0;
856 u->pfx_insn = 0;
857 u->mnemonic = UD_Inone;
858 u->itab_entry = NULL;
859 u->have_modrm = 0;
860
861 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
862 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
863 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
864 }
865
866 static int
resolve_mode(struct ud * u)867 resolve_mode( struct ud* u )
868 {
869 /* if in error state, bail out */
870 if ( u->error ) return -1;
871
872 /* propagate prefix effects */
873 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
874
875 /* Check validity of instruction m64 */
876 if ( P_INV64( u->itab_entry->prefix ) ) {
877 u->error = 1;
878 return -1;
879 }
880
881 /* effective rex prefix is the effective mask for the
882 * instruction hard-coded in the opcode map.
883 */
884 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
885 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
886
887 /* whether this instruction has a default operand size of
888 * 64bit, also hardcoded into the opcode map.
889 */
890 u->default64 = P_DEF64( u->itab_entry->prefix );
891 /* calculate effective operand size */
892 if ( REX_W( u->pfx_rex ) ) {
893 u->opr_mode = 64;
894 } else if ( u->pfx_opr ) {
895 u->opr_mode = 16;
896 } else {
897 /* unless the default opr size of instruction is 64,
898 * the effective operand size in the absence of rex.w
899 * prefix is 32.
900 */
901 u->opr_mode = ( u->default64 ) ? 64 : 32;
902 }
903
904 /* calculate effective address size */
905 u->adr_mode = (u->pfx_adr) ? 32 : 64;
906 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
907 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
908 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
909 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
910 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
911 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
912 }
913
914 /* These flags determine which operand to apply the operand size
915 * cast to.
916 */
917 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
918 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
919 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
920
921 /* set flags for implicit addressing */
922 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
923
924 return 0;
925 }
926
gen_hex(struct ud * u)927 static int gen_hex( struct ud *u )
928 {
929 unsigned int i;
930 unsigned char *src_ptr = ud_inp_sess( u );
931 char* src_hex;
932
933 /* bail out if in error stat. */
934 if ( u->error ) return -1;
935 /* output buffer pointe */
936 src_hex = ( char* ) u->insn_hexcode;
937 /* for each byte used to decode instruction */
938 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
939 sprintf( src_hex, "%02x", *src_ptr & 0xFF );
940 src_hex += 2;
941 }
942 return 0;
943 }
944
945
946 static inline int
decode_insn(struct ud * u,uint16_t ptr)947 decode_insn(struct ud *u, uint16_t ptr)
948 {
949 ASSERT((ptr & 0x8000) == 0);
950 u->itab_entry = &ud_itab[ ptr ];
951 u->mnemonic = u->itab_entry->mnemonic;
952 return (resolve_mode(u) == 0 &&
953 decode_operands(u) == 0 &&
954 resolve_mnemonic(u) == 0) ? 0 : -1;
955 }
956
957
958 /*
959 * decode_3dnow()
960 *
961 * Decoding 3dnow is a little tricky because of its strange opcode
962 * structure. The final opcode disambiguation depends on the last
963 * byte that comes after the operands have been decoded. Fortunately,
964 * all 3dnow instructions have the same set of operand types. So we
965 * go ahead and decode the instruction by picking an arbitrarily chosen
966 * valid entry in the table, decode the operands, and read the final
967 * byte to resolve the menmonic.
968 */
969 static inline int
decode_3dnow(struct ud * u)970 decode_3dnow(struct ud* u)
971 {
972 uint16_t ptr;
973 ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
974 ASSERT(u->le->table[0xc] != 0);
975 decode_insn(u, u->le->table[0xc]);
976 ud_inp_next(u);
977 if (u->error) {
978 return -1;
979 }
980 ptr = u->le->table[ud_inp_curr(u)];
981 ASSERT((ptr & 0x8000) == 0);
982 u->mnemonic = ud_itab[ptr].mnemonic;
983 return 0;
984 }
985
986
987 static int
decode_ssepfx(struct ud * u)988 decode_ssepfx(struct ud *u)
989 {
990 uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2;
991 if (u->le->table[idx] == 0) {
992 idx = 0;
993 }
994 if (idx && u->le->table[idx] != 0) {
995 /*
996 * "Consume" the prefix as a part of the opcode, so it is no
997 * longer exported as an instruction prefix.
998 */
999 switch (u->pfx_insn) {
1000 case 0xf2:
1001 u->pfx_repne = 0;
1002 break;
1003 case 0xf3:
1004 u->pfx_rep = 0;
1005 u->pfx_repe = 0;
1006 break;
1007 case 0x66:
1008 u->pfx_opr = 0;
1009 break;
1010 }
1011 }
1012 return decode_ext(u, u->le->table[idx]);
1013 }
1014
1015
1016 /*
1017 * decode_ext()
1018 *
1019 * Decode opcode extensions (if any)
1020 */
1021 static int
decode_ext(struct ud * u,uint16_t ptr)1022 decode_ext(struct ud *u, uint16_t ptr)
1023 {
1024 uint8_t idx = 0;
1025 if ((ptr & 0x8000) == 0) {
1026 return decode_insn(u, ptr);
1027 }
1028 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1029 if (u->le->type == UD_TAB__OPC_3DNOW) {
1030 return decode_3dnow(u);
1031 }
1032
1033 switch (u->le->type) {
1034 case UD_TAB__OPC_MOD:
1035 /* !11 = 0, 11 = 1 */
1036 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1037 break;
1038 /* disassembly mode/operand size/address size based tables.
1039 * 16 = 0,, 32 = 1, 64 = 2
1040 */
1041 case UD_TAB__OPC_MODE:
1042 idx = u->dis_mode / 32;
1043 break;
1044 case UD_TAB__OPC_OSIZE:
1045 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1046 break;
1047 case UD_TAB__OPC_ASIZE:
1048 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1049 break;
1050 case UD_TAB__OPC_X87:
1051 idx = modrm(u) - 0xC0;
1052 break;
1053 case UD_TAB__OPC_VENDOR:
1054 if (u->vendor == UD_VENDOR_ANY) {
1055 /* choose a valid entry */
1056 idx = (u->le->table[idx] != 0) ? 0 : 1;
1057 } else if (u->vendor == UD_VENDOR_AMD) {
1058 idx = 0;
1059 } else {
1060 idx = 1;
1061 }
1062 break;
1063 case UD_TAB__OPC_RM:
1064 idx = MODRM_RM(modrm(u));
1065 break;
1066 case UD_TAB__OPC_REG:
1067 idx = MODRM_REG(modrm(u));
1068 break;
1069 case UD_TAB__OPC_SSE:
1070 return decode_ssepfx(u);
1071 default:
1072 ASSERT(!"not reached");
1073 break;
1074 }
1075
1076 return decode_ext(u, u->le->table[idx]);
1077 }
1078
1079
1080 static inline int
decode_opcode(struct ud * u)1081 decode_opcode(struct ud *u)
1082 {
1083 uint16_t ptr;
1084 ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1085 ud_inp_next(u);
1086 if (u->error) {
1087 return -1;
1088 }
1089 ptr = u->le->table[ud_inp_curr(u)];
1090 if (ptr & 0x8000) {
1091 u->le = &ud_lookup_table_list[ptr & ~0x8000];
1092 if (u->le->type == UD_TAB__OPC_TABLE) {
1093 return decode_opcode(u);
1094 }
1095 }
1096 return decode_ext(u, ptr);
1097 }
1098
1099
1100 /* =============================================================================
1101 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1102 * =============================================================================
1103 */
1104 unsigned int
ud_decode(struct ud * u)1105 ud_decode(struct ud *u)
1106 {
1107 ud_inp_start(u);
1108 clear_insn(u);
1109 u->le = &ud_lookup_table_list[0];
1110 u->error = decode_prefixes(u) == -1 ||
1111 decode_opcode(u) == -1 ||
1112 u->error;
1113 /* Handle decode error. */
1114 if (u->error) {
1115 /* clear out the decode data. */
1116 clear_insn(u);
1117 /* mark the sequence of bytes as invalid. */
1118 u->itab_entry = & s_ie__invalid;
1119 u->mnemonic = u->itab_entry->mnemonic;
1120 }
1121
1122 /* maybe this stray segment override byte
1123 * should be spewed out?
1124 */
1125 if ( !P_SEG( u->itab_entry->prefix ) &&
1126 u->operand[0].type != UD_OP_MEM &&
1127 u->operand[1].type != UD_OP_MEM )
1128 u->pfx_seg = 0;
1129
1130 u->insn_offset = u->pc; /* set offset of instruction */
1131 u->insn_fill = 0; /* set translation buffer index to 0 */
1132 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1133 gen_hex( u ); /* generate hex code */
1134
1135 /* return number of bytes disassembled. */
1136 return u->inp_ctr;
1137 }
1138
1139 /*
1140 vim: set ts=2 sw=2 expandtab
1141 */
1142
1143 #endif // USE(UDIS86)
1144