1 /* udis86 - libudis86/decode.c
2  *
3  * Copyright (c) 2002-2009 Vivek Thampi
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  *     * Redistributions of source code must retain the above copyright notice,
10  *       this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above copyright notice,
12  *       this list of conditions and the following disclaimer in the documentation
13  *       and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 
29 #if USE(UDIS86)
30 
31 #include "udis86_extern.h"
32 #include "udis86_types.h"
33 #include "udis86_input.h"
34 #include "udis86_decode.h"
35 #include <wtf/Assertions.h>
36 
37 #ifndef _MSC_VER
38 #define dbg(x, n...)
39 /* #define dbg printf */
40 #endif // _MSC_VER
41 
42 #ifndef __UD_STANDALONE__
43 # include <string.h>
44 #endif /* __UD_STANDALONE__ */
45 
46 /* The max number of prefixes to an instruction */
47 #define MAX_PREFIXES    15
48 
49 /* instruction aliases and special cases */
50 static struct ud_itab_entry s_ie__invalid =
51     { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
52 
53 static int
54 decode_ext(struct ud *u, uint16_t ptr);
55 
56 
57 static inline int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)58 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
59 {
60   if (dis_mode == 64) {
61     return rex_w ? 64 : (pfx_opr ? 16 : 32);
62   } else if (dis_mode == 32) {
63     return pfx_opr ? 16 : 32;
64   } else {
65     ASSERT(dis_mode == 16);
66     return pfx_opr ? 32 : 16;
67   }
68 }
69 
70 
71 static inline int
eff_adr_mode(int dis_mode,int pfx_adr)72 eff_adr_mode(int dis_mode, int pfx_adr)
73 {
74   if (dis_mode == 64) {
75     return pfx_adr ? 32 : 64;
76   } else if (dis_mode == 32) {
77     return pfx_adr ? 16 : 32;
78   } else {
79     ASSERT(dis_mode == 16);
80     return pfx_adr ? 32 : 16;
81   }
82 }
83 
84 
85 /* Looks up mnemonic code in the mnemonic string table
86  * Returns NULL if the mnemonic code is invalid
87  */
ud_lookup_mnemonic(enum ud_mnemonic_code c)88 const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
89 {
90     return ud_mnemonics_str[ c ];
91 }
92 
93 
94 /*
95  * decode_prefixes
96  *
97  *  Extracts instruction prefixes.
98  */
99 static int
decode_prefixes(struct ud * u)100 decode_prefixes(struct ud *u)
101 {
102     unsigned int have_pfx = 1;
103     unsigned int i;
104     uint8_t curr;
105 
106     /* if in error state, bail out */
107     if ( u->error )
108         return -1;
109 
110     /* keep going as long as there are prefixes available */
111     for ( i = 0; have_pfx ; ++i ) {
112 
113         /* Get next byte. */
114         ud_inp_next(u);
115         if ( u->error )
116             return -1;
117         curr = ud_inp_curr( u );
118 
119         /* rex prefixes in 64bit mode */
120         if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
121             u->pfx_rex = curr;
122         } else {
123             switch ( curr )
124             {
125             case 0x2E :
126                 u->pfx_seg = UD_R_CS;
127                 u->pfx_rex = 0;
128                 break;
129             case 0x36 :
130                 u->pfx_seg = UD_R_SS;
131                 u->pfx_rex = 0;
132                 break;
133             case 0x3E :
134                 u->pfx_seg = UD_R_DS;
135                 u->pfx_rex = 0;
136                 break;
137             case 0x26 :
138                 u->pfx_seg = UD_R_ES;
139                 u->pfx_rex = 0;
140                 break;
141             case 0x64 :
142                 u->pfx_seg = UD_R_FS;
143                 u->pfx_rex = 0;
144                 break;
145             case 0x65 :
146                 u->pfx_seg = UD_R_GS;
147                 u->pfx_rex = 0;
148                 break;
149             case 0x67 : /* adress-size override prefix */
150                 u->pfx_adr = 0x67;
151                 u->pfx_rex = 0;
152                 break;
153             case 0xF0 :
154                 u->pfx_lock = 0xF0;
155                 u->pfx_rex  = 0;
156                 break;
157             case 0x66:
158                 /* the 0x66 sse prefix is only effective if no other sse prefix
159                  * has already been specified.
160                  */
161                 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
162                 u->pfx_opr = 0x66;
163                 u->pfx_rex = 0;
164                 break;
165             case 0xF2:
166                 u->pfx_insn  = 0xF2;
167                 u->pfx_repne = 0xF2;
168                 u->pfx_rex   = 0;
169                 break;
170             case 0xF3:
171                 u->pfx_insn = 0xF3;
172                 u->pfx_rep  = 0xF3;
173                 u->pfx_repe = 0xF3;
174                 u->pfx_rex  = 0;
175                 break;
176             default :
177                 /* No more prefixes */
178                 have_pfx = 0;
179                 break;
180             }
181         }
182 
183         /* check if we reached max instruction length */
184         if ( i + 1 == MAX_INSN_LENGTH ) {
185             u->error = 1;
186             break;
187         }
188     }
189 
190     /* return status */
191     if ( u->error )
192         return -1;
193 
194     /* rewind back one byte in stream, since the above loop
195      * stops with a non-prefix byte.
196      */
197     ud_inp_back(u);
198     return 0;
199 }
200 
201 
modrm(struct ud * u)202 static inline unsigned int modrm( struct ud * u )
203 {
204     if ( !u->have_modrm ) {
205         u->modrm = ud_inp_next( u );
206         u->have_modrm = 1;
207     }
208     return u->modrm;
209 }
210 
211 
resolve_operand_size(const struct ud * u,unsigned int s)212 static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
213 {
214     switch ( s )
215     {
216     case SZ_V:
217         return ( u->opr_mode );
218     case SZ_Z:
219         return ( u->opr_mode == 16 ) ? 16 : 32;
220     case SZ_P:
221         return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
222     case SZ_MDQ:
223         return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
224     case SZ_RDQ:
225         return ( u->dis_mode == 64 ) ? 64 : 32;
226     default:
227         return s;
228     }
229 }
230 
231 
resolve_mnemonic(struct ud * u)232 static int resolve_mnemonic( struct ud* u )
233 {
234   /* far/near flags */
235   u->br_far = 0;
236   u->br_near = 0;
237   /* readjust operand sizes for call/jmp instrcutions */
238   if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
239     /* WP: 16:16 pointer */
240     if ( u->operand[ 0 ].size == SZ_WP ) {
241         u->operand[ 0 ].size = 16;
242         u->br_far = 1;
243         u->br_near= 0;
244     /* DP: 32:32 pointer */
245     } else if ( u->operand[ 0 ].size == SZ_DP ) {
246         u->operand[ 0 ].size = 32;
247         u->br_far = 1;
248         u->br_near= 0;
249     } else {
250         u->br_far = 0;
251         u->br_near= 1;
252     }
253   /* resolve 3dnow weirdness. */
254   } else if ( u->mnemonic == UD_I3dnow ) {
255     u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u )  ] ].mnemonic;
256   }
257   /* SWAPGS is only valid in 64bits mode */
258   if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
259     u->error = 1;
260     return -1;
261   }
262 
263   if (u->mnemonic == UD_Ixchg) {
264     if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
265          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
266         (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
267          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
268       u->operand[0].type = UD_NONE;
269       u->operand[1].type = UD_NONE;
270       u->mnemonic = UD_Inop;
271     }
272   }
273 
274   if (u->mnemonic == UD_Inop && u->pfx_rep) {
275     u->pfx_rep = 0;
276     u->mnemonic = UD_Ipause;
277   }
278   return 0;
279 }
280 
281 
282 /* -----------------------------------------------------------------------------
283  * decode_a()- Decodes operands of the type seg:offset
284  * -----------------------------------------------------------------------------
285  */
286 static void
decode_a(struct ud * u,struct ud_operand * op)287 decode_a(struct ud* u, struct ud_operand *op)
288 {
289   if (u->opr_mode == 16) {
290     /* seg16:off16 */
291     op->type = UD_OP_PTR;
292     op->size = 32;
293     op->lval.ptr.off = ud_inp_uint16(u);
294     op->lval.ptr.seg = ud_inp_uint16(u);
295   } else {
296     /* seg16:off32 */
297     op->type = UD_OP_PTR;
298     op->size = 48;
299     op->lval.ptr.off = ud_inp_uint32(u);
300     op->lval.ptr.seg = ud_inp_uint16(u);
301   }
302 }
303 
304 /* -----------------------------------------------------------------------------
305  * decode_gpr() - Returns decoded General Purpose Register
306  * -----------------------------------------------------------------------------
307  */
308 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)309 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
310 {
311   s = resolve_operand_size(u, s);
312 
313   switch (s) {
314     case 64:
315         return UD_R_RAX + rm;
316     case SZ_DP:
317     case 32:
318         return UD_R_EAX + rm;
319     case SZ_WP:
320     case 16:
321         return UD_R_AX  + rm;
322     case  8:
323         if (u->dis_mode == 64 && u->pfx_rex) {
324             if (rm >= 4)
325                 return UD_R_SPL + (rm-4);
326             return UD_R_AL + rm;
327         } else return UD_R_AL + rm;
328     default:
329         return 0;
330   }
331 }
332 
333 /* -----------------------------------------------------------------------------
334  * resolve_gpr64() - 64bit General Purpose Register-Selection.
335  * -----------------------------------------------------------------------------
336  */
337 static enum ud_type
resolve_gpr64(struct ud * u,enum ud_operand_code gpr_op,enum ud_operand_size * size)338 resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size)
339 {
340   if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
341     gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
342   else  gpr_op = (gpr_op - OP_rAX);
343 
344   if (u->opr_mode == 16) {
345     *size = 16;
346     return gpr_op + UD_R_AX;
347   }
348   if (u->dis_mode == 32 ||
349     (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
350     *size = 32;
351     return gpr_op + UD_R_EAX;
352   }
353 
354   *size = 64;
355   return gpr_op + UD_R_RAX;
356 }
357 
358 /* -----------------------------------------------------------------------------
359  * resolve_gpr32 () - 32bit General Purpose Register-Selection.
360  * -----------------------------------------------------------------------------
361  */
362 static enum ud_type
resolve_gpr32(struct ud * u,enum ud_operand_code gpr_op)363 resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
364 {
365   gpr_op = gpr_op - OP_eAX;
366 
367   if (u->opr_mode == 16)
368     return gpr_op + UD_R_AX;
369 
370   return gpr_op +  UD_R_EAX;
371 }
372 
373 /* -----------------------------------------------------------------------------
374  * resolve_reg() - Resolves the register type
375  * -----------------------------------------------------------------------------
376  */
377 static enum ud_type
resolve_reg(struct ud * u,unsigned int type,unsigned char i)378 resolve_reg(struct ud* u, unsigned int type, unsigned char i)
379 {
380   switch (type) {
381     case T_MMX :    return UD_R_MM0  + (i & 7);
382     case T_XMM :    return UD_R_XMM0 + i;
383     case T_CRG :    return UD_R_CR0  + i;
384     case T_DBG :    return UD_R_DR0  + i;
385     case T_SEG : {
386       /*
387        * Only 6 segment registers, anything else is an error.
388        */
389       if ((i & 7) > 5) {
390         u->error = 1;
391       } else {
392         return UD_R_ES + (i & 7);
393       }
394     }
395     case T_NONE:
396     default:    return UD_NONE;
397   }
398 }
399 
400 /* -----------------------------------------------------------------------------
401  * decode_imm() - Decodes Immediate values.
402  * -----------------------------------------------------------------------------
403  */
404 static void
decode_imm(struct ud * u,unsigned int s,struct ud_operand * op)405 decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
406 {
407   op->size = resolve_operand_size(u, s);
408   op->type = UD_OP_IMM;
409 
410   switch (op->size) {
411     case  8: op->lval.sbyte = ud_inp_uint8(u);   break;
412     case 16: op->lval.uword = ud_inp_uint16(u);  break;
413     case 32: op->lval.udword = ud_inp_uint32(u); break;
414     case 64: op->lval.uqword = ud_inp_uint64(u); break;
415     default: return;
416   }
417 }
418 
419 
420 /*
421  * decode_modrm_reg
422  *
423  *    Decodes reg field of mod/rm byte
424  *
425  */
426 static void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)427 decode_modrm_reg(struct ud         *u,
428                  struct ud_operand *operand,
429                  unsigned int       type,
430                  unsigned int       size)
431 {
432   uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
433   operand->type = UD_OP_REG;
434   operand->size = resolve_operand_size(u, size);
435 
436   if (type == T_GPR) {
437     operand->base = decode_gpr(u, operand->size, reg);
438   } else {
439     operand->base = resolve_reg(u, type, reg);
440   }
441 }
442 
443 
444 /*
445  * decode_modrm_rm
446  *
447  *    Decodes rm field of mod/rm byte
448  *
449  */
450 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)451 decode_modrm_rm(struct ud         *u,
452                 struct ud_operand *op,
453                 unsigned char      type,
454                 unsigned int       size)
455 
456 {
457   unsigned char mod, rm, reg;
458 
459   /* get mod, r/m and reg fields */
460   mod = MODRM_MOD(modrm(u));
461   rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
462   reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
463 
464   op->size = resolve_operand_size(u, size);
465 
466   /*
467    * If mod is 11b, then the modrm.rm specifies a register.
468    *
469    */
470   if (mod == 3) {
471     op->type = UD_OP_REG;
472     if (type ==  T_GPR) {
473       op->base = decode_gpr(u, op->size, rm);
474     } else {
475       op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7));
476     }
477     return;
478   }
479 
480 
481   /*
482    * !11 => Memory Address
483    */
484   op->type = UD_OP_MEM;
485 
486   if (u->adr_mode == 64) {
487     op->base = UD_R_RAX + rm;
488     if (mod == 1) {
489       op->offset = 8;
490     } else if (mod == 2) {
491       op->offset = 32;
492     } else if (mod == 0 && (rm & 7) == 5) {
493       op->base = UD_R_RIP;
494       op->offset = 32;
495     } else {
496       op->offset = 0;
497     }
498     /*
499      * Scale-Index-Base (SIB)
500      */
501     if ((rm & 7) == 4) {
502       ud_inp_next(u);
503 
504       op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
505       op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
506       op->base  = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
507 
508       /* special conditions for base reference */
509       if (op->index == UD_R_RSP) {
510         op->index = UD_NONE;
511         op->scale = UD_NONE;
512       }
513 
514       if (op->base == UD_R_RBP || op->base == UD_R_R13) {
515         if (mod == 0) {
516           op->base = UD_NONE;
517         }
518         if (mod == 1) {
519           op->offset = 8;
520         } else {
521           op->offset = 32;
522         }
523       }
524     }
525   } else if (u->adr_mode == 32) {
526     op->base = UD_R_EAX + rm;
527     if (mod == 1) {
528       op->offset = 8;
529     } else if (mod == 2) {
530       op->offset = 32;
531     } else if (mod == 0 && rm == 5) {
532       op->base = UD_NONE;
533       op->offset = 32;
534     } else {
535       op->offset = 0;
536     }
537 
538     /* Scale-Index-Base (SIB) */
539     if ((rm & 7) == 4) {
540       ud_inp_next(u);
541 
542       op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
543       op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
544       op->base  = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
545 
546       if (op->index == UD_R_ESP) {
547         op->index = UD_NONE;
548         op->scale = UD_NONE;
549       }
550 
551       /* special condition for base reference */
552       if (op->base == UD_R_EBP) {
553         if (mod == 0) {
554           op->base = UD_NONE;
555         }
556         if (mod == 1) {
557           op->offset = 8;
558         } else {
559           op->offset = 32;
560         }
561       }
562     }
563   } else {
564     const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
565                                      UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
566     const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
567                                      UD_NONE, UD_NONE, UD_NONE, UD_NONE };
568     op->base  = bases[rm & 7];
569     op->index = indices[rm & 7];
570     if (mod == 0 && rm == 6) {
571       op->offset= 16;
572       op->base = UD_NONE;
573     } else if (mod == 1) {
574       op->offset = 8;
575     } else if (mod == 2) {
576       op->offset = 16;
577     }
578   }
579 
580   /*
581    * extract offset, if any
582    */
583   switch (op->offset) {
584     case 8 : op->lval.ubyte  = ud_inp_uint8(u);  break;
585     case 16: op->lval.uword  = ud_inp_uint16(u); break;
586     case 32: op->lval.udword = ud_inp_uint32(u); break;
587     case 64: op->lval.uqword = ud_inp_uint64(u); break;
588     default: break;
589   }
590 }
591 
592 /* -----------------------------------------------------------------------------
593  * decode_o() - Decodes offset
594  * -----------------------------------------------------------------------------
595  */
596 static void
decode_o(struct ud * u,unsigned int s,struct ud_operand * op)597 decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
598 {
599   switch (u->adr_mode) {
600     case 64:
601         op->offset = 64;
602         op->lval.uqword = ud_inp_uint64(u);
603         break;
604     case 32:
605         op->offset = 32;
606         op->lval.udword = ud_inp_uint32(u);
607         break;
608     case 16:
609         op->offset = 16;
610         op->lval.uword  = ud_inp_uint16(u);
611         break;
612     default:
613         return;
614   }
615   op->type = UD_OP_MEM;
616   op->size = resolve_operand_size(u, s);
617 }
618 
619 /* -----------------------------------------------------------------------------
620  * decode_operands() - Disassembles Operands.
621  * -----------------------------------------------------------------------------
622  */
623 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)624 decode_operand(struct ud           *u,
625                struct ud_operand   *operand,
626                enum ud_operand_code type,
627                unsigned int         size)
628 {
629   switch (type) {
630     case OP_A :
631       decode_a(u, operand);
632       break;
633     case OP_MR:
634       if (MODRM_MOD(modrm(u)) == 3) {
635         decode_modrm_rm(u, operand, T_GPR,
636                         size == SZ_DY ? SZ_MDQ : SZ_V);
637       } else if (size == SZ_WV) {
638         decode_modrm_rm( u, operand, T_GPR, SZ_W);
639       } else if (size == SZ_BV) {
640         decode_modrm_rm( u, operand, T_GPR, SZ_B);
641       } else if (size == SZ_DY) {
642         decode_modrm_rm( u, operand, T_GPR, SZ_D);
643       } else {
644         ASSERT(!"unexpected size");
645       }
646       break;
647     case OP_M:
648       if (MODRM_MOD(modrm(u)) == 3) {
649           u->error = 1;
650       }
651       /* intended fall through */
652     case OP_E:
653       decode_modrm_rm(u, operand, T_GPR, size);
654       break;
655       break;
656     case OP_G:
657       decode_modrm_reg(u, operand, T_GPR, size);
658       break;
659     case OP_I:
660       decode_imm(u, size, operand);
661       break;
662     case OP_I1:
663       operand->type = UD_OP_CONST;
664       operand->lval.udword = 1;
665       break;
666     case OP_PR:
667       if (MODRM_MOD(modrm(u)) != 3) {
668           u->error = 1;
669       }
670       decode_modrm_rm(u, operand, T_MMX, size);
671       break;
672     case OP_P:
673       decode_modrm_reg(u, operand, T_MMX, size);
674       break;
675     case OP_VR:
676       if (MODRM_MOD(modrm(u)) != 3) {
677           u->error = 1;
678       }
679       /* intended fall through */
680     case OP_W:
681       decode_modrm_rm(u, operand, T_XMM, size);
682       break;
683     case OP_V:
684       decode_modrm_reg(u, operand, T_XMM, size);
685       break;
686     case OP_S:
687       decode_modrm_reg(u, operand, T_SEG, size);
688       break;
689     case OP_AL:
690     case OP_CL:
691     case OP_DL:
692     case OP_BL:
693     case OP_AH:
694     case OP_CH:
695     case OP_DH:
696     case OP_BH:
697       operand->type = UD_OP_REG;
698       operand->base = UD_R_AL + (type - OP_AL);
699       operand->size = 8;
700       break;
701     case OP_DX:
702       operand->type = UD_OP_REG;
703       operand->base = UD_R_DX;
704       operand->size = 16;
705       break;
706     case OP_O:
707       decode_o(u, size, operand);
708       break;
709     case OP_rAXr8:
710     case OP_rCXr9:
711     case OP_rDXr10:
712     case OP_rBXr11:
713     case OP_rSPr12:
714     case OP_rBPr13:
715     case OP_rSIr14:
716     case OP_rDIr15:
717     case OP_rAX:
718     case OP_rCX:
719     case OP_rDX:
720     case OP_rBX:
721     case OP_rSP:
722     case OP_rBP:
723     case OP_rSI:
724     case OP_rDI:
725       operand->type = UD_OP_REG;
726       operand->base = resolve_gpr64(u, type, (enum ud_operand_size*)(&operand->size));
727       break;
728     case OP_ALr8b:
729     case OP_CLr9b:
730     case OP_DLr10b:
731     case OP_BLr11b:
732     case OP_AHr12b:
733     case OP_CHr13b:
734     case OP_DHr14b:
735     case OP_BHr15b: {
736       ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL
737                         + (REX_B(u->pfx_rex) << 3);
738       if (UD_R_AH <= gpr && u->pfx_rex) {
739         gpr = gpr + 4;
740       }
741       operand->type = UD_OP_REG;
742       operand->base = gpr;
743       break;
744     }
745     case OP_eAX:
746     case OP_eCX:
747     case OP_eDX:
748     case OP_eBX:
749     case OP_eSP:
750     case OP_eBP:
751     case OP_eSI:
752     case OP_eDI:
753       operand->type = UD_OP_REG;
754       operand->base = resolve_gpr32(u, type);
755       operand->size = u->opr_mode == 16 ? 16 : 32;
756       break;
757     case OP_ES:
758     case OP_CS:
759     case OP_DS:
760     case OP_SS:
761     case OP_FS:
762     case OP_GS:
763       /* in 64bits mode, only fs and gs are allowed */
764       if (u->dis_mode == 64) {
765         if (type != OP_FS && type != OP_GS) {
766           u->error= 1;
767         }
768       }
769       operand->type = UD_OP_REG;
770       operand->base = (type - OP_ES) + UD_R_ES;
771       operand->size = 16;
772       break;
773     case OP_J :
774       decode_imm(u, size, operand);
775       operand->type = UD_OP_JIMM;
776       break ;
777     case OP_Q:
778       decode_modrm_rm(u, operand, T_MMX, size);
779       break;
780     case OP_R :
781       decode_modrm_rm(u, operand, T_GPR, size);
782       break;
783     case OP_C:
784       decode_modrm_reg(u, operand, T_CRG, size);
785       break;
786     case OP_D:
787       decode_modrm_reg(u, operand, T_DBG, size);
788       break;
789     case OP_I3 :
790       operand->type = UD_OP_CONST;
791       operand->lval.sbyte = 3;
792       break;
793     case OP_ST0:
794     case OP_ST1:
795     case OP_ST2:
796     case OP_ST3:
797     case OP_ST4:
798     case OP_ST5:
799     case OP_ST6:
800     case OP_ST7:
801       operand->type = UD_OP_REG;
802       operand->base = (type - OP_ST0) + UD_R_ST0;
803       operand->size = 0;
804       break;
805     case OP_AX:
806       operand->type = UD_OP_REG;
807       operand->base = UD_R_AX;
808       operand->size = 16;
809       break;
810     default :
811       operand->type = UD_NONE;
812       break;
813   }
814   return 0;
815 }
816 
817 
818 /*
819  * decode_operands
820  *
821  *    Disassemble upto 3 operands of the current instruction being
822  *    disassembled. By the end of the function, the operand fields
823  *    of the ud structure will have been filled.
824  */
825 static int
decode_operands(struct ud * u)826 decode_operands(struct ud* u)
827 {
828   decode_operand(u, &u->operand[0],
829                     u->itab_entry->operand1.type,
830                     u->itab_entry->operand1.size);
831   decode_operand(u, &u->operand[1],
832                     u->itab_entry->operand2.type,
833                     u->itab_entry->operand2.size);
834   decode_operand(u, &u->operand[2],
835                     u->itab_entry->operand3.type,
836                     u->itab_entry->operand3.size);
837   return 0;
838 }
839 
840 /* -----------------------------------------------------------------------------
841  * clear_insn() - clear instruction structure
842  * -----------------------------------------------------------------------------
843  */
844 static void
clear_insn(register struct ud * u)845 clear_insn(register struct ud* u)
846 {
847   u->error     = 0;
848   u->pfx_seg   = 0;
849   u->pfx_opr   = 0;
850   u->pfx_adr   = 0;
851   u->pfx_lock  = 0;
852   u->pfx_repne = 0;
853   u->pfx_rep   = 0;
854   u->pfx_repe  = 0;
855   u->pfx_rex   = 0;
856   u->pfx_insn  = 0;
857   u->mnemonic  = UD_Inone;
858   u->itab_entry = NULL;
859   u->have_modrm = 0;
860 
861   memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
862   memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
863   memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
864 }
865 
866 static int
resolve_mode(struct ud * u)867 resolve_mode( struct ud* u )
868 {
869   /* if in error state, bail out */
870   if ( u->error ) return -1;
871 
872   /* propagate prefix effects */
873   if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
874 
875     /* Check validity of  instruction m64 */
876     if ( P_INV64( u->itab_entry->prefix ) ) {
877         u->error = 1;
878         return -1;
879     }
880 
881     /* effective rex prefix is the  effective mask for the
882      * instruction hard-coded in the opcode map.
883      */
884     u->pfx_rex = ( u->pfx_rex & 0x40 ) |
885                  ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
886 
887     /* whether this instruction has a default operand size of
888      * 64bit, also hardcoded into the opcode map.
889      */
890     u->default64 = P_DEF64( u->itab_entry->prefix );
891     /* calculate effective operand size */
892     if ( REX_W( u->pfx_rex ) ) {
893         u->opr_mode = 64;
894     } else if ( u->pfx_opr ) {
895         u->opr_mode = 16;
896     } else {
897         /* unless the default opr size of instruction is 64,
898          * the effective operand size in the absence of rex.w
899          * prefix is 32.
900          */
901         u->opr_mode = ( u->default64 ) ? 64 : 32;
902     }
903 
904     /* calculate effective address size */
905     u->adr_mode = (u->pfx_adr) ? 32 : 64;
906   } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
907     u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
908     u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
909   } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
910     u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
911     u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
912   }
913 
914   /* These flags determine which operand to apply the operand size
915    * cast to.
916    */
917   u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
918   u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
919   u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
920 
921   /* set flags for implicit addressing */
922   u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
923 
924   return 0;
925 }
926 
gen_hex(struct ud * u)927 static int gen_hex( struct ud *u )
928 {
929   unsigned int i;
930   unsigned char *src_ptr = ud_inp_sess( u );
931   char* src_hex;
932 
933   /* bail out if in error stat. */
934   if ( u->error ) return -1;
935   /* output buffer pointe */
936   src_hex = ( char* ) u->insn_hexcode;
937   /* for each byte used to decode instruction */
938   for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
939     sprintf( src_hex, "%02x", *src_ptr & 0xFF );
940     src_hex += 2;
941   }
942   return 0;
943 }
944 
945 
946 static inline int
decode_insn(struct ud * u,uint16_t ptr)947 decode_insn(struct ud *u, uint16_t ptr)
948 {
949   ASSERT((ptr & 0x8000) == 0);
950   u->itab_entry = &ud_itab[ ptr ];
951   u->mnemonic = u->itab_entry->mnemonic;
952   return (resolve_mode(u)     == 0 &&
953           decode_operands(u)  == 0 &&
954           resolve_mnemonic(u) == 0) ? 0 : -1;
955 }
956 
957 
958 /*
959  * decode_3dnow()
960  *
961  *    Decoding 3dnow is a little tricky because of its strange opcode
962  *    structure. The final opcode disambiguation depends on the last
963  *    byte that comes after the operands have been decoded. Fortunately,
964  *    all 3dnow instructions have the same set of operand types. So we
965  *    go ahead and decode the instruction by picking an arbitrarily chosen
966  *    valid entry in the table, decode the operands, and read the final
967  *    byte to resolve the menmonic.
968  */
969 static inline int
decode_3dnow(struct ud * u)970 decode_3dnow(struct ud* u)
971 {
972   uint16_t ptr;
973   ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
974   ASSERT(u->le->table[0xc] != 0);
975   decode_insn(u, u->le->table[0xc]);
976   ud_inp_next(u);
977   if (u->error) {
978     return -1;
979   }
980   ptr = u->le->table[ud_inp_curr(u)];
981   ASSERT((ptr & 0x8000) == 0);
982   u->mnemonic = ud_itab[ptr].mnemonic;
983   return 0;
984 }
985 
986 
987 static int
decode_ssepfx(struct ud * u)988 decode_ssepfx(struct ud *u)
989 {
990   uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2;
991   if (u->le->table[idx] == 0) {
992     idx = 0;
993   }
994   if (idx && u->le->table[idx] != 0) {
995     /*
996      * "Consume" the prefix as a part of the opcode, so it is no
997      * longer exported as an instruction prefix.
998      */
999     switch (u->pfx_insn) {
1000       case 0xf2:
1001         u->pfx_repne = 0;
1002         break;
1003       case 0xf3:
1004         u->pfx_rep = 0;
1005         u->pfx_repe = 0;
1006         break;
1007       case 0x66:
1008         u->pfx_opr = 0;
1009         break;
1010     }
1011   }
1012   return decode_ext(u, u->le->table[idx]);
1013 }
1014 
1015 
1016 /*
1017  * decode_ext()
1018  *
1019  *    Decode opcode extensions (if any)
1020  */
1021 static int
decode_ext(struct ud * u,uint16_t ptr)1022 decode_ext(struct ud *u, uint16_t ptr)
1023 {
1024   uint8_t idx = 0;
1025   if ((ptr & 0x8000) == 0) {
1026     return decode_insn(u, ptr);
1027   }
1028   u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1029   if (u->le->type == UD_TAB__OPC_3DNOW) {
1030     return decode_3dnow(u);
1031   }
1032 
1033   switch (u->le->type) {
1034     case UD_TAB__OPC_MOD:
1035       /* !11 = 0, 11 = 1 */
1036       idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1037       break;
1038       /* disassembly mode/operand size/address size based tables.
1039        * 16 = 0,, 32 = 1, 64 = 2
1040        */
1041     case UD_TAB__OPC_MODE:
1042       idx = u->dis_mode / 32;
1043       break;
1044     case UD_TAB__OPC_OSIZE:
1045       idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1046       break;
1047     case UD_TAB__OPC_ASIZE:
1048       idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1049       break;
1050     case UD_TAB__OPC_X87:
1051       idx = modrm(u) - 0xC0;
1052       break;
1053     case UD_TAB__OPC_VENDOR:
1054       if (u->vendor == UD_VENDOR_ANY) {
1055         /* choose a valid entry */
1056         idx = (u->le->table[idx] != 0) ? 0 : 1;
1057       } else if (u->vendor == UD_VENDOR_AMD) {
1058         idx = 0;
1059       } else {
1060         idx = 1;
1061       }
1062       break;
1063     case UD_TAB__OPC_RM:
1064       idx = MODRM_RM(modrm(u));
1065       break;
1066     case UD_TAB__OPC_REG:
1067       idx = MODRM_REG(modrm(u));
1068       break;
1069     case UD_TAB__OPC_SSE:
1070       return decode_ssepfx(u);
1071     default:
1072       ASSERT(!"not reached");
1073       break;
1074   }
1075 
1076   return decode_ext(u, u->le->table[idx]);
1077 }
1078 
1079 
1080 static inline int
decode_opcode(struct ud * u)1081 decode_opcode(struct ud *u)
1082 {
1083   uint16_t ptr;
1084   ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1085   ud_inp_next(u);
1086   if (u->error) {
1087     return -1;
1088   }
1089   ptr = u->le->table[ud_inp_curr(u)];
1090   if (ptr & 0x8000) {
1091     u->le = &ud_lookup_table_list[ptr & ~0x8000];
1092     if (u->le->type == UD_TAB__OPC_TABLE) {
1093       return decode_opcode(u);
1094     }
1095   }
1096   return decode_ext(u, ptr);
1097 }
1098 
1099 
1100 /* =============================================================================
1101  * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1102  * =============================================================================
1103  */
1104 unsigned int
ud_decode(struct ud * u)1105 ud_decode(struct ud *u)
1106 {
1107   ud_inp_start(u);
1108   clear_insn(u);
1109   u->le = &ud_lookup_table_list[0];
1110   u->error = decode_prefixes(u) == -1 ||
1111              decode_opcode(u)   == -1 ||
1112              u->error;
1113   /* Handle decode error. */
1114   if (u->error) {
1115     /* clear out the decode data. */
1116     clear_insn(u);
1117     /* mark the sequence of bytes as invalid. */
1118     u->itab_entry = & s_ie__invalid;
1119     u->mnemonic = u->itab_entry->mnemonic;
1120   }
1121 
1122     /* maybe this stray segment override byte
1123      * should be spewed out?
1124      */
1125     if ( !P_SEG( u->itab_entry->prefix ) &&
1126             u->operand[0].type != UD_OP_MEM &&
1127             u->operand[1].type != UD_OP_MEM )
1128         u->pfx_seg = 0;
1129 
1130   u->insn_offset = u->pc; /* set offset of instruction */
1131   u->insn_fill = 0;   /* set translation buffer index to 0 */
1132   u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1133   gen_hex( u );       /* generate hex code */
1134 
1135   /* return number of bytes disassembled. */
1136   return u->inp_ctr;
1137 }
1138 
1139 /*
1140 vim: set ts=2 sw=2 expandtab
1141 */
1142 
1143 #endif // USE(UDIS86)
1144