1 /****************************  disasm1.cpp   ********************************
2 * Author:        Agner Fog
3 * Date created:  2007-02-25
4 * Last modified: 2016-11-09
5 * Project:       objconv
6 * Module:        disasm1.cpp
7 * Description:
8 * Module for disassembler.
9 *
10 * Most of the disassembler code is in this file.
11 * Instruction tables are in opcodes.cpp.
12 * All functions relating to file output are in disasm2.cpp
13 *
14 * Copyright 2007-2016 GNU General Public License http://www.gnu.org/licenses
15 *****************************************************************************/
16 #include "stdafx.h"
17 
18 
19 /**************************  class CSymbolTable   *****************************
20 
21 class CSymbolTable is a container class for a sorted list of symbols. The list
22 of symbols is kept sorted by address at all times. Named symbols from the
23 original file are added to the list with AddSymbol(). New symbols for jump
24 targets and code blocks that do not have a name are added during pass 1 by
25 NewSymbol(). AssignNames() assigns names to these unnamed symbols.
26 
27 A symbol in the list can be found in three different ways: By its address,
28 by its old index, and by its new index. The new index is monotonous, so that
29 consecutive new indices correspond to consecutive addresses. Unfortunately,
30 the new index of a symbol will change whenever another symbol with a lower
31 address is added to the list. Therefore, we need to use the old index rather
32 than the new index for identifying a symbol, e.g. in the relocation table.
33 The old index is a permanent, unique identifier, but in random order.
34 The old index of a symbol is usually the same as the index used in the
35 original file and in the relocation table. New symbols added during pass 1
36 will get assigned an old index which is higher than the highest value that
37 occurred in the original file. Do not make a pointer or reference to a symbol.
38 It may become invalid when new symbols are added.
39 
40 To access a symbol by its old index, you have to translate it with Old2NewIndex
41 To access a symbol by its new index, use operator [].
42 To find a symbol by its address, use FindByAddress().
43 
44 ******************************************************************************/
45 
CSymbolTable()46 CSymbolTable::CSymbolTable() {
47     // Constructor
48     OldNum = 1;
49     NewNum = 0;                                   // Initialize
50     UnnamedNum = 0;                               // Number of unnamed symbols
51     UnnamedSymFormat = 0;                         // Format string for giving names to unnamed symbols
52     UnnamedSymbolsPrefix = cmd.SubType == SUBTYPE_GASM ? "$_" : "?_";// Prefix to add to unnamed symbols
53     ImportTablePrefix = "imp_";                   // Prefix for pointers in import table
54 
55     // Make dummy symbol number 0
56     SASymbol sym0;
57     sym0.Reset();
58     sym0.Section = 0x80000000;                    // Lowest possible address
59     List.PushSort(sym0);                          // Put into Symbols list
60 
61     SymbolNameBuffer.Push(0, 1);                  // Make string 0 empty
62 }
63 
AddSymbol(int32_t Section,uint32_t Offset,uint32_t Size,uint32_t Type,uint32_t Scope,uint32_t OldIndex,const char * Name,const char * DLLName)64 uint32_t CSymbolTable::AddSymbol(int32_t Section, uint32_t Offset, uint32_t Size,
65 uint32_t Type, uint32_t Scope, uint32_t OldIndex, const char * Name, const char * DLLName) {
66     // Add symbol from original file to symbol table.
67     // If name is not known then set Name = 0. A name will then be assigned
68     // OldIndex is the identifier used in relocation records. If the symbol is known
69     // by address rather than by index, then set OldIndex = 0. The return value will
70     // be the assigned value of OldIndex to use in relocation records. The returned value
71     // of OldIndex will be equal to the OldIndex of any previous symbols with same address.
72 
73     // Symbol record
74     SASymbol NewSym;                              // New symbol table entry
75 
76     NewSym.Section  = Section;
77     NewSym.Offset   = Offset;
78     NewSym.Size     = Size;
79     NewSym.Type     = Type;
80     NewSym.Scope    = Scope;
81     NewSym.OldIndex = OldIndex;
82 
83     // Store symbol name in NameBuffer
84     if (Name && *Name) {
85         NewSym.Name = SymbolNameBuffer.GetDataSize();
86         if (DLLName) {
87             // Imported from DLL. Prefix name with "imp_"
88             SymbolNameBuffer.Push(ImportTablePrefix, (uint32_t)strlen(ImportTablePrefix));
89         }
90         // Store name
91         SymbolNameBuffer.PushString(Name);
92     }
93     else {
94         NewSym.Name = 0;                           // Will get a name later
95     }
96     // Store DLL name in NameBuffer
97     if (DLLName && *DLLName) {
98         NewSym.DLLName = SymbolNameBuffer.PushString(DLLName);
99     }
100     else {
101         NewSym.DLLName = 0;
102     }
103 
104     if (OldIndex == 0) {
105         // Make non-unique entry
106         uint32_t NewIndex = NewSymbol(NewSym);
107         // Get old index
108         OldIndex = List[NewIndex].OldIndex;
109     }
110     else {
111         // Make unique entry
112         List.PushSort(NewSym);
113     }
114 
115     // Set OldNum to 1 + maximum OldIndex
116     if (OldIndex >= OldNum) OldNum = OldIndex + 1;
117 
118     return OldIndex;
119 }
120 
NewSymbol(SASymbol & sym)121 uint32_t CSymbolTable::NewSymbol(SASymbol & sym) {
122     // Add symbol to symbol table.
123     // Will not add a new symbol if one already exists at this address and
124     // either the new symbol or the existing symbol has no name.
125     // The return value is the new index to a new or existing symbol.
126     // The type or scope of any existing symbol will be modified if
127     // the type or scope of the new symbol is higher.
128     // The name will be applied to the existing symbol if the existing symbol
129     // has no name.
130 
131     // Find new index of any existing symbol with same address
132     int32_t SIndex = FindByAddress(sym.Section, sym.Offset);
133 
134     if (SIndex > 0 && !(List[SIndex].Type & 0x80000000)
135         && !(sym.Name && List[SIndex].Name)) {
136             // Existing symbol found. Update it with type and scope
137 
138             // Choose between Type of existing symbol and new Type information.
139             // The highest Type value takes precedence, except near indirect jump/call,
140             // which has highest precedence
141             if (((sym.Type & 0xFF) > (List[SIndex].Type & 0xFF)
142                 && ((List[SIndex].Type+1) & 0xFE) != 0x0C) || ((sym.Type+1) & 0xFE) == 0x0C) {
143                     // New symbol has higher type
144                     List[SIndex].Type = sym.Type;
145             }
146             if ((sym.Scope & 0xFF) > (List[SIndex].Scope & 0xFF)) {
147                 // New symbol has higher Scope
148                 List[SIndex].Scope = sym.Scope;
149             }
150             if (sym.Name && !List[SIndex].Name) {
151                 // New symbol has name, old symbol has no name
152                 List[SIndex].Name = sym.Name;
153             }
154     }
155     else {
156         // No existing symbol. Make new one
157         // Give it an old index
158         if (sym.OldIndex == 0) sym.OldIndex = OldNum++;
159 
160         SIndex = List.PushSort(sym);
161     }
162 
163     // Return new index
164     return SIndex;
165 }
166 
167 
NewSymbol(int32_t Section,uint32_t Offset,uint32_t Scope)168 uint32_t CSymbolTable::NewSymbol(int32_t Section, uint32_t Offset, uint32_t Scope) {
169     // Add symbol to jump target or code block that doesn't have a name.
170     // Will not add a new symbol if one already exists at this address.
171     // The return value is the new index to a new or existing symbol.
172     // The symbol will get a name later.
173 
174     // Symbol record
175     SASymbol NewSym;                              // New symbol table entry
176     NewSym.Reset();
177 
178     NewSym.Section  = Section;
179     NewSym.Offset   = Offset;
180     NewSym.Scope    = Scope;
181 
182     // Store new symbol record if no symbol with this address already exists
183     return NewSymbol(NewSym);
184 }
185 
AssignNames()186 void CSymbolTable::AssignNames() {
187     // Assign names to symbols that do not have a name
188 
189     uint32_t i;                                     // New symbol index
190     uint32_t NumDigits;                             // Number of digits in new symbol names
191     char name[64];                                // Buffer for making symbol name
192     static char Format[64];
193 
194     // Find necessary number of digits
195     NumDigits = 3; i = NewNum;
196     while (i >= 1000) {
197         i /= 10;
198         NumDigits++;
199     }
200 
201     // Format string for symbol names
202     sprintf(Format, "%s%c0%i%c", UnnamedSymbolsPrefix, '%', NumDigits, 'i');
203     UnnamedSymFormat = Format;
204 
205     // Update TranslateOldIndex
206     UpdateIndex();
207 
208     // Loop through symbols
209     for (i = 1; i < List.GetNumEntries(); i++) {
210         if (List[i].Name == 0 && List[i].Scope != 0) {
211             // Symbol has no name. Make one
212             sprintf(name, UnnamedSymFormat, ++UnnamedNum);
213             // Store new name
214             List[i].Name = SymbolNameBuffer.PushString(name);
215         }
216     }
217     // Round up the value of UnnamedNum in case more names are assigned later
218     if (NewNum < 1000) {
219         UnnamedNum = (UnnamedNum + 199) / 100 * 100;
220     }
221     else {
222         UnnamedNum = (UnnamedNum + 1999) / 1000 * 1000;
223     }
224 
225 #if 0 //
226     // For debugging: list all symbols
227     printf("\n\nSymbols:");
228     for (i = 0; i < List.GetNumEntries(); i++) {
229 
230         //   if (List[i].Offset > 0x0 && List[i].Offset < 0x8)
231 
232         printf("\n%3X %3X %s Sect %i Offset %X Type %X Size %i Scope %i",
233             i, List[i].OldIndex, GetName(i),
234             List[i].Section, List[i].Offset, List[i].Type, List[i].Size, List[i].Scope);
235     }
236 #endif
237 }
238 
FindByAddress(int32_t Section,uint32_t Offset,uint32_t * Last,uint32_t * NextAfter)239 uint32_t CSymbolTable::FindByAddress(int32_t Section, uint32_t Offset, uint32_t * Last, uint32_t * NextAfter) {
240     // Find symbols by address
241     // The return value will be the new index to the first symbol at the
242     // specified address. The return value will be zero if no symbol found.
243     // If more than one symbol is found with the same address then Last
244     // will receive the new index of the last symbol with this address.
245     // NextAfter will receive the new index of the first symbol with an
246     // address higher than the specified address in the same section, or
247     // zero if none.
248 
249     uint32_t i1;                                    // New index of first symbol
250     uint32_t i2;                                    // New index of last symbol
251     uint32_t i3;                                    // New index of first symbol after address
252 
253     // Make dummy symbol record for searching
254     SASymbol sym;
255     sym.Section = Section;
256     sym.Offset  = Offset;
257 
258     // Search List by address
259     i1 = List.FindFirst(sym);
260 
261     if (i1 == 0 || i1 >= List.GetNumEntries()) {
262         // No symbol found at this address or later. Return 0
263         if (NextAfter) *NextAfter = 0;
264         return 0;
265     }
266     if (sym < List[i1]) {
267         // No symbol found at this address, but one found at higher address
268         // Check if same section
269         if (List[i1].Section != Section) i1 = 0;
270         // Return symbol at later address
271         if (NextAfter) *NextAfter = i1;
272         return 0;
273     }
274 
275     // A symbol was found at this address.
276     // Search for more symbols at same address
277     i2 = i1;
278     while (i2+1 < List.GetNumEntries() && !(sym < List[i2+1])) i2++;
279 
280     // Search for first symbol after this address in same section
281     if (i2+1 < List.GetNumEntries() && List[i2+1].Section == Section) {
282         i3 = i2 + 1;                               // Found
283     }
284     else {
285         i3 = 0;                                    // Not found
286     }
287 
288     // Return last symbol at same address
289     if (Last) *Last = i2;
290 
291     // Return first symbol at higher address
292     if (NextAfter) *NextAfter = i3;
293 
294     // Return first symbol at address
295     return i1;
296 }
297 
FindByAddress(int32_t Section,uint32_t Offset)298 uint32_t CSymbolTable::FindByAddress(int32_t Section, uint32_t Offset) {
299     // Find symbols by address
300     // The return value will be the new index to a first symbol at the
301     // specified address. If more than one symbol is found at the same
302     // address then the one with the highest scope (and which is not
303     // a section record) is returned;
304     uint32_t s0, s1, s2 = 0;
305     uint32_t MaxScope = 0;
306     // Find all symbols at this address
307     s0 = s1 = FindByAddress(Section, Offset, &s2);
308     // Check if any symbols found
309     if (s0 == 0) return 0;
310 
311     // Loop through symbols at this address
312     for (; s1 <= s2; s1++) {
313         // Look for highest scope (and not section)
314         if ((*this)[s1].Scope >= MaxScope && !((*this)[s1].Type & 0x80000000)) {
315             s0 = s1;  MaxScope = (*this)[s1].Scope;
316         }
317     }
318     // Return index to symbol with highest scope
319     return s0;
320 }
321 
Old2NewIndex(uint32_t OldIndex)322 uint32_t CSymbolTable::Old2NewIndex(uint32_t OldIndex) {
323     // Translate old symbol index to new symbol index
324 
325     // Check if TranslateOldIndex is up to date
326     if (NewNum != List.GetNumEntries()) {
327         // New entries have been added since last update. Update TranslateOldIndex
328         UpdateIndex();
329     }
330     // Check if valid
331     if (OldIndex >= OldNum) OldIndex = 0;
332 
333     // Translate old index to new index
334     uint32_t NewIndex = TranslateOldIndex[OldIndex];
335 
336     // Check limit
337     if (NewIndex >= NewNum) NewIndex = 0;
338 
339     // Return new index
340     return NewIndex;
341 }
342 
HasName(uint32_t symo)343 const char * CSymbolTable::HasName(uint32_t symo) {
344     // Ask if symbol has a name, input = old index, output = name or 0
345     // Returns 0 if symbol has no name yet.
346     // Use HasName rather than GetName or GetNameO during pass 1 to avoid
347     // naming symbols in random order.
348 
349     // Get new index
350     uint32_t symi = Old2NewIndex(symo);
351     // Check if valid
352     if (symi == 0 || symi >= NewNum) return 0;
353     // Check if symbol has a name
354     if ((*this)[symi].Name == 0) return 0;
355     // Symbol has a name
356     return GetName(symi);
357 }
358 
GetName(uint32_t symi)359 const char * CSymbolTable::GetName(uint32_t symi) {
360     // Get symbol name from new index.
361     // A name will be assigned to the symbol if it doesn't have one
362 
363     // Get name index from symbol record
364     uint32_t NameIndex = (*this)[symi].Name;
365     if (NameIndex == 0) {
366         // Symbol has no name
367         // Search for other symbol with same address
368         uint32_t Alias = FindByAddress((*this)[symi].Section,(*this)[symi].Offset);
369         if ((*this)[Alias].Name) {
370             // A named symbol with same address found
371             NameIndex = (*this)[Alias].Name;
372         }
373         else {
374             // Give symbol a name
375             // This should occur only if new symbols are made during pass 2
376             char name[64];                             // Buffer for making symbol name
377             sprintf(name, "Unnamed_%X_%X", (*this)[symi].Section, (*this)[symi].Offset);
378             // sprintf(name, UnnamedSymFormat, ++UnnamedNum);
379             // Store new name
380             NameIndex = (*this)[symi].Name = SymbolNameBuffer.PushString(name);
381         }
382     }
383     // Check if valid
384     if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
385         // NameIndex is invalid
386         return "ErrorNoName";
387     }
388     // Return name
389     return (char*)SymbolNameBuffer.Buf() + NameIndex;
390 }
391 
GetNameO(uint32_t symo)392 const char * CSymbolTable::GetNameO(uint32_t symo) {
393     // Get symbol name by old index.
394     // A name will be assigned to the symbol if it doesn't have one
395     return GetName(Old2NewIndex(symo));
396 }
397 
GetDLLName(uint32_t symi)398 const char * CSymbolTable::GetDLLName(uint32_t symi) {
399     // Get import DLL name from old index
400     if ((*this)[symi].DLLName == 0) {
401         // No name
402         return "ErrorNoName";
403     }
404     // Get name DLL index from symbol record
405     uint32_t NameIndex = (*this)[symi].DLLName;
406     // Check if valid
407     if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
408         // NameIndex is invalid
409         return "ErrorNoName";
410     }
411     // Return name
412     return (char*)SymbolNameBuffer.Buf() + NameIndex;
413 }
414 
AssignName(uint32_t symi,const char * name)415 void CSymbolTable::AssignName(uint32_t symi, const char *name) {
416     // Give symbol a specific name
417     (*this)[symi].Name = SymbolNameBuffer.PushString(name);
418 }
419 
UpdateIndex()420 void CSymbolTable::UpdateIndex() {
421     // Update TranslateOldIndex
422     uint32_t i;                                     // New index
423 
424     // Allocate array with sufficient size
425     TranslateOldIndex.SetNum(OldNum);
426 
427     // Initialize to zeroes
428     memset(&TranslateOldIndex[0], 0, TranslateOldIndex.GetNumEntries() * sizeof(uint32_t));
429 
430     for (i = 0; i < List.GetNumEntries(); i++) {
431         if (List[i].OldIndex < OldNum) {
432             TranslateOldIndex[List[i].OldIndex] = i;
433         }
434         else {
435             // symbol index out of range
436             err.submit(2031);                       // Report error
437             List[i].OldIndex = 0;                   // Reset index that was out of range
438         }
439     }
440     NewNum = List.GetNumEntries();
441 }
442 
443 
444 /**************************  class CDisassembler  *****************************
445 Members of class CDisassembler
446 Members that relate to file output are in disasm2.cpp
447 ******************************************************************************/
448 
CDisassembler()449 CDisassembler::CDisassembler() {
450     // Constructor
451     Sections.PushZero();                          // Make first section entry zero
452     Relocations.PushZero();                       // Make first relocation entry zero
453     NameBuffer.Push(0, 1);                        // Make first string entry zero
454     FunctionList.PushZero();                      // Make first function entry zero
455     // Initialize variables
456     Buffer = 0;
457     InstructionSetMax = InstructionSetAMDMAX = 0;
458     InstructionSetOR = FlagPrevious = NamesChanged = 0;
459     WordSize = MasmOptions = RelocationsInSource = ExeType = 0;
460     ImageBase = 0;
461     Syntax = cmd.SubType;                         // Assembly syntax dialect
462     if (Syntax == SUBTYPE_GASM) {
463         CommentSeparator = "# ";                   // Symbol for indicating comment
464         HereOperator = ".";                        // Symbol for current address
465     }
466     else {
467         CommentSeparator = "; ";                   // Symbol for indicating comment
468         HereOperator = "$";                        // Symbol for current address
469     }
470 };
471 
Init(uint32_t ExeType,int64_t ImageBase)472 void CDisassembler::Init(uint32_t ExeType, int64_t ImageBase) {
473     // Define file type and imagebase if executable file
474     this->ExeType = ExeType;
475     this->ImageBase = ImageBase;
476 }
477 
AddSection(uint8_t * Buffer,uint32_t InitSize,uint32_t TotalSize,uint32_t SectionAddress,uint32_t Type,uint32_t Align,uint32_t WordSize,const char * Name,uint32_t NameLength)478 void CDisassembler::AddSection(
479 uint8_t * Buffer,                               // Buffer containing raw data
480 uint32_t  InitSize,                             // Size of initialized data in section
481 uint32_t  TotalSize,                            // Size of initialized and uninitialized data in section
482 uint32_t  SectionAddress,                       // Start address to be added to offset in listing
483 uint32_t  Type,                                 // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
484 uint32_t  Align,                                // Alignment = 1 << Align
485 uint32_t  WordSize,                             // Segment word size: 16, 32 or 64
486 const char * Name,                            // Name of section
487 uint32_t  NameLength) {                         // Length of name if not zero terminated
488 
489     // Check values
490     if (Buffer == 0) Type = 3;
491     if (Name == 0) Name = "?";
492     if (NameLength == 0) NameLength = (uint32_t)strlen(Name);
493     if (TotalSize < InitSize) TotalSize = InitSize;
494 
495     // Define section to be disassembled
496     SASection SecRec;                             // New section record
497 
498     SecRec.Start = Buffer;
499     SecRec.SectionAddress = SectionAddress;
500     SecRec.InitSize = InitSize;
501     SecRec.TotalSize = TotalSize;
502     SecRec.Type = Type;
503     SecRec.Align = Align;
504     SecRec.WordSize = WordSize;
505     // Save name in NameBuffer
506     SecRec.Name = NameBuffer.Push(Name, NameLength);
507     // Terminate with zero
508     NameBuffer.Push(0, 1);
509     // Default group is 'flat' except in 16 bit mode
510     if (WordSize == 16 || (MasmOptions & 0x100)) {
511         // 16-bit or mixed segment size. Group is unknown
512         SecRec.Group = 0;
513     }
514     else {
515         // Pure 32 or 64 bit mode. Group = flat
516         SecRec.Group = ASM_SEGMENT_FLAT;
517     }
518 
519     // Save section record
520     Sections.Push(SecRec);
521 
522     // Remember WordSize
523     switch (WordSize) {
524     case 16:
525         MasmOptions |= 0x100;  break;
526     case 32:
527         MasmOptions |= 0x200;  break;
528     case 64:
529         MasmOptions |= 0x400;  break;
530     }
531 }
532 
AddSectionGroup(const char * Name,int32_t MemberSegment)533 int32_t CDisassembler::AddSectionGroup(const char * Name, int32_t MemberSegment) {
534     // Define section group (from OMF file).
535     // Must be called after all segments have been defined.
536     // To define a group with multiple members, you must call AddSectionGroup
537     // multiple times. You must finish adding members to one group before
538     // starting the definition of another group.
539     // You can define a group without defining its members by calling
540     // AddSectionGroup with MemberSegment = 0.
541 
542     // Check values
543     if (Name == 0) Name = "?";
544 
545     // Find preceding segment or group definition
546     int32_t LastIndex = Sections.GetNumEntries() - 1;
547     // Index of group record
548     int32_t GroupIndex = LastIndex;
549 
550     const char * LastName = "?";
551     if (Sections[LastIndex].Name < NameBuffer.GetDataSize()) {
552         // Last name valid
553         LastName = (char*)NameBuffer.Buf() + Sections[LastIndex].Name;
554     }
555     // Check if group name already defined
556     if (strcmp(Name, LastName) != 0) {
557         // Not define. Make group record in Sections list
558         SASection SecRec;                             // New section record
559         memset(&SecRec, 0, sizeof(SecRec));           // Initialize
560 
561         // Set type = group
562         SecRec.Type = 0x800;
563 
564         // Save name in NameBuffer
565         SecRec.Name = NameBuffer.PushString(Name);
566 
567         // Save group index = my own index
568         SecRec.Group = ++GroupIndex;
569 
570         // Save section record
571         Sections.Push(SecRec);
572     }
573     // Find MemberSegment record
574     if (MemberSegment && MemberSegment < GroupIndex) {
575         // Register group index in segment record
576         Sections[MemberSegment].Group = GroupIndex;
577     }
578     // Return value is group index
579     return GroupIndex;
580 }
581 
AddSymbol(int32_t Section,uint32_t Offset,uint32_t Size,uint32_t Type,uint32_t Scope,uint32_t OldIndex,const char * Name,const char * DLLName)582 uint32_t CDisassembler::AddSymbol(
583 int32_t  Section,                            // Section number (1-based). ASM_SEGMENT_UNKNOWN = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative
584 uint32_t Offset,                             // Offset into section. (Value for absolute symbol)
585 uint32_t Size,                               // Number of bytes used by symbol or function. 0 = unknown
586 uint32_t Type,                               // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
587 uint32_t Scope,                              // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
588 uint32_t OldIndex,                           // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index.
589 const char * Name,                         // Name of symbol. Zero-terminated
590 const char * DLLName) {                    // Name of DLL if imported dynamically
591 
592     // Add symbol form original file.
593     // Multiple symbols at same address are allowed.
594     // If section is not known then set Section = ASM_SEGMENT_IMGREL and Offset = image-relative address
595     // If name is not known then set Name = 0. A name will then be assigned
596     // OldIndex is the identifier used in relocation records. It must be nonzero.
597     // If the original file uses 0-based symbol indices then add 1 to OldIndex
598     // and remember to also add 1 when referring to the symbol in a relocation record.
599     // If the symbol is known by address rather than by index, then set OldIndex = 0.
600     // The return value will be the assigned value of OldIndex to use in relocation records.
601     // The returned value of OldIndex will be equal to the OldIndex of any previous symbols
602     // with same address. All symbols that have an identifier (OldIndex) must be defined
603     // before any symbol identified by address only in order to avoid using the same OldIndex.
604 
605     // Check if image-relative
606     if (Section == ASM_SEGMENT_IMGREL) {
607         // Translate absolute virtual address to section and offset
608         TranslateAbsAddress(ImageBase + (int32_t)Offset, Section, Offset);
609     }
610 
611     // Define symbol for disassembler
612     return Symbols.AddSymbol(Section, Offset, Size, Type, Scope, OldIndex, Name, DLLName);
613 }
614 
AddRelocation(int32_t Section,uint32_t Offset,int32_t Addend,uint32_t Type,uint32_t Size,uint32_t TargetIndex,uint32_t ReferenceIndex)615 void CDisassembler::AddRelocation(
616 int32_t  Section,                               // Section of relocation source
617 uint32_t Offset,                                // Offset of relocation source into section
618 int32_t  Addend,                                // Addend to add to target address,
619 // including distance from source to instruction pointer in self-relative addresses,
620 // not including inline addend.
621 uint32_t Type,                                  // Relocation type. See SARelocation in disasm.h for definition of values
622 uint32_t Size,                                  // 1 = byte, 2 = word, 4 = dword, 8 = qword
623 uint32_t TargetIndex,                           // Symbol index of target
624 uint32_t ReferenceIndex) {                      // Symbol index of reference point if Type = 8 or 0x10
625 
626     // Check if image-relative
627     if (Section == ASM_SEGMENT_IMGREL) {
628         // Translate absolute virtual address to section and offset
629         if (!TranslateAbsAddress(ImageBase + (int32_t)Offset, Section, Offset)) {
630             err.submit(1304);
631         }
632     }
633 
634     if (Type != 0x41) {
635         // Define relocation or cross-reference for disassembler
636         SARelocation RelRec;                          // New relocation record
637 
638         RelRec.Section = Section;
639         RelRec.Offset = Offset;
640         RelRec.Type = Type;
641         RelRec.Size = Size;
642         RelRec.Addend = Addend;
643         RelRec.TargetOldIndex = TargetIndex;
644         RelRec.RefOldIndex = ReferenceIndex;
645 
646         // Save relocation record
647         Relocations.PushSort(RelRec);
648     }
649     else {
650         // Make entry in procedure linkage table
651         uint32_t targetsym = Symbols.Old2NewIndex(TargetIndex);
652         if (targetsym && Symbols[targetsym].DLLName) {
653             // Put label on entry in procedure linkage table (import table)
654             // Copy Name and DLLName from target symbol
655             SASymbol ImportSym = Symbols[targetsym];
656             ImportSym.Section = Section;
657             ImportSym.Offset = Offset;
658             ImportSym.Type = 0x0C;
659             ImportSym.OldIndex = 0;
660             ImportSym.Scope = 2;
661             Symbols.NewSymbol(ImportSym);
662         }
663     }
664 }
665 
Go()666 void CDisassembler::Go() {
667     // Do the disassembly
668 
669     // Check for illegal entries in relocations table
670     InitialErrorCheck();
671 
672     // Find missing relocation target addresses
673     FixRelocationTargetAddresses();
674 
675     // Pass 1: Find symbols types and unnamed symbols
676     Pass = 1;
677     Pass1();
678     Pass = 2;
679     Pass1();
680 
681     if (Pass & 0x100) {
682         // Repetition of pass 1 requested
683         Pass = 3;
684         Pass1();
685         Pass = 4;
686         Pass1();
687     }
688 
689     // Put names on unnamed symbols
690     Symbols.AssignNames();
691 
692     // Fix invalid characters in symbol and section names
693     CheckNamesValid();
694 
695 #if 0 //
696     // Show function list. For debugging only
697     printf("\n\nFunctionList:");
698     for (uint32_t i = 0; i < FunctionList.GetNumEntries(); i++) {
699         printf("\nsect %i, start %X, end %X, scope %i, name %s",
700             FunctionList[i].Section, FunctionList[i].Start, FunctionList[i].End,
701             FunctionList[i].Scope, Symbols.GetNameO(FunctionList[i].OldSymbolIndex));
702     }
703 #endif
704 #if 0
705     // For debugging: list all relocations
706     printf("\n\nRelocations:");
707     for (uint32_t i = 0; i < Relocations.GetNumEntries(); i++) {
708         printf("\nsect %i, os %X, type %X, size %i, add %X, target %X",
709             Relocations[i].Section, Relocations[i].Offset, Relocations[i].Type,
710             Relocations[i].Size, Relocations[i].Addend, Relocations[i].TargetOldIndex);
711     }
712 #endif
713 #if 0
714     // For debugging: list all sections
715     printf("\n\nSections:");
716     for (uint32_t s = 1; s < Sections.GetNumEntries(); s++) {
717         printf("\n%2i, %s", s, NameBuffer.Buf() + Sections[s].Name);
718     }
719 #endif
720 
721     // Begin writing output file
722     WriteFileBegin();
723 
724     // Pass 2: Write all sections to output file
725     Pass = 0x10;
726     Pass2();
727 
728     // Check for illegal entries in symbol table and relocations table
729     FinalErrorCheck();
730 
731     // Finish writing output file
732     WriteFileEnd();
733 };
734 
Pass1()735 void CDisassembler::Pass1() {
736 
737     /*             Pass 1: does the following jobs:
738     --------------------------------
739 
740     * Scans all code sections, instruction by instruction. Checks code syntax.
741 
742     * Tries to identify where each function begins and ends.
743 
744     * Follows all references to data in order to determine data type for
745     each data symbol.
746 
747     * Assigns symbol table entries for all jump and call targets that do not
748     allready have a name.
749 
750     * Follows all jump instructions to identify code blocks that are connected.
751     Code blocks in same section that are connected through jumps (not calls)
752     are joined together into the same function.
753 
754     * Identifies and analyzes tables of jump addresses and call addresses,
755     e.g. switch/case tables and virtual function tables.
756 
757     * Tries to identify any data in the code section. If erroneous code or
758     sequences of zeroes are found then the nearest preceding label is marked
759     as dubious and the analysis of code is skipped until the next code label.
760     Pass 1 will be repeated in this case in order to follow backwards jumps
761     from subsequent code. Dubious code will be shown as both code and data
762     in the output of pass 2.
763     */
764 
765     // Loop through sections, pass 1
766     for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
767 
768         // Get section type
769         SectionType = Sections[Section].Type;
770         if (SectionType & 0x800) continue;         // This is a group
771 
772         // Code or data
773         CodeMode = (SectionType & 1) ? 1 : 4;
774         LabelBegin = FlagPrevious = CountErrors = 0;
775 
776         if ((Sections[Section].Type & 0xFF) == 1) {
777             // This is a code section
778 
779             // Initialize code parser
780             Buffer     = Sections[Section].Start;
781             SectionEnd = FunctionEnd = LabelInaccessible = Sections[Section].TotalSize;
782             WordSize   = Sections[Section].WordSize;
783             SectionAddress = Sections[Section].SectionAddress;
784             if (Buffer == 0) continue;
785 
786             IBegin = IEnd = LabelEnd = 0;
787             IFunction = 0;
788 
789             // Loop through instructions
790             while (NextInstruction1()) {
791 
792                 // check if function beings here
793                 CheckForFunctionBegin();
794 
795                 // Find any label here
796                 FindLabels();
797 
798                 // Check if code
799                 if (CodeMode < 4) {
800                     // This is code
801 
802                     // Parse instruction
803                     ParseInstruction();
804                 }
805                 else {
806                     // This is data. Skip to next label
807                     IEnd = LabelEnd;
808                 }
809                 // check if function ends here
810                 CheckForFunctionEnd();
811             }
812         }
813         else {
814             // This is a data section
815             // Make a single entry in FunctionList covering the whole section
816             SFunctionRecord fun = {(int)Section, 0, Sections[Section].TotalSize, 0, 0};
817             FunctionList.PushUnique(fun);
818         }
819     }
820 }
821 
FindLabels()822 void CDisassembler::FindLabels() {
823     // Find any labels at current position and next during pass 1
824     uint32_t sym1, sym2 = 0, sym3 = 0;              // Symbol indices
825 
826     // Search for labels from IBegin
827     sym1 = Symbols.FindByAddress(Section, IBegin, &sym2, &sym3);
828 
829     if (sym1 && sym2) {
830         // Set LabelBegin to address of last label at current address
831         LabelBegin = Symbols[sym2].Offset;
832         CountErrors = 0;
833 
834         // Get code mode from label
835         if ((Symbols[sym2].Type & 0xF0) == 0x80) {
836             // This is known to be code
837             CodeMode = 1;
838         }
839         else if ((Symbols[sym2].Type & 0xFF) == 0) {
840             // Type is unknown
841             if ((Symbols[sym2].Scope & 4) && SectionType == 1) {
842                 // Public label in code segment. Consider this code
843                 CodeMode = 1;
844             }
845             // Otherwise: Assume same type as previous
846         }
847         else {
848             // This is known to be data
849             CodeMode = 4;
850         }
851         // Reset tracer
852         t.Reset();
853     }
854     if (sym3) {
855         // Set LabelEnd to address of next symbol
856         LabelEnd = Symbols[sym3].Offset;
857         if (LabelEnd > SectionEnd) LabelEnd = SectionEnd;
858     }
859     else {
860         // No next label
861         LabelEnd = SectionEnd;
862     }
863 }
864 
CheckForMisplacedLabel()865 void CDisassembler::CheckForMisplacedLabel() {
866     // Remove any label placed inside function
867     // This is called if there appears to be a function end inside an instruction
868     if (FunctionEnd && FunctionEnd < SectionEnd) {
869         FunctionEnd = IEnd;
870         FunctionList[IFunction].Scope |= 0x10000;
871     }
872     else {
873         s.Errors |= 0x10;
874     }
875 }
876 
NextLabel()877 int CDisassembler::NextLabel() {
878     // Loop through labels from IEnd. Pass 2
879     uint32_t sym, sym1, sym2 = 0, sym3 = 0;         // Symbol indices
880 
881     // Make ready for next instruction
882     IBegin = IEnd;
883 
884     // Reset tracer
885     t.Reset();
886 
887     // Check if end of function/section
888     if (IEnd >= FunctionEnd || IEnd >= SectionEnd) {
889         // No more labels in this function or section
890         return 0;
891     }
892 
893     // Search for labels from IEnd
894     sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
895 
896     if (sym1) {
897         // Symbol found
898         for (sym = sym1; sym <= sym2; sym++) {
899             // Remember symbol address
900             LabelBegin = Symbols[sym].Offset;
901             CountErrors = 0;
902 
903             if ((SectionType & 0xFF) == 1) {
904                 // Code section. Get CodeMode
905                 if ((Symbols[sym].Type >> 24) & 0xF) {
906                     // Get CodeMode from last label. 1 = code, 2 = dubiuos, 4 = data
907                     CodeMode = (Symbols[sym].Type >> 24) & 0xF;
908                 }
909                 else if (Symbols[sym].Type & 0x80) {
910                     // Type defined as jump/call. This is known to be code
911                     CodeMode = 1;
912                 }
913                 else if (Symbols[sym].Type == 0) {
914                     // Type is unknown. (Assume same type as previous) changed to:
915                     // Type is unknown. Assume code
916                     CodeMode = 1;
917                 }
918                 else {
919                     // This has been accessed as data
920                     CodeMode = 4;
921                 }
922             }
923             else {
924                 // This is a data segment
925                 CodeMode = 4;
926             }
927             // Get symbol type and size, except for section type
928             if (!(Symbols[sym].Type & 0x80000000)) {
929                 DataType = Symbols[sym].Type;
930                 DataSize = GetDataItemSize(DataType);
931                 if (((DataType+1) & 0xFE) == 0x0C && Symbols[sym].Size) {
932                     // Jump table can have different sizes for direct or image relative
933                     DataSize = Symbols[sym].Size;
934                 }
935             }
936         }
937     }
938     if (sym3) {
939         // Next label found
940         LabelEnd = Symbols[sym3].Offset;
941         return 1;
942     }
943     // No new label found. Continue to FunctionEnd
944     LabelEnd = FunctionEnd;
945     return 1;
946 }
947 
NextFunction2()948 int CDisassembler::NextFunction2() {
949     // Loop through function blocks in pass 2. Return 0 if finished
950 
951     SFunctionRecord Fun;                          // Dummy function record for search and compare
952 
953     if (IFunction == 0) {
954         // Begin of section. Find first function block
955         Fun.Section = Section;
956         Fun.Start   = IBegin;
957         IFunction   = FunctionList.FindFirst(Fun);
958     }
959     else {
960         // Try next function block
961         IFunction++;
962     }
963     // Check if IFunction is valid
964     if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) {
965         // Not valid
966         IFunction = 0;
967         return 0;
968     }
969     // Check if IFunction is within current section
970     Fun.Section = Section;
971     Fun.Start   = SectionEnd;
972     if (Fun < FunctionList[IFunction]) {
973         // Past end of current section
974         IFunction = 0;
975         return 0;
976     }
977     // IFunction is within current section
978     // End of function
979     FunctionEnd = FunctionList[IFunction].End;
980 
981     // Check if function has a defined size
982     if (FunctionEnd <= FunctionList[IFunction].Start) {
983         // Size unknown. Continue until begin of next function
984         if (IFunction+1 < FunctionList.GetNumEntries()
985             && FunctionList[IFunction+1] < Fun
986             && FunctionList[IFunction] < FunctionList[IFunction+1]) {
987                 FunctionEnd = FunctionList[IFunction+1].Start;
988         }
989         else {
990             // No next function. Continue until end of section
991             FunctionEnd = SectionEnd;
992         }
993     }
994 
995     // return IFunction for success
996     return 1;
997 }
998 
CheckForFunctionBegin()999 void CDisassembler::CheckForFunctionBegin() {
1000     // Check if function begins at current position
1001     uint32_t sym1, sym2 = 0, sym3 = 0;              // Symbol indices
1002     SFunctionRecord fun;                          // New function record
1003     IBegin = IEnd;
1004 
1005     if (IFunction == 0) {
1006         // No function defined. Begin new function here
1007 
1008         // Search for nearest labels
1009         sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
1010 
1011         if (sym1 == 0) {
1012             // There is no label here. Make one with Scope = 0
1013             sym1 = Symbols.NewSymbol(Section, IEnd, 0);
1014             // Update labels
1015             LabelBegin = LabelEnd = CountErrors = 0;
1016             FindLabels();
1017         }
1018         // Check that sym1 is valid
1019         if (sym1 == 0 || sym1 >= Symbols.GetNumEntries()) {
1020             err.submit(9000);  return;
1021         }
1022 
1023         // Make function record for FunctionList
1024         fun.Section        = Section;
1025         fun.Start          = IBegin;
1026         fun.End            = IBegin;
1027         fun.Scope          = Symbols[sym1].Scope;
1028         fun.OldSymbolIndex = Symbols[sym1].OldIndex;
1029 
1030         // Add to function list
1031         IFunction = FunctionList.PushUnique(fun);
1032 
1033         // End of function not known yet
1034         FunctionEnd = SectionEnd;  LabelEnd = 0;
1035     }
1036 }
1037 
CheckForFunctionEnd()1038 void CDisassembler::CheckForFunctionEnd() {
1039     // Check if function ends at current position
1040     if (IFunction >= FunctionList.GetNumEntries()) {
1041         // Should not occur
1042         err.submit(9000);  IFunction = 0;  return;
1043     }
1044 
1045     // Function ends if section ends here
1046     if (IEnd >= SectionEnd) {
1047         // Current function must end because section ends here
1048         FunctionList[IFunction].End = SectionEnd;
1049         FunctionList[IFunction].Scope &= ~0x10000;
1050         IFunction = 0;
1051 
1052         // Check if return instruction
1053         if (s.OpcodeDef && !(s.OpcodeDef->Options & 0x10) && (Pass & 0x10)) {
1054             // No return or unconditional jump. Write error message
1055             s.Errors |= 0x10000;
1056             WriteErrorsAndWarnings();
1057         }
1058         return;
1059     }
1060 
1061     // Function ends after ret or unconditional jump and preceding code had no
1062     // jumps beyond this position:
1063     if (s.OpcodeDef && s.OpcodeDef->Options & 0x10) {
1064         // A return or unconditional jump instruction was found.
1065         FlagPrevious |= 2;
1066 
1067         // Mark this position as inaccessible if there is no reference to this place
1068         Symbols.NewSymbol(Section, IEnd, 0);
1069         // Update labels
1070         LabelBegin = LabelEnd = CountErrors = 0;
1071         FindLabels();
1072 
1073         if (IEnd >= FunctionList[IFunction].End) {
1074             // Indicate current function ends here
1075             FunctionList[IFunction].End = IEnd;
1076             FunctionList[IFunction].Scope &= ~0x10000;
1077             IFunction = 0;
1078             return;
1079         }
1080     }
1081 
1082     // Function ends at next label if preceding label is inaccessible and later end not known
1083     if (IFunction && FunctionList[IFunction].Scope == 0 && IEnd >= FunctionList[IFunction].End) {
1084         if (Symbols.FindByAddress(Section, IEnd)) {
1085             // Previous label was inaccessible. There is a new label here. Begin new function here
1086             IFunction = 0;
1087             return;
1088         }
1089     }
1090 
1091     // Function does not end here
1092     return;
1093 }
1094 
1095 
CheckRelocationTarget(uint32_t IRel,uint32_t TargetType,uint32_t TargetSize)1096 void CDisassembler::CheckRelocationTarget(uint32_t IRel, uint32_t TargetType, uint32_t TargetSize) {
1097     // Update relocation record and its target.
1098     // This function updates the symbol type and size of a relocation target.
1099     // If the relocation target is a section:offset address then a new
1100     // symbol record is made
1101     uint32_t SymOldI;                               // Old index of target symbol
1102     uint32_t SymNewI;                               // New index of target symbol
1103     int32_t  TargetSection;                         // Section of target symbol
1104     uint32_t TargetOffset;                          // Offset of target symbol
1105 
1106     // Check if relocation valid
1107     if (!IRel || IRel >= Relocations.GetNumEntries() || !Relocations[IRel].TargetOldIndex
1108         || Relocations[IRel].Section <= 0 || uint32_t(Relocations[IRel].Section) >= Sections.GetNumEntries()) {
1109             return;
1110     }
1111 
1112     // Find target symbol
1113     SymOldI = Relocations[IRel].TargetOldIndex;
1114 
1115     // Look up in symbol table
1116     SymNewI = Symbols.Old2NewIndex(SymOldI);
1117 
1118     // Check if valid
1119     if (!Symbols[SymNewI].OldIndex) return;
1120 
1121     if (Symbols[SymNewI].Type & 0x80000000) {
1122         // Symbol is a section record. Relocation refers to a section-relative address
1123         // Make a new symbol for this data item. The symbol will get a name later
1124 
1125         // Get address of new symbol
1126         TargetSection = Symbols[SymNewI].Section;
1127         TargetOffset  = Symbols[SymNewI].Offset + Relocations[IRel].Addend;
1128 
1129         // Pointer to relocation source address
1130         uint8_t * RelSource = Sections[Relocations[IRel].Section].Start + Relocations[IRel].Offset;
1131 
1132         // Inline Addend;
1133         int32_t InlineA = 0;
1134         switch (Relocations[IRel].Size) {
1135         case 1:
1136             InlineA = *(int8_t*)RelSource;  break;
1137         case 2:
1138             InlineA = *(int16_t*)RelSource;  break;
1139         case 4:  case 8:
1140             InlineA = *(int32_t*)RelSource;  break;
1141         }
1142         // Add inline addend to target address
1143         TargetOffset += InlineA;
1144 
1145         if (Relocations[IRel].Type & 2) {
1146             // Address is self-relative
1147             if ((s.AddressFieldSize && (s.MFlags & 0x100)) || s.ImmediateFieldSize) {
1148                 // Relative jump or rip-relative address
1149                 TargetOffset += IEnd - s.AddressField;
1150                 InlineA      += IEnd - s.AddressField;
1151             }
1152             else {
1153                 // Self-relative address in data segment or unknown
1154                 // This may occur in position-independent code
1155                 // We can't calculate the intended target
1156                 // Make sure there is a symbol, but don't change existing symbol if there is one
1157                 SymNewI = Symbols.NewSymbol(TargetSection, 0, 2);
1158                 return;
1159             }
1160         }
1161         // Make new symbol in symbol table if none exists
1162         SymNewI = Symbols.NewSymbol(TargetSection, TargetOffset, 2);
1163 
1164         if (SymNewI) {
1165             // Get old index
1166             SymOldI = Symbols[SymNewI].OldIndex;
1167 
1168             // Change relocation record to point to new symbol
1169             Relocations[IRel].TargetOldIndex = SymOldI;
1170 
1171             // Compensate for inline addend and rip-relative address
1172             Relocations[IRel].Addend = -InlineA;
1173         }
1174     }
1175 
1176     // Check if symbol has a scope assigned
1177     if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
1178 
1179     // Choose between Symbols[SymNewI].Type and TargetType the one that has the highest priority
1180     if ((TargetType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)
1181         || (((TargetType+1) & 0xFE) == 0x0C && (Symbols[SymNewI].Type & 0xFF) > 0x0C)) {
1182 
1183             // No type assigned yet, or new type overrides old type
1184             Symbols[SymNewI].Type = TargetType;
1185 
1186             // Choose biggest size. Size for code pointer takes precedence
1187             if (TargetSize > Symbols[SymNewI].Size || ((TargetType+1) & 0xFE) == 0x0C) {
1188                 Symbols[SymNewI].Size = TargetSize;
1189             }
1190     }
1191 }
1192 
1193 
CheckJumpTarget(uint32_t symi)1194 void CDisassembler::CheckJumpTarget(uint32_t symi) {
1195     // Extend range of current function to jump target, if needed
1196 
1197     // Check if current section is valid
1198     if (Section == 0 || Section >= Sections.GetNumEntries()) return;
1199 
1200     // Check if current function is valid
1201     if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) return;
1202 
1203     // Check if target is in same section
1204     if (Symbols[symi].Section != (int32_t)Section) return;
1205 
1206     // Check if target extends current function
1207     if (Symbols[symi].Offset > FunctionList[IFunction].End && Symbols[symi].Offset <= Sections[Section].InitSize) {
1208         // Target is after tentative end of current function but within section
1209 
1210         // Check if it is a known function
1211         if ((Symbols[symi].Type & 0xFF) == 0x83 || (Symbols[symi].Type & 0xFF) == 0x85
1212             || (Symbols[symi].Scope & 0x1C)) {
1213                 // Target is known as public or a function. No need to extend current function
1214                 return;
1215         }
1216         // Extend current function forward to include target offset
1217         FunctionList[IFunction].End = Symbols[symi].Offset;
1218         FunctionList[IFunction].Scope |= 0x10000;
1219     }
1220     else if (Symbols[symi].Offset < FunctionList[IFunction].Start) {
1221         // Target is before tentative begin of current function but within section
1222 
1223         // Check if target is already in function table
1224         SFunctionRecord fun;
1225         fun.Section = Symbols[symi].Section;
1226         fun.Start   = Symbols[symi].Offset;
1227         uint32_t IFun = FunctionList.Exists(fun);
1228         if (IFun > 0 && IFun < FunctionList.GetNumEntries()) {
1229             // Target is the beginning of a known function. No need to extend current function
1230             return;
1231         }
1232 
1233         /* Removed: This is a mess. Looks better when functions are separate
1234         // Target points inside a previously defined function. Join the two functions into one
1235         IFun = FunctionList.FindFirst(fun) - 1;
1236         if (IFun > 0 && IFun < FunctionList.GetNumEntries() && FunctionList[IFun].Section == Section) {
1237 
1238         // Get maximum scope of the two functions
1239         if (FunctionList[IFun].Scope < FunctionList[IFunction].Scope) {
1240         FunctionList[IFun].Scope = FunctionList[IFunction].Scope;
1241         }
1242 
1243         // Get maximum end of the two functions
1244         if (FunctionList[IFun].End < FunctionList[IFunction].End) {
1245         FunctionList[IFun].End = FunctionList[IFunction].End;
1246         }
1247 
1248         // Remove entry IFunction from FunctionList
1249         FunctionList.Remove(IFunction);
1250 
1251         // Set current function to IFun
1252         IFunction = IFun;
1253         }
1254         */
1255     }
1256 }
1257 
1258 
Pass2()1259 void CDisassembler::Pass2() {
1260 
1261     /*             Pass 2: does the following jobs:
1262     --------------------------------
1263 
1264     * Scans through all sections, code and data.
1265 
1266     * Code is analyzed, instruction by instruction. Checks code syntax.
1267 
1268     * Outputs warnings for suboptimal instruction codes and error messages
1269     for erroneous code and erroneous relocations.
1270 
1271     * Outputs disassembly of all instructions, operands and relocations,
1272     followed by the binary code listing as comment.
1273 
1274     * Outputs disassembly of all data, followed by alternative representations
1275     as comment.
1276 
1277     * Outputs dubious code as both code and data in order to allow a re-assembly
1278     to produce identical code.
1279     */
1280 
1281     // Loop through sections, pass 2
1282     for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
1283 
1284         // Get section type
1285         SectionType = Sections[Section].Type;
1286         if (SectionType & 0x800) continue;         // This is a group
1287 
1288         if (((SectionType & 0xFF) == 0x10) && cmd.DebugInfo == CMDL_DEBUG_STRIP) {
1289             // Skip debug section
1290             cmd.CountDebugRemoved();
1291             continue;
1292         }
1293         if (((SectionType & 0xFF) == 0x11) && cmd.ExeptionInfo == CMDL_EXCEPTION_STRIP) {
1294             // Skip exception section
1295             cmd.CountExceptionRemoved();
1296             continue;
1297         }
1298         // Is this code or data?
1299         CodeMode = ((SectionType & 0xFF) == 1) ? 1 : 4;
1300 
1301         // Initialize
1302         LabelBegin = FlagPrevious = CountErrors = 0;
1303         Buffer = Sections[Section].Start;
1304         SectionEnd = Sections[Section].TotalSize;
1305         LabelInaccessible = Sections[Section].InitSize;
1306         WordSize = Sections[Section].WordSize;
1307         SectionAddress = Sections[Section].SectionAddress;
1308 
1309         // Write segment directive
1310         WriteSegmentBegin();
1311 
1312         IBegin = IEnd = LabelEnd = IFunction = DataType = DataSize = 0;
1313 
1314         // Loop through function blocks in this section
1315         while (NextFunction2()) {
1316 
1317             // Check CodeMode from label
1318             NextLabel();
1319 
1320             // Write begin function
1321             if (CodeMode & 3) WriteFunctionBegin();
1322 
1323             // Loop through labels
1324             while (NextLabel()) {
1325 
1326                 // Loop through code
1327                 while (NextInstruction2()) {
1328 
1329                     if (CodeMode & 3) {
1330                         // Interpret this as code
1331 
1332                         // Write label if any
1333                         CheckLabel();
1334 
1335                         // Parse instruction
1336                         ParseInstruction();
1337 
1338                         // Check for filling space
1339                         if (((s.Warnings1 & 0x10000000) || s.Warnings1 == 0x1000000) && WriteFillers()) {
1340                             // Code is inaccessible fillers. Has been written by CheckForFillers()
1341                             continue;
1342                         }
1343 
1344                         // Write any error and warning messages to OutFile
1345                         WriteErrorsAndWarnings();
1346 
1347                         // Write instruction to OutFile
1348                         WriteInstruction();
1349 
1350                         // Write hex code as comment after instruction
1351                         WriteCodeComment();
1352                     }
1353                     if (CodeMode & 6) {
1354 
1355                         // Interpret this as data
1356                         WriteDataItems();
1357                     }
1358                     if (IEnd <= IBegin) {
1359 
1360                         // Prevent infinite loop
1361                         IEnd++;
1362                         break;
1363                     }
1364                 }
1365             }
1366             // Write end of function, if any
1367             if (CodeMode & 3) WriteFunctionEnd();         // End function
1368         }
1369         // Write end of segment
1370         WriteSegmentEnd();
1371     }
1372 }
1373 
1374 /********************  Explanation of tracer:  ***************************
1375 
1376 This is a machine which can trace the contents of each register in certain
1377 situations. It is currently used for recognizing certain instruction patterns
1378 that are used by various 64 bit compilers for accessing jump tables and
1379 virtual function tables. The trace machine can be extended for other purposes.
1380 
1381 A switch/case statement is typically implemented as follows by the 64 bit MS
1382 C++ compiler:
1383 
1384 .code
1385 lea     rbx, [__ImageBase]
1386 mov     eax, [SwitchIndex]
1387 add     eax, - LowerLimit
1388 cmp     eax, Range
1389 ja      LabelDefault
1390 cdqe
1391 mov     ecx, [imagerel(SwitchTable) + rbx + rax*4]
1392 add     rcx, rbx
1393 jmp     rcx
1394 
1395 .data
1396 SwitchTable label dword
1397 dd      imagerel(Label1)
1398 dd      imagerel(Label2)
1399 dd      imagerel(Label3)
1400 
1401 Some other compilers use the beginning of the switch table or the beginning of
1402 the code section as reference point for 32-bit jump addresses. Other
1403 compilers use 64-bit addresses in the switch table. We want to recognize
1404 all these patterns in order to disassemble a switch table in a comprehensible
1405 way and find the case label targets.
1406 
1407 In order to recognize a switch table in the above example, the tracer must
1408 do the following tasks:
1409 
1410 1.  Calculate the rip-relative address in the lea instruction and detect
1411 that it is equal to the image base.
1412 
1413 2.  Remember that rbx contains the image base.
1414 
1415 3.  When interpreting the mov ecx instruction it recognizes that the base
1416 pointer contains the image base, therefore the displacement must be
1417 interpreted as an image-relative address. Calculate this address and
1418 give it a name.
1419 
1420 4.  Remember that ecx contains an an element from the array SwitchTable.
1421 It is not yet known that SwitchTable is a switch table.
1422 
1423 5.  After add rcx,rbx remember that rcx contains an element from the array
1424 SwitchTable plus the image base.
1425 
1426 6.  When interpreting the jmp rcx instruction, the information about the
1427 contents of rcx is used for concluding that SwitchTable contains jump
1428 addresses, and that these addresses are image-relative. If there had
1429 been no add rcx,rbx, we would conclude that SwitchTable contains
1430 absolute virtual addresses.
1431 
1432 7.  Go through all elements of SwitchTable. Calculate the address that each
1433 element points to, give it a name, and extend the scope of the current
1434 function to include this target.
1435 
1436 8.  It would be possible to determine the length of the switch table from
1437 the cmp instruction, but the tracer does not currently use this
1438 information. Instead, it stops parsing the switch table at the first
1439 known label or the first invalid address.
1440 
1441 This is quite a long way to go for acquiring this information, but it is
1442 necessary in order to tell what is code and what is data and to find out
1443 where the function ends. Unfortunately, the MS compiler puts switch tables
1444 in the code segment rather than in the data segment which would give better
1445 caching and code prefetching. If the switch table was not identified as such,
1446 it would be impossible to tell what is code and what is data.
1447 
1448 The tracer is also used for identifying virtual function tables.
1449 
1450 Values of SATracer::Regist[i] tells what kind of information register i contains:
1451 0     Unknown contents
1452 1     Contains image base
1453 4     Contains a constant = Value[i]
1454 8     Contains a value < Value[i]. (Not implemented yet)
1455 0x10  Contains the value of a symbol. Value[i] contains the old index of the symbol
1456 0x11  Contains the value of an array element. Value[i] contains the symbol old index of the array
1457 0x12  Contains the value of an array element + image base. Value[i] contains the symbol old index of the array. (array may contain image-relative jump addresses)
1458 0x13  Contains the value of an array element + array base. Value[i] contains the symbol old index of the array. (array may contain jump addresses relative to array base)
1459 0x18  Contains the address of a symbol. Value[i] contains the symbol old index
1460 0x19  Contains the address of an array element. Value[i] contains the symbol old index of the array
1461 */
1462 
UpdateTracer()1463 void CDisassembler::UpdateTracer() {
1464     // Trace register values. See explanation above
1465     uint32_t reg;                                   // Destination register number
1466     uint32_t srcreg;                                // Source register number
1467 
1468     if (s.Operands[0] & 0xFF) {
1469         // There is a destination operand
1470         if ((s.Operands[0] & 0xFF) < 5 && (s.Operands[0] & 0x1000)) {
1471             // Destination operand is a general purpose register
1472             switch (s.Operands[0] & 0xF0000) {
1473             case 0x20000:
1474                 // Register indicated by last bits of opcode byte
1475                 reg = Get<uint8_t>(s.OpcodeStart2) & 7;
1476                 // Check REX.B prefix
1477                 if (s.Prefixes[7] & 1) reg |= 8;     // Add 8 if REX.B prefix
1478                 break;
1479             case 0x30000:
1480                 // Register indicated by rm bits of mod/reg/rm byte
1481                 reg = s.RM;
1482                 break;
1483             case 0x40000:
1484                 // Register indicated by reg bits of mod/reg/rm byte
1485                 reg = s.Reg;
1486                 break;
1487             default:
1488                 // Error. Don't know where to find destination register
1489                 t.Reset();  return;
1490             }
1491         }
1492         else if ((s.Operands[0] & 0xFF) >= 0xA0 && (s.Operands[0] & 0xFF) <= 0xA9) {
1493             // Destination is al, ax, eax, or rax
1494             reg = 0;
1495         }
1496         else {
1497             // Destination is not a general purpose register
1498             return;
1499         }
1500     }
1501     else {
1502         // There is no destination operand
1503         return;
1504     }
1505 
1506     // Destination operand is a general purpose register
1507     if (OpcodeOptions & 4) {
1508         // Destination register is not changed
1509         return;
1510     }
1511 
1512     // Check the opcode to find out what has happened to this register
1513     switch (Opcodei) {
1514     case 0xB0: case 0xB1: case 0xB2: case 0xB3:
1515     case 0xB4: case 0xB5: case 0xB6: case 0xB7:
1516     case 0xB8: case 0xB9: case 0xBA: case 0xBB:
1517     case 0xBC: case 0xBD: case 0xBE: case 0xBF:
1518         // MOV register, constant
1519         t.Regist[reg] = 0;
1520         if (s.OperandSize < 32) {
1521             // Only part of register is changed
1522             return;
1523         }
1524         if (s.ImmediateRelocation) {
1525             if (s.OperandSize < WordSize || !(Relocations[s.ImmediateRelocation].Type & 0x21)) {
1526                 // Wrong size or type of relocation
1527                 return;
1528             }
1529             // Register contains the address of a symbol
1530             t.Regist[reg] = 0x18;
1531             t.Value [reg] = Relocations[s.ImmediateRelocation].TargetOldIndex;
1532             return;
1533         }
1534 
1535         // Register value is a known constant
1536         t.Regist[reg] = 4;
1537         // Save value
1538         switch (s.ImmediateFieldSize) {
1539         case 1:
1540             t.Value[reg] = Get<uint8_t>(s.ImmediateField);
1541             break;
1542         case 2:
1543             t.Value[reg] = Get<uint16_t>(s.ImmediateField);
1544             break;
1545         case 4:
1546         case 8: // 64-bit value truncated to 32 bits
1547             t.Value[reg] = Get<uint32_t>(s.ImmediateField);
1548             break;
1549         default:
1550             // Error. Should not occur
1551             t.Regist[reg] = 0;
1552         }
1553         return;
1554         /* This part is currently unused:
1555         case 0x31: case 0x33: case 0x29: case 0x2B:
1556         // XOR or SUB. Check if source and destination is same register
1557         if ((s.Operands[0] & 0xFFFF) == (s.Operands[1] & 0xFFFF) && s.Reg == s.RM && s.OperandSize >= 32) {
1558         // XOR OR SUB with same source and destination produces zero
1559         t.Regist[reg] = 4;
1560         t.Value [reg] = 0;
1561         return;
1562         }
1563         break;
1564         */
1565 
1566     case 0x8D:
1567         // LEA
1568         if (s.AddressFieldSize == 4 && s.AddressRelocation && s.OperandSize >= 32) {
1569             // Register contains the address of a symbol
1570             if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
1571                 // Cannot follow position-independent code in 32 bit mode
1572                 t.Regist[reg] = 0;  return;
1573             }
1574             t.Regist[reg] = 0x18;
1575             t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
1576             // Check if symbol has name
1577             const char * SymName = Symbols.HasName(t.Value[reg]);
1578             if (SymName && strcmp(SymName, "__ImageBase") == 0) {
1579                 // Symbol is imagebase
1580                 t.Regist[reg] = 1;
1581             }
1582             // Check if base or index register
1583             if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
1584             return;
1585         }
1586         if (!s.AddressRelocation && s.BaseReg && s.IndexReg && s.Scale == 0) {
1587             // LEA used as ADD
1588 
1589             if (t.Regist[s.BaseReg-1] == 1 && (t.Regist[s.IndexReg-1] & 0xFE) == 0x10) {
1590                 // Adding imagebase to the value of a symbol or array element
1591                 t.Regist[reg] = 0x12;
1592                 t.Value [reg] = t.Value[s.IndexReg-1];
1593                 return;
1594             }
1595             if (t.Regist[s.IndexReg-1] == 1 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10) {
1596                 // Adding the value of a symbol or array element to the imagebase
1597                 t.Regist[reg] = 0x12;
1598                 t.Value [reg] = t.Value[s.BaseReg-1];
1599                 return;
1600             }
1601             if ((((t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10)
1602                 ||   ((t.Regist[s.IndexReg-1] & 0xFE) == 0x10 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18))
1603                 &&     t.Value [s.IndexReg-1] == t.Value[s.BaseReg-1]) {
1604                     // Adding the value of an array element to the base address of same array.
1605                     // This is a computed jump address if array contains self-relative addresses
1606                     t.Regist[reg] = 0x13;
1607                     t.Value [reg] = t.Value[s.BaseReg-1];
1608                     return;
1609             }
1610         }
1611         break;
1612 
1613     case 0x89: case 0x8B: case 0x3B02:
1614         // MOV and MOVSXD instruction
1615         if (s.OperandSize < 32) break;          // Only part of register is changed
1616         if (!(s.MFlags & 1)) {
1617             // MOV reg,reg. Copy register contents
1618             if (Opcodei == 0x8B || Opcodei == 0x3B02) {
1619                 // Source register indicated by rm bits
1620                 srcreg = s.RM;
1621             }
1622             else {
1623                 // Source register indicated by reg bits
1624                 srcreg = s.Reg;
1625             }
1626             t.Regist[reg] = t.Regist[srcreg];
1627             t.Value [reg] = t.Value [srcreg];
1628             return;
1629         }
1630         // MOV reg,mem
1631         if (s.AddressFieldSize == 4 && s.AddressRelocation) {
1632             // Register contains the value of a symbol
1633             if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
1634                 // Cannot follow position-independent code in 32 bit mode
1635                 t.Regist[reg] = 0;  return;
1636             }
1637             t.Regist[reg] = 0x10;
1638             t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
1639 
1640             // Check if base or index register
1641             if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
1642             return;
1643         }
1644         if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
1645             // Memory operand has a base register which contains the address of a symbol
1646             // Destination register will contain value of same symbol
1647             t.Regist[reg] = 0x10;
1648             t.Value [reg] = t.Value[s.BaseReg-1];
1649             if (s.IndexReg || s.AddressFieldSize || (t.Regist[s.BaseReg-1] & 1)) {
1650                 // There is an offset
1651                 t.Regist[reg] |= 1;
1652             }
1653             return;
1654         }
1655         if (s.IndexReg && (t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && s.BaseReg && s.Scale == 0) {
1656             // Same as above, base and index registers swapped, scale factor = 1
1657             t.Regist[reg] = 0x10;
1658             t.Value [reg] = t.Value[s.IndexReg-1];
1659             if (s.AddressFieldSize || (t.Regist[s.IndexReg-1] & 1)) {
1660                 // There is an offset
1661                 t.Regist[reg] |= 1;
1662             }
1663             return;
1664         }
1665         break;
1666 
1667     case 0x01: case 0x03:
1668         // ADD instruction
1669         if (s.OperandSize < 32) break;          // Only part of register is changed
1670         if (Opcodei == 0x03) {
1671             // Source register indicated by rm bits
1672             srcreg = s.RM;
1673         }
1674         else {
1675             // Source register indicated by reg bits
1676             srcreg = s.Reg;
1677         }
1678         if (t.Regist[srcreg] == 1 && (t.Regist[reg] & 0xFE) == 0x10) {
1679             // Adding imagebase to the value of a symbol or array element
1680             t.Regist[reg] = 0x12;
1681             return;
1682         }
1683         if (t.Regist[reg] == 1 && (t.Regist[srcreg] & 0xFE) == 0x10) {
1684             // Adding the value of a symbol or array element to the imagebase
1685             t.Regist[reg] = 0x12;
1686             t.Value [reg] = t.Value[srcreg];
1687             return;
1688         }
1689         if ((((t.Regist[srcreg] & 0xFE) == 0x18 && (t.Regist[reg] & 0xFE) == 0x10)
1690             ||   ((t.Regist[srcreg] & 0xFE) == 0x10 && (t.Regist[reg] & 0xFE) == 0x18))
1691             && t.Value [reg] == t.Value[srcreg]) {
1692                 // Adding the value of an array element to the base address of same array.
1693                 // This is a computed jump address if array contains self-relative addresses
1694                 t.Regist[reg] = 0x13;
1695                 return;
1696         }
1697         break;
1698 
1699     case 0x3902:
1700         // CDQE. eax sign extended to rax. Ignore
1701         return;
1702     case 0x3900: case 0x3901:
1703         // CBW, CWDE. rax changed
1704         t.Regist[0] = 0;
1705         return;
1706     case 0x3A00: case 0x3A01: case 0x3A02:
1707         // CWD, CDQ, CQO. rdx changed
1708         t.Regist[2] = 0;
1709         return;
1710     }
1711     // Anything else: Remember that this register is changed
1712     t.Regist[reg] = 0;
1713 
1714     if (OpcodeOptions & 8) {
1715         // Registers other than destination register may be changed
1716         t.Reset();
1717     }
1718 }
1719 
1720 
UpdateSymbols()1721 void CDisassembler::UpdateSymbols() {
1722     // Find unnamed symbols, determine symbol types,
1723     // update symbol list, call CheckJumpTarget if jump/call.
1724     // This function is called during pass 1 for every instruction
1725 
1726     uint32_t OpI;                                   // Operand index
1727     uint32_t OperandType;                           // Type of operand
1728     uint32_t SymOldI;                               // Symbol table old index
1729     uint32_t SymNewI;                               // Symbol table new index
1730 
1731     // Loop through all operands for one instruction
1732     for (OpI = 0; OpI < 4; OpI++) {
1733         if (s.Operands[OpI]) {
1734             SymNewI = 0;                            // Reset symbol index
1735             OperandType = s.Operands[OpI];          // Operand type
1736 
1737             // Check if indirect jump/call
1738             if (OpI == 0 && ((s.OpcodeDef->Destination + 1) & 0xFE) == 0x0C) {
1739                 OperandType = s.OpcodeDef->Destination;
1740             }
1741 
1742             // Check operand type
1743             if ((OperandType & 0xF0) == 0x80) {
1744                 // This is a jump/call destination
1745 
1746                 if (!s.ImmediateRelocation) {
1747                     // Has no reference to other symbol. Make one
1748 
1749                     // Relocation type
1750                     uint32_t RelocationType = 2;        // Self relative
1751                     if ((OperandType & 0xFE) == 0x84) RelocationType = 8; // Far
1752 
1753                     // Scope
1754                     uint32_t TargetScope = 1;           // Function local
1755                     if ((OperandType & 0xFF) >= 0x83) TargetScope = 2;  // Call or far. File scope
1756 
1757                     // Make relocation and target symbol
1758                     SymNewI = MakeMissingRelocation(Section, s.ImmediateField, RelocationType, OperandType, TargetScope);
1759 
1760                     // Update labels
1761                     LabelBegin = 0;
1762                     FindLabels();
1763 
1764                     if (TargetScope == 1 && SymNewI) {
1765                         // Short or near jump (not call). Update range of current function
1766                         CheckJumpTarget(SymNewI);
1767                     }
1768                 }
1769                 else {
1770                     // Jump or call to relocated symbol
1771                     // Look up in Relocations table
1772                     SymOldI = Relocations[s.ImmediateRelocation].TargetOldIndex;
1773 
1774                     // Look up in symbol table
1775                     SymNewI = Symbols.Old2NewIndex(SymOldI);
1776                     if (Symbols[SymNewI].OldIndex) {
1777                         // Found
1778                         // Check if symbol already has a scope assigned
1779                         if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
1780 
1781                         // Check if symbol already has a type assigned
1782                         if ((OperandType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)) {
1783 
1784                             // No type assigned yet, or new type overrides old type
1785                             Symbols[SymNewI].Type = (Symbols[SymNewI].Type & ~0xFF) | OperandType;
1786                         }
1787                         // Check if jump target is in data segment
1788                         if (Symbols[SymNewI].Section > 0 && (uint16_t)(Symbols[SymNewI].Section) < Sections.GetNumEntries()
1789                             && (Sections[Symbols[SymNewI].Section].Type & 0xFF) > 1) {
1790                                 s.Warnings1 |= 0x80000;
1791                         }
1792                     }
1793                 }
1794             }
1795             else {
1796                 // Check if reference to data symbol
1797                 if ((s.Operands[OpI] & 0x2000) && (s.Operands[OpI] & 0xD0000) == 0x10000) {
1798                     // Memory operand
1799 
1800                     if (s.AddressRelocation) {
1801                         // There is a reference to a data symbol
1802 
1803                         // Make exception for LEA: Target type is unknown
1804                         if (Opcodei == 0x8D) OperandType = 0;
1805 
1806                         // Check and update relocation target
1807                         CheckRelocationTarget(s.AddressRelocation, OperandType, GetDataItemSize(OperandType));
1808                     }
1809                     else if (s.AddressFieldSize >= 4) {
1810                         // Relocation missing. Make one if possible
1811                         uint32_t TargetType = OperandType;
1812                         if (Opcodei == 0x8D) {
1813                             // Source of LEA instruction has no type
1814                             TargetType = 0;
1815                         }
1816                         // Check addressing mode
1817                         if (s.MFlags & 0x100) {
1818                             // There is a rip-relative reference
1819                             // Make relocation record and target record
1820                             MakeMissingRelocation(Section, s.AddressField, 2, TargetType, 2);
1821                             FindRelocations();
1822                         }
1823                         else if (s.BaseReg && t.Regist[s.BaseReg-1] == 1 && s.AddressFieldSize == 4) {
1824                             // Memory operand has a base register which has been traced
1825                             // to contain the image base. Make image-relative relocation
1826                             MakeMissingRelocation(Section, s.AddressField, 4, TargetType, 2);
1827                             FindRelocations();
1828                         }
1829                         else if (ImageBase && !(RelocationsInSource & 0x20) && s.AddressFieldSize >= 4) {
1830                             // No base relocations in source. Make direct relocation
1831                             MakeMissingRelocation(Section, s.AddressField, 1, TargetType, 2, s.AddressFieldSize);
1832                             FindRelocations();
1833                         }
1834                     }
1835                 }
1836                 if ((s.Operands[OpI] & 0xF0) >= 0x10 && (s.Operands[OpI] & 0xF0) < 0x40) {
1837                     // Immediate operand
1838 
1839                     if (!s.ImmediateRelocation && s.ImmediateFieldSize >= 4
1840                         && ImageBase && !(RelocationsInSource & 0x20)
1841                         && (Opcodei == 0x3000 || Opcodei == 0x68 || (Opcodei & 0xFFF8) == 0xB8)) {
1842                             // instruction = MOV or PUSH, immediate operand may be an address
1843                             // Make a relocation if immediate value is valid address
1844                             MakeMissingRelocation(Section, s.ImmediateField, 1, 0, 2, s.ImmediateFieldSize);
1845                             FindRelocations();
1846                     }
1847                     if (s.ImmediateRelocation) {
1848                         // There is a reference to the offset of a data symbol
1849                         // Check and update relocation target
1850                         CheckRelocationTarget(s.ImmediateRelocation, 0, 0);
1851                     }
1852                 }
1853             }
1854             if (((OperandType + 1) & 0xFE) == 0x0C) {
1855                 // Indirect jump or call. Find jump table or virtual table
1856 
1857                 // Default relocation type for jump table is direct
1858                 uint32_t RelocationType = 1;
1859 
1860                 // Find symbol table entry for jump pointer or call pointer
1861                 if (s.AddressRelocation && Relocations[s.AddressRelocation].TargetOldIndex) {
1862                     // Look up in symbol table
1863                     SymNewI = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].TargetOldIndex);
1864                 }
1865                 else SymNewI = 0;
1866 
1867                 if (SymNewI == 0 || Symbols[SymNewI].OldIndex == 0) {
1868                     // Symbol for jump table not found yet
1869                     if (s.Operands[OpI] & 0x2000) {
1870                         // There is a memory operand
1871                         if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
1872                             // Memory operand has a base register which has been traced to
1873                             // point to a known symbol
1874                             SymNewI = Symbols.Old2NewIndex(t.Value[s.BaseReg-1]);
1875                         }
1876                         else if (((s.BaseReg != 0) ^ (s.IndexReg != 0)) && s.AddressFieldSize == 4 && ExeType) {
1877                             // Here is a jump table with an absolute address
1878                             SymNewI = MakeMissingRelocation(Section, s.AddressField, 1, 0x0B, 2, s.AddressFieldSize);
1879                         }
1880                     }
1881                     else {
1882                         // Jump or call to a register operand
1883                         // Check if the register value has been traced
1884                         if ((t.Regist[s.RM] & 0x1C) == 0x10) {
1885                             // Register contains an array element. Get symbol for this array
1886                             SymNewI = Symbols.Old2NewIndex(t.Value[s.RM]);
1887                             // Check relocation type
1888                             if (t.Regist[s.RM] == 0x12) {
1889                                 // Register contains array element plus imagebase.
1890                                 RelocationType = 4;         // Array elements must have image-relative relocations
1891                             }
1892                             if (t.Regist[s.RM] == 0x13) {
1893                                 // Register contains array element plus base address of same array
1894                                 RelocationType = 0x10;         // Array elements must have self-relative relocations
1895                             }
1896                         }
1897                     }
1898                 }
1899                 // Check if valid symbol for jump/call table
1900                 if (SymNewI && Symbols[SymNewI].OldIndex) {
1901                     // Jump/call table found
1902 
1903                     if ((s.Operands[OpI] & 0x2000) && !s.BaseReg && !s.IndexReg && Opcodei == 0x2704) {
1904                         // Simple memory operand
1905                         // Assign name if symbol is import table entry
1906                         CheckImportSymbol(SymNewI);
1907                     }
1908 
1909                     // Check relocation type if memory operand
1910                     if ((s.Operands[OpI] & 0x2000) && s.BaseReg && t.Regist[s.BaseReg-1] == 1) {
1911                         // Memory operand has a base register which has been traced to contain the imagebase
1912                         RelocationType = 4;               // Array elements must have image-relative relocations
1913                     }
1914 
1915                     // Check symbol type
1916                     if ((Symbols[SymNewI].Type & 0xFF) < (OperandType & 0xFF) /*|| (Symbols[SymNewI].Type & 0xF0)*/) {
1917                         // No type assigned yet, or new type overrides old type
1918                         Symbols[SymNewI].Type = OperandType;
1919                     }
1920 
1921                     // Check symbol size
1922                     if (RelocationType == 4 && WordSize > 16) {
1923                         Symbols[SymNewI].Size = 4;     // Image relative
1924                     }
1925                     if (RelocationType == 0x10 && WordSize > 16) {
1926                         Symbols[SymNewI].Size = 4;     // Relative to table base
1927                     }
1928                     else {
1929                         Symbols[SymNewI].Size = WordSize / 8; // Direct
1930                     }
1931 
1932                     // Follow what the jump/call table points to
1933                     FollowJumpTable(SymNewI, RelocationType);
1934                 }
1935             }
1936         }
1937     }
1938 }
1939 
1940 
FollowJumpTable(uint32_t symi,uint32_t RelType)1941 void CDisassembler::FollowJumpTable(uint32_t symi, uint32_t RelType) {
1942     // Check jump/call table and its targets
1943     uint32_t sym1, sym2, sym3 = 0;                  // Symbol indices
1944     uint32_t NextLabel;                             // Offset of next label
1945     uint32_t Pos;                                   // Current position
1946     SARelocation rel;                             // Relocation record for searching
1947     int32_t  Reli;                                  // Index to relocation
1948     uint32_t NewType = 0;                           // Type to assign to symbol
1949     int32_t  SourceSection;                         // Section of relocation source
1950     uint32_t SourceOffset;                          // Offset of relocation source
1951     uint32_t SourceSize;                            // Size of relocation source
1952     uint32_t TargetType;                            // Type for relocation target
1953     uint32_t RefPoint = 0;                          // Reference point if relocationtype = 0x10
1954     int32_t  Addend = 0;                            // Inline addend
1955 
1956     // Check if sym is  valid
1957     if (Symbols[symi].OldIndex == 0) return;
1958 
1959     // Get type of target
1960     switch (s.OpcodeDef->Destination & 0xFF) {
1961     case 0x0B:  // Near indirect jump. Target type = jump destination
1962         NewType = 0x82;  break;
1963     case 0x0C:  // Near indirect call. Target type = call destination
1964         NewType = 0x83;  break;
1965     default:    // Should not occur
1966         return;
1967     }
1968 
1969     // Check symbol size
1970     if ((RelType & 4) && WordSize >= 32) {
1971         // Image relative relocation
1972         Symbols[symi].Size = 4;
1973     }
1974     else if ((RelType & 0x10) && WordSize >= 32) {
1975         // Relative to table base
1976         Symbols[symi].Size = 4;
1977         RefPoint = Symbols[symi].OldIndex; // Reference point = table base
1978     }
1979     else if ((RelType & 0x21) || Symbols[symi].Size == 0) {
1980         // Direct near relocation
1981         Symbols[symi].Size = WordSize / 8;
1982     }
1983 
1984     // Check symbol type
1985     if (uint32_t(s.OpcodeDef->Destination & 0xFF) > (Symbols[symi].Type & 0xFF)) {
1986         // No type assigned yet, or new type overrides old type
1987         Symbols[symi].Type = s.OpcodeDef->Destination | 0x4000000;
1988     }
1989     // Make sure symbol is marked as data
1990     Symbols[symi].Type |= 0x4000000;
1991 
1992     // Check if symbol has a scope assigned
1993     if (Symbols[symi].Scope == 0) Symbols[symi].Scope = 2;
1994 
1995     // Save symbol properties
1996     // (The reference to sym will become invalid when new symbols are created)
1997     SourceSection = Symbols[symi].Section;
1998     SourceOffset  = Symbols[symi].Offset;
1999     SourceSize    = Symbols[symi].Size;
2000     TargetType    = 0x82;
2001 
2002     // Target type = jump label
2003     if ((Symbols[symi].Type & 0xFF) == 0x0C) TargetType++;  // Target type = call label
2004 
2005     // Find next label
2006     sym1 = Symbols.FindByAddress(SourceSection, SourceOffset, &sym2, &sym3);
2007     if (sym1 && sym3) {
2008         // Assume that table ends at next label
2009         NextLabel = Symbols[sym3].Offset;
2010     }
2011     else {
2012         // No next label. End at source section end
2013         NextLabel = Sections[SourceSection].InitSize;
2014     }
2015 
2016     // Loop through table of jump/call addresses
2017     for (Pos = SourceOffset; Pos < NextLabel; Pos += SourceSize) {
2018 
2019         // Search for relocation source at table entry
2020         rel.Section = SourceSection;
2021         rel.Offset  = Pos;
2022         Reli = Relocations.Exists(rel);
2023 
2024         if (Reli > 0) {
2025             // Relocation found. Check target
2026             CheckRelocationTarget(Reli, TargetType, 0);
2027         }
2028         else {
2029             // No relocation here. Make one if possible
2030 
2031             uint32_t symi = MakeMissingRelocation(rel.Section, rel.Offset, RelType, TargetType, 2, 0, RefPoint);
2032             if (!symi) {
2033                 // Failed to make a meaningful relocation. End jump table
2034                 break;
2035             }
2036             int32_t TargetSection = Symbols[symi].Section;
2037             if (!TargetSection || (Sections[TargetSection].Type & 0xFF) != 1) {
2038                 // Target is not in code section. End jump table
2039                 break;
2040             }
2041             // Find the newly made relocation
2042             Reli = Relocations.Exists(rel);
2043             if (Reli <= 0) break;
2044         }
2045         // Relocation found. Check if valid
2046         if (!(Relocations[Reli].Type & 0x37) || !Relocations[Reli].TargetOldIndex) {
2047             // Wrong relocation type or invalid. Stop searching
2048             break;
2049         }
2050         // Find relocation target
2051         uint32_t TargetSymI = Symbols.Old2NewIndex(Relocations[Reli].TargetOldIndex);
2052         if (!TargetSymI) {
2053             // Target invalid
2054             break;
2055         }
2056 
2057         // Calculate target address
2058         Addend = Relocations[Reli].Addend;
2059         // Check inline addend if target is section-relative and this is an object file
2060         if (!ExeType && Symbols[TargetSymI].Offset == 0) {
2061 
2062             switch (SourceSize) {
2063             case 2:
2064                 Addend += *(int16_t*)(Sections[SourceSection].Start + Pos);
2065                 break;
2066             case 4: case 8:
2067                 Addend += *(int32_t*)(Sections[SourceSection].Start + Pos);
2068                 break;
2069             default:
2070                 Addend += 0;
2071             }
2072             if (Addend) {
2073                 // Make new symbol at target address
2074                 uint32_t NewSymOffset = Addend;
2075                 if (Relocations[Reli].Type & 2) {  // relative
2076                     if (RelType == 0x10) {  // arbitrary reference point
2077                         NewSymOffset -= (Relocations[Reli].Offset - SourceOffset);
2078                     }
2079                 }
2080                 uint32_t NewSym = Symbols.NewSymbol(Symbols[TargetSymI].Section, NewSymOffset, 2);
2081                 if (NewSym) TargetSymI = NewSym;
2082             }
2083         }
2084 
2085         // Update target symbol type
2086         if ((Symbols[TargetSymI].Type & 0xFF) < NewType) {
2087             Symbols[TargetSymI].Type = (Symbols[TargetSymI].Type & ~0xFF) | NewType;
2088         }
2089         // Extend current function to include target
2090         CheckJumpTarget(TargetSymI);
2091 
2092         // Update NextLabel in case new target is between Pos and NextLabel
2093         if (Symbols[TargetSymI].Section == SourceSection && Symbols[TargetSymI].Offset > Pos && Symbols[TargetSymI].Offset < NextLabel) {
2094             NextLabel = Symbols[TargetSymI].Offset;
2095         }
2096     }
2097 
2098     if (Pos < NextLabel) {
2099         // There is no label after jump table. Make one with zero scope
2100         SASymbol SymAfter;
2101         SymAfter.Reset();
2102         SymAfter.Section = SourceSection;
2103         SymAfter.Offset  = Pos;
2104         SymAfter.Type    = (Sections[SourceSection].Type & 0xFF) == 1 ? 0x82 : 0;
2105         Symbols.NewSymbol(SymAfter);
2106     }
2107 }
2108 
2109 
MakeMissingRelocation(int32_t Section,uint32_t Offset,uint32_t RelType,uint32_t TargetType,uint32_t TargetScope,uint32_t SourceSize,uint32_t RefPoint)2110 uint32_t CDisassembler::MakeMissingRelocation(int32_t Section, uint32_t Offset, uint32_t RelType, uint32_t TargetType, uint32_t TargetScope, uint32_t SourceSize, uint32_t RefPoint) {
2111     // Make a relocation and its target symbol from inline address
2112     /* This function is used for executable files that have already been
2113     relocated for making the relocation information that has been
2114     lost as well as the symbol record that the relocation should
2115     point to.
2116     Parameters:
2117     Section     Section of relocation source
2118     Offset      Offset of relocation source
2119     RelType     Relocation type: 1 = direct, 2 = self relative, 4 = image relative, 0x10 = relative to reference point
2120     TargetType  Symbol type for target
2121     TargetScope Scope for target symbol
2122     SourceSize  Size of source field (0 = default for relocation type and WordSize)
2123     RefPoint    Reference point if RelType = 0x10 (symbol old index)
2124 
2125     The return value is a symbol new index for the target, or zero if failure
2126 
2127     The size of the relocation source is implied from RelType
2128     A symbol record for the target will be made if none exists.
2129     The scope of the target symbol will be file local (2)
2130     */
2131 
2132     SARelocation Rel;                             // Temporary relocation record
2133     SASymbol Sym;                                 // Temporary symbol record for target
2134     Sym.Reset();
2135     int32_t  irel;                                  // Relocation index
2136     uint32_t isym = 0;                              // Symbol new index
2137     int64_t  InlineA;                               // Inline address or displacement
2138     int64_t  TargetAbsAddr;                         // Absolute address of target
2139 
2140     // Check if Section valid
2141     if (Section <= 0 || (uint32_t)Section >= Sections.GetNumEntries() || Offset >= Sections[Section].InitSize || !Sections[Section].Start) {
2142         return 0;
2143     }
2144 
2145     // Check if a relocation would be missing
2146     if (RelType & 1) {
2147         // Direct relocation
2148         if (RelocationsInSource & 0x20) return 0;  // Source file has base relocations. There would be a relocation here if needed
2149     }
2150     else if (RelType & 4) {
2151         // Image relative
2152         if (!ExeType) return 0;                    // Object file. There would be a relocation here if needed
2153     }
2154 
2155     // Check if a relocation already exists
2156     Rel.Section = Section;
2157     Rel.Offset  = Offset;
2158     irel = Relocations.Exists(Rel);
2159     if (irel > 0) return 0;                       // Relocation exists. Don't do anything
2160 
2161     if (SourceSize == 0) {
2162         // Source size not specified. Get default source size
2163         if ((TargetType & 0xFF) == 0x81) {
2164             // Short jump
2165             SourceSize = 1;
2166         }
2167         else if (RelType & 1) {
2168             // Direct relocation. Size depends on word size
2169             SourceSize = WordSize / 8;
2170         }
2171         else if (RelType & 0x12) {
2172             // Self relative or relative to table base
2173             SourceSize = (WordSize == 16) ? 2 : 4;
2174         }
2175         else if (RelType & 4 && WordSize > 16) {
2176             // Image relative
2177             SourceSize = 4;
2178         }
2179         else {
2180             // Other value. Ignore
2181             return 0;
2182         }
2183     }
2184 
2185     // Get inline address or displacement from source address
2186     if (SourceSize == 8) {
2187         InlineA = *(int64_t*)(Sections[Section].Start + Offset);
2188     }
2189     else if (SourceSize == 4) {
2190         InlineA = *(int32_t*)(Sections[Section].Start + Offset);
2191     }
2192     else if (SourceSize == 2) {
2193         InlineA = *(int16_t*)(Sections[Section].Start + Offset);
2194     }
2195     else { // 1
2196         InlineA = *(int8_t*)(Sections[Section].Start + Offset);
2197     }
2198 
2199     // Get absolute virtual address of target
2200     if (RelType & 1) {
2201         // Direct address
2202         TargetAbsAddr = InlineA;
2203     }
2204     else if (RelType & 2) {
2205         // Self relative. Translate self-relative to absolute address
2206         TargetAbsAddr = InlineA + ImageBase + SectionAddress + IEnd;
2207     }
2208     else if (RelType & 0x10) {
2209         // Relative to reference point. Translate relative to absolute address
2210         uint32_t RefSym = Symbols.Old2NewIndex(RefPoint);
2211         TargetAbsAddr = InlineA + Symbols[RefSym].Offset + Sections[Symbols[RefSym].Section].SectionAddress;
2212     }
2213     else {
2214         // Image relative
2215         TargetAbsAddr = InlineA + ImageBase;
2216     }
2217 
2218     if (ExeType) {
2219         // Executable file
2220         // Translate to section:offset address
2221         if (TranslateAbsAddress(TargetAbsAddr, Sym.Section, Sym.Offset)) {
2222 
2223             // Make a symbol for this address if none exists
2224             Sym.Scope = TargetScope;
2225             Sym.Type  = TargetType;
2226             isym = Symbols.NewSymbol(Sym);
2227         }
2228         else if (TargetAbsAddr == ImageBase && TargetAbsAddr) {
2229             // Reference to image base (nonzero)
2230             // Make a symbol for image base if none exists
2231             Sym.Scope = 0x20;
2232             Sym.Type  = 0;
2233             isym = Symbols.NewSymbol(Sym);
2234             if (isym && Symbols[isym].Name == 0) {
2235                 Symbols.AssignName(isym, "__ImageBase");
2236             }
2237         }
2238     }
2239     else {
2240         // Object file
2241         Sym.Section = Section;
2242         Sym.Offset  = (uint32_t)TargetAbsAddr - SectionAddress;
2243 
2244         // Make a symbol for this address if none exists
2245         Sym.Scope = TargetScope;
2246         Sym.Type  = TargetType;
2247         isym = Symbols.NewSymbol(Sym);
2248     }
2249 
2250     if ((RelType & 2) && (TargetType & 0xF0) == 0x80 && Sym.Section == Section && CodeMode == 1) {
2251         // Relocation not needed for relative jump/call within same section
2252         return isym;
2253     }
2254 
2255     if (isym) {
2256         // Relocation addend
2257         int32_t Addend = -(int32_t)InlineA;
2258         if (RelType & 2) {
2259             // Correct self-relative record for bias
2260             if (s.MFlags & 0x100) {
2261                 // rip-relative address
2262                 Addend -= IEnd - s.AddressField;
2263             }
2264             else {
2265                 // self-relative jump etc.
2266                 Addend -= SourceSize;
2267             }
2268         }
2269 
2270         // Make a relocation record
2271         AddRelocation (Section, Offset, Addend, RelType, SourceSize, Symbols[isym].OldIndex, RefPoint);
2272 
2273         // Update s.AddressRelocation and s.ImmediateRelocation
2274         if (CodeMode & 3) {
2275             FindRelocations();
2276 
2277             // Remove warning for absolute address
2278             s.Warnings1 &= ~0x8000;
2279         }
2280     }
2281     return isym;
2282 }
2283 
2284 
CheckImportSymbol(uint32_t symi)2285 void CDisassembler::CheckImportSymbol(uint32_t symi) {
2286     // Check for indirect jump to import table entry
2287 
2288     if (Symbols[symi].DLLName) {
2289         // Instruction is an indirect jump to symbol table entry
2290         // Find label at current instruction
2291         uint32_t sym2 = Symbols.FindByAddress(Section, IBegin);
2292         if (sym2 && Symbols[sym2].Name == 0) {
2293             // Label at current instruction has no name
2294             // Give current instruction the import name without "_imp" prefix
2295             const char * ImpName = Symbols.GetName(symi);
2296             if (strncmp(ImpName, Symbols.ImportTablePrefix, (uint32_t)strlen(Symbols.ImportTablePrefix)) == 0) {
2297                 Symbols.AssignName(sym2, ImpName + (uint32_t)strlen(Symbols.ImportTablePrefix));
2298             }
2299         }
2300     }
2301 }
2302 
MarkCodeAsDubious()2303 void CDisassembler::MarkCodeAsDubious() {
2304     // Remember that this may be data in a code segment
2305     uint32_t sym1, sym2 = 0, sym3 = 0;              // Preceding and succeding symbols
2306 
2307     // Check likelihood that this is data rather than code
2308     if (((s.Errors & 0x4000) && ((s.Warnings1 & 0x10000000) || CountErrors > 1))
2309         || CountErrors > 5) {
2310             // There are more than 5 errors, or consecutive zeroes and at
2311             // least one more error or inaccessible code.
2312             // Consider this sufficient evidence that this is very unlikely
2313             // to be code. Show it as data only
2314             CodeMode = 4;
2315     }
2316     if (CodeMode < 4) {
2317         // This may be code containing errors or interpreted out of phase.
2318         // Set CodeMode to dubious so that it will be shown as both code and data
2319         CodeMode = 2;
2320     }
2321 
2322     if (Pass & 0x0F) {
2323         // Pass 1. Mark preceding label as dubious
2324 
2325         // Check nearest preceding label
2326         if (LabelBegin == 0) {
2327             // There is no preceding label. Make one
2328             Symbols.NewSymbol(Section, IBegin, 1);
2329             LabelBegin = 0;
2330             FindLabels();
2331         }
2332 
2333         // Find symbol index for nearest preceding label
2334         sym1 = Symbols.FindByAddress(Section, LabelBegin, &sym2, &sym3);
2335 
2336         if (sym1 && sym2) {
2337             // Mark symbol as dubious or data
2338             Symbols[sym2].Type = (Symbols[sym2].Type & ~0xF000000) | (CodeMode << 24);
2339         }
2340 
2341         // Request repetition of pass 1
2342         Pass |= 0x100;
2343 
2344         /* Skip to next label.
2345         This is removed because we want to accumulate errors as evidence for
2346         determined whether this is code or data
2347         // Is there a label after this?
2348         if (sym3) {
2349         // Skip to next label
2350         if (Symbols[sym3].Offset > IEnd) {
2351         IBegin = IEnd = Symbols[sym3].Offset;
2352         }
2353         }
2354         else {
2355         // No next label. Skip to section end
2356         IBegin = IEnd = SectionEnd;
2357         }
2358         */
2359     }
2360 }
2361 
2362 
NextInstruction1()2363 int CDisassembler::NextInstruction1() {
2364     // Go to next instruction or data item. Return 0 if none. Pass 1
2365     IBegin = IEnd;
2366 
2367     // Reset everything in s field
2368     s.Reset();
2369 
2370     // Return if there are more instructions
2371     return (IBegin < SectionEnd);
2372 }
2373 
NextInstruction2()2374 int CDisassembler::NextInstruction2() {
2375     // Go to next instruction or data item. Return 0 if none. Pass 2
2376     IBegin = IEnd;
2377 
2378     // Reset everything in s field
2379     s.Reset();
2380 
2381     // Return if there are more instructions
2382     return (IBegin < FunctionEnd && IBegin < LabelEnd && IBegin < SectionEnd);
2383 }
2384 
ParseInstruction()2385 void CDisassembler::ParseInstruction() {
2386     // Parse one opcode
2387     FlagPrevious = 0;                             // Reset flag from previous instruction
2388 
2389     s.OpcodeStart1 = IBegin;                      // Index to start of instruction
2390 
2391     // Scan prefixes first
2392     ScanPrefixes();
2393 
2394     // Find opcode map entry
2395     FindMapEntry();                               // Find entry in opcode maps
2396 
2397     // Find operands
2398     FindOperands();                               // Interpret mod/reg/rm and SIB bytes and find operands
2399 
2400     // Determine the types of each operand
2401     FindOperandTypes();
2402 
2403     if (s.Prefixes[3] == 0x62) {
2404         if (s.Prefixes[6] & 0x20) { // EVEX
2405             FindBroadcast();                      // Find broadcast and offet multiplier for EVEX code
2406         }
2407         else {  // MVEX
2408             SwizTableLookup(); // Find swizzle table record if MVEX prefix
2409         }
2410     }
2411 
2412     // Find any relocation sources in this instruction
2413     FindRelocations();
2414 
2415     // Find any reasons for warnings
2416     FindWarnings();
2417 
2418     // Find any errors
2419     FindErrors();
2420 
2421     if (!s.Errors && CodeMode == 1) {
2422         // Find instruction set
2423         FindInstructionSet();
2424 
2425         // Update symbol types for operands of this instruction
2426         UpdateSymbols();
2427 
2428         // Trace register values
2429         UpdateTracer();
2430     }
2431 }
2432 
2433 
ScanPrefixes()2434 void CDisassembler::ScanPrefixes() {
2435     // Scan prefixes
2436     uint32_t i;                                            // Index to current byte
2437     uint8_t  Byte;                                         // Current byte of code
2438     for (i = IBegin; i < SectionEnd; i++) {
2439 
2440         // Read code byte
2441         Byte = Buffer[i];
2442 
2443         // Check if Byte is a prefix
2444         if (WordSize == 64 && (Byte & 0xF0) == 0x40) {
2445 
2446             // This is a REX prefix
2447             if (Byte & 0x08) {
2448                 // REX.W prefix
2449                 StorePrefix(4, 0x48);                    // REX.W also in category operand size
2450             }
2451             StorePrefix(7, Byte);                        // Store in category REX
2452         }
2453         else if (i+1 < SectionEnd &&
2454             ((((Byte & 0xFE) == 0xC4 || Byte == 0x62) && (WordSize == 64 || (Buffer[i+1] >= 0xC0)))
2455             || (Byte == 0x8F && (Buffer[i+1] & 0x38)))) {
2456                 // This is a VEX, EVEX, MVEX or XOP prefix
2457 
2458                 // Check for invalid prefixes before this
2459                 if (s.Prefixes[5] | s.Prefixes[7]) s.Warnings1 |= 0x800;
2460 
2461                 // Get equivalent prefixes
2462                 uint8_t prefix3 = Byte;                    // Repeat prefix (F2, F3) or VEX prefix (C4, C5, 62)
2463                 uint8_t prefix4;                           // 66, 48 Operand size prefix
2464                 uint8_t prefix5;                           // 66, F2, F3 operand type prefixes
2465                 uint8_t prefix6;                           // VEX.mmmmm and VEX.L
2466                 uint8_t prefix7;                           // equivalent to REX prefix
2467                 uint8_t vvvv;                              // vvvv register operand
2468                 if (Byte == 0xC5) {
2469                     // 2-bytes VEX prefix
2470                     if (i+2 >= SectionEnd) {
2471                         IEnd = i+2;
2472                         s.Errors |= 0x10; return;        // End of buffer reached
2473                     }
2474                     Byte = Buffer[++i];                  // Second byte
2475                     prefix5 = Byte & 3;                  // pp bits
2476                     prefix6 = (Byte << 3) & 0x20;        // L bit
2477                     prefix6 |= 1;                        // mmmmm bits = 1 for 0F map
2478                     vvvv = (~Byte >> 3) & 0x0F;          // vvvv operand
2479                     prefix7 = 0x10;                      // Indicate 2-bytes VEX prefix
2480                     prefix7 |= (~Byte >> 5) & 4;         // R bit
2481                 }
2482                 else {
2483                     // 3 or 4-bytes VEX/EVEX/MVEX prefix or XOP prefix
2484                     if (i+3+(Byte==0x62) >= SectionEnd) {
2485                         IEnd = i+3+(Byte==0x62);
2486                         s.Errors |= 0x10; return;        // End of buffer reached
2487                     }
2488                     prefix7 = (Byte == 0x8F) ? 0x80 : 0x20;// Indicate 3/4-bytes VEX prefix or XOP prefix
2489                     Byte = Buffer[++i];                  // Second byte
2490                     prefix6 = Byte & 0x1F;               // mmmmm bits
2491                     prefix7 |= (~Byte >> 5) & 7;         // R,X,B bits
2492                     Byte = Buffer[++i];                  // Third byte
2493                     prefix5 = Byte & 3;                  // pp bits
2494                     prefix6 |= (Byte << 3) & 0x20;       // VEX: L bit, MVEX: 0, EVEX: 1
2495                     vvvv = (~Byte >> 3) & 0x0F;          // vvvv operand
2496                     prefix7 |= (Byte >> 4) & 8;          // W bit
2497                     if (prefix3 == 0x62) {
2498                         // 4-bytes EVEX or MVEX prefix
2499                         prefix6 |= 0x40;                 // Indicates EVEX or MVEX prefix, bit 5 is 0 for MVEX, 1 for EVEX
2500                         Byte = Buffer[++i];              // Fourth byte
2501                         s.Kreg = Byte & 0x07;            // kkk mask register
2502                         vvvv |= (~Byte & 8) << 1;        // extra v bit
2503                         s.Esss = Byte >> 4;              // EVEX: zLLb, MVEX: Esss bits
2504                     }
2505                 }
2506                 StorePrefix(3, prefix3);                 // VEX prefix
2507                 // Get operand size prefix
2508                 prefix4 = (prefix5 == 1) ? 0x66 : 0;
2509                 if (prefix7 & 8) prefix4 = 0x48;
2510                 StorePrefix(4, prefix4);                // Operand size prefix
2511                 // Translate operand type prefix values
2512                 static const uint8_t PrefixValues[4] = {0, 0x66, 0xF3, 0xF2};
2513                 prefix5 = PrefixValues[prefix5];
2514                 StorePrefix(5, prefix5);                // Operand type prefix
2515                 StorePrefix(6, prefix6);                // VEX mmmmm,L
2516                 StorePrefix(7, prefix7);                // REX prefix equivalent
2517                 s.Vreg = vvvv;                          // Store vvvv operand
2518                 // Next byte cannot be a prefix. Stop searching for prefixes
2519                 s.OpcodeStart1 = i + 1;
2520                 return;
2521         }
2522         else if (OpcodeMap0[Byte].InstructionFormat & 0x8000) {
2523 
2524             // This is a prefix (other than REX/VEX)
2525             switch (Byte) {
2526             case 0x26: case 0x2E: case 0x36: case 0x3E: case 0x64: case 0x65:
2527                 // Segment prefix
2528                 StorePrefix(0, Byte);                // Store prefix
2529                 if (Byte == 0x64) MasmOptions |= 2;  // Remember FS used
2530                 if (Byte == 0x65) MasmOptions |= 4;  // Remember GS used
2531                 break;
2532 
2533             case 0x67:
2534                 // Address size prefix
2535                 StorePrefix(1, Byte);  break;
2536 
2537             case 0xF0:
2538                 // Lock prefix
2539                 StorePrefix(2, Byte);  break;
2540 
2541             case 0xF2: case 0xF3:
2542                 // Repeat prefix
2543                 StorePrefix(3, Byte);  // Both in category repeat and operand type
2544                 StorePrefix(5, Byte);  break;
2545 
2546             case 0x66:
2547                 // Operand size
2548                 StorePrefix(4, Byte);  // Both in category operand size and operand type
2549                 StorePrefix(5, Byte);  break;
2550 
2551             default:
2552                 err.submit(9000);
2553             }
2554         }
2555         else {
2556             // This is not a prefix
2557             s.OpcodeStart1 = i;
2558             return;
2559         }
2560     }
2561     // Error: end of block reached before end of prefixes
2562     IEnd = i;
2563     s.Errors |= 0x10;
2564 }
2565 
2566 
StorePrefix(uint32_t Category,uint8_t Byte)2567 void CDisassembler::StorePrefix(uint32_t Category, uint8_t Byte) {
2568     // Store prefix according to category
2569     if (Category > 7) {err.submit(9000); return;} // Out of range
2570 
2571     // Check if we already have a prefix in this category
2572     if (s.Prefixes[Category]) {
2573         // We already have a prefix in this category
2574         if (s.Prefixes[Category] != Byte || Category == 7) {
2575             // Conflicting prefixes in this category
2576             s.Conflicts[Category]++;
2577         }
2578         else {
2579             // Same prefix occurs more than once
2580             s.Warnings1 |= 0x100;
2581         }
2582     }
2583     // Check if REX prefix before this
2584     if (s.Prefixes[7]) s.Errors |= 0x20;
2585 
2586     // Save prefix in category
2587     s.Prefixes[Category] = Byte;
2588 }
2589 
2590 
FindMapEntry()2591 void CDisassembler::FindMapEntry() {
2592     // Find entry in opcode maps
2593     uint32_t i = s.OpcodeStart1;                    // Index to current byte
2594     uint16_t Link;                                  // Link to another map
2595     uint8_t  Byte = Buffer[i];                      // Current byte of code or index into map
2596     uint32_t MapNumber = 0;                         // Map number in opcodes.cpp
2597     uint32_t StartPage;                             // Index to start page in opcode map
2598     uint32_t MapNumber0 = 0;                        // Fallback start page if no map entry found in StartPage
2599     SOpcodeDef const * MapEntry;                  // Point to current opcode map entry
2600 
2601     // Get start page from VEX.mmmm or XOP.mmmm bits if any
2602     switch (s.Prefixes[3]) {
2603     default:   // no multibyte prefix
2604         StartPage = 0;
2605         MapEntry = OpcodeTables[StartPage] + Byte;
2606         break;
2607     case 0xC4: case 0xC5: case 0x62:                // 2-, 3-, or 4-bytes VEX prefix
2608         StartPage = s.Prefixes[6] & 0x0F;           // 4 mmmm bits or 0 if no VEX or XOP prefix
2609         if (StartPage >= NumOpcodeStartPageVEX) {
2610             s.Errors |= 0x10000; StartPage = 0;     // mmmm bits out of range
2611         }
2612         MapNumber = OpcodeStartPageVEX[StartPage];
2613         if (StartPage == 1) MapNumber0 = 1;
2614         if (StartPage == 2 && s.Prefixes[3] == 0x62) {
2615             if ((s.Prefixes[5] & 0xFE) == 0xF2) {   // shortcut for EVEX F2 0F 38 and EVEX F3 0F 38
2616                 StartPage = 8 + (s.Prefixes[5] & 1);
2617                 MapNumber0 = MapNumber;
2618                 MapNumber = OpcodeStartPageVEX[StartPage];
2619             }
2620         }
2621 
2622         // Get entry [Byte] in map
2623         MapEntry  = OpcodeTables[MapNumber] + Byte;
2624 
2625         // There are two entries for mm = 1: OpcodeMap1 for legacy code and OpcodeMapB1 for VEX-only code.
2626         // There are two entries for mm = 2: OpcodeMap2 for legacy code and OpcodeMapB2 for EVEX-only code with F3 prefix.
2627         // We don't want to have the same code in two different maps because this may cause errors if a code
2628         // is updated only in one of the maps.
2629         // Search the shortcut map first, then the default map
2630         if ((MapEntry->Name == 0 && MapEntry->TableLink == 0) || Byte >= OpcodeTableLength[MapNumber]) {
2631             // not found here, try in default map
2632             MapNumber = MapNumber0;
2633             MapEntry  = OpcodeTables[MapNumber] + Byte;
2634         }
2635         if (MapNumber == 0) s.Errors |= 0x10000;   // no map found
2636         break;
2637     case 0x8F:  // XOP prefix
2638         StartPage = (s.Prefixes[6] & 0x1F) - 8;    // 4 mmmm bits or 0 if no VEX or XOP prefix
2639         if (StartPage >= NumOpcodeStartPageXOP) {
2640             s.Errors |= 0x10000; StartPage = 0;     // mmmm bits out of range
2641         }
2642         MapEntry = OpcodeStartPageXOP[StartPage] + Byte;// Get entry [Byte] in map
2643     }
2644 
2645     // Save previous opcode and options
2646     *(uint32_t*)&PreviousOpcodei = *(uint32_t*)&Opcodei;
2647     *(uint32_t*)&Opcodei = 0;
2648 
2649     // Loop through map tree (exit loop when Link == 0)
2650     while (1) {
2651 
2652         // Check if MapEntry has a link to another map
2653         Link = MapEntry->TableLink;
2654 
2655         switch (Link) {
2656         case 0:      // No link
2657             // Final map entry found
2658             s.OpcodeStart2 = i;
2659             s.OpcodeDef    = MapEntry;
2660 
2661             // Save opcode and options
2662             Opcodei = (MapNumber << 8) | Byte;
2663             OpcodeOptions = MapEntry->Options;
2664 
2665             // Return success
2666             return;
2667 
2668         case 1:      // Use following byte as index into next table
2669             if (i >= SectionEnd) {
2670                 // Instruction extends beyond end of block
2671                 IEnd = i;  s.Errors |= 0x10;
2672                 s.OpcodeStart2 = i;
2673                 return;
2674             }
2675             Byte = Buffer[++i];                     // Get next byte of code as index
2676             break;
2677 
2678         case 2:      // Use reg field of mod/reg/rm byte as index into next table
2679             Byte = (Buffer[i+1] >> 3) & 7;          // Read reg bits
2680             break;
2681 
2682         case 3:      // Use mod < 3 vs. mod == 3 as index into next table
2683             Byte = (Buffer[i+1] & 0xC0) == 0xC0;    // 1 if mod == 3
2684             break;
2685 
2686         case 4:      // Use mod and reg fields of mod/reg/rm byte as index into next table,
2687             // first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3.
2688             Byte = (Buffer[i+1] >> 3) & 7;          // Read reg bits
2689             if ((Buffer[i+1] & 0xC0) == 0xC0) Byte += 8; // Add 8 if mod == 3
2690             break;
2691 
2692         case 5:      // Use rm bits of mod/reg/rm byte as index into next table
2693             Byte = Buffer[i+1] & 7;                 // Read r/m bits
2694             break;
2695 
2696         case 6:      // Use immediate byte after any other operands as index into next table
2697             s.OpcodeStart2 = i;
2698             s.OpcodeDef    = MapEntry;
2699             FindOperands();                         // Find size of all operand fields and end of instruction
2700             Byte = Buffer[IEnd - 1];                // Last byte of instruction
2701             break;
2702 
2703         case 7:      // Use mode as index into next table (16, 32, 64 bits)
2704             switch (WordSize) {
2705             case 16:
2706                 Byte = 0;  break;
2707             case 32: default:
2708                 Byte = 1;  break;
2709             case 64:
2710                 Byte = 2;
2711             }
2712             break;
2713 
2714         case 8:      // Use operand size as index into next table (16, 32, 64 bits)
2715             switch (WordSize) {
2716             case 64:
2717                 if (s.Prefixes[4] == 0x48) {         // REX.W prefix = 64 bit
2718                     Byte = 2;  break;
2719                 }
2720             // Else continue in case 32:
2721             case 32: default:
2722                 Byte = (s.Prefixes[4] == 0x66) ? 0 : 1;  break;
2723             case 16:
2724                 Byte = (s.Prefixes[4] == 0x66) ? 1 : 0;  break;
2725             }
2726             break;
2727 
2728         case 9:      // Use operand type prefixes as index into next table (none, 66, F2, F3)
2729             switch (s.Prefixes[5]) {
2730             case 0: default:
2731                 Byte = 0;  break;
2732             case 0x66:
2733                 Byte = 1;
2734                 if (s.Prefixes[3] == 0xF2) Byte = 2;      // F2/F3 take precedence over 66 in (tzcnt instruction)
2735                 else if (s.Prefixes[3] == 0xF3) Byte = 3;
2736                 break;
2737             case 0xF2:
2738                 Byte = 2;  break;
2739             case 0xF3:
2740                 Byte = 3;  break;
2741             }
2742             break;
2743 
2744         case 0xA:    // Use address size as index into next table (16, 32, 64 bits)
2745             switch (WordSize) {
2746             case 64:
2747                 Byte = (s.Prefixes[1] == 0x67) ? 1 : 2;  break;
2748             case 32: default:
2749                 Byte = (s.Prefixes[1] == 0x67) ? 0 : 1;  break;
2750             case 16:
2751                 Byte = (s.Prefixes[1] == 0x67) ? 1 : 0;  break;
2752             }
2753             break;
2754 
2755         case 0x0B:  // Use VEX prefix and VEX.L bits as index into next table
2756             // 0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3
2757             // (VEX absent, VEX.L=0, VEX.L=1)
2758             if ((s.Prefixes[7] & 0xB0) == 0) {
2759                 Byte = 0;                            // VEX absent
2760             }
2761             else if ((s.Prefixes[6] & 0x60) == 0x60) { // EVEX
2762                 Byte = ((s.Esss >> 1) & 3) + 1; // EVEX.LL bits
2763             }
2764             else if ((s.Prefixes[6] & 0x60) == 0x40) { // MVEX
2765                 Byte = 3;
2766             }
2767             else {  // VEX
2768                 Byte = 1 + (s.Prefixes[6] >> 5 & 1); // 1 + VEX.L
2769             }
2770             break;
2771 
2772         case 0x0C:   // Use VEX.W bit as index into next table
2773             Byte = (s.Prefixes[7] & 0x08) >> 3;
2774             break;
2775 
2776         case 0x0D:   // Use vector size by VEX.L bit and EVEX/MVEX as index into next table
2777             // 0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3
2778             Byte = (s.Prefixes[6] >> 5) & 1;        // VEX.L indicates xmm or ymm
2779             if (s.Prefixes[3] == 0x62) {
2780                 if (s.Prefixes[6] & 0x20) {
2781                     // EVEX. Use LL bits
2782                     Byte = (s.Esss >> 1) & 3;
2783                 }
2784                 else {
2785                     // MVEX. Always 512 bits
2786                     Byte = 2;
2787                 }
2788             }
2789             break;
2790 
2791         case 0x0E:   // Use VEX type as index into next table: 0 = 2 or 3 bytes VEX, 1 = 4 bytes EVEX
2792             Byte = (s.Prefixes[3] == 0x62);         // EVEX
2793             break;
2794 
2795         case 0x0F:   // Use MVEX.E bit as index into next table
2796             Byte = (s.Prefixes[3] == 0x62 && (s.Esss & 8));         // MVEX.E bit
2797             break;
2798 
2799         case 0x10:   // Use assembly language dialect as index into next table
2800             Byte = Syntax;
2801             break;
2802 
2803         case 0x11:   // Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix)
2804             if ((s.Prefixes[3] & ~1) == 0xC4) Byte = 1;   // 2 or 3-bytes VEX prefix
2805             else if (s.Prefixes[3] == 0x62) {             // EVEX or MVEX
2806                 if (s.Prefixes[6] & 0x20) Byte = 2;       // EVEX
2807                 else Byte = 3;                            // MVEX
2808             }
2809             else Byte = 0;                                // no VEX
2810             break;
2811 
2812         default:     // Internal error in map tree
2813             err.submit(9007, MapNumber);
2814             s.OpcodeStart2 = i;
2815             return;
2816         }
2817 
2818         // Get next map from branched tree of maps
2819         MapNumber = MapEntry->InstructionSet;
2820         if (MapNumber >= NumOpcodeTables1 || OpcodeTableLength[MapNumber] == 0) {
2821             err.submit(9007, MapNumber);  return;   // Map number out of range
2822         }
2823 
2824         // Use Byte as index into new map. Check if within range
2825         if (Byte >= OpcodeTableLength[MapNumber]) {
2826             // Points outside map. Get last entry in map containing default
2827             Byte = OpcodeTableLength[MapNumber] - 1;
2828         }
2829         // Point to entry [Byte] in new map
2830         MapEntry = OpcodeTables[MapNumber] + Byte;
2831         if (MapEntry == 0) {
2832             err.submit(9007, MapNumber);  return;   // Map missing
2833         }
2834 
2835     }  // Loop end. Go to next
2836 }
2837 
2838 
FindOperands()2839 void CDisassembler::FindOperands() {
2840     // Interpret mod/reg/rm and SIB bytes and find operands
2841     s.MFlags = 0;                                 // Memory operand flags:
2842     // 1 = has memory operand,
2843     // 2 = has mod/reg/rm byte,
2844     // 4 = has SIB byte,
2845     // 8 = has DREX byte (AMD SSE5 instructions never implemented),
2846     // 0x10 = is rip-relative
2847     uint8_t ModRegRM;                               // mod/reg/rm byte
2848     uint8_t SIB;                                    // SIB byte
2849 
2850     // Get address size
2851     if (WordSize == 64) s.AddressSize = (s.Prefixes[1] == 0x67) ? 32 : 64;
2852     else s.AddressSize = (WordSize == 16) ^ (s.Prefixes[1] == 0x67) ? 16 : 32;
2853 
2854     s.AddressFieldSize = s.ImmediateFieldSize = 0;// Initialize
2855 
2856     // Position of next element in opcode
2857     s.AddressField = s.OpcodeStart2 + 1;
2858 
2859     // Check if there is a mod/reg/rm byte
2860     if (s.OpcodeDef->InstructionFormat & 0x10) {
2861 
2862         // There is a mod/reg/rm byte
2863         s.MFlags |= 2;
2864 
2865         if (s.OpcodeStart2 + 1 >= FunctionEnd) {
2866             CheckForMisplacedLabel();
2867         }
2868 
2869         // Read mod/reg/rm byte
2870         ModRegRM = Buffer[s.AddressField++];
2871         s.Mod =  ModRegRM >> 6;                    // mod = bit 6-7
2872         s.Reg = (ModRegRM >> 3) & 7;               // reg = bit 3-5
2873         s.RM  =  ModRegRM & 7;                     // RM  = bit 0-2
2874 
2875         // Check if there is a SIB byte
2876         if (s.AddressSize > 16 && s.Mod != 3 && s.RM == 4) {
2877             // There is a SIB byte
2878             s.MFlags |= 4;                          // Remember we have a SIB byte
2879             SIB = Buffer[s.AddressField++];         // Read SIB byte
2880             // Get scale, index, base
2881             s.Scale = SIB >> 6;               // Scale = bit 6-7
2882             s.IndexReg = (SIB >> 3) & 7;      // Index = bit 3-5
2883             s.BaseReg = SIB & 7;              // Base  = bit 0-2
2884         }
2885 
2886         // Check if there is a DREX byte (AMD SSE5 instructions never implemented):
2887         if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
2888             s.MFlags |= 8;                          // Remember we have a DREX byte
2889             s.Vreg = Buffer[s.AddressField++];      // Read DREX byte
2890             // The R,X,B bits of Vreg are equivalent to the corresponding bits of a REX prefix:
2891             s.Prefixes[7] |= (s.Vreg & 7) | 0x80;
2892         }
2893 
2894         if (s.AddressField > FunctionEnd) {
2895             CheckForMisplacedLabel();
2896         }
2897 
2898         // Check REX prefix
2899         if (s.Prefixes[7] & 4) s.Reg |= 8;         // Add REX.R to reg field
2900         if (s.Prefixes[7] & 1) s.RM  |= 8;         // Add REX.B to RM  field
2901 
2902         // Interpretation of mod/reg/rm byte is different for 16 bit address size
2903         if (s.AddressSize == 16) {
2904 
2905             if (s.Mod != 3) {
2906                 // There is a memory operand
2907                 s.MFlags |= 1;
2908 
2909                 // Get size of address/displacement operand from mod bits
2910                 // (Will be overwritten later if none)
2911                 if (s.Mod == 1) {
2912                     s.AddressFieldSize = 1;           // Size of displacement field
2913                 }
2914                 else if (s.Mod == 2) {
2915                     s.AddressFieldSize = 2;           // Size of displacement field
2916                 }
2917 
2918                 // Check if direct memory operand
2919                 if (s.Mod == 0 && s.RM == 6) {
2920                     // Direct memory operand and nothing else
2921                     s.AddressFieldSize = 2;           // Size of address field
2922                 }
2923                 else {
2924                     // Indirect memory operand
2925                     // Get base and index registers
2926                     // [bx+si], [bx+di], [bp+si], [bp+di], [si], [di], [bp], [bx]
2927                     static const uint8_t BaseRegister [8] = {3+1, 3+1, 5+1, 5+1, 0, 0, 5+1, 3+1};
2928                     static const uint8_t IndexRegister[8] = {6+1, 7+1, 6+1, 7+1, 6+1, 7+1, 0, 0};
2929                     // Save register number + 1, because 0 means none.
2930                     s.BaseReg  = BaseRegister [s.RM]; // Base register = BX or BP or none
2931                     s.IndexReg = IndexRegister[s.RM]; // Index register = SI or DI or none
2932                     s.Scale = 0;                      // No scale factor in 16 bit mode
2933                 }
2934             }
2935         }
2936         else {
2937             // Address size is 32 or 64 bits
2938 
2939             if (s.Mod != 3) {
2940                 // There is a memory operand
2941                 s.MFlags |= 1;
2942 
2943                 // Get size of address/displacement operand from mod bits
2944                 // (Will be overwritten later if none)
2945                 if (s.Mod == 1) {
2946                     s.AddressFieldSize = 1;              // Size of displacement field
2947                 }
2948                 else if (s.Mod == 2) {
2949                     s.AddressFieldSize = 4;              // Size of displacement field
2950                 }
2951 
2952                 // Check if direct memory operand
2953                 if (s.Mod == 0 && (s.RM & 7) == 5) {
2954                     // Direct memory operand and nothing else
2955                     s.AddressFieldSize = 4;           // Size of address field
2956                 }
2957                 else if ((s.RM & 7) == 4) {
2958                     // There is a SIB byte
2959 
2960                     // Check REX prefix
2961                     if (s.Prefixes[7] & 2) s.IndexReg |= 8; // Add REX.X to index
2962                     if (s.Prefixes[7] & 1) s.BaseReg  |= 8; // Add REX.B to base
2963                     s.RM &= 7;                              // Remove REX.B from RM
2964 
2965                     s.BaseReg++;                      // Add 1 so that 0 means none
2966                     if (s.IndexReg == 4 && (s.OpcodeDef->InstructionFormat & 0x1F) != 0x1E) {
2967                         // No index register
2968                         s.IndexReg = 0;
2969                     }
2970                     else {
2971                         s.IndexReg++;                  // Add 1 so that 0 means none
2972                     }
2973 
2974                     if (s.Mod == 0 && s.BaseReg == 5+1) {
2975                         // No base register, 32 bit address
2976                         s.AddressFieldSize = 4;
2977                         s.BaseReg = 0;
2978                     }
2979                 }
2980                 else {
2981                     // Indirect memory operand and no SIB byte
2982                     s.BaseReg = s.RM;                 // Get base register from RM bits
2983                     s.BaseReg++;                      // Add 1 because 0 means none
2984                 }
2985             }
2986             else {
2987                 // No memory operand. Address size is 32 or 64 bits
2988             }
2989             // Check if rip-relative
2990             if (WordSize == 64 && (s.MFlags & 7) == 3 && !s.BaseReg && s.AddressFieldSize == 4) {
2991                 // Memory operand is rip-relative
2992                 s.MFlags |= 0x100;
2993             }
2994         }
2995         if (s.Prefixes[3] == 0x62) {
2996             // EVEX prefix gives another extra register bit
2997             s.Reg += ~(s.Prefixes[6]) & 0x10;        // extra r bit = highest m bit
2998             if (s.Mod == 3) {
2999                 // Register operands only. B bit extended by X bit
3000                 s.RM += (s.Prefixes[7] & 2) << 3;
3001             }
3002             else if (s.IndexReg && s.OpcodeDef->InstructionFormat == 0x1E) {
3003                 // VSIB byte. Index register extended by one of the v bits, base register < 16
3004                 s.IndexReg += s.Vreg & 0x10;
3005             }
3006         }
3007     }
3008 
3009     // Get operand size
3010     uint32_t OpSizePrefix = 0;
3011     if (s.Prefixes[4] == 0x66 && (s.OpcodeDef->AllowedPrefixes & 0x100))  OpSizePrefix = 1; // Operand size prefix
3012     if (s.Prefixes[4] == 0x48 && (s.OpcodeDef->AllowedPrefixes & 0x1000)) OpSizePrefix = 2; // Rex.W prefix
3013     s.OperandSize = (WordSize == 16) ^ (OpSizePrefix & 1) ? 16 : 32;
3014     if (OpSizePrefix == 2) s.OperandSize = 64;
3015     if ((s.OpcodeDef->AllowedPrefixes & 0x3000) == 0x3000 && WordSize == 64 && (OpSizePrefix & 2)) s.OperandSize = 64;
3016 
3017     // Get any immediate operand
3018     // Offset to immediate operand field, if any
3019     s.ImmediateField = s.AddressField + s.AddressFieldSize;
3020 
3021     // Check InstructionFormat for immediate and direct operands
3022     switch (s.OpcodeDef->InstructionFormat & 0x0FE0) {
3023     case 0x20:  // Has 2 bytes immediate operand
3024         s.ImmediateFieldSize = 2;  break;
3025 
3026     case 0x40:  // Has 1 byte immediate operand or short jump
3027         s.ImmediateFieldSize = 1;  break;
3028 
3029     case 0x60:  // Has 3 bytes immediate operand (enter)
3030         s.ImmediateFieldSize = 3;  break;
3031 
3032     case 0x80:  // Has 2 or 4 bytes immediate operand or near jump/call
3033         if ((s.OpcodeDef->Destination & 0xFE) == 0x82) {
3034             // Near jump/call address size depends on WordSize and operand size prefix,
3035             // but not on address size prefix
3036             s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 2 : 4;
3037             if (WordSize == 64) s.ImmediateFieldSize = 4; // 66 prefix ignored in 64 bit mode
3038         }
3039         else {
3040             // Size of other immediate data depend on operand size
3041             s.ImmediateFieldSize = (s.OperandSize == 16) ? 2 : 4;
3042         }
3043         break;
3044 
3045     case 0x100:  // Has 2, 4 or 8 bytes immediate operand
3046         s.ImmediateFieldSize = s.OperandSize / 8;
3047         break;
3048 
3049     case 0x200:  // Has 2+2 or 4+2 bytes far direct jump/call operand
3050         s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 4 : 6;
3051         break;
3052 
3053     case 0x400:  // Has 2, 4 or 8 bytes direct memory operand
3054         s.AddressFieldSize = s.AddressSize / 8;
3055         s.AddressField = s.ImmediateField;
3056         s.ImmediateField = s.AddressField + s.AddressFieldSize;
3057         s.ImmediateFieldSize = 0;
3058         break;
3059 
3060     default:     // No immediate operand
3061         s.ImmediateFieldSize = 0;
3062     }
3063 
3064     // Find instruction end
3065     IEnd = s.ImmediateField + s.ImmediateFieldSize;
3066     if (IEnd > FunctionEnd) {
3067         CheckForMisplacedLabel();
3068         if (IEnd > SectionEnd) {
3069             // instruction extends outside code block
3070             s.Errors |= 0x10;
3071             IEnd = SectionEnd;
3072         }
3073     }
3074 }
3075 
FindBroadcast()3076 void CDisassembler::FindBroadcast() {
3077     // Find broadcast and offset multiplier for EVEX code
3078     if (s.Mod != 3) {
3079         // has memory operand
3080         uint32_t m;       // find memory operand
3081         for (m = 0; m < s.MaxNumOperands; m++) {
3082             if (s.Operands[m] & 0x2000) break;
3083         }
3084         if (m == s.MaxNumOperands) return;   // no memory operand found. should not occur
3085         uint32_t r;       // find largest vector operand
3086         uint32_t vectortype = 0;
3087         for (r = 0; r < s.MaxNumOperands; r++) {
3088             if ((s.Operands[r] & 0xF00) > vectortype) vectortype = s.Operands[r] & 0xF00;
3089         }
3090         uint32_t vectorsize = GetDataItemSize(vectortype);
3091         if (m < s.MaxNumOperands) {
3092             if ((s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
3093                 // broadcasting. multiplier = element size
3094                 s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
3095                 // operand size = element size
3096                 s.Operands[m] &= ~0xF00;
3097                 if (s.OffsetMultiplier >= vectorsize) {
3098                     s.Warnings2 |= 0x200; // broadcasting to scalar
3099                 }
3100             }
3101             else if (s.OpcodeDef->EVEX & 0x1000) {
3102                 //  multiplier = element size, not broadcasting
3103                 s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
3104             }
3105             else if (s.OpcodeDef->EVEX & 0x2000) {
3106                 // multiplier = fraction of largest vector size
3107                 s.OffsetMultiplier = vectorsize >> ((s.OpcodeDef->EVEX & 0x600) >> 9);
3108             }
3109             else {
3110                 // not broadcasting. multiplier = vector size
3111                 s.OffsetMultiplier = GetDataItemSize(s.Operands[m]);
3112             }
3113         }
3114     }
3115 }
3116 
3117 
SwizTableLookup()3118 void CDisassembler::SwizTableLookup() {
3119     // Find the swizzle table record that correspond to the instruction and the sss bits for MVEX instructions
3120     int sw = (s.OpcodeDef->MVEX & 0x1F);  // swizzle metatable index
3121     int opsize = 0;                          // operand size override
3122     if (s.OpcodeDef->Options & 1) {
3123         // operand size depends on prefix bits
3124         if (s.OpcodeDef->AllowedPrefixes & 0x1000) {
3125             // operand size depends on W bit
3126             if (s.Prefixes[7] & 8) opsize = 1;
3127         }
3128         else if (s.OpcodeDef->AllowedPrefixes & 0x300) {
3129             // operand size depends on 66 implied prefix
3130             if (s.Prefixes[5] == 0x66) opsize = 1;
3131         }
3132     }
3133     int IsMem = s.Mod != 3;                  // has memory operand
3134     // find record in swizzle tables
3135     s.SwizRecord = &(SwizTables[sw | opsize][IsMem][s.Esss & 7]);
3136     // find offset multiplier
3137     if (s.OpcodeDef->MVEX & 0x40) {
3138         // address single element
3139         s.OffsetMultiplier = s.SwizRecord->elementsize;
3140     }
3141     else {
3142         // address vector or subvector
3143         s.OffsetMultiplier = s.SwizRecord->memopsize;
3144         if (s.OffsetMultiplier == 0) {
3145             // no swizzle, use vector size
3146             uint16_t source = s.OpcodeDef->Source2;                 // last source operand
3147             if (!(source & 0xF00)) source = s.OpcodeDef->Source1; // if source2 is not a vector, use source1
3148             switch ((source >> 8) & 0xF) {
3149             case 2:
3150                 // vector size depends on prefixes, currently only zmm supported when EVEX prefix is present
3151                 s.OffsetMultiplier = 0x40;  break;
3152             case 4:
3153                 s.OffsetMultiplier = 0x10;  break;
3154             case 5:
3155                 s.OffsetMultiplier = 0x20;  break;
3156             case 6:
3157                 s.OffsetMultiplier = 0x40;  break;
3158             }
3159         }
3160     }
3161 }
3162 
FindOperandTypes()3163 void CDisassembler::FindOperandTypes() {
3164     // Determine the type of each operand
3165     uint32_t i, j, k;                               // Operands index
3166     int nimm = 0;                                 // Number of immediate operands
3167     uint32_t AllowedPref = s.OpcodeDef->AllowedPrefixes;
3168     uint32_t oper;                                  // current operand definition
3169 
3170     s.MaxNumOperands = 4;  // may be 5 in the future in cases where EVEX field is used as an extra operand
3171 
3172     // Copy all operands from opcode map and zero-extend
3173     for (i = 0; i < s.MaxNumOperands; i++) {
3174         s.Operands[i] = (&s.OpcodeDef->Destination)[i];
3175     }
3176 
3177     // Check instruction format
3178     switch (s.OpcodeDef->InstructionFormat & 0x1F) {
3179 
3180     case 2: // No operands or only immediate operand
3181         break;
3182 
3183     case 3: // Register operand indicated by bits 0-2 of opcode
3184         // Find which of the operands it applies to
3185         if ((s.Operands[0] & 0xFF) > 0 && (s.Operands[0] & 0xFF) < 0xB) i = 0; else i = 1;
3186         // Indicate this operand uses opcode bits
3187         s.Operands[i] |= 0x20000;
3188         break;
3189 
3190     case 4: // Register operand indicated by VEX.vvvv bits
3191         // Find which of the operands it applies to
3192         if ((s.Operands[0] & 0xFF) < 0xB || (s.Operands[0] & 0xFF) == 0x95) i = 0; else i = 1;
3193         // Indicate this operand uses VEX.vvvv bits
3194         s.Operands[i] |= 0x60000;
3195         break;
3196 
3197     case 0x11: // There is a mod/reg/rm byte and one operand
3198         // Find which of the operands it applies to
3199         for (j = k = 0; j < 2; j++) {
3200             if (s.Operands[j]) {
3201                 switch (s.Operands[j] & 0xF0) {
3202                 case 0: case 0x40: case 0x50:
3203                     // This operand can have use rm bits
3204                     k |= j+1;
3205                 }
3206             }
3207         }
3208         if (k < 1 || k > 2) {
3209             // There must be one, and only one, operand that can use rm bits
3210             s.Errors |= 0x80000;  // Error in opcode table
3211         }
3212         else {
3213             // Indicate this operand uses mod and rm bits
3214             s.Operands[k-1] |= 0x30000;
3215         }
3216         break;
3217 
3218     case 0x12: // There is a mod/reg/rm byte and two operands. Destination is reg
3219         // Destination operand uses s.Reg bits
3220         s.Operands[0] |= 0x40000;
3221         // Source operand uses mod and rm bits
3222         s.Operands[1] |= 0x30000;
3223         break;
3224 
3225     case 0x13: // There is a mod/reg/rm byte and two operands. Source is reg
3226         // Destination operand uses mod and rm bits
3227         s.Operands[0] |= 0x30000;
3228         // Source operand uses s.Reg bits
3229         s.Operands[1] |= 0x40000;
3230         break;
3231 
3232     case 0x14: case 0x15: { // There is a DREX byte and three or four operands
3233         // Combine OC0 from DREX byte and OC1 from opcode byte into Operand configuration
3234         int OperandConfiguration = ((s.Vreg >> 3) & 1) | ((Get<uint8_t>(s.OpcodeStart2) >> 1) & 2);
3235         // Determine operands
3236         s.Operands[0] |= 0x50000;                  // Destination determined by dest field of DREX byte
3237         if (s.OpcodeDef->InstructionFormat & 1) {
3238             // Four XMM or register operands
3239             switch (OperandConfiguration) {
3240     case 0:
3241         s.Operands[1]  = s.Operands[0];      // 1. source = same as destination
3242         s.Operands[2] |= 0x40000;            // 2. source = reg
3243         s.Operands[3] |= 0x30000;            // 3. source = rm
3244         break;
3245     case 1:
3246         s.Operands[1]  = s.Operands[0];      // 1. source = same as destination
3247         s.Operands[2] |= 0x30000;            // 2. source = rm
3248         s.Operands[3] |= 0x40000;            // 3. source = reg
3249         break;
3250     case 2:
3251         s.Operands[1] |= 0x40000;           // 1. source = reg
3252         s.Operands[2] |= 0x30000;           // 2. source = rm
3253         s.Operands[3]  = s.Operands[0];     // 3. source = same as destination
3254         break;
3255     case 3:
3256         s.Operands[1] |= 0x30000;           // 1. source = rm
3257         s.Operands[2] |= 0x40000;           // 2. source = reg
3258         s.Operands[3]  = s.Operands[0];     // 3. source = same as destination
3259         break;
3260             }
3261         }
3262         else {
3263             // Three XMM or register operands
3264             if ((OperandConfiguration & 1) == 0) {
3265                 // OC0 = 0
3266                 s.Operands[1] |= 0x40000;           // 1. source = reg
3267                 s.Operands[2] |= 0x30000;           // 2. source = rm
3268             }
3269             else {
3270                 // OC0 = 1
3271                 s.Operands[1] |= 0x30000;           // 1. source = rm
3272                 s.Operands[2] |= 0x40000;           // 2. source = reg
3273             }
3274         }
3275         break;}
3276 
3277     case 0x18: // Has VEX prefix and 2 operands
3278         // Dest = VEX.vvvv, src = rm, opcode extension in r bits.
3279         // Destination operand uses VEX.vvvv bits
3280         s.Operands[0] |= 0x60000;
3281         // Source1 operand uses mod and rm bits
3282         s.Operands[1] |= 0x30000;
3283         if (!(s.Prefixes[7] & 0xB0)) {
3284             // One operand omitted if no VEX prefix
3285             s.Operands[0] = s.Operands[1];  s.Operands[1] = 0;
3286         }
3287         break;
3288 
3289     case 0x19: // Has VEX prefix and 3 operands
3290         // Dest = r, src1 = VEX.vvvv, src2 = rm.
3291         s.Operands[0] |= 0x40000;
3292         s.Operands[1] |= 0x60000;
3293         s.Operands[2] |= 0x30000;
3294         if (!(s.Prefixes[7] & 0xB0)) {
3295             // One source operand omitted if no VEX prefix
3296             s.Operands[1] = s.Operands[2];  s.Operands[2] = 0;
3297         }
3298         // Preliminary AMD specification
3299         if ((AllowedPref & 0x7000) == 0x7000 && !(s.Prefixes[7] & 8)) {
3300             // Swap src1 and src2 if XOP prefix and XOP.W = 0
3301             k = s.Operands[1]; s.Operands[1] = s.Operands[2]; s.Operands[2] = k;
3302         }
3303         break;
3304 
3305     case 0x1A: // Has VEX prefix and 3 operands.
3306         // Dest = rm, src1 = VEX.v, src2 = r
3307         s.Operands[0] |= 0x30000;
3308         s.Operands[1] |= 0x60000;
3309         s.Operands[2] |= 0x40000;
3310         if (!(s.Prefixes[7] & 0xB0)) {
3311             // One source operand omitted if no VEX prefix
3312             s.Operands[1] = s.Operands[2];  s.Operands[2] = 0;
3313         }
3314         break;
3315 
3316     case 0x1B: // Has VEX prefix and 3 operands
3317         // Dest = r, src1 = rm, src2 = VEX.vvvv
3318         s.Operands[0] |= 0x40000;
3319         s.Operands[1] |= 0x30000;
3320         s.Operands[2] |= 0x60000;
3321         if (!(s.Prefixes[7] & 0xB0)) {
3322             // Last source operand omitted if no VEX prefix
3323             s.Operands[2] = 0;
3324         }
3325         break;
3326 
3327     case 0x1C: // Has VEX prefix and 4 operands
3328         // Dest = r,  src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte
3329         s.Operands[0] |= 0x40000;
3330         s.Operands[1] |= 0x60000;
3331         s.Operands[2] |= 0x30000;
3332         s.Operands[3] |= 0x70000;
3333         if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
3334             // Swap src2 and src3 if VEX.W
3335             k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
3336         }
3337         nimm++;                                    // part of immediate byte used
3338         break;
3339 
3340     case 0x1D: // Has VEX prefix and 4 operands
3341         // Dest = r,  src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.vvvv
3342         s.Operands[0] |= 0x40000;
3343         s.Operands[1] |= 0x70000;
3344         s.Operands[2] |= 0x30000;
3345         s.Operands[3] |= 0x60000;
3346         if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
3347             // Swap src2 and src3 if VEX.W
3348             k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
3349         }
3350         nimm++;                                    // part of immediate byte used
3351         break;
3352 
3353     case 0x1E: // Has VEX prefix, VSIB and 1, 2 or 3 operands.
3354         if (s.Operands[0] & 0x2000) {
3355             // destination is memory
3356             // Dest = rm, src1 = r
3357             s.Operands[0] |= 0x30000;
3358             s.Operands[1] |= 0x40000;
3359             //if (s.Operands[2]) s.Operands[2] |= 0x60000;
3360         }
3361         else {
3362             // Dest = r, src1 = rm, src2 = VEX.v
3363             if (s.Operands[0]) s.Operands[0] |= 0x40000;
3364             s.Operands[1] |= 0x30000;
3365             if (s.Operands[2]) s.Operands[2] |= 0x60000;
3366         }
3367         break;
3368 
3369     default: // No explicit operands.
3370         // Check for implicit memory operands
3371         for (i = 0; i < 2; i++) {
3372             if (s.Operands[i] & 0x2000) {
3373                 // Direct memory operand
3374                 s.Operands[i] |= 0x10000;
3375                 if (s.OpcodeDef->InstructionFormat > 1) {
3376                     // There is an address field
3377                     s.AddressFieldSize = s.AddressSize / 8;
3378                     s.AddressField = s.OpcodeStart2 + 1;
3379                     s.MFlags |= 1;                    // Remember we have a memory operand
3380                 }
3381             }
3382         }
3383         break;
3384     }
3385 
3386     // Loop for destination and source operands
3387     for (i = 0; i < s.MaxNumOperands; i++) {
3388         // Ignore empty operands
3389         if (s.Operands[i] == 0) continue;
3390 
3391         // Immediate operands
3392         if ((s.Operands[i] & 0xFF) >= 0x10 && (s.Operands[i] & 0xFF) < 0x40) {
3393             if (nimm++) {
3394                 s.Operands[i] |= 0x200000;           // second immediate operand
3395             }
3396             else {
3397                 s.Operands[i] |= 0x100000;           // first immediate operand
3398             }
3399         }
3400 
3401         // Check if register or memory
3402         switch (s.Operands[i] & 0x3000) {
3403         case 0x1000:  // Must be register
3404             if ((s.Operands[i] & 0xF0000) == 0x30000 && s.Mod != 3 && (s.OpcodeDef->InstructionFormat & 0x10)) {
3405                 s.Errors |= 8;                       // Is memory. Indicate wrong operand type
3406                 s.Operands[i] = (s.Operands[i] & ~0x1000) | 0x2000;// Indicate it is memory
3407             }
3408             break;
3409 
3410         case 0x2000: // Must be memory operand
3411             if ((s.Operands[i] & 0xD0000) != 0x10000 || s.Mod == 3) {
3412                 s.Errors |= 8;                       // Is register. Indicate wrong operand type
3413                 s.Operands[i] = (s.Operands[i] & ~0x2000) | 0x1000; // Indicate it is register
3414             }
3415             break;
3416 
3417         case 0x0000: // Can be register or memory
3418             if ((s.Operands[i] & 0xF0000) == 0x10000) {
3419                 // Direct memory operand
3420                 s.Operands[i] |= 0x2000;  break;
3421             }
3422             if ((s.Operands[i] & 0xF0000) == 0x30000) {
3423                 // Indicated by mod/rm bits
3424                 if (s.Mod == 3) {
3425                     s.Operands[i] |= 0x1000;          // Is register
3426                 }
3427                 else {
3428                     s.Operands[i] |= 0x2000;          // Is memory
3429                 }
3430                 break;
3431             }
3432             if ((s.Operands[i] & 0xF0) != 0x10) {   // Not a constant
3433                 s.Operands[i] |= 0x1000;             // Anything else is register
3434             }
3435             break;
3436         }
3437 
3438         // Resolve types that depend on prefixes or WordSize
3439         switch (s.Operands[i] & 0xFF) {
3440         case 8: case 0x18: case 0x28: case 0x38: case 0xA8:
3441             // 16 or 32 bits
3442             s.Operands[i] &= ~0x0F;
3443             s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3444             break;
3445 
3446         case 9: case 0x19: case 0x29: case 0x39: case 0xA9:
3447             // 8, 16, 32 or 64 bits, depending on operand size prefixes
3448             s.Operands[i] &= ~0x0F;
3449             switch (AllowedPref & 0x7000) {
3450             case 0x3000: default: // 32 or 64 depending on mode and 66 or REX.W prefix
3451                 s.Operands[i] |= (s.OperandSize == 16) ? 2 : ((s.OperandSize == 64) ? 4 : 3);
3452                 break;
3453             case 0x4000:  // VEX.W prefix determines integer (vector) operand size b/w
3454                 if ((s.Prefixes[7] & 8) == 0) {  // W bit
3455                     s.OperandSize = 8;
3456                     s.Operands[i] |= 1;
3457                 }
3458                 else {
3459                     s.OperandSize = 16;
3460                     s.Operands[i] |= 2;
3461                 }
3462                 break;
3463             case 0x5000:  // VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1)
3464                 s.Operands[i] |= (s.Prefixes[5] != 0x66) + ((s.Prefixes[7] & 8) >> 2) + 1;
3465                 break;
3466             }
3467             break;
3468 
3469         case 0xB: case 0xC: // 16, 32 or 64 bits. Fixed size = 64 in 64 bit mode
3470             s.Operands[i] &= ~0x0F;
3471             if (WordSize == 64) {
3472                 s.Operands[i] |= 4;
3473             }
3474             else {
3475                 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3476             }
3477             break;
3478 
3479         case 0xA: // 16, 32 or 64 bits. Default size = 64 in 64 bit mode
3480             s.Operands[i] &= ~0x0F;
3481             if (WordSize == 64) {
3482                 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 4;
3483             }
3484             else {
3485                 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3486             }
3487             break;
3488 
3489         case 0xD: // 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call)
3490             s.Operands[i] &= ~0x0F;
3491             s.Operands[i] |= (s.OperandSize == 16) ? 3 : ((s.OperandSize == 64) ? 5 : 7);
3492             break;
3493 
3494         case 0x4F: // XMM float. Size and precision depend on prefix bits
3495             s.Operands[i] &= ~0x7F;  // remove type
3496             if ((AllowedPref & 0x1000) && !((AllowedPref & 0xF00) == 0xE00)) {
3497                 // precision depends on VEX.W bit
3498                 if (s.Prefixes[7] & 8) {
3499                     s.Operands[i] |= 0x4C;
3500                 }
3501                 else {
3502                     s.Operands[i] |= 0x4B;
3503                 }
3504             }
3505             else {
3506                 // Size and precision depend on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss
3507                 switch (s.Prefixes[5]) {
3508                 case 0:  // No prefix = ps
3509                     s.Operands[i] |= 0x4B;  break;
3510                 case 0x66: // 66 prefix = pd
3511                     s.Operands[i] |= 0x4C;  break;
3512                 case 0xF3: // F3 prefix = ss
3513                     s.Operands[i] |= 0x4B;
3514                     s.Operands[i] &= ~0xF00;  // make scalar
3515                     break;
3516                 case 0xF2: // F2 prefix = sd
3517                     s.Operands[i] |= 0x4C;
3518                     s.Operands[i] &= ~0xF00;  // make scalar
3519                     break;
3520                 };
3521                 break;
3522             }
3523         }
3524 
3525         // Resolve vector size
3526         switch (s.Operands[i] & 0xF00) {
3527         case 0x100: // MMX or XMM or YMM or ZMM depending on 66 prefix and VEX.L prefix and EVEX prefix
3528         case 0x200: // XMM or YMM or ZMM depending on prefixes
3529         case 0xF00: // Half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register
3530 
3531             oper = s.Operands[i] & ~0xF00;           // element type
3532             if (s.Prefixes[3] == 0x62) {             // EVEX or MVEX prefix
3533                 if (s.Prefixes[6] & 0x20) {
3534                     // EVEX prefix
3535                     // Do LL bits specify vector size when b = 1 for instructions that allow
3536                     // sae but not rounding? Perhaps not, because sae is only allowed for
3537                     // 512 bit vectors, but manual says otherwise.
3538                     // NASM version 2.11.06 sets LL = 0 when b = 1 for vrangeps instruction
3539                     //??if ((s.OpcodeDef->EVEX & 4) && (s.Mod == 3) && (s.Esss & 1)) {
3540                     if ((s.OpcodeDef->EVEX & 6) && (s.Mod == 3) && (s.Esss & 1)) {
3541                         // rounding control, register operand. L'L do not indicate vector size
3542                         oper |= 0x600;      // zmm
3543                     }
3544                     else if (s.OpcodeDef->EVEX & 8) {
3545                         // scalar
3546                         oper |= 0x400;      // xmm
3547                     }
3548                     else {
3549                         // L'L indicates vector size
3550                         oper |= 0x400 + ((s.Esss & 6) << 7); // xmm, ymm, zmm,
3551                     }
3552                 }
3553                 else {
3554                     // MVEX prefix
3555                     oper |= 0x600;          // zmm
3556                 }
3557             }
3558             else if (s.Prefixes[6] & 0x20) {
3559                 oper |= 0x500;              // VEX.L: ymm
3560             }
3561             else if (s.Prefixes[5] == 0x66 || (s.Operands[i] & 0x200)) {
3562                 oper |= 0x400;              // 66 prefix or mm not allowed: xmm
3563             }
3564             else {
3565                 oper |= 0x300;              // no prefix: mm
3566             }
3567             if ((s.Operands[i] & 0xF00) == 0xF00) {
3568                 // half size vector
3569                 oper -= 0x100;
3570                 if ((oper & 0x1000) || (s.OpcodeDef->InstructionFormat == 0x1E)) {
3571                     // is register or vsib index. minimum size is xmm
3572                     if ((oper & 0xF00) < 0x400) {
3573                         oper = (oper & ~0x300) | 0x400;
3574                     }
3575                 }
3576             }
3577             s.Operands[i] = oper;                     // save corrected vector size
3578 
3579             break;
3580         }
3581 
3582         // resolve types that depend on MVEX swizzle
3583         if ((s.Prefixes[6] & 0x60) == 0x40 && (s.Operands[i] & 0xF0000) == 0x30000) {
3584             int sw = (s.OpcodeDef->MVEX & 0x1F);
3585             if (sw) {
3586                 int optype = s.SwizRecord ? s.SwizRecord->memop : 0; //?
3587                 if (s.OpcodeDef->InstructionFormat == 0x1E) {
3588                     // vsib addressing: s.Operands[i] & 0xF00 indicates index register size, s.Operands[i] & 0xFF indicates operand size
3589                     s.Operands[i] = (s.Operands[i] & ~0xFF) | (optype & 0xFF);
3590                 }
3591                 else if (s.OpcodeDef->MVEX & 0x40) {
3592                     // operand is not a full vector
3593                     s.Operands[i] = (s.Operands[i] & ~0xFFF) | (optype & 0xFF);
3594                 }
3595                 else {
3596                     // get operand type from swizzle table only
3597                     if (optype) s.Operands[i] = optype | 0x30000;
3598                 }
3599             }
3600         }
3601     }
3602 }
3603 
3604 
FindWarnings()3605 void CDisassembler::FindWarnings() {
3606     // Find any reasons for warnings in code
3607     uint32_t i;                                     // Operand index
3608     uint32_t OperandSize;                           // Operand size
3609     uint8_t RexBits = 0;                            // Bits in REX prefix
3610 
3611     if ((s.OpcodeDef->Options & 0x80) && s.ImmediateFieldSize > 1 && s.ImmediateRelocation == 0) {
3612         // Check if sign-extended operand can be used
3613         if ((s.ImmediateFieldSize == 2 && Get<int16_t>(s.ImmediateField) == Get<int8_t>(s.ImmediateField))
3614             ||  (s.ImmediateFieldSize == 4 && Get<int32_t>(s.ImmediateField) == Get<int8_t>(s.ImmediateField))) {
3615                 s.Warnings1 |= 1; // Sign-extended operand could be used
3616         }
3617     }
3618     if (WordSize == 64 && s.ImmediateFieldSize == 8 && s.ImmediateRelocation == 0) {
3619         // We have a 64 bit immediate operand. Could it be made shorter?
3620         if (Get<uint32_t>(s.ImmediateField+4) == 0) {
3621             s.Warnings1 |= 2;                        // Upper half is zero. Could use zero-extension
3622         }
3623         else if (Get<int64_t>(s.ImmediateField) == Get<int32_t>(s.ImmediateField)) {
3624             s.Warnings1 |= 1;                        // Could use sign-extension
3625         }
3626     }
3627     // Check if displacement could be made smaller
3628     if (s.AddressFieldSize > 0 && s.AddressRelocation == 0
3629         && (s.BaseReg || (s.IndexReg && !s.BaseReg && s.Scale < 2))
3630         && s.OffsetMultiplier <= 1) {
3631             // There is a displacement which might be unnecessary
3632             switch (s.AddressFieldSize) {
3633             case 1:  // 1 byte displacement
3634                 if (Get<uint8_t>(s.AddressField) == 0
3635                     && (((s.BaseReg-1) & 7) != 5 || (s.AddressSize == 16 && s.IndexReg)))
3636                     s.Warnings1 |= 4; // Displacement is 0 and an addressing mode without displacement exists
3637                 break;
3638             case 2:  // 2 bytes displacement
3639                 if (Get<int16_t>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
3640                 else if (Get<int16_t>(s.AddressField) == Get<int8_t>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
3641                 break;
3642             case 4:  // 4 bytes displacement
3643                 if (s.OpcodeDef->InstructionFormat != 0x1E) {
3644                     if (Get<int32_t>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
3645                     else if (Get<int32_t>(s.AddressField) == Get<int8_t>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
3646                 }
3647                 break;
3648             case 8:  // 8 bytes displacement
3649                 if (Get<int32_t>(s.AddressField) == Get<int64_t>(s.AddressField))
3650                     // Has 8 bytes displacement. Could use sign-extended or rip-relative
3651                     s.Warnings1 |= 8;
3652                 break;
3653             }
3654     }
3655     // Check for unnecessary SIB byte
3656     if ((s.MFlags&4) && (s.BaseReg&7)!=4+1 && (s.IndexReg==0 || (s.BaseReg==0 && s.Scale==0))) {
3657         if (WordSize == 64 && s.BaseReg==0 && s.IndexReg==0) s.Warnings1 |= 0x4000; // 64-bit address not rip-relative
3658         else if ((s.Operands[0] & 0xFF) != 0x98 && (s.Operands[1] & 0xFF) != 0x98 && s.OpcodeDef->InstructionFormat != 0x1E) { // ignore if bounds register used or vsib
3659             s.Warnings1 |= 0x10; // Unnecessary SIB byte
3660         }
3661     }
3662     // Check if shorter instruction exists for register operands
3663     if ((s.OpcodeDef->Options & 0x80) && !(s.OpcodeDef->InstructionFormat & 0xFE0) && s.Mod == 3
3664         && !(WordSize == 64 && Get<uint8_t>(s.OpcodeStart1) == 0xFF)) {
3665             s.Warnings1 |= 0x20;   // No memory operand. A shorter version exists for register operand
3666     }
3667     // Check for length-changing prefix
3668     if (s.ImmediateFieldSize > 1 && s.Prefixes[4] == 0x66
3669         && (s.OpcodeDef->AllowedPrefixes & 0x100) && !(s.OpcodeDef->InstructionFormat & 0x20)) {
3670             // 66 prefix changes length of immediate field
3671             s.Warnings1 |= 0x40;
3672     }
3673     // Check for bogus length-changing prefix causing stall on Intel Core2.
3674     // Will occur if 66 prefix and first opcode byte is F7 and there is a 16 bytes boundary between opcode byte and mod/reg/rm byte
3675     if (Get<uint8_t>(s.OpcodeStart1) == 0xF7 && s.Prefixes[4] == 0x66 && ((s.OpcodeStart1+1) & 0xF) == 0 && !s.ImmediateFieldSize) {
3676         s.Warnings1 |= 0x2000000;
3677     }
3678     // Warn for address size prefix if mod/reg/rm byte
3679     // (This does not cause a stall in 64 bit mode, but I am issueing a
3680     // warning anyway because the changed address size is probably unintended)
3681     if (s.Prefixes[1] == 0x67 && (s.MFlags & 2)) {
3682         s.Warnings1 |= 0x80;
3683     }
3684     // Check for unnecessary REX.W prefix
3685     if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x2000 && s.Prefixes[7] == 0x48) {
3686         s.Warnings1 |= 0x200;  // REX.W prefix valid but unnecessary
3687     }
3688     // Check for meaningless prefixes
3689     if (!(s.OpcodeDef->InstructionFormat & 0x10) || s.Mod == 3) {
3690         // No mod/reg/rm byte or only register operand. Check for address size and segment prefixes
3691         if ((s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0xC))
3692             || (s.Prefixes[1] && !(s.OpcodeDef->AllowedPrefixes & 3))) {
3693                 s.Warnings1 |= 0x400; // Unnecessary segment or address size prefix
3694         }
3695     }
3696 
3697     // Check for meaningless segment prefixes
3698     if (s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0x0C)) {
3699         // Segment prefix is not branch hint
3700         if (WordSize == 64 && (s.Prefixes[0] & 0x02))
3701             s.Warnings1 |= 0x400; // CS, DS, ES or SS prefix in 64 bit mode has no effect
3702         if (s.Prefixes[0] == 0x3E && s.BaseReg != 4+1 && s.BaseReg != 5+1)
3703             s.Warnings1 |= 0x400; // Unnecessary DS: segment prefix
3704         if (s.Prefixes[0] == 0x36 && (s.BaseReg == 4+1 || s.BaseReg == 5+1) )
3705             s.Warnings1 |= 0x400; // Unnecessary SS: segment prefix
3706         if (Opcodei == 0x8D)
3707             s.Warnings1 |= 0x400; // Segment prefix on LEA instruction
3708         if (s.Mod == 3)
3709             s.Warnings1 |= 0x400; // mod/reg/rm byte indicates no memory operand
3710     }
3711 
3712     // Check for meaningless 66 prefix
3713     if (s.Prefixes[4] == 0x66 && !(s.OpcodeDef->AllowedPrefixes & 0x380))
3714         s.Warnings1 |= 0x400; // 66 prefix not allowed here
3715 
3716     // Check for meaningless F2 prefix
3717     if (s.Prefixes[3] == 0xF2 && !(s.OpcodeDef->AllowedPrefixes & 0x868))
3718         s.Warnings1 |= 0x400; // F2 prefix not allowed here
3719 
3720     // Check for meaningless F3 prefix
3721     if (s.Prefixes[3] == 0xF3 && !(s.OpcodeDef->AllowedPrefixes & 0x460))
3722         s.Warnings1 |= 0x400; // F3 prefix not allowed here
3723 
3724     // Check for meaningless REX prefix bits
3725     if (s.Prefixes[7]) {
3726         // REX, VEX, XOP or DREX present
3727         // Get significant bits
3728         RexBits = s.Prefixes[7] & 0x0F;
3729         // Check if empty REX prefix
3730         if (RexBits == 0 && (s.Prefixes[7] & 0x40) && (s.Operands[0] & 0xFF) != 1 && (s.Operands[1] & 0xFF) != 1) {
3731             // Empty REX prefix needed only if 8 bit register register
3732             s.Warnings1 |= 0x400;
3733         }
3734         // Clear bits that are used:
3735         // Check if REX.W bit used
3736         if (s.OpcodeDef->AllowedPrefixes & 0x3000) RexBits &= ~8;
3737         // Check if REX.R and REX.B bit used for source or destination operands
3738         for (i = 0; i < 4; i++) {
3739             switch (s.Operands[i] & 0xF0000) {
3740             case 0x40000: // uses reg bits, check if REX.R allowed
3741                 if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
3742                     // REX.R used for operand and register type allows value > 7
3743                     RexBits &= ~4;
3744                 break;
3745             case 0x30000: // Uses rm bits. check if REX.B allowed
3746                 if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
3747                     // REX.B used for operand and register type allows value > 7
3748                     RexBits &= ~1;
3749                 break;
3750             case 0x20000: // Register operand indicated by opcode bits and REX:B
3751                 RexBits &= ~1;
3752                 break;
3753             }
3754         }
3755         // Check if REX.X bit used for index register
3756         if (s.IndexReg) RexBits &= ~2;
3757         // Check if REX.B bit used for base register
3758         if (s.BaseReg)  RexBits &= ~1;
3759         // Check if REX.X bit used for base register with EVEX prefix
3760         if (s.Prefixes[3] == 0x62 && s.Mod == 3) RexBits &= ~2;
3761 
3762         // Check if VEX.W bit used for some purpose
3763         if ((s.OpcodeDef->AllowedPrefixes & 0x7000) != 0 && (s.Prefixes[7] & 0xB0)) RexBits &= ~8;
3764 
3765         // Any unused bits left?
3766         if (RexBits) {
3767             s.Warnings1 |= 0x400; // At least one REX bit makes no sense here
3768         }
3769     }
3770     // Check for registers not allowed in 32-bit mode
3771     if (this->WordSize < 64) {
3772         if (s.Prefixes[7] & 7 & ~RexBits) {
3773             s.Errors |= 0x200;        // Register 8-15 not allowed in this mode
3774         }
3775         if (s.Prefixes[7] & 0xB0) {
3776             // VEX present, check vvvv register operand
3777             if (s.Vreg & 8) s.Errors |= 0x200;  // Register 8-15 not allowed in this mode
3778             // Check imm[7:4] register operand
3779             if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x1C && (Get<uint8_t>(s.ImmediateField) & 8)) {
3780                 s.Errors |= 0x200;  // Register 8-15 not allowed in this mode
3781             }
3782         }
3783     }
3784 
3785     // Check for meaningless VEX prefix bits
3786     if (s.Prefixes[7] & 0xB0) {
3787         // VEX present
3788         if ((s.Prefixes[6] & 0x60) == 0x20) { // VEX.L bit set and not EVEX
3789             if (!(s.OpcodeDef->AllowedPrefixes & 0x240000)) s.Warnings1 |= 0x40000000; // L bit not allowed
3790             if ((s.OpcodeDef->AllowedPrefixes & 0x200000) && s.Prefixes[5] > 0x66) s.Warnings1 |= 0x40000000; // L bit not allowed with F2 and F3 prefix
3791         }
3792         else {
3793             if ((s.OpcodeDef->AllowedPrefixes & 0x100000) && !(s.Prefixes[6] & 0x20)) s.Warnings1 |= 0x1000; // L bit missing
3794         }
3795         if ((s.Prefixes[6] & 0x10) && s.Prefixes[3] != 0x62) {
3796             s.Warnings1 |= 0x40000000; // Uppermost m bit only allowed if EVEX prefix
3797         }
3798         // check VEX.v bits
3799         if (s.Prefixes[3] == 0x62 && s.OpcodeDef->InstructionFormat == 0x1E) {
3800             // has EVEX VSIB address
3801             if (s.Vreg & 0xF) {
3802                 s.Warnings1 |= 0x40000000; // vvvv bits not allowed, v' bit allowed
3803             }
3804         }
3805         else { // not EVEX VSIB
3806             if ((s.Vreg & 0x1F) && !(s.OpcodeDef->AllowedPrefixes & 0x80000)) {
3807                 s.Warnings1 |= 0x40000000; // vvvvv bits not allowed
3808             }
3809         }
3810     }
3811     // Check for meaningless EVEX and MVEX prefix bits
3812     if (s.Prefixes[3] == 0x62) {
3813         if (s.Prefixes[6] & 0x20) {
3814             // EVEX prefix
3815             if (s.Mod == 3) {
3816                 // register operands
3817                 if (!(s.OpcodeDef->EVEX & 6) && (s.Esss & 1)) {
3818                     s.Warnings2 |= 0x40; // rounding and sae not allowed
3819                 }
3820             }
3821             else {
3822                 // memory operand
3823                 if (!(s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
3824                     s.Warnings2 |= 0x40; // broadcast not allowed
3825                 }
3826             }
3827             if (!(s.OpcodeDef->EVEX & 0x30) && s.Kreg) {
3828                 s.Warnings2 |= 0x40; // masking not allowed
3829             }
3830             else if (!(s.OpcodeDef->EVEX & 0x20) && (s.Esss & 8)) {
3831                 s.Warnings2 |= 0x40; // zeroing not allowed
3832             }
3833             else if ((s.OpcodeDef->EVEX & 0x40) && s.Kreg == 0) {
3834                 s.Warnings2 |= 0x100; // mask register must be nonzero
3835             }
3836         }
3837         else {
3838             // MVEX prefix.
3839             if (s.Mod == 3) {
3840                 // register operands only
3841                 if ((s.Esss & 8) && (s.OpcodeDef->MVEX & 0x600) == 0) {
3842                     s.Warnings2 |= 0x80; // E bit not allowed for register operand here
3843                 }
3844             }
3845             if (((s.OpcodeDef->MVEX & 0x1F) == 0) && (s.Esss & 7) != 0) {
3846                 s.Warnings2 |= 0x80; // sss bits not allowed here
3847             }
3848             if (s.Kreg && (s.OpcodeDef->MVEX & 0x3000) == 0) {
3849                 s.Warnings2 |= 0x80; // kkk bits not allowed here
3850             }
3851         }
3852     }
3853 
3854     // Check for conflicting prefixes
3855     if (s.OpcodeDef->AllowedPrefixes & 0x140)  s.Conflicts[5] = 0; // 66 + F2/F3 allowed for string instructions
3856     if ((s.OpcodeDef->AllowedPrefixes & 0x1200) == 0x1200) s.Conflicts[4] = 0; // 66 + REX.W allowed for e.g. movd/movq instruction
3857     if (*(int64_t*)&s.Conflicts) s.Warnings1 |= 0x800;  // Conflicting prefixes. Check all categories at once
3858 
3859     // Check for missing prefixes
3860     if ((s.OpcodeDef->AllowedPrefixes & 0x8000) && s.Prefixes[5] == 0)
3861         s.Warnings1 |= 0x1000; // Required 66/F2/F3 prefix missing
3862     if ((s.OpcodeDef->AllowedPrefixes & 0x20000) && (s.Prefixes[7] & 0xB0) == 0)
3863         s.Warnings1 |= 0x1000; // Required VEX prefix missing
3864 
3865     // Check for VEX prefix not allowed
3866     if (!(s.OpcodeDef->AllowedPrefixes & 0xC30000) && (s.Prefixes[7] & 0xB0))
3867         s.Warnings1 |= 0x40000000; // VEX prefix not allowed
3868 
3869     // Check for EVEX and MVEX prefix allowed
3870     if (s.Prefixes[3] == 0x62) {
3871 
3872         if (s.Prefixes[6] & 0x20) {
3873             if (!(s.OpcodeDef->AllowedPrefixes & 0x800000)) s.Warnings2 |= 0x10;
3874         }
3875         else {
3876             if (!(s.OpcodeDef->AllowedPrefixes & 0x400000)) s.Warnings2 |= 0x20;
3877         }
3878     }
3879 
3880     // Check for unused SIB scale factor
3881     if (s.Scale && s.IndexReg == 0) s.Warnings1 |= 0x2000; // SIB has scale factor but no index register
3882 
3883     // Check if address in 64 bit mode is rip-relative
3884     if (WordSize == 64 && s.AddressFieldSize >= 4 && s.AddressRelocation && !(s.MFlags & 0x100)) {
3885         // 32-bit address in 64 bit mode is not rip-relative. Check if image-relative
3886         if (s.AddressRelocation >= Relocations.GetNumEntries() || !(Relocations[s.AddressRelocation].Type & 0x14)) {
3887             // Not image-relative or relative to reference point
3888             if (s.AddressFieldSize == 8) {
3889                 s.Warnings1 |= 0x20000000;            // Full 64-bit address
3890             }
3891             else {
3892                 s.Warnings1 |= 0x4000;             // 32-bit absolute address
3893             }
3894         }
3895     }
3896     // Check if direct address is relocated
3897     if (s.AddressFieldSize > 1 && !s.AddressRelocation && !s.BaseReg && !s.IndexReg && (WordSize != 16 || !(s.Prefixes[0] & 0x40)))
3898         s.Warnings1 |= 0x8000;  // Direct address has no relocation, except FS: and GS:
3899 
3900     // Check if address relocation type is correct
3901     if (s.AddressFieldSize > 1 && s.AddressRelocation && (s.MFlags & 1)) {
3902         // Memory operand found. Should it be direct or self-relative
3903         if (s.MFlags & 0x100) {
3904             // Memory address should be self-relative (rip-relative)
3905             if (!(Relocations[s.AddressRelocation].Type & 2)) {
3906                 s.Warnings1 |= 0x10000;     // rip-relative relocation expected but not found
3907             }
3908         }
3909         else {
3910             // Memory address should be direct
3911             if (Relocations[s.AddressRelocation].Type & 0x302) {
3912                 s.Warnings1 |= 0x10000;     // direct address expected, other type found
3913             }
3914         }
3915 
3916         // Check if memory address has correct alignment
3917         // Loop through destination and source operands
3918         for (i = 0; i < s.MaxNumOperands; i++) {
3919             // Operand type
3920             uint32_t OperandType = s.Operands[i];
3921             if ((OperandType & 0x2000) && Opcodei != 0x8D) {
3922                 // This is a memory operand (except LEA). Get target offset
3923                 int64_t TargetOffset = 0;
3924                 switch (s.AddressFieldSize) {
3925                 case 1:
3926                     TargetOffset = Get<int8_t>(s.AddressField);  break;
3927                 case 2:
3928                     TargetOffset = Get<int16_t>(s.AddressField);  break;
3929                 case 4:
3930                     TargetOffset = Get<int32_t>(s.AddressField);
3931                     if (s.MFlags & 0x100) {
3932                         // Compute rip-relative address
3933                         TargetOffset += IEnd - s.AddressField;
3934                     }
3935                     break;
3936                 case 8:
3937                     TargetOffset = Get<int64_t>(s.AddressField);  break;
3938                 }
3939                 // Add relocation offset
3940                 TargetOffset += Relocations[s.AddressRelocation].Addend;
3941 
3942                 // Find relocation target
3943                 uint32_t SymbolOldIndex = Relocations[s.AddressRelocation].TargetOldIndex;
3944                 uint32_t SymbolNewIndex = Symbols.Old2NewIndex(SymbolOldIndex);
3945                 if (SymbolNewIndex) {
3946                     // Add relocation target offset
3947                     TargetOffset += Symbols[SymbolNewIndex].Offset;
3948                     // Target section
3949                     int32_t TargetSection = Symbols[SymbolNewIndex].Section;
3950                     if (TargetSection && (uint32_t)TargetSection < Sections.GetNumEntries()) {
3951                         // Add relocation section address
3952                         TargetOffset += Sections[TargetSection].SectionAddress;
3953                     }
3954                     if ((Relocations[s.AddressRelocation].Type & 0x10) && Relocations[s.AddressRelocation].RefOldIndex) {
3955                         // Add offset of reference point
3956                         uint32_t RefIndex = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].RefOldIndex);
3957                         TargetOffset += Symbols[RefIndex].Offset;
3958                     }
3959                     if (Relocations[s.AddressRelocation].Type & 0x3000) {
3960                         // GOT entry etc. Can't check alignment
3961                         continue;
3962                     }
3963                 }
3964 
3965                 // Get operand size
3966                 OperandSize = GetDataItemSize(OperandType);
3967                 if (s.OffsetMultiplier) OperandSize = s.OffsetMultiplier;
3968                 while (OperandSize & (OperandSize-1)) {
3969                     // Not a power of 2. Get nearest lower power of 2
3970                     OperandSize = OperandSize & (OperandSize-1);
3971                 }
3972 
3973                 // Check if aligned
3974                 if ((TargetOffset & (OperandSize-1)) && !(s.Warnings1 & 0x10000)) {
3975                     // Memory operand is misaligned
3976                     if (s.OffsetMultiplier) {
3977                         // EVEX code with required alignment
3978                         s.Warnings1 |= 0x800000;           // Serious. Generates fault
3979                     }
3980                     else if (OperandSize < 16) {
3981                         // Performance penalty but no fault
3982                         s.Warnings1 |= 0x400000;           // Warn not aligned
3983                     }
3984                     else {
3985                         // XMM or larger. May generate fault
3986                         // with VEX: only explicitly aligned instructions generate fault
3987                         // without VEX: all require alignment except explicitly unaligned
3988                         if (s.OpcodeDef->Options & 0x100 || (!(s.Prefixes[7] & 0xB0) && !(s.OpcodeDef->Options & 0x200))) {
3989                             s.Warnings1 |= 0x800000;       // Serious. Generates fault
3990                         }
3991                         else {
3992                             s.Warnings1 |= 0x400000;       // Not serious. Performance penalty only
3993                         }
3994                     }
3995                 }
3996             }
3997         }
3998     }
3999 
4000     // Check if jump relocation type is correct
4001     if (s.ImmediateFieldSize > 1 && s.ImmediateRelocation && (s.OpcodeDef->Destination & 0xFE) == 0x82) {
4002         // Near jump or call. Relocation must be self-relative
4003         if (!(Relocations[s.ImmediateRelocation].Type & 2)) {
4004             s.Warnings1 |= 0x10000;  // Self-relative relocation expected but not found
4005         }
4006     }
4007     // Check operand size for jumps
4008     if ((s.OpcodeDef->AllowedPrefixes & 0x80) && s.Prefixes[4]) {
4009         // Jump instruction sensitive to operand size prefix
4010         if (WordSize == 32) s.Warnings1 |= 0x20000; // Instruction pointer truncated
4011         if (WordSize == 64) s.Warnings1 |= 0x400;   // Prefix has no effect
4012     }
4013 
4014     // Check address size for stack operations
4015     if ((s.OpcodeDef->AllowedPrefixes & 2) && s.Prefixes[1])
4016         s.Warnings1 |= 0x40000; // Stack operation has address size prefix
4017 
4018     // Check for undocumented opcode
4019     if ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name)
4020         s.Warnings1 |= 0x100000; // Undocumented opcode
4021 
4022     // Check for future opcode
4023     if (s.OpcodeDef->InstructionFormat & 0x2000)
4024         s.Warnings1 |= 0x200000; // Opcode reserved for future extensions
4025 
4026     // Check instruction set
4027     if (s.OpcodeDef->InstructionSet & 0x10000)
4028         s.Warnings2 |= 0x2; // Planned future instruction
4029 
4030     if (s.OpcodeDef->InstructionSet & 0x20000)
4031         s.Warnings2 |= 0x4; // Proposed instruction code never implemented, preliminary specification later changed
4032 
4033     // Check operand size for stack operations
4034     if ((s.OpcodeDef->AllowedPrefixes & 0x102) == 0x102) {
4035         if (s.Prefixes[4] == 0x66 || (Get<uint8_t>(s.OpcodeStart1) == 0xCF && s.OperandSize != WordSize)) {
4036             s.Warnings1 |= 0x4000000; // Non-default size for stack operation
4037         }
4038     }
4039 
4040     // Check if function ends with ret or unconditional jump (or nop)
4041     if (IEnd == FunctionEnd && !(s.OpcodeDef->Options & 0x50)) {
4042         s.Warnings1 |= 0x8000000; // Function does not end with return or jump
4043     }
4044 
4045     // Check for multi-byte NOP and UD2
4046     if (s.OpcodeDef->Options & 0x50) CheckForNops();
4047 
4048     // Check for inaccessible code
4049     if (IBegin == LabelInaccessible) {
4050         s.Warnings1 |= 0x10000000; // Inaccessible code other than NOP or UD2
4051     }
4052 }
4053 
4054 
FindErrors()4055 void CDisassembler::FindErrors() {
4056     // Find any errors in code
4057     if (IEnd - IBegin > 15) {
4058         // Instruction longer than 15 bytes
4059         s.Errors |= 1;
4060     }
4061     if (s.Prefixes[2] && (!(s.OpcodeDef->AllowedPrefixes & 0x10) || !(s.MFlags & 1))) {
4062         // Lock prefix not allowed for this instruction
4063         s.Errors |= 2;
4064     }
4065     if ( s.OpcodeDef->InstructionFormat == 0
4066         || ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name == 0)) {
4067             // Illegal instruction
4068             s.Errors |= 4;
4069     }
4070     if ((s.OpcodeDef->InstructionSet & 0x8000) && WordSize == 64) {
4071         // Instruction not allowed in 64 bit mode
4072         s.Errors |= 0x40;
4073     }
4074     if (IEnd > LabelEnd && IBegin < LabelEnd) {
4075         // Instruction crosses a label
4076         // Check if label is public
4077         uint32_t sym1 = Symbols.FindByAddress(Section, LabelEnd, 0, 0);
4078         if (sym1 && (Symbols[sym1].Scope & 0x1C)) {
4079             // Label is public. Code interpretation may be out of phase
4080             s.Errors |= 0x80;
4081             // Put interpretation in phase with label
4082             IEnd = LabelEnd;
4083         }
4084         else {
4085             // Symbol is local.
4086             // This may be a spurious label produced by misinterpretation elsewhere
4087             if (sym1) Symbols[sym1].Type = 0;       // Remove symbol type
4088             s.Warnings2 |= 1;
4089         }
4090     }
4091     if ((s.MFlags & 3) == 3 && (s.Prefixes[7] & 1) && s.BaseReg == 0 && s.AddressFieldSize == 4) {
4092         // Attempt to use R13 as base register without displacement
4093         s.Errors |= 0x100;
4094     }
4095     if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
4096         // Check validity of DREX byte
4097         if ((s.Vreg & 0x87) && WordSize < 64) {
4098             s.Errors |= 0x200;  // Attempt to use XMM8-15 in 16 or 32 bit mode (ignored, may be changed to warning)
4099         }
4100         if (s.Prefixes[7] & 0x40) {
4101             s.Errors |= 0x400;  // Both REX and DREX byte
4102         }
4103         if ((s.Vreg & 2) && !(s.MFlags & 4)) {
4104             s.Errors |= 0x800;  // DREX.X bit but no SIB byte (probably ignored, may be changed to warning)
4105         }
4106     }
4107     if ((s.OpcodeDef->InstructionFormat & 0x1F) == 0x1E) {
4108         // Instruction needs VSIB byte
4109         if (s.IndexReg == 0) s.Errors |= 8;  // Illegal operand: no index register
4110     }
4111     if (LabelEnd >= s.OpcodeStart2+2 && (
4112         Get<uint16_t>(s.OpcodeStart2) == 0
4113         || Get<uint16_t>(s.OpcodeStart2) == 0xFFFF
4114         // || Get<uint16_t>(s.OpcodeStart2) == 0xCCCC
4115         )) {
4116             // Two consecutive bytes of zero gives the instruction: add byte ptr [eax],al
4117             // This instruction is very unlikely to occur in normal code but occurs
4118             // frequently in data. Mark to code as probably data.
4119             // Two bytes of 0xFF makes no legal instruction but occurs frequently in data.
4120             // Two bytes of 0xCC is debug breaks used by debuggers for marking illegal addresses or unitialized data
4121             s.Errors = 0x4000;
4122     }
4123     if (s.Errors) {
4124         // Errors found. May be data in code segment
4125         CountErrors++;
4126         MarkCodeAsDubious();
4127     }
4128 }
4129 
4130 
FindRelocations()4131 void CDisassembler::FindRelocations() {
4132     // Find any relocation sources in this instruction
4133     SARelocation rel1, rel2;                      // Make relocation records for searching
4134     rel1.Section = Section;
4135     rel1.Offset  = IBegin;                        // rel1 marks begin of this instruction
4136     rel2.Section = Section;
4137     rel2.Offset  = IEnd;                          // rel2 marks end of this instruction
4138 
4139     // Search for relocations in this instruction
4140     uint32_t irel = Relocations.FindFirst(rel1);    // Finds first relocation source >= IBegin
4141 
4142     if (irel == 0 || irel >= Relocations.GetNumEntries()) {
4143         // No relocations found
4144         return;
4145     }
4146     if (Relocations[irel] < rel2) {
4147         // Found relocation points between IBegin and IEnd
4148         if (Relocations[irel].Offset == s.AddressField && s.AddressFieldSize) {
4149             // Relocation points to address field
4150             s.AddressRelocation = irel;
4151             if (Relocations[irel].Size > s.AddressFieldSize) {
4152                 // Right place but wrong size
4153                 s.Errors |= 0x1000;
4154             }
4155         }
4156         else if (Relocations[irel].Offset == s.ImmediateField && s.ImmediateFieldSize) {
4157             // Relocation points to immediate operand/jump address field
4158             s.ImmediateRelocation = irel;
4159             if (Relocations[irel].Size > s.ImmediateFieldSize) {
4160                 // Right place but wrong size
4161                 s.Errors |= 0x1000;
4162             }
4163         }
4164         else {
4165             // Relocation source points to a wrong address
4166             s.Errors |= 0x1000;
4167         }
4168         if (s.AddressRelocation) {
4169             // Found relocation for address field, there may be
4170             // a second relocation for the immediate field
4171             if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
4172                 // Second relocation found
4173                 if (Relocations[irel+1].Offset == s.ImmediateField && s.ImmediateFieldSize) {
4174                     // Relocation points to immediate operand/jump address field
4175                     s.ImmediateRelocation = irel + 1;
4176                     if (Relocations[irel+1].Size > s.ImmediateFieldSize) {
4177                         // Right place but wrong size
4178                         s.Errors |= 0x1000;
4179                     }
4180                     else {
4181                         // Second relocation accepted
4182                         irel++;
4183                     }
4184                 }
4185             }
4186         }
4187         // Check if there are more relocations
4188         if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
4189             // This relocation points before IEnd but doesn't fit any operand or overlaps previous relocation
4190             if ((s.Operands[0] & 0xFE) == 0x84 && Relocations[irel+1].Offset == s.ImmediateField + s.ImmediateFieldSize - 2) {
4191                 // Fits segment field of far jump/call
4192                 ;
4193             }
4194             else {
4195                 // Relocation doesn't fit anywhere
4196                 s.Errors |= 0x1000;
4197             }
4198         }
4199     }
4200 }
4201 
4202 
FindInstructionSet()4203 void CDisassembler::FindInstructionSet() {
4204     // Update instruction set
4205     uint16_t InstSet = s.OpcodeDef->InstructionSet;
4206     if (InstSet == 7 && s.Prefixes[5] == 0x66) {
4207         // Change MMX to SSE2 if 66 prefix
4208         InstSet = 0x12;
4209     }
4210     if ((s.Prefixes[7] & 0x30) && InstSet < 0x19) {
4211         // VEX instruction set if VEX prefix
4212         InstSet = 0x19;
4213     }
4214     if (s.Prefixes[6] & 0x40) {
4215         // EVEX or MVEX prefix
4216         if (s.Prefixes[6] & 0x20) {
4217             // EVEX prefix
4218             if (InstSet < 0x20) InstSet = 0x20;
4219         }
4220         else {
4221             // MVEX prefix
4222             if (InstSet < 0x80) InstSet = 0x80;
4223         }
4224     }
4225     if ((InstSet & 0xFF00) == 0x1000) {
4226         // AMD-specific instruction set
4227         // Set AMD-specific instruction set to max
4228         if ((InstSet & 0xFF) > InstructionSetAMDMAX) {
4229             InstructionSetAMDMAX = InstSet & 0xFF;
4230         }
4231     }
4232     else {
4233         // Set Intel or generic instruction set to maximum
4234         if ((InstSet & 0xFF) > InstructionSetMax) {
4235             InstructionSetMax = InstSet & 0xFF;
4236         }
4237     }
4238 
4239     // Set InstructionSetOR to a bitwise OR of all instruction sets encountered
4240     InstructionSetOR |= InstSet;
4241 
4242     if (s.OpcodeDef->Options & 0x10) {
4243         FlagPrevious |= 2;
4244     }
4245 }
4246 
4247 
CheckLabel()4248 void CDisassembler::CheckLabel() {
4249     // Check if there is a label at instruction, and write it
4250     // Write begin and end of function
4251 
4252     // Search in symbol table
4253     uint32_t Sym1, Sym2;                            // First and last symbol
4254 
4255     // Find all symbol table entries at this address
4256     Sym1 = Symbols.FindByAddress(Section, IBegin, &Sym2);
4257 
4258     if (Sym1) {
4259         // Found at least one symbol
4260         // Loop for all symbols with same address
4261         for (uint32_t s = Sym1; s <= Sym2; s++) {
4262 
4263             // Check if label has already been written as a function label
4264             if (!(Symbols[s].Scope & 0x100) && !(Symbols[s].Type & 0x80000000)) {
4265 
4266                 // Write label as a private or public code label
4267                 WriteCodeLabel(s);
4268             }
4269         }
4270         // Get symbol type and size
4271         DataType = Symbols[Sym2].Type;
4272         DataSize = GetDataItemSize(DataType);
4273     }
4274 }
4275 
4276 
CheckForNops()4277 void CDisassembler::CheckForNops() {
4278     // Check for multi-byte NOP and UD2 instructions
4279 
4280     switch (Opcodei) {
4281     case 0x3C00: case 0x3C01: case 0x3C02: case 0x11F:  // NOP
4282         // These opcodes are intended for NOPs. Indicate if longer than one byte
4283         if (IEnd - IBegin > 1) s.Warnings1 |= 0x1000000;
4284         // Remember NOP
4285         FlagPrevious |= 1;
4286         break;
4287 
4288     case 0x8D:   // LEA
4289         // LEA is often used as NOP with destination = base register
4290         if (s.Mod < 3 && s.Reg+1 == s.BaseReg && s.IndexReg == 0 &&
4291             s.AddressSize == s.OperandSize && s.OperandSize >= WordSize) {
4292                 // Destination is same as base register.
4293                 // Check if displacement is 0
4294                 switch (s.AddressFieldSize) {
4295     case 0:
4296         break;
4297     case 1:
4298         if (Get<int8_t>(s.AddressField) != 0) return;
4299         break;
4300     case 2:
4301         if (Get<int16_t>(s.AddressField) != 0) return;
4302         break;
4303     case 4:
4304         if (Get<int32_t>(s.AddressField) != 0) return;
4305         break;
4306     default:
4307         return;
4308                 }
4309                 // Displacement is zero. This is a multi-byte NOP
4310                 s.Warnings1 |= 0x1000000;
4311                 break;
4312         }
4313 
4314     case 0x86: case 0x87:  // XCHG
4315     case 0x88: case 0x89: case 0x8A: case 0x8B:  // MOV
4316         // Check if source and destination are the same register
4317         if (s.Mod == 3 && s.Reg == s.RM && s.OperandSize >= WordSize) {
4318             // Moving a register to itself. This is a NOP
4319             s.Warnings1 |= 0x1000000;
4320         }
4321         break;
4322     case 0x10B:  // UD2
4323         FlagPrevious |= 6;
4324         break;
4325     }
4326 
4327     if (s.Warnings1 & 0x1000000) {
4328         // A multi-byte NOP is detected.
4329         // Remove warnings for longer-than-necessary instruction
4330         s.Warnings1 &= ~ 0x873D;
4331         // Remember NOP
4332         FlagPrevious |= 1;
4333     }
4334 }
4335 
4336 
InitialErrorCheck()4337 void CDisassembler::InitialErrorCheck() {
4338     // Check for illegal relocations table entries
4339     uint32_t i;                                     // Loop counter
4340 
4341     // Loop through relocations table
4342     for (i = 1; i < Relocations.GetNumEntries(); i++) {
4343         if (Relocations[i].TargetOldIndex >= Symbols.GetLimit()) {
4344             // Nonexisting relocation target
4345             Relocations[i].TargetOldIndex = 0;
4346         }
4347         if (Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
4348             // Nonexisting reference index
4349             Relocations[i].RefOldIndex = 0;
4350         }
4351         // Remember types of relocations in source
4352         RelocationsInSource |= Relocations[i].Type;
4353     }
4354 
4355     // Check opcode tables
4356     if (NumOpcodeTables1 != NumOpcodeTables2) {
4357         err.submit(9007, 0xFFFF);
4358     }
4359 }
4360 
4361 
FinalErrorCheck()4362 void CDisassembler::FinalErrorCheck() {
4363     // Check for illegal entries in symbol table and relocations table
4364     uint32_t i;                                     // Loop counter
4365     int SpaceWritten = 0;                         // Blank line written
4366 
4367     // Loop through symbol table
4368     for (i = 1; i < Symbols.GetNumEntries(); i++) {
4369         if (Symbols[i].Section <= 0 || (Symbols[i].Type & 0x80000000)) {
4370             // Constant or external symbol or section
4371             continue;
4372         }
4373         if ((uint32_t)Symbols[i].Section >= Sections.GetNumEntries()
4374             || Symbols[i].Offset > Sections[Symbols[i].Section].TotalSize) {
4375                 // Symbol has illegal address
4376                 // Blank line
4377                 if (!SpaceWritten++) OutFile.NewLine();
4378                 // Write comment
4379                 OutFile.Put(CommentSeparator);
4380                 OutFile.Put("Error: Symbol ");
4381                 // Write symbol name
4382                 OutFile.Put(Symbols.GetName(i));
4383                 // Write the illegal address
4384                 OutFile.Put(" has a non-existing address. Section: ");
4385                 if (Symbols[i].Section != ASM_SEGMENT_IMGREL) {
4386                     OutFile.PutDecimal(Symbols[i].Section, 1);
4387                 }
4388                 else {
4389                     OutFile.Put("Unknown");
4390                 }
4391                 OutFile.Put(" Offset: ");
4392                 OutFile.PutHex(Symbols[i].Offset, 1);
4393                 OutFile.NewLine();
4394         }
4395     }
4396     // Loop through relocations table
4397     for (i = 1; i < Relocations.GetNumEntries(); i++) {
4398         // Check source address
4399         if (Relocations[i].Section == 0
4400             || (uint32_t)Relocations[i].Section >= Sections.GetNumEntries()
4401             || (Sections[Relocations[i].Section].Type & 0xFF) == 3
4402             || Relocations[i].Offset >= Sections[Relocations[i].Section].InitSize) {
4403                 // Relocation has illegal source address
4404                 // Blank line
4405                 if (!SpaceWritten++) OutFile.NewLine();
4406                 // Write comment
4407                 OutFile.Put(CommentSeparator);
4408                 OutFile.Put("Error: Relocation number ");
4409                 OutFile.PutDecimal(i);
4410                 OutFile.Put(" has a non-existing source address. Section: ");
4411                 if (Relocations[i].Section != ASM_SEGMENT_IMGREL) {
4412                     OutFile.PutDecimal(Relocations[i].Section, 1);
4413                 }
4414                 else {
4415                     OutFile.Put("Unknown");
4416                 }
4417                 OutFile.Put(" Offset: ");
4418                 OutFile.PutHex(Relocations[i].Offset, 1);
4419                 OutFile.NewLine();
4420         }
4421         // Check target
4422         if (Relocations[i].TargetOldIndex == 0
4423             || Relocations[i].TargetOldIndex >= Symbols.GetLimit()
4424             || Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
4425                 // Relocation has illegal target
4426                 // Blank line
4427                 if (!SpaceWritten++) OutFile.NewLine();
4428                 // Write comment
4429                 OutFile.Put(CommentSeparator);
4430                 OutFile.Put("Error: Relocation number ");
4431                 OutFile.PutDecimal(i);
4432                 OutFile.Put(" at section ");
4433                 OutFile.PutDecimal(Relocations[i].Section);
4434                 OutFile.Put(" offset ");
4435                 OutFile.PutHex(Relocations[i].Offset);
4436                 OutFile.Put(" has a non-existing target index. Target: ");
4437                 OutFile.PutDecimal(Relocations[i].TargetOldIndex, 1);
4438                 if (Relocations[i].RefOldIndex) {
4439                     OutFile.Put(", Reference point index: ");
4440                     OutFile.PutDecimal(Relocations[i].RefOldIndex, 1);
4441                 }
4442                 OutFile.NewLine();
4443         }
4444     }
4445 }
4446 
4447 
CheckNamesValid()4448 void CDisassembler::CheckNamesValid() {
4449     // Fix invalid symbol and section names
4450     uint32_t i, j;                                  // Loop counter
4451     uint32_t Len;                                   // Length of name
4452     uint32_t Changed;                               // Symbol is changed
4453     char c;                                       // Character in symbol
4454     const char * ValidCharacters = "";            // List of valid characters in symbol names
4455     // Make list of characters valid in symbol names other than alphanumeric characters
4456     switch (Syntax) {
4457     case SUBTYPE_MASM:
4458         ValidCharacters = "_$@?";  break;
4459     case SUBTYPE_NASM:
4460         ValidCharacters = "_$@?.~#";  break;
4461     case SUBTYPE_GASM:
4462         ValidCharacters = "_$.";  break;
4463     default:
4464         err.submit(9000);
4465     }
4466 
4467     // Loop through sections
4468     for (i = 1; i < Sections.GetNumEntries(); i++) {
4469         char * SecName = (char*)NameBuffer.Buf() + Sections[i].Name;
4470         if (Syntax == SUBTYPE_MASM && SecName[0] == '.') {
4471             // Name begins with dot
4472             // Check for reserved names
4473             if (stricmp(SecName, ".text") == 0
4474                 ||  stricmp(SecName, ".data") == 0
4475                 ||  stricmp(SecName, ".code") == 0
4476                 ||  stricmp(SecName, ".const") == 0) {
4477                     // Change . to _ in beginning of name to avoid reserved directive name
4478                     SecName[0] = '_';
4479             }
4480             else {
4481                 // Other name beginning with .
4482                 // Set option dotname
4483                 MasmOptions |= 1;
4484             }
4485         }
4486     }
4487     // Loop through symbols
4488     for (i = 1; i < Symbols.GetNumEntries(); i++) {
4489         if (Symbols[i].Name) {
4490             // Warning: violating const specifier in GetName():
4491             char * SymName = (char *)Symbols.GetName(i);
4492             Len = (uint32_t)strlen(SymName);  Changed = 0;
4493             // Loop through characters in symbol
4494             for (j = 0; j < Len; j++) {
4495                 c = SymName[j];
4496                 if (!(((c | 0x20) >= 'a' && (c | 0x20) <= 'z')
4497                 || (c >= '0' && c <= '9' && j != 0)
4498                 || strchr(ValidCharacters, c))) {
4499                     // Illegal character found
4500                     if (Syntax == SUBTYPE_MASM) {
4501                         if (j == 0 && c == '.') {
4502                             // Symbol beginning with dot in MASM
4503                             if (Symbols[i].Type & 0x80000000) {
4504                                 // This is a segment. Check for reserved names
4505                                 if (stricmp(SymName, ".text") == 0
4506                                     ||  stricmp(SymName, ".data") == 0
4507                                     ||  stricmp(SymName, ".code") == 0
4508                                     ||  stricmp(SymName, ".const") == 0) {
4509                                         // Change . to _ in beginning of name to avoid reserved directive name
4510                                         SymName[0] = '_';  // Warning: violating const specifier in GetName()
4511                                         break; // break out of j loop
4512                                 }
4513                             }
4514                             // Set option dotname
4515                             MasmOptions |= 1;
4516                         }
4517                         else {
4518                             // Other illegal character in MASM
4519 #if ReplaceIllegalChars
4520                             SymName[j] = '?';
4521 #endif
4522                             Changed++;
4523                         }
4524                     }
4525                     else {
4526                         // Illegal character in GAS or YASM syntax
4527 #if ReplaceIllegalChars
4528                         SymName[j] = (Syntax == SUBTYPE_NASM) ? '?' : '$';
4529 #endif
4530                         Changed++;
4531                     }
4532                 }
4533             }
4534             // Count names changed
4535             if (Changed) NamesChanged++;
4536         }
4537     }
4538 }
4539 
4540 
FixRelocationTargetAddresses()4541 void CDisassembler::FixRelocationTargetAddresses() {
4542     // Fix missing relocation target addresses
4543     // to section:offset addresses
4544     uint32_t r;                                     // Relocation index
4545     uint32_t s;                                     // Symbol index
4546     int32_t sect;
4547 
4548     // Loop through relocations
4549     for (r = 1; r < Relocations.GetNumEntries(); r++) {
4550 
4551         if (Relocations[r].TargetOldIndex == 0 && (Relocations[r].Type & 0x60)) {
4552             // Target symbol not defined. Make new symbol
4553             SASymbol sym;
4554             sym.Reset();
4555 
4556             // Find target address from relocation source
4557             sect = Relocations[r].Section;
4558             if ((uint32_t)sect >= Sections.GetNumEntries()) continue;
4559             uint8_t * pSectionData = Sections[sect].Start;
4560             if (!pSectionData) continue;
4561             int64_t TargetOffset = 0;
4562             if (Relocations[r].Size == 4) {
4563                 TargetOffset = *(int32_t*)(pSectionData + Relocations[r].Offset);
4564             }
4565             else if (Relocations[r].Size == 8) {
4566                 TargetOffset = *(int64_t*)(pSectionData + Relocations[r].Offset);
4567             }
4568             else {
4569                 // Error: wrong size
4570                 continue;
4571             }
4572             if (HighDWord(TargetOffset)) {
4573                 // Error: out of range
4574                 continue;
4575             }
4576             // Translate to section:offset address
4577             if (!(TranslateAbsAddress(TargetOffset, sym.Section, sym.Offset))) {
4578                 // Translation failed
4579                 continue;
4580             }
4581             // Default scope is file local
4582             sym.Scope = 2;
4583 
4584             // Add symbol if it doesn't exist or get index of existing symbol
4585             s = Symbols.NewSymbol(sym);
4586 
4587             // Make reference to symbol from relocation record
4588             if (s) {
4589                 Relocations[r].TargetOldIndex = Symbols[s].OldIndex;
4590             }
4591         }
4592     }
4593 }
4594 
4595 
TranslateAbsAddress(int64_t Addr,int32_t & Sect,uint32_t & Offset)4596 int CDisassembler::TranslateAbsAddress(int64_t Addr, int32_t &Sect, uint32_t &Offset) {
4597     // Translate absolute virtual address to section and offset
4598     // Returns 1 if valid address found.
4599     int32_t Section;
4600 
4601     // Get image-relative address
4602     Addr -= ImageBase;
4603     // Fail if too big
4604     if (HighDWord(Addr)) return 0;
4605 
4606     // Search through sections
4607     for (Section = 1; (uint32_t)Section < Sections.GetNumEntries(); Section++) {
4608         uint32_t SectionAddress = Sections[Section].SectionAddress;
4609         if ((uint32_t)Addr >= SectionAddress && (uint32_t)Addr < SectionAddress + Sections[Section].TotalSize) {
4610             // Address is within this section
4611             // Return section and offset
4612             Sect = Section;
4613             Offset = (uint32_t)Addr - SectionAddress;
4614             // Return 1 to indicate success
4615             return 1;
4616         }
4617     }
4618     // Not found. Return 0
4619     return 0;
4620 }
4621 
4622 
GetDataItemSize(uint32_t Type)4623 uint32_t CDisassembler::GetDataItemSize(uint32_t Type) {
4624     // Get size in bytes of data item with specified type
4625     uint32_t Size = 1;
4626 
4627     switch (Type & 0xFF) {
4628         // Scalar types
4629     case 1:
4630         Size = 1;  break;
4631     case 2: case 0x4A: case 0x95:
4632         Size = 2;  break;
4633     case 3: case 0x43: case 0x4B:
4634         Size = 4;  break;
4635     case 4: case 0x44: case 0x4C:
4636         Size = 8;  break;
4637     case 5: case 0x45:
4638         Size = 10;  break;
4639     case 7:
4640         Size = 6;  break;
4641     case 0x50: case 51:
4642         Size = 16;  break;
4643     case 0x0B: case 0x0C:
4644         // Function pointer
4645         Size = WordSize / 8;  break;
4646     case 0x0D:
4647         // Far function pointer
4648         Size = WordSize / 8 + 2;  break;
4649     }
4650     switch (Type & 0xF00) {
4651     // Override above size if vector of known size
4652     case 0x300:
4653         Size = 8;  break;
4654     case 0x400:
4655         Size = 16;  break;
4656     case 0x500:
4657         Size = 32;  break;
4658     case 0x600:
4659         Size = 64;  break;
4660     case 0x700:
4661         Size = 128;  break;
4662     }
4663     return Size;
4664 }
4665 
4666 
GetDataElementSize(uint32_t Type)4667 uint32_t CDisassembler::GetDataElementSize(uint32_t Type) {
4668     // Get size of vector element in data item with specified type
4669     if ((Type & 0xF0) == 0x50) {
4670         // Vector of unknown elements
4671         return GetDataItemSize(Type);
4672     }
4673     else {
4674         // Vector of known elements. Return element type
4675         return GetDataItemSize(Type & 7);
4676     }
4677 }
4678 
4679 
GetSegmentRegisterFromPrefix()4680 int32_t CDisassembler::GetSegmentRegisterFromPrefix() {
4681     // Translate segment prefix to segment register
4682     switch (s.Prefixes[0]) {
4683     case 0x26:  // ES:
4684         return 0;
4685     case 0x2E:  // CS:
4686         return 1;
4687     case 0x36:  // SS:
4688         return 2;
4689     case 0x3E:  // DS:
4690         return 3;
4691     case 0x64:  // FS:
4692         return 4;
4693     case 0x65:  // GS:
4694         return 5;
4695     }
4696     return -1;  // Error: none
4697 }
4698