1 /**************************** disasm1.cpp ********************************
2 * Author: Agner Fog
3 * Date created: 2007-02-25
4 * Last modified: 2016-11-09
5 * Project: objconv
6 * Module: disasm1.cpp
7 * Description:
8 * Module for disassembler.
9 *
10 * Most of the disassembler code is in this file.
11 * Instruction tables are in opcodes.cpp.
12 * All functions relating to file output are in disasm2.cpp
13 *
14 * Copyright 2007-2016 GNU General Public License http://www.gnu.org/licenses
15 *****************************************************************************/
16 #include "stdafx.h"
17
18
19 /************************** class CSymbolTable *****************************
20
21 class CSymbolTable is a container class for a sorted list of symbols. The list
22 of symbols is kept sorted by address at all times. Named symbols from the
23 original file are added to the list with AddSymbol(). New symbols for jump
24 targets and code blocks that do not have a name are added during pass 1 by
25 NewSymbol(). AssignNames() assigns names to these unnamed symbols.
26
27 A symbol in the list can be found in three different ways: By its address,
28 by its old index, and by its new index. The new index is monotonous, so that
29 consecutive new indices correspond to consecutive addresses. Unfortunately,
30 the new index of a symbol will change whenever another symbol with a lower
31 address is added to the list. Therefore, we need to use the old index rather
32 than the new index for identifying a symbol, e.g. in the relocation table.
33 The old index is a permanent, unique identifier, but in random order.
34 The old index of a symbol is usually the same as the index used in the
35 original file and in the relocation table. New symbols added during pass 1
36 will get assigned an old index which is higher than the highest value that
37 occurred in the original file. Do not make a pointer or reference to a symbol.
38 It may become invalid when new symbols are added.
39
40 To access a symbol by its old index, you have to translate it with Old2NewIndex
41 To access a symbol by its new index, use operator [].
42 To find a symbol by its address, use FindByAddress().
43
44 ******************************************************************************/
45
CSymbolTable()46 CSymbolTable::CSymbolTable() {
47 // Constructor
48 OldNum = 1;
49 NewNum = 0; // Initialize
50 UnnamedNum = 0; // Number of unnamed symbols
51 UnnamedSymFormat = 0; // Format string for giving names to unnamed symbols
52 UnnamedSymbolsPrefix = cmd.SubType == SUBTYPE_GASM ? "$_" : "?_";// Prefix to add to unnamed symbols
53 ImportTablePrefix = "imp_"; // Prefix for pointers in import table
54
55 // Make dummy symbol number 0
56 SASymbol sym0;
57 sym0.Reset();
58 sym0.Section = 0x80000000; // Lowest possible address
59 List.PushSort(sym0); // Put into Symbols list
60
61 SymbolNameBuffer.Push(0, 1); // Make string 0 empty
62 }
63
AddSymbol(int32_t Section,uint32_t Offset,uint32_t Size,uint32_t Type,uint32_t Scope,uint32_t OldIndex,const char * Name,const char * DLLName)64 uint32_t CSymbolTable::AddSymbol(int32_t Section, uint32_t Offset, uint32_t Size,
65 uint32_t Type, uint32_t Scope, uint32_t OldIndex, const char * Name, const char * DLLName) {
66 // Add symbol from original file to symbol table.
67 // If name is not known then set Name = 0. A name will then be assigned
68 // OldIndex is the identifier used in relocation records. If the symbol is known
69 // by address rather than by index, then set OldIndex = 0. The return value will
70 // be the assigned value of OldIndex to use in relocation records. The returned value
71 // of OldIndex will be equal to the OldIndex of any previous symbols with same address.
72
73 // Symbol record
74 SASymbol NewSym; // New symbol table entry
75
76 NewSym.Section = Section;
77 NewSym.Offset = Offset;
78 NewSym.Size = Size;
79 NewSym.Type = Type;
80 NewSym.Scope = Scope;
81 NewSym.OldIndex = OldIndex;
82
83 // Store symbol name in NameBuffer
84 if (Name && *Name) {
85 NewSym.Name = SymbolNameBuffer.GetDataSize();
86 if (DLLName) {
87 // Imported from DLL. Prefix name with "imp_"
88 SymbolNameBuffer.Push(ImportTablePrefix, (uint32_t)strlen(ImportTablePrefix));
89 }
90 // Store name
91 SymbolNameBuffer.PushString(Name);
92 }
93 else {
94 NewSym.Name = 0; // Will get a name later
95 }
96 // Store DLL name in NameBuffer
97 if (DLLName && *DLLName) {
98 NewSym.DLLName = SymbolNameBuffer.PushString(DLLName);
99 }
100 else {
101 NewSym.DLLName = 0;
102 }
103
104 if (OldIndex == 0) {
105 // Make non-unique entry
106 uint32_t NewIndex = NewSymbol(NewSym);
107 // Get old index
108 OldIndex = List[NewIndex].OldIndex;
109 }
110 else {
111 // Make unique entry
112 List.PushSort(NewSym);
113 }
114
115 // Set OldNum to 1 + maximum OldIndex
116 if (OldIndex >= OldNum) OldNum = OldIndex + 1;
117
118 return OldIndex;
119 }
120
NewSymbol(SASymbol & sym)121 uint32_t CSymbolTable::NewSymbol(SASymbol & sym) {
122 // Add symbol to symbol table.
123 // Will not add a new symbol if one already exists at this address and
124 // either the new symbol or the existing symbol has no name.
125 // The return value is the new index to a new or existing symbol.
126 // The type or scope of any existing symbol will be modified if
127 // the type or scope of the new symbol is higher.
128 // The name will be applied to the existing symbol if the existing symbol
129 // has no name.
130
131 // Find new index of any existing symbol with same address
132 int32_t SIndex = FindByAddress(sym.Section, sym.Offset);
133
134 if (SIndex > 0 && !(List[SIndex].Type & 0x80000000)
135 && !(sym.Name && List[SIndex].Name)) {
136 // Existing symbol found. Update it with type and scope
137
138 // Choose between Type of existing symbol and new Type information.
139 // The highest Type value takes precedence, except near indirect jump/call,
140 // which has highest precedence
141 if (((sym.Type & 0xFF) > (List[SIndex].Type & 0xFF)
142 && ((List[SIndex].Type+1) & 0xFE) != 0x0C) || ((sym.Type+1) & 0xFE) == 0x0C) {
143 // New symbol has higher type
144 List[SIndex].Type = sym.Type;
145 }
146 if ((sym.Scope & 0xFF) > (List[SIndex].Scope & 0xFF)) {
147 // New symbol has higher Scope
148 List[SIndex].Scope = sym.Scope;
149 }
150 if (sym.Name && !List[SIndex].Name) {
151 // New symbol has name, old symbol has no name
152 List[SIndex].Name = sym.Name;
153 }
154 }
155 else {
156 // No existing symbol. Make new one
157 // Give it an old index
158 if (sym.OldIndex == 0) sym.OldIndex = OldNum++;
159
160 SIndex = List.PushSort(sym);
161 }
162
163 // Return new index
164 return SIndex;
165 }
166
167
NewSymbol(int32_t Section,uint32_t Offset,uint32_t Scope)168 uint32_t CSymbolTable::NewSymbol(int32_t Section, uint32_t Offset, uint32_t Scope) {
169 // Add symbol to jump target or code block that doesn't have a name.
170 // Will not add a new symbol if one already exists at this address.
171 // The return value is the new index to a new or existing symbol.
172 // The symbol will get a name later.
173
174 // Symbol record
175 SASymbol NewSym; // New symbol table entry
176 NewSym.Reset();
177
178 NewSym.Section = Section;
179 NewSym.Offset = Offset;
180 NewSym.Scope = Scope;
181
182 // Store new symbol record if no symbol with this address already exists
183 return NewSymbol(NewSym);
184 }
185
AssignNames()186 void CSymbolTable::AssignNames() {
187 // Assign names to symbols that do not have a name
188
189 uint32_t i; // New symbol index
190 uint32_t NumDigits; // Number of digits in new symbol names
191 char name[64]; // Buffer for making symbol name
192 static char Format[64];
193
194 // Find necessary number of digits
195 NumDigits = 3; i = NewNum;
196 while (i >= 1000) {
197 i /= 10;
198 NumDigits++;
199 }
200
201 // Format string for symbol names
202 sprintf(Format, "%s%c0%i%c", UnnamedSymbolsPrefix, '%', NumDigits, 'i');
203 UnnamedSymFormat = Format;
204
205 // Update TranslateOldIndex
206 UpdateIndex();
207
208 // Loop through symbols
209 for (i = 1; i < List.GetNumEntries(); i++) {
210 if (List[i].Name == 0 && List[i].Scope != 0) {
211 // Symbol has no name. Make one
212 sprintf(name, UnnamedSymFormat, ++UnnamedNum);
213 // Store new name
214 List[i].Name = SymbolNameBuffer.PushString(name);
215 }
216 }
217 // Round up the value of UnnamedNum in case more names are assigned later
218 if (NewNum < 1000) {
219 UnnamedNum = (UnnamedNum + 199) / 100 * 100;
220 }
221 else {
222 UnnamedNum = (UnnamedNum + 1999) / 1000 * 1000;
223 }
224
225 #if 0 //
226 // For debugging: list all symbols
227 printf("\n\nSymbols:");
228 for (i = 0; i < List.GetNumEntries(); i++) {
229
230 // if (List[i].Offset > 0x0 && List[i].Offset < 0x8)
231
232 printf("\n%3X %3X %s Sect %i Offset %X Type %X Size %i Scope %i",
233 i, List[i].OldIndex, GetName(i),
234 List[i].Section, List[i].Offset, List[i].Type, List[i].Size, List[i].Scope);
235 }
236 #endif
237 }
238
FindByAddress(int32_t Section,uint32_t Offset,uint32_t * Last,uint32_t * NextAfter)239 uint32_t CSymbolTable::FindByAddress(int32_t Section, uint32_t Offset, uint32_t * Last, uint32_t * NextAfter) {
240 // Find symbols by address
241 // The return value will be the new index to the first symbol at the
242 // specified address. The return value will be zero if no symbol found.
243 // If more than one symbol is found with the same address then Last
244 // will receive the new index of the last symbol with this address.
245 // NextAfter will receive the new index of the first symbol with an
246 // address higher than the specified address in the same section, or
247 // zero if none.
248
249 uint32_t i1; // New index of first symbol
250 uint32_t i2; // New index of last symbol
251 uint32_t i3; // New index of first symbol after address
252
253 // Make dummy symbol record for searching
254 SASymbol sym;
255 sym.Section = Section;
256 sym.Offset = Offset;
257
258 // Search List by address
259 i1 = List.FindFirst(sym);
260
261 if (i1 == 0 || i1 >= List.GetNumEntries()) {
262 // No symbol found at this address or later. Return 0
263 if (NextAfter) *NextAfter = 0;
264 return 0;
265 }
266 if (sym < List[i1]) {
267 // No symbol found at this address, but one found at higher address
268 // Check if same section
269 if (List[i1].Section != Section) i1 = 0;
270 // Return symbol at later address
271 if (NextAfter) *NextAfter = i1;
272 return 0;
273 }
274
275 // A symbol was found at this address.
276 // Search for more symbols at same address
277 i2 = i1;
278 while (i2+1 < List.GetNumEntries() && !(sym < List[i2+1])) i2++;
279
280 // Search for first symbol after this address in same section
281 if (i2+1 < List.GetNumEntries() && List[i2+1].Section == Section) {
282 i3 = i2 + 1; // Found
283 }
284 else {
285 i3 = 0; // Not found
286 }
287
288 // Return last symbol at same address
289 if (Last) *Last = i2;
290
291 // Return first symbol at higher address
292 if (NextAfter) *NextAfter = i3;
293
294 // Return first symbol at address
295 return i1;
296 }
297
FindByAddress(int32_t Section,uint32_t Offset)298 uint32_t CSymbolTable::FindByAddress(int32_t Section, uint32_t Offset) {
299 // Find symbols by address
300 // The return value will be the new index to a first symbol at the
301 // specified address. If more than one symbol is found at the same
302 // address then the one with the highest scope (and which is not
303 // a section record) is returned;
304 uint32_t s0, s1, s2 = 0;
305 uint32_t MaxScope = 0;
306 // Find all symbols at this address
307 s0 = s1 = FindByAddress(Section, Offset, &s2);
308 // Check if any symbols found
309 if (s0 == 0) return 0;
310
311 // Loop through symbols at this address
312 for (; s1 <= s2; s1++) {
313 // Look for highest scope (and not section)
314 if ((*this)[s1].Scope >= MaxScope && !((*this)[s1].Type & 0x80000000)) {
315 s0 = s1; MaxScope = (*this)[s1].Scope;
316 }
317 }
318 // Return index to symbol with highest scope
319 return s0;
320 }
321
Old2NewIndex(uint32_t OldIndex)322 uint32_t CSymbolTable::Old2NewIndex(uint32_t OldIndex) {
323 // Translate old symbol index to new symbol index
324
325 // Check if TranslateOldIndex is up to date
326 if (NewNum != List.GetNumEntries()) {
327 // New entries have been added since last update. Update TranslateOldIndex
328 UpdateIndex();
329 }
330 // Check if valid
331 if (OldIndex >= OldNum) OldIndex = 0;
332
333 // Translate old index to new index
334 uint32_t NewIndex = TranslateOldIndex[OldIndex];
335
336 // Check limit
337 if (NewIndex >= NewNum) NewIndex = 0;
338
339 // Return new index
340 return NewIndex;
341 }
342
HasName(uint32_t symo)343 const char * CSymbolTable::HasName(uint32_t symo) {
344 // Ask if symbol has a name, input = old index, output = name or 0
345 // Returns 0 if symbol has no name yet.
346 // Use HasName rather than GetName or GetNameO during pass 1 to avoid
347 // naming symbols in random order.
348
349 // Get new index
350 uint32_t symi = Old2NewIndex(symo);
351 // Check if valid
352 if (symi == 0 || symi >= NewNum) return 0;
353 // Check if symbol has a name
354 if ((*this)[symi].Name == 0) return 0;
355 // Symbol has a name
356 return GetName(symi);
357 }
358
GetName(uint32_t symi)359 const char * CSymbolTable::GetName(uint32_t symi) {
360 // Get symbol name from new index.
361 // A name will be assigned to the symbol if it doesn't have one
362
363 // Get name index from symbol record
364 uint32_t NameIndex = (*this)[symi].Name;
365 if (NameIndex == 0) {
366 // Symbol has no name
367 // Search for other symbol with same address
368 uint32_t Alias = FindByAddress((*this)[symi].Section,(*this)[symi].Offset);
369 if ((*this)[Alias].Name) {
370 // A named symbol with same address found
371 NameIndex = (*this)[Alias].Name;
372 }
373 else {
374 // Give symbol a name
375 // This should occur only if new symbols are made during pass 2
376 char name[64]; // Buffer for making symbol name
377 sprintf(name, "Unnamed_%X_%X", (*this)[symi].Section, (*this)[symi].Offset);
378 // sprintf(name, UnnamedSymFormat, ++UnnamedNum);
379 // Store new name
380 NameIndex = (*this)[symi].Name = SymbolNameBuffer.PushString(name);
381 }
382 }
383 // Check if valid
384 if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
385 // NameIndex is invalid
386 return "ErrorNoName";
387 }
388 // Return name
389 return (char*)SymbolNameBuffer.Buf() + NameIndex;
390 }
391
GetNameO(uint32_t symo)392 const char * CSymbolTable::GetNameO(uint32_t symo) {
393 // Get symbol name by old index.
394 // A name will be assigned to the symbol if it doesn't have one
395 return GetName(Old2NewIndex(symo));
396 }
397
GetDLLName(uint32_t symi)398 const char * CSymbolTable::GetDLLName(uint32_t symi) {
399 // Get import DLL name from old index
400 if ((*this)[symi].DLLName == 0) {
401 // No name
402 return "ErrorNoName";
403 }
404 // Get name DLL index from symbol record
405 uint32_t NameIndex = (*this)[symi].DLLName;
406 // Check if valid
407 if (NameIndex == 0 || NameIndex >= SymbolNameBuffer.GetDataSize()) {
408 // NameIndex is invalid
409 return "ErrorNoName";
410 }
411 // Return name
412 return (char*)SymbolNameBuffer.Buf() + NameIndex;
413 }
414
AssignName(uint32_t symi,const char * name)415 void CSymbolTable::AssignName(uint32_t symi, const char *name) {
416 // Give symbol a specific name
417 (*this)[symi].Name = SymbolNameBuffer.PushString(name);
418 }
419
UpdateIndex()420 void CSymbolTable::UpdateIndex() {
421 // Update TranslateOldIndex
422 uint32_t i; // New index
423
424 // Allocate array with sufficient size
425 TranslateOldIndex.SetNum(OldNum);
426
427 // Initialize to zeroes
428 memset(&TranslateOldIndex[0], 0, TranslateOldIndex.GetNumEntries() * sizeof(uint32_t));
429
430 for (i = 0; i < List.GetNumEntries(); i++) {
431 if (List[i].OldIndex < OldNum) {
432 TranslateOldIndex[List[i].OldIndex] = i;
433 }
434 else {
435 // symbol index out of range
436 err.submit(2031); // Report error
437 List[i].OldIndex = 0; // Reset index that was out of range
438 }
439 }
440 NewNum = List.GetNumEntries();
441 }
442
443
444 /************************** class CDisassembler *****************************
445 Members of class CDisassembler
446 Members that relate to file output are in disasm2.cpp
447 ******************************************************************************/
448
CDisassembler()449 CDisassembler::CDisassembler() {
450 // Constructor
451 Sections.PushZero(); // Make first section entry zero
452 Relocations.PushZero(); // Make first relocation entry zero
453 NameBuffer.Push(0, 1); // Make first string entry zero
454 FunctionList.PushZero(); // Make first function entry zero
455 // Initialize variables
456 Buffer = 0;
457 InstructionSetMax = InstructionSetAMDMAX = 0;
458 InstructionSetOR = FlagPrevious = NamesChanged = 0;
459 WordSize = MasmOptions = RelocationsInSource = ExeType = 0;
460 ImageBase = 0;
461 Syntax = cmd.SubType; // Assembly syntax dialect
462 if (Syntax == SUBTYPE_GASM) {
463 CommentSeparator = "# "; // Symbol for indicating comment
464 HereOperator = "."; // Symbol for current address
465 }
466 else {
467 CommentSeparator = "; "; // Symbol for indicating comment
468 HereOperator = "$"; // Symbol for current address
469 }
470 };
471
Init(uint32_t ExeType,int64_t ImageBase)472 void CDisassembler::Init(uint32_t ExeType, int64_t ImageBase) {
473 // Define file type and imagebase if executable file
474 this->ExeType = ExeType;
475 this->ImageBase = ImageBase;
476 }
477
AddSection(uint8_t * Buffer,uint32_t InitSize,uint32_t TotalSize,uint32_t SectionAddress,uint32_t Type,uint32_t Align,uint32_t WordSize,const char * Name,uint32_t NameLength)478 void CDisassembler::AddSection(
479 uint8_t * Buffer, // Buffer containing raw data
480 uint32_t InitSize, // Size of initialized data in section
481 uint32_t TotalSize, // Size of initialized and uninitialized data in section
482 uint32_t SectionAddress, // Start address to be added to offset in listing
483 uint32_t Type, // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
484 uint32_t Align, // Alignment = 1 << Align
485 uint32_t WordSize, // Segment word size: 16, 32 or 64
486 const char * Name, // Name of section
487 uint32_t NameLength) { // Length of name if not zero terminated
488
489 // Check values
490 if (Buffer == 0) Type = 3;
491 if (Name == 0) Name = "?";
492 if (NameLength == 0) NameLength = (uint32_t)strlen(Name);
493 if (TotalSize < InitSize) TotalSize = InitSize;
494
495 // Define section to be disassembled
496 SASection SecRec; // New section record
497
498 SecRec.Start = Buffer;
499 SecRec.SectionAddress = SectionAddress;
500 SecRec.InitSize = InitSize;
501 SecRec.TotalSize = TotalSize;
502 SecRec.Type = Type;
503 SecRec.Align = Align;
504 SecRec.WordSize = WordSize;
505 // Save name in NameBuffer
506 SecRec.Name = NameBuffer.Push(Name, NameLength);
507 // Terminate with zero
508 NameBuffer.Push(0, 1);
509 // Default group is 'flat' except in 16 bit mode
510 if (WordSize == 16 || (MasmOptions & 0x100)) {
511 // 16-bit or mixed segment size. Group is unknown
512 SecRec.Group = 0;
513 }
514 else {
515 // Pure 32 or 64 bit mode. Group = flat
516 SecRec.Group = ASM_SEGMENT_FLAT;
517 }
518
519 // Save section record
520 Sections.Push(SecRec);
521
522 // Remember WordSize
523 switch (WordSize) {
524 case 16:
525 MasmOptions |= 0x100; break;
526 case 32:
527 MasmOptions |= 0x200; break;
528 case 64:
529 MasmOptions |= 0x400; break;
530 }
531 }
532
AddSectionGroup(const char * Name,int32_t MemberSegment)533 int32_t CDisassembler::AddSectionGroup(const char * Name, int32_t MemberSegment) {
534 // Define section group (from OMF file).
535 // Must be called after all segments have been defined.
536 // To define a group with multiple members, you must call AddSectionGroup
537 // multiple times. You must finish adding members to one group before
538 // starting the definition of another group.
539 // You can define a group without defining its members by calling
540 // AddSectionGroup with MemberSegment = 0.
541
542 // Check values
543 if (Name == 0) Name = "?";
544
545 // Find preceding segment or group definition
546 int32_t LastIndex = Sections.GetNumEntries() - 1;
547 // Index of group record
548 int32_t GroupIndex = LastIndex;
549
550 const char * LastName = "?";
551 if (Sections[LastIndex].Name < NameBuffer.GetDataSize()) {
552 // Last name valid
553 LastName = (char*)NameBuffer.Buf() + Sections[LastIndex].Name;
554 }
555 // Check if group name already defined
556 if (strcmp(Name, LastName) != 0) {
557 // Not define. Make group record in Sections list
558 SASection SecRec; // New section record
559 memset(&SecRec, 0, sizeof(SecRec)); // Initialize
560
561 // Set type = group
562 SecRec.Type = 0x800;
563
564 // Save name in NameBuffer
565 SecRec.Name = NameBuffer.PushString(Name);
566
567 // Save group index = my own index
568 SecRec.Group = ++GroupIndex;
569
570 // Save section record
571 Sections.Push(SecRec);
572 }
573 // Find MemberSegment record
574 if (MemberSegment && MemberSegment < GroupIndex) {
575 // Register group index in segment record
576 Sections[MemberSegment].Group = GroupIndex;
577 }
578 // Return value is group index
579 return GroupIndex;
580 }
581
AddSymbol(int32_t Section,uint32_t Offset,uint32_t Size,uint32_t Type,uint32_t Scope,uint32_t OldIndex,const char * Name,const char * DLLName)582 uint32_t CDisassembler::AddSymbol(
583 int32_t Section, // Section number (1-based). ASM_SEGMENT_UNKNOWN = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative
584 uint32_t Offset, // Offset into section. (Value for absolute symbol)
585 uint32_t Size, // Number of bytes used by symbol or function. 0 = unknown
586 uint32_t Type, // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
587 uint32_t Scope, // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
588 uint32_t OldIndex, // Unique identifier used in relocation entries. Value must be > 0 and limited because an array is created with this as index.
589 const char * Name, // Name of symbol. Zero-terminated
590 const char * DLLName) { // Name of DLL if imported dynamically
591
592 // Add symbol form original file.
593 // Multiple symbols at same address are allowed.
594 // If section is not known then set Section = ASM_SEGMENT_IMGREL and Offset = image-relative address
595 // If name is not known then set Name = 0. A name will then be assigned
596 // OldIndex is the identifier used in relocation records. It must be nonzero.
597 // If the original file uses 0-based symbol indices then add 1 to OldIndex
598 // and remember to also add 1 when referring to the symbol in a relocation record.
599 // If the symbol is known by address rather than by index, then set OldIndex = 0.
600 // The return value will be the assigned value of OldIndex to use in relocation records.
601 // The returned value of OldIndex will be equal to the OldIndex of any previous symbols
602 // with same address. All symbols that have an identifier (OldIndex) must be defined
603 // before any symbol identified by address only in order to avoid using the same OldIndex.
604
605 // Check if image-relative
606 if (Section == ASM_SEGMENT_IMGREL) {
607 // Translate absolute virtual address to section and offset
608 TranslateAbsAddress(ImageBase + (int32_t)Offset, Section, Offset);
609 }
610
611 // Define symbol for disassembler
612 return Symbols.AddSymbol(Section, Offset, Size, Type, Scope, OldIndex, Name, DLLName);
613 }
614
AddRelocation(int32_t Section,uint32_t Offset,int32_t Addend,uint32_t Type,uint32_t Size,uint32_t TargetIndex,uint32_t ReferenceIndex)615 void CDisassembler::AddRelocation(
616 int32_t Section, // Section of relocation source
617 uint32_t Offset, // Offset of relocation source into section
618 int32_t Addend, // Addend to add to target address,
619 // including distance from source to instruction pointer in self-relative addresses,
620 // not including inline addend.
621 uint32_t Type, // Relocation type. See SARelocation in disasm.h for definition of values
622 uint32_t Size, // 1 = byte, 2 = word, 4 = dword, 8 = qword
623 uint32_t TargetIndex, // Symbol index of target
624 uint32_t ReferenceIndex) { // Symbol index of reference point if Type = 8 or 0x10
625
626 // Check if image-relative
627 if (Section == ASM_SEGMENT_IMGREL) {
628 // Translate absolute virtual address to section and offset
629 if (!TranslateAbsAddress(ImageBase + (int32_t)Offset, Section, Offset)) {
630 err.submit(1304);
631 }
632 }
633
634 if (Type != 0x41) {
635 // Define relocation or cross-reference for disassembler
636 SARelocation RelRec; // New relocation record
637
638 RelRec.Section = Section;
639 RelRec.Offset = Offset;
640 RelRec.Type = Type;
641 RelRec.Size = Size;
642 RelRec.Addend = Addend;
643 RelRec.TargetOldIndex = TargetIndex;
644 RelRec.RefOldIndex = ReferenceIndex;
645
646 // Save relocation record
647 Relocations.PushSort(RelRec);
648 }
649 else {
650 // Make entry in procedure linkage table
651 uint32_t targetsym = Symbols.Old2NewIndex(TargetIndex);
652 if (targetsym && Symbols[targetsym].DLLName) {
653 // Put label on entry in procedure linkage table (import table)
654 // Copy Name and DLLName from target symbol
655 SASymbol ImportSym = Symbols[targetsym];
656 ImportSym.Section = Section;
657 ImportSym.Offset = Offset;
658 ImportSym.Type = 0x0C;
659 ImportSym.OldIndex = 0;
660 ImportSym.Scope = 2;
661 Symbols.NewSymbol(ImportSym);
662 }
663 }
664 }
665
Go()666 void CDisassembler::Go() {
667 // Do the disassembly
668
669 // Check for illegal entries in relocations table
670 InitialErrorCheck();
671
672 // Find missing relocation target addresses
673 FixRelocationTargetAddresses();
674
675 // Pass 1: Find symbols types and unnamed symbols
676 Pass = 1;
677 Pass1();
678 Pass = 2;
679 Pass1();
680
681 if (Pass & 0x100) {
682 // Repetition of pass 1 requested
683 Pass = 3;
684 Pass1();
685 Pass = 4;
686 Pass1();
687 }
688
689 // Put names on unnamed symbols
690 Symbols.AssignNames();
691
692 // Fix invalid characters in symbol and section names
693 CheckNamesValid();
694
695 #if 0 //
696 // Show function list. For debugging only
697 printf("\n\nFunctionList:");
698 for (uint32_t i = 0; i < FunctionList.GetNumEntries(); i++) {
699 printf("\nsect %i, start %X, end %X, scope %i, name %s",
700 FunctionList[i].Section, FunctionList[i].Start, FunctionList[i].End,
701 FunctionList[i].Scope, Symbols.GetNameO(FunctionList[i].OldSymbolIndex));
702 }
703 #endif
704 #if 0
705 // For debugging: list all relocations
706 printf("\n\nRelocations:");
707 for (uint32_t i = 0; i < Relocations.GetNumEntries(); i++) {
708 printf("\nsect %i, os %X, type %X, size %i, add %X, target %X",
709 Relocations[i].Section, Relocations[i].Offset, Relocations[i].Type,
710 Relocations[i].Size, Relocations[i].Addend, Relocations[i].TargetOldIndex);
711 }
712 #endif
713 #if 0
714 // For debugging: list all sections
715 printf("\n\nSections:");
716 for (uint32_t s = 1; s < Sections.GetNumEntries(); s++) {
717 printf("\n%2i, %s", s, NameBuffer.Buf() + Sections[s].Name);
718 }
719 #endif
720
721 // Begin writing output file
722 WriteFileBegin();
723
724 // Pass 2: Write all sections to output file
725 Pass = 0x10;
726 Pass2();
727
728 // Check for illegal entries in symbol table and relocations table
729 FinalErrorCheck();
730
731 // Finish writing output file
732 WriteFileEnd();
733 };
734
Pass1()735 void CDisassembler::Pass1() {
736
737 /* Pass 1: does the following jobs:
738 --------------------------------
739
740 * Scans all code sections, instruction by instruction. Checks code syntax.
741
742 * Tries to identify where each function begins and ends.
743
744 * Follows all references to data in order to determine data type for
745 each data symbol.
746
747 * Assigns symbol table entries for all jump and call targets that do not
748 allready have a name.
749
750 * Follows all jump instructions to identify code blocks that are connected.
751 Code blocks in same section that are connected through jumps (not calls)
752 are joined together into the same function.
753
754 * Identifies and analyzes tables of jump addresses and call addresses,
755 e.g. switch/case tables and virtual function tables.
756
757 * Tries to identify any data in the code section. If erroneous code or
758 sequences of zeroes are found then the nearest preceding label is marked
759 as dubious and the analysis of code is skipped until the next code label.
760 Pass 1 will be repeated in this case in order to follow backwards jumps
761 from subsequent code. Dubious code will be shown as both code and data
762 in the output of pass 2.
763 */
764
765 // Loop through sections, pass 1
766 for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
767
768 // Get section type
769 SectionType = Sections[Section].Type;
770 if (SectionType & 0x800) continue; // This is a group
771
772 // Code or data
773 CodeMode = (SectionType & 1) ? 1 : 4;
774 LabelBegin = FlagPrevious = CountErrors = 0;
775
776 if ((Sections[Section].Type & 0xFF) == 1) {
777 // This is a code section
778
779 // Initialize code parser
780 Buffer = Sections[Section].Start;
781 SectionEnd = FunctionEnd = LabelInaccessible = Sections[Section].TotalSize;
782 WordSize = Sections[Section].WordSize;
783 SectionAddress = Sections[Section].SectionAddress;
784 if (Buffer == 0) continue;
785
786 IBegin = IEnd = LabelEnd = 0;
787 IFunction = 0;
788
789 // Loop through instructions
790 while (NextInstruction1()) {
791
792 // check if function beings here
793 CheckForFunctionBegin();
794
795 // Find any label here
796 FindLabels();
797
798 // Check if code
799 if (CodeMode < 4) {
800 // This is code
801
802 // Parse instruction
803 ParseInstruction();
804 }
805 else {
806 // This is data. Skip to next label
807 IEnd = LabelEnd;
808 }
809 // check if function ends here
810 CheckForFunctionEnd();
811 }
812 }
813 else {
814 // This is a data section
815 // Make a single entry in FunctionList covering the whole section
816 SFunctionRecord fun = {(int)Section, 0, Sections[Section].TotalSize, 0, 0};
817 FunctionList.PushUnique(fun);
818 }
819 }
820 }
821
FindLabels()822 void CDisassembler::FindLabels() {
823 // Find any labels at current position and next during pass 1
824 uint32_t sym1, sym2 = 0, sym3 = 0; // Symbol indices
825
826 // Search for labels from IBegin
827 sym1 = Symbols.FindByAddress(Section, IBegin, &sym2, &sym3);
828
829 if (sym1 && sym2) {
830 // Set LabelBegin to address of last label at current address
831 LabelBegin = Symbols[sym2].Offset;
832 CountErrors = 0;
833
834 // Get code mode from label
835 if ((Symbols[sym2].Type & 0xF0) == 0x80) {
836 // This is known to be code
837 CodeMode = 1;
838 }
839 else if ((Symbols[sym2].Type & 0xFF) == 0) {
840 // Type is unknown
841 if ((Symbols[sym2].Scope & 4) && SectionType == 1) {
842 // Public label in code segment. Consider this code
843 CodeMode = 1;
844 }
845 // Otherwise: Assume same type as previous
846 }
847 else {
848 // This is known to be data
849 CodeMode = 4;
850 }
851 // Reset tracer
852 t.Reset();
853 }
854 if (sym3) {
855 // Set LabelEnd to address of next symbol
856 LabelEnd = Symbols[sym3].Offset;
857 if (LabelEnd > SectionEnd) LabelEnd = SectionEnd;
858 }
859 else {
860 // No next label
861 LabelEnd = SectionEnd;
862 }
863 }
864
CheckForMisplacedLabel()865 void CDisassembler::CheckForMisplacedLabel() {
866 // Remove any label placed inside function
867 // This is called if there appears to be a function end inside an instruction
868 if (FunctionEnd && FunctionEnd < SectionEnd) {
869 FunctionEnd = IEnd;
870 FunctionList[IFunction].Scope |= 0x10000;
871 }
872 else {
873 s.Errors |= 0x10;
874 }
875 }
876
NextLabel()877 int CDisassembler::NextLabel() {
878 // Loop through labels from IEnd. Pass 2
879 uint32_t sym, sym1, sym2 = 0, sym3 = 0; // Symbol indices
880
881 // Make ready for next instruction
882 IBegin = IEnd;
883
884 // Reset tracer
885 t.Reset();
886
887 // Check if end of function/section
888 if (IEnd >= FunctionEnd || IEnd >= SectionEnd) {
889 // No more labels in this function or section
890 return 0;
891 }
892
893 // Search for labels from IEnd
894 sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
895
896 if (sym1) {
897 // Symbol found
898 for (sym = sym1; sym <= sym2; sym++) {
899 // Remember symbol address
900 LabelBegin = Symbols[sym].Offset;
901 CountErrors = 0;
902
903 if ((SectionType & 0xFF) == 1) {
904 // Code section. Get CodeMode
905 if ((Symbols[sym].Type >> 24) & 0xF) {
906 // Get CodeMode from last label. 1 = code, 2 = dubiuos, 4 = data
907 CodeMode = (Symbols[sym].Type >> 24) & 0xF;
908 }
909 else if (Symbols[sym].Type & 0x80) {
910 // Type defined as jump/call. This is known to be code
911 CodeMode = 1;
912 }
913 else if (Symbols[sym].Type == 0) {
914 // Type is unknown. (Assume same type as previous) changed to:
915 // Type is unknown. Assume code
916 CodeMode = 1;
917 }
918 else {
919 // This has been accessed as data
920 CodeMode = 4;
921 }
922 }
923 else {
924 // This is a data segment
925 CodeMode = 4;
926 }
927 // Get symbol type and size, except for section type
928 if (!(Symbols[sym].Type & 0x80000000)) {
929 DataType = Symbols[sym].Type;
930 DataSize = GetDataItemSize(DataType);
931 if (((DataType+1) & 0xFE) == 0x0C && Symbols[sym].Size) {
932 // Jump table can have different sizes for direct or image relative
933 DataSize = Symbols[sym].Size;
934 }
935 }
936 }
937 }
938 if (sym3) {
939 // Next label found
940 LabelEnd = Symbols[sym3].Offset;
941 return 1;
942 }
943 // No new label found. Continue to FunctionEnd
944 LabelEnd = FunctionEnd;
945 return 1;
946 }
947
NextFunction2()948 int CDisassembler::NextFunction2() {
949 // Loop through function blocks in pass 2. Return 0 if finished
950
951 SFunctionRecord Fun; // Dummy function record for search and compare
952
953 if (IFunction == 0) {
954 // Begin of section. Find first function block
955 Fun.Section = Section;
956 Fun.Start = IBegin;
957 IFunction = FunctionList.FindFirst(Fun);
958 }
959 else {
960 // Try next function block
961 IFunction++;
962 }
963 // Check if IFunction is valid
964 if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) {
965 // Not valid
966 IFunction = 0;
967 return 0;
968 }
969 // Check if IFunction is within current section
970 Fun.Section = Section;
971 Fun.Start = SectionEnd;
972 if (Fun < FunctionList[IFunction]) {
973 // Past end of current section
974 IFunction = 0;
975 return 0;
976 }
977 // IFunction is within current section
978 // End of function
979 FunctionEnd = FunctionList[IFunction].End;
980
981 // Check if function has a defined size
982 if (FunctionEnd <= FunctionList[IFunction].Start) {
983 // Size unknown. Continue until begin of next function
984 if (IFunction+1 < FunctionList.GetNumEntries()
985 && FunctionList[IFunction+1] < Fun
986 && FunctionList[IFunction] < FunctionList[IFunction+1]) {
987 FunctionEnd = FunctionList[IFunction+1].Start;
988 }
989 else {
990 // No next function. Continue until end of section
991 FunctionEnd = SectionEnd;
992 }
993 }
994
995 // return IFunction for success
996 return 1;
997 }
998
CheckForFunctionBegin()999 void CDisassembler::CheckForFunctionBegin() {
1000 // Check if function begins at current position
1001 uint32_t sym1, sym2 = 0, sym3 = 0; // Symbol indices
1002 SFunctionRecord fun; // New function record
1003 IBegin = IEnd;
1004
1005 if (IFunction == 0) {
1006 // No function defined. Begin new function here
1007
1008 // Search for nearest labels
1009 sym1 = Symbols.FindByAddress(Section, IEnd, &sym2, &sym3);
1010
1011 if (sym1 == 0) {
1012 // There is no label here. Make one with Scope = 0
1013 sym1 = Symbols.NewSymbol(Section, IEnd, 0);
1014 // Update labels
1015 LabelBegin = LabelEnd = CountErrors = 0;
1016 FindLabels();
1017 }
1018 // Check that sym1 is valid
1019 if (sym1 == 0 || sym1 >= Symbols.GetNumEntries()) {
1020 err.submit(9000); return;
1021 }
1022
1023 // Make function record for FunctionList
1024 fun.Section = Section;
1025 fun.Start = IBegin;
1026 fun.End = IBegin;
1027 fun.Scope = Symbols[sym1].Scope;
1028 fun.OldSymbolIndex = Symbols[sym1].OldIndex;
1029
1030 // Add to function list
1031 IFunction = FunctionList.PushUnique(fun);
1032
1033 // End of function not known yet
1034 FunctionEnd = SectionEnd; LabelEnd = 0;
1035 }
1036 }
1037
CheckForFunctionEnd()1038 void CDisassembler::CheckForFunctionEnd() {
1039 // Check if function ends at current position
1040 if (IFunction >= FunctionList.GetNumEntries()) {
1041 // Should not occur
1042 err.submit(9000); IFunction = 0; return;
1043 }
1044
1045 // Function ends if section ends here
1046 if (IEnd >= SectionEnd) {
1047 // Current function must end because section ends here
1048 FunctionList[IFunction].End = SectionEnd;
1049 FunctionList[IFunction].Scope &= ~0x10000;
1050 IFunction = 0;
1051
1052 // Check if return instruction
1053 if (s.OpcodeDef && !(s.OpcodeDef->Options & 0x10) && (Pass & 0x10)) {
1054 // No return or unconditional jump. Write error message
1055 s.Errors |= 0x10000;
1056 WriteErrorsAndWarnings();
1057 }
1058 return;
1059 }
1060
1061 // Function ends after ret or unconditional jump and preceding code had no
1062 // jumps beyond this position:
1063 if (s.OpcodeDef && s.OpcodeDef->Options & 0x10) {
1064 // A return or unconditional jump instruction was found.
1065 FlagPrevious |= 2;
1066
1067 // Mark this position as inaccessible if there is no reference to this place
1068 Symbols.NewSymbol(Section, IEnd, 0);
1069 // Update labels
1070 LabelBegin = LabelEnd = CountErrors = 0;
1071 FindLabels();
1072
1073 if (IEnd >= FunctionList[IFunction].End) {
1074 // Indicate current function ends here
1075 FunctionList[IFunction].End = IEnd;
1076 FunctionList[IFunction].Scope &= ~0x10000;
1077 IFunction = 0;
1078 return;
1079 }
1080 }
1081
1082 // Function ends at next label if preceding label is inaccessible and later end not known
1083 if (IFunction && FunctionList[IFunction].Scope == 0 && IEnd >= FunctionList[IFunction].End) {
1084 if (Symbols.FindByAddress(Section, IEnd)) {
1085 // Previous label was inaccessible. There is a new label here. Begin new function here
1086 IFunction = 0;
1087 return;
1088 }
1089 }
1090
1091 // Function does not end here
1092 return;
1093 }
1094
1095
CheckRelocationTarget(uint32_t IRel,uint32_t TargetType,uint32_t TargetSize)1096 void CDisassembler::CheckRelocationTarget(uint32_t IRel, uint32_t TargetType, uint32_t TargetSize) {
1097 // Update relocation record and its target.
1098 // This function updates the symbol type and size of a relocation target.
1099 // If the relocation target is a section:offset address then a new
1100 // symbol record is made
1101 uint32_t SymOldI; // Old index of target symbol
1102 uint32_t SymNewI; // New index of target symbol
1103 int32_t TargetSection; // Section of target symbol
1104 uint32_t TargetOffset; // Offset of target symbol
1105
1106 // Check if relocation valid
1107 if (!IRel || IRel >= Relocations.GetNumEntries() || !Relocations[IRel].TargetOldIndex
1108 || Relocations[IRel].Section <= 0 || uint32_t(Relocations[IRel].Section) >= Sections.GetNumEntries()) {
1109 return;
1110 }
1111
1112 // Find target symbol
1113 SymOldI = Relocations[IRel].TargetOldIndex;
1114
1115 // Look up in symbol table
1116 SymNewI = Symbols.Old2NewIndex(SymOldI);
1117
1118 // Check if valid
1119 if (!Symbols[SymNewI].OldIndex) return;
1120
1121 if (Symbols[SymNewI].Type & 0x80000000) {
1122 // Symbol is a section record. Relocation refers to a section-relative address
1123 // Make a new symbol for this data item. The symbol will get a name later
1124
1125 // Get address of new symbol
1126 TargetSection = Symbols[SymNewI].Section;
1127 TargetOffset = Symbols[SymNewI].Offset + Relocations[IRel].Addend;
1128
1129 // Pointer to relocation source address
1130 uint8_t * RelSource = Sections[Relocations[IRel].Section].Start + Relocations[IRel].Offset;
1131
1132 // Inline Addend;
1133 int32_t InlineA = 0;
1134 switch (Relocations[IRel].Size) {
1135 case 1:
1136 InlineA = *(int8_t*)RelSource; break;
1137 case 2:
1138 InlineA = *(int16_t*)RelSource; break;
1139 case 4: case 8:
1140 InlineA = *(int32_t*)RelSource; break;
1141 }
1142 // Add inline addend to target address
1143 TargetOffset += InlineA;
1144
1145 if (Relocations[IRel].Type & 2) {
1146 // Address is self-relative
1147 if ((s.AddressFieldSize && (s.MFlags & 0x100)) || s.ImmediateFieldSize) {
1148 // Relative jump or rip-relative address
1149 TargetOffset += IEnd - s.AddressField;
1150 InlineA += IEnd - s.AddressField;
1151 }
1152 else {
1153 // Self-relative address in data segment or unknown
1154 // This may occur in position-independent code
1155 // We can't calculate the intended target
1156 // Make sure there is a symbol, but don't change existing symbol if there is one
1157 SymNewI = Symbols.NewSymbol(TargetSection, 0, 2);
1158 return;
1159 }
1160 }
1161 // Make new symbol in symbol table if none exists
1162 SymNewI = Symbols.NewSymbol(TargetSection, TargetOffset, 2);
1163
1164 if (SymNewI) {
1165 // Get old index
1166 SymOldI = Symbols[SymNewI].OldIndex;
1167
1168 // Change relocation record to point to new symbol
1169 Relocations[IRel].TargetOldIndex = SymOldI;
1170
1171 // Compensate for inline addend and rip-relative address
1172 Relocations[IRel].Addend = -InlineA;
1173 }
1174 }
1175
1176 // Check if symbol has a scope assigned
1177 if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
1178
1179 // Choose between Symbols[SymNewI].Type and TargetType the one that has the highest priority
1180 if ((TargetType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)
1181 || (((TargetType+1) & 0xFE) == 0x0C && (Symbols[SymNewI].Type & 0xFF) > 0x0C)) {
1182
1183 // No type assigned yet, or new type overrides old type
1184 Symbols[SymNewI].Type = TargetType;
1185
1186 // Choose biggest size. Size for code pointer takes precedence
1187 if (TargetSize > Symbols[SymNewI].Size || ((TargetType+1) & 0xFE) == 0x0C) {
1188 Symbols[SymNewI].Size = TargetSize;
1189 }
1190 }
1191 }
1192
1193
CheckJumpTarget(uint32_t symi)1194 void CDisassembler::CheckJumpTarget(uint32_t symi) {
1195 // Extend range of current function to jump target, if needed
1196
1197 // Check if current section is valid
1198 if (Section == 0 || Section >= Sections.GetNumEntries()) return;
1199
1200 // Check if current function is valid
1201 if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) return;
1202
1203 // Check if target is in same section
1204 if (Symbols[symi].Section != (int32_t)Section) return;
1205
1206 // Check if target extends current function
1207 if (Symbols[symi].Offset > FunctionList[IFunction].End && Symbols[symi].Offset <= Sections[Section].InitSize) {
1208 // Target is after tentative end of current function but within section
1209
1210 // Check if it is a known function
1211 if ((Symbols[symi].Type & 0xFF) == 0x83 || (Symbols[symi].Type & 0xFF) == 0x85
1212 || (Symbols[symi].Scope & 0x1C)) {
1213 // Target is known as public or a function. No need to extend current function
1214 return;
1215 }
1216 // Extend current function forward to include target offset
1217 FunctionList[IFunction].End = Symbols[symi].Offset;
1218 FunctionList[IFunction].Scope |= 0x10000;
1219 }
1220 else if (Symbols[symi].Offset < FunctionList[IFunction].Start) {
1221 // Target is before tentative begin of current function but within section
1222
1223 // Check if target is already in function table
1224 SFunctionRecord fun;
1225 fun.Section = Symbols[symi].Section;
1226 fun.Start = Symbols[symi].Offset;
1227 uint32_t IFun = FunctionList.Exists(fun);
1228 if (IFun > 0 && IFun < FunctionList.GetNumEntries()) {
1229 // Target is the beginning of a known function. No need to extend current function
1230 return;
1231 }
1232
1233 /* Removed: This is a mess. Looks better when functions are separate
1234 // Target points inside a previously defined function. Join the two functions into one
1235 IFun = FunctionList.FindFirst(fun) - 1;
1236 if (IFun > 0 && IFun < FunctionList.GetNumEntries() && FunctionList[IFun].Section == Section) {
1237
1238 // Get maximum scope of the two functions
1239 if (FunctionList[IFun].Scope < FunctionList[IFunction].Scope) {
1240 FunctionList[IFun].Scope = FunctionList[IFunction].Scope;
1241 }
1242
1243 // Get maximum end of the two functions
1244 if (FunctionList[IFun].End < FunctionList[IFunction].End) {
1245 FunctionList[IFun].End = FunctionList[IFunction].End;
1246 }
1247
1248 // Remove entry IFunction from FunctionList
1249 FunctionList.Remove(IFunction);
1250
1251 // Set current function to IFun
1252 IFunction = IFun;
1253 }
1254 */
1255 }
1256 }
1257
1258
Pass2()1259 void CDisassembler::Pass2() {
1260
1261 /* Pass 2: does the following jobs:
1262 --------------------------------
1263
1264 * Scans through all sections, code and data.
1265
1266 * Code is analyzed, instruction by instruction. Checks code syntax.
1267
1268 * Outputs warnings for suboptimal instruction codes and error messages
1269 for erroneous code and erroneous relocations.
1270
1271 * Outputs disassembly of all instructions, operands and relocations,
1272 followed by the binary code listing as comment.
1273
1274 * Outputs disassembly of all data, followed by alternative representations
1275 as comment.
1276
1277 * Outputs dubious code as both code and data in order to allow a re-assembly
1278 to produce identical code.
1279 */
1280
1281 // Loop through sections, pass 2
1282 for (Section = 1; Section < Sections.GetNumEntries(); Section++) {
1283
1284 // Get section type
1285 SectionType = Sections[Section].Type;
1286 if (SectionType & 0x800) continue; // This is a group
1287
1288 if (((SectionType & 0xFF) == 0x10) && cmd.DebugInfo == CMDL_DEBUG_STRIP) {
1289 // Skip debug section
1290 cmd.CountDebugRemoved();
1291 continue;
1292 }
1293 if (((SectionType & 0xFF) == 0x11) && cmd.ExeptionInfo == CMDL_EXCEPTION_STRIP) {
1294 // Skip exception section
1295 cmd.CountExceptionRemoved();
1296 continue;
1297 }
1298 // Is this code or data?
1299 CodeMode = ((SectionType & 0xFF) == 1) ? 1 : 4;
1300
1301 // Initialize
1302 LabelBegin = FlagPrevious = CountErrors = 0;
1303 Buffer = Sections[Section].Start;
1304 SectionEnd = Sections[Section].TotalSize;
1305 LabelInaccessible = Sections[Section].InitSize;
1306 WordSize = Sections[Section].WordSize;
1307 SectionAddress = Sections[Section].SectionAddress;
1308
1309 // Write segment directive
1310 WriteSegmentBegin();
1311
1312 IBegin = IEnd = LabelEnd = IFunction = DataType = DataSize = 0;
1313
1314 // Loop through function blocks in this section
1315 while (NextFunction2()) {
1316
1317 // Check CodeMode from label
1318 NextLabel();
1319
1320 // Write begin function
1321 if (CodeMode & 3) WriteFunctionBegin();
1322
1323 // Loop through labels
1324 while (NextLabel()) {
1325
1326 // Loop through code
1327 while (NextInstruction2()) {
1328
1329 if (CodeMode & 3) {
1330 // Interpret this as code
1331
1332 // Write label if any
1333 CheckLabel();
1334
1335 // Parse instruction
1336 ParseInstruction();
1337
1338 // Check for filling space
1339 if (((s.Warnings1 & 0x10000000) || s.Warnings1 == 0x1000000) && WriteFillers()) {
1340 // Code is inaccessible fillers. Has been written by CheckForFillers()
1341 continue;
1342 }
1343
1344 // Write any error and warning messages to OutFile
1345 WriteErrorsAndWarnings();
1346
1347 // Write instruction to OutFile
1348 WriteInstruction();
1349
1350 // Write hex code as comment after instruction
1351 WriteCodeComment();
1352 }
1353 if (CodeMode & 6) {
1354
1355 // Interpret this as data
1356 WriteDataItems();
1357 }
1358 if (IEnd <= IBegin) {
1359
1360 // Prevent infinite loop
1361 IEnd++;
1362 break;
1363 }
1364 }
1365 }
1366 // Write end of function, if any
1367 if (CodeMode & 3) WriteFunctionEnd(); // End function
1368 }
1369 // Write end of segment
1370 WriteSegmentEnd();
1371 }
1372 }
1373
1374 /******************** Explanation of tracer: ***************************
1375
1376 This is a machine which can trace the contents of each register in certain
1377 situations. It is currently used for recognizing certain instruction patterns
1378 that are used by various 64 bit compilers for accessing jump tables and
1379 virtual function tables. The trace machine can be extended for other purposes.
1380
1381 A switch/case statement is typically implemented as follows by the 64 bit MS
1382 C++ compiler:
1383
1384 .code
1385 lea rbx, [__ImageBase]
1386 mov eax, [SwitchIndex]
1387 add eax, - LowerLimit
1388 cmp eax, Range
1389 ja LabelDefault
1390 cdqe
1391 mov ecx, [imagerel(SwitchTable) + rbx + rax*4]
1392 add rcx, rbx
1393 jmp rcx
1394
1395 .data
1396 SwitchTable label dword
1397 dd imagerel(Label1)
1398 dd imagerel(Label2)
1399 dd imagerel(Label3)
1400
1401 Some other compilers use the beginning of the switch table or the beginning of
1402 the code section as reference point for 32-bit jump addresses. Other
1403 compilers use 64-bit addresses in the switch table. We want to recognize
1404 all these patterns in order to disassemble a switch table in a comprehensible
1405 way and find the case label targets.
1406
1407 In order to recognize a switch table in the above example, the tracer must
1408 do the following tasks:
1409
1410 1. Calculate the rip-relative address in the lea instruction and detect
1411 that it is equal to the image base.
1412
1413 2. Remember that rbx contains the image base.
1414
1415 3. When interpreting the mov ecx instruction it recognizes that the base
1416 pointer contains the image base, therefore the displacement must be
1417 interpreted as an image-relative address. Calculate this address and
1418 give it a name.
1419
1420 4. Remember that ecx contains an an element from the array SwitchTable.
1421 It is not yet known that SwitchTable is a switch table.
1422
1423 5. After add rcx,rbx remember that rcx contains an element from the array
1424 SwitchTable plus the image base.
1425
1426 6. When interpreting the jmp rcx instruction, the information about the
1427 contents of rcx is used for concluding that SwitchTable contains jump
1428 addresses, and that these addresses are image-relative. If there had
1429 been no add rcx,rbx, we would conclude that SwitchTable contains
1430 absolute virtual addresses.
1431
1432 7. Go through all elements of SwitchTable. Calculate the address that each
1433 element points to, give it a name, and extend the scope of the current
1434 function to include this target.
1435
1436 8. It would be possible to determine the length of the switch table from
1437 the cmp instruction, but the tracer does not currently use this
1438 information. Instead, it stops parsing the switch table at the first
1439 known label or the first invalid address.
1440
1441 This is quite a long way to go for acquiring this information, but it is
1442 necessary in order to tell what is code and what is data and to find out
1443 where the function ends. Unfortunately, the MS compiler puts switch tables
1444 in the code segment rather than in the data segment which would give better
1445 caching and code prefetching. If the switch table was not identified as such,
1446 it would be impossible to tell what is code and what is data.
1447
1448 The tracer is also used for identifying virtual function tables.
1449
1450 Values of SATracer::Regist[i] tells what kind of information register i contains:
1451 0 Unknown contents
1452 1 Contains image base
1453 4 Contains a constant = Value[i]
1454 8 Contains a value < Value[i]. (Not implemented yet)
1455 0x10 Contains the value of a symbol. Value[i] contains the old index of the symbol
1456 0x11 Contains the value of an array element. Value[i] contains the symbol old index of the array
1457 0x12 Contains the value of an array element + image base. Value[i] contains the symbol old index of the array. (array may contain image-relative jump addresses)
1458 0x13 Contains the value of an array element + array base. Value[i] contains the symbol old index of the array. (array may contain jump addresses relative to array base)
1459 0x18 Contains the address of a symbol. Value[i] contains the symbol old index
1460 0x19 Contains the address of an array element. Value[i] contains the symbol old index of the array
1461 */
1462
UpdateTracer()1463 void CDisassembler::UpdateTracer() {
1464 // Trace register values. See explanation above
1465 uint32_t reg; // Destination register number
1466 uint32_t srcreg; // Source register number
1467
1468 if (s.Operands[0] & 0xFF) {
1469 // There is a destination operand
1470 if ((s.Operands[0] & 0xFF) < 5 && (s.Operands[0] & 0x1000)) {
1471 // Destination operand is a general purpose register
1472 switch (s.Operands[0] & 0xF0000) {
1473 case 0x20000:
1474 // Register indicated by last bits of opcode byte
1475 reg = Get<uint8_t>(s.OpcodeStart2) & 7;
1476 // Check REX.B prefix
1477 if (s.Prefixes[7] & 1) reg |= 8; // Add 8 if REX.B prefix
1478 break;
1479 case 0x30000:
1480 // Register indicated by rm bits of mod/reg/rm byte
1481 reg = s.RM;
1482 break;
1483 case 0x40000:
1484 // Register indicated by reg bits of mod/reg/rm byte
1485 reg = s.Reg;
1486 break;
1487 default:
1488 // Error. Don't know where to find destination register
1489 t.Reset(); return;
1490 }
1491 }
1492 else if ((s.Operands[0] & 0xFF) >= 0xA0 && (s.Operands[0] & 0xFF) <= 0xA9) {
1493 // Destination is al, ax, eax, or rax
1494 reg = 0;
1495 }
1496 else {
1497 // Destination is not a general purpose register
1498 return;
1499 }
1500 }
1501 else {
1502 // There is no destination operand
1503 return;
1504 }
1505
1506 // Destination operand is a general purpose register
1507 if (OpcodeOptions & 4) {
1508 // Destination register is not changed
1509 return;
1510 }
1511
1512 // Check the opcode to find out what has happened to this register
1513 switch (Opcodei) {
1514 case 0xB0: case 0xB1: case 0xB2: case 0xB3:
1515 case 0xB4: case 0xB5: case 0xB6: case 0xB7:
1516 case 0xB8: case 0xB9: case 0xBA: case 0xBB:
1517 case 0xBC: case 0xBD: case 0xBE: case 0xBF:
1518 // MOV register, constant
1519 t.Regist[reg] = 0;
1520 if (s.OperandSize < 32) {
1521 // Only part of register is changed
1522 return;
1523 }
1524 if (s.ImmediateRelocation) {
1525 if (s.OperandSize < WordSize || !(Relocations[s.ImmediateRelocation].Type & 0x21)) {
1526 // Wrong size or type of relocation
1527 return;
1528 }
1529 // Register contains the address of a symbol
1530 t.Regist[reg] = 0x18;
1531 t.Value [reg] = Relocations[s.ImmediateRelocation].TargetOldIndex;
1532 return;
1533 }
1534
1535 // Register value is a known constant
1536 t.Regist[reg] = 4;
1537 // Save value
1538 switch (s.ImmediateFieldSize) {
1539 case 1:
1540 t.Value[reg] = Get<uint8_t>(s.ImmediateField);
1541 break;
1542 case 2:
1543 t.Value[reg] = Get<uint16_t>(s.ImmediateField);
1544 break;
1545 case 4:
1546 case 8: // 64-bit value truncated to 32 bits
1547 t.Value[reg] = Get<uint32_t>(s.ImmediateField);
1548 break;
1549 default:
1550 // Error. Should not occur
1551 t.Regist[reg] = 0;
1552 }
1553 return;
1554 /* This part is currently unused:
1555 case 0x31: case 0x33: case 0x29: case 0x2B:
1556 // XOR or SUB. Check if source and destination is same register
1557 if ((s.Operands[0] & 0xFFFF) == (s.Operands[1] & 0xFFFF) && s.Reg == s.RM && s.OperandSize >= 32) {
1558 // XOR OR SUB with same source and destination produces zero
1559 t.Regist[reg] = 4;
1560 t.Value [reg] = 0;
1561 return;
1562 }
1563 break;
1564 */
1565
1566 case 0x8D:
1567 // LEA
1568 if (s.AddressFieldSize == 4 && s.AddressRelocation && s.OperandSize >= 32) {
1569 // Register contains the address of a symbol
1570 if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
1571 // Cannot follow position-independent code in 32 bit mode
1572 t.Regist[reg] = 0; return;
1573 }
1574 t.Regist[reg] = 0x18;
1575 t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
1576 // Check if symbol has name
1577 const char * SymName = Symbols.HasName(t.Value[reg]);
1578 if (SymName && strcmp(SymName, "__ImageBase") == 0) {
1579 // Symbol is imagebase
1580 t.Regist[reg] = 1;
1581 }
1582 // Check if base or index register
1583 if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
1584 return;
1585 }
1586 if (!s.AddressRelocation && s.BaseReg && s.IndexReg && s.Scale == 0) {
1587 // LEA used as ADD
1588
1589 if (t.Regist[s.BaseReg-1] == 1 && (t.Regist[s.IndexReg-1] & 0xFE) == 0x10) {
1590 // Adding imagebase to the value of a symbol or array element
1591 t.Regist[reg] = 0x12;
1592 t.Value [reg] = t.Value[s.IndexReg-1];
1593 return;
1594 }
1595 if (t.Regist[s.IndexReg-1] == 1 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10) {
1596 // Adding the value of a symbol or array element to the imagebase
1597 t.Regist[reg] = 0x12;
1598 t.Value [reg] = t.Value[s.BaseReg-1];
1599 return;
1600 }
1601 if ((((t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x10)
1602 || ((t.Regist[s.IndexReg-1] & 0xFE) == 0x10 && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18))
1603 && t.Value [s.IndexReg-1] == t.Value[s.BaseReg-1]) {
1604 // Adding the value of an array element to the base address of same array.
1605 // This is a computed jump address if array contains self-relative addresses
1606 t.Regist[reg] = 0x13;
1607 t.Value [reg] = t.Value[s.BaseReg-1];
1608 return;
1609 }
1610 }
1611 break;
1612
1613 case 0x89: case 0x8B: case 0x3B02:
1614 // MOV and MOVSXD instruction
1615 if (s.OperandSize < 32) break; // Only part of register is changed
1616 if (!(s.MFlags & 1)) {
1617 // MOV reg,reg. Copy register contents
1618 if (Opcodei == 0x8B || Opcodei == 0x3B02) {
1619 // Source register indicated by rm bits
1620 srcreg = s.RM;
1621 }
1622 else {
1623 // Source register indicated by reg bits
1624 srcreg = s.Reg;
1625 }
1626 t.Regist[reg] = t.Regist[srcreg];
1627 t.Value [reg] = t.Value [srcreg];
1628 return;
1629 }
1630 // MOV reg,mem
1631 if (s.AddressFieldSize == 4 && s.AddressRelocation) {
1632 // Register contains the value of a symbol
1633 if (!(Relocations[s.AddressRelocation].Type & 1) && WordSize < 64) {
1634 // Cannot follow position-independent code in 32 bit mode
1635 t.Regist[reg] = 0; return;
1636 }
1637 t.Regist[reg] = 0x10;
1638 t.Value [reg] = Relocations[s.AddressRelocation].TargetOldIndex;
1639
1640 // Check if base or index register
1641 if (s.BaseReg || s.IndexReg) t.Regist[reg]++;
1642 return;
1643 }
1644 if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
1645 // Memory operand has a base register which contains the address of a symbol
1646 // Destination register will contain value of same symbol
1647 t.Regist[reg] = 0x10;
1648 t.Value [reg] = t.Value[s.BaseReg-1];
1649 if (s.IndexReg || s.AddressFieldSize || (t.Regist[s.BaseReg-1] & 1)) {
1650 // There is an offset
1651 t.Regist[reg] |= 1;
1652 }
1653 return;
1654 }
1655 if (s.IndexReg && (t.Regist[s.IndexReg-1] & 0xFE) == 0x18 && s.BaseReg && s.Scale == 0) {
1656 // Same as above, base and index registers swapped, scale factor = 1
1657 t.Regist[reg] = 0x10;
1658 t.Value [reg] = t.Value[s.IndexReg-1];
1659 if (s.AddressFieldSize || (t.Regist[s.IndexReg-1] & 1)) {
1660 // There is an offset
1661 t.Regist[reg] |= 1;
1662 }
1663 return;
1664 }
1665 break;
1666
1667 case 0x01: case 0x03:
1668 // ADD instruction
1669 if (s.OperandSize < 32) break; // Only part of register is changed
1670 if (Opcodei == 0x03) {
1671 // Source register indicated by rm bits
1672 srcreg = s.RM;
1673 }
1674 else {
1675 // Source register indicated by reg bits
1676 srcreg = s.Reg;
1677 }
1678 if (t.Regist[srcreg] == 1 && (t.Regist[reg] & 0xFE) == 0x10) {
1679 // Adding imagebase to the value of a symbol or array element
1680 t.Regist[reg] = 0x12;
1681 return;
1682 }
1683 if (t.Regist[reg] == 1 && (t.Regist[srcreg] & 0xFE) == 0x10) {
1684 // Adding the value of a symbol or array element to the imagebase
1685 t.Regist[reg] = 0x12;
1686 t.Value [reg] = t.Value[srcreg];
1687 return;
1688 }
1689 if ((((t.Regist[srcreg] & 0xFE) == 0x18 && (t.Regist[reg] & 0xFE) == 0x10)
1690 || ((t.Regist[srcreg] & 0xFE) == 0x10 && (t.Regist[reg] & 0xFE) == 0x18))
1691 && t.Value [reg] == t.Value[srcreg]) {
1692 // Adding the value of an array element to the base address of same array.
1693 // This is a computed jump address if array contains self-relative addresses
1694 t.Regist[reg] = 0x13;
1695 return;
1696 }
1697 break;
1698
1699 case 0x3902:
1700 // CDQE. eax sign extended to rax. Ignore
1701 return;
1702 case 0x3900: case 0x3901:
1703 // CBW, CWDE. rax changed
1704 t.Regist[0] = 0;
1705 return;
1706 case 0x3A00: case 0x3A01: case 0x3A02:
1707 // CWD, CDQ, CQO. rdx changed
1708 t.Regist[2] = 0;
1709 return;
1710 }
1711 // Anything else: Remember that this register is changed
1712 t.Regist[reg] = 0;
1713
1714 if (OpcodeOptions & 8) {
1715 // Registers other than destination register may be changed
1716 t.Reset();
1717 }
1718 }
1719
1720
UpdateSymbols()1721 void CDisassembler::UpdateSymbols() {
1722 // Find unnamed symbols, determine symbol types,
1723 // update symbol list, call CheckJumpTarget if jump/call.
1724 // This function is called during pass 1 for every instruction
1725
1726 uint32_t OpI; // Operand index
1727 uint32_t OperandType; // Type of operand
1728 uint32_t SymOldI; // Symbol table old index
1729 uint32_t SymNewI; // Symbol table new index
1730
1731 // Loop through all operands for one instruction
1732 for (OpI = 0; OpI < 4; OpI++) {
1733 if (s.Operands[OpI]) {
1734 SymNewI = 0; // Reset symbol index
1735 OperandType = s.Operands[OpI]; // Operand type
1736
1737 // Check if indirect jump/call
1738 if (OpI == 0 && ((s.OpcodeDef->Destination + 1) & 0xFE) == 0x0C) {
1739 OperandType = s.OpcodeDef->Destination;
1740 }
1741
1742 // Check operand type
1743 if ((OperandType & 0xF0) == 0x80) {
1744 // This is a jump/call destination
1745
1746 if (!s.ImmediateRelocation) {
1747 // Has no reference to other symbol. Make one
1748
1749 // Relocation type
1750 uint32_t RelocationType = 2; // Self relative
1751 if ((OperandType & 0xFE) == 0x84) RelocationType = 8; // Far
1752
1753 // Scope
1754 uint32_t TargetScope = 1; // Function local
1755 if ((OperandType & 0xFF) >= 0x83) TargetScope = 2; // Call or far. File scope
1756
1757 // Make relocation and target symbol
1758 SymNewI = MakeMissingRelocation(Section, s.ImmediateField, RelocationType, OperandType, TargetScope);
1759
1760 // Update labels
1761 LabelBegin = 0;
1762 FindLabels();
1763
1764 if (TargetScope == 1 && SymNewI) {
1765 // Short or near jump (not call). Update range of current function
1766 CheckJumpTarget(SymNewI);
1767 }
1768 }
1769 else {
1770 // Jump or call to relocated symbol
1771 // Look up in Relocations table
1772 SymOldI = Relocations[s.ImmediateRelocation].TargetOldIndex;
1773
1774 // Look up in symbol table
1775 SymNewI = Symbols.Old2NewIndex(SymOldI);
1776 if (Symbols[SymNewI].OldIndex) {
1777 // Found
1778 // Check if symbol already has a scope assigned
1779 if (Symbols[SymNewI].Scope == 0) Symbols[SymNewI].Scope = 2;
1780
1781 // Check if symbol already has a type assigned
1782 if ((OperandType & 0xFF) > (Symbols[SymNewI].Type & 0xFF)) {
1783
1784 // No type assigned yet, or new type overrides old type
1785 Symbols[SymNewI].Type = (Symbols[SymNewI].Type & ~0xFF) | OperandType;
1786 }
1787 // Check if jump target is in data segment
1788 if (Symbols[SymNewI].Section > 0 && (uint16_t)(Symbols[SymNewI].Section) < Sections.GetNumEntries()
1789 && (Sections[Symbols[SymNewI].Section].Type & 0xFF) > 1) {
1790 s.Warnings1 |= 0x80000;
1791 }
1792 }
1793 }
1794 }
1795 else {
1796 // Check if reference to data symbol
1797 if ((s.Operands[OpI] & 0x2000) && (s.Operands[OpI] & 0xD0000) == 0x10000) {
1798 // Memory operand
1799
1800 if (s.AddressRelocation) {
1801 // There is a reference to a data symbol
1802
1803 // Make exception for LEA: Target type is unknown
1804 if (Opcodei == 0x8D) OperandType = 0;
1805
1806 // Check and update relocation target
1807 CheckRelocationTarget(s.AddressRelocation, OperandType, GetDataItemSize(OperandType));
1808 }
1809 else if (s.AddressFieldSize >= 4) {
1810 // Relocation missing. Make one if possible
1811 uint32_t TargetType = OperandType;
1812 if (Opcodei == 0x8D) {
1813 // Source of LEA instruction has no type
1814 TargetType = 0;
1815 }
1816 // Check addressing mode
1817 if (s.MFlags & 0x100) {
1818 // There is a rip-relative reference
1819 // Make relocation record and target record
1820 MakeMissingRelocation(Section, s.AddressField, 2, TargetType, 2);
1821 FindRelocations();
1822 }
1823 else if (s.BaseReg && t.Regist[s.BaseReg-1] == 1 && s.AddressFieldSize == 4) {
1824 // Memory operand has a base register which has been traced
1825 // to contain the image base. Make image-relative relocation
1826 MakeMissingRelocation(Section, s.AddressField, 4, TargetType, 2);
1827 FindRelocations();
1828 }
1829 else if (ImageBase && !(RelocationsInSource & 0x20) && s.AddressFieldSize >= 4) {
1830 // No base relocations in source. Make direct relocation
1831 MakeMissingRelocation(Section, s.AddressField, 1, TargetType, 2, s.AddressFieldSize);
1832 FindRelocations();
1833 }
1834 }
1835 }
1836 if ((s.Operands[OpI] & 0xF0) >= 0x10 && (s.Operands[OpI] & 0xF0) < 0x40) {
1837 // Immediate operand
1838
1839 if (!s.ImmediateRelocation && s.ImmediateFieldSize >= 4
1840 && ImageBase && !(RelocationsInSource & 0x20)
1841 && (Opcodei == 0x3000 || Opcodei == 0x68 || (Opcodei & 0xFFF8) == 0xB8)) {
1842 // instruction = MOV or PUSH, immediate operand may be an address
1843 // Make a relocation if immediate value is valid address
1844 MakeMissingRelocation(Section, s.ImmediateField, 1, 0, 2, s.ImmediateFieldSize);
1845 FindRelocations();
1846 }
1847 if (s.ImmediateRelocation) {
1848 // There is a reference to the offset of a data symbol
1849 // Check and update relocation target
1850 CheckRelocationTarget(s.ImmediateRelocation, 0, 0);
1851 }
1852 }
1853 }
1854 if (((OperandType + 1) & 0xFE) == 0x0C) {
1855 // Indirect jump or call. Find jump table or virtual table
1856
1857 // Default relocation type for jump table is direct
1858 uint32_t RelocationType = 1;
1859
1860 // Find symbol table entry for jump pointer or call pointer
1861 if (s.AddressRelocation && Relocations[s.AddressRelocation].TargetOldIndex) {
1862 // Look up in symbol table
1863 SymNewI = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].TargetOldIndex);
1864 }
1865 else SymNewI = 0;
1866
1867 if (SymNewI == 0 || Symbols[SymNewI].OldIndex == 0) {
1868 // Symbol for jump table not found yet
1869 if (s.Operands[OpI] & 0x2000) {
1870 // There is a memory operand
1871 if (s.BaseReg && (t.Regist[s.BaseReg-1] & 0xFE) == 0x18) {
1872 // Memory operand has a base register which has been traced to
1873 // point to a known symbol
1874 SymNewI = Symbols.Old2NewIndex(t.Value[s.BaseReg-1]);
1875 }
1876 else if (((s.BaseReg != 0) ^ (s.IndexReg != 0)) && s.AddressFieldSize == 4 && ExeType) {
1877 // Here is a jump table with an absolute address
1878 SymNewI = MakeMissingRelocation(Section, s.AddressField, 1, 0x0B, 2, s.AddressFieldSize);
1879 }
1880 }
1881 else {
1882 // Jump or call to a register operand
1883 // Check if the register value has been traced
1884 if ((t.Regist[s.RM] & 0x1C) == 0x10) {
1885 // Register contains an array element. Get symbol for this array
1886 SymNewI = Symbols.Old2NewIndex(t.Value[s.RM]);
1887 // Check relocation type
1888 if (t.Regist[s.RM] == 0x12) {
1889 // Register contains array element plus imagebase.
1890 RelocationType = 4; // Array elements must have image-relative relocations
1891 }
1892 if (t.Regist[s.RM] == 0x13) {
1893 // Register contains array element plus base address of same array
1894 RelocationType = 0x10; // Array elements must have self-relative relocations
1895 }
1896 }
1897 }
1898 }
1899 // Check if valid symbol for jump/call table
1900 if (SymNewI && Symbols[SymNewI].OldIndex) {
1901 // Jump/call table found
1902
1903 if ((s.Operands[OpI] & 0x2000) && !s.BaseReg && !s.IndexReg && Opcodei == 0x2704) {
1904 // Simple memory operand
1905 // Assign name if symbol is import table entry
1906 CheckImportSymbol(SymNewI);
1907 }
1908
1909 // Check relocation type if memory operand
1910 if ((s.Operands[OpI] & 0x2000) && s.BaseReg && t.Regist[s.BaseReg-1] == 1) {
1911 // Memory operand has a base register which has been traced to contain the imagebase
1912 RelocationType = 4; // Array elements must have image-relative relocations
1913 }
1914
1915 // Check symbol type
1916 if ((Symbols[SymNewI].Type & 0xFF) < (OperandType & 0xFF) /*|| (Symbols[SymNewI].Type & 0xF0)*/) {
1917 // No type assigned yet, or new type overrides old type
1918 Symbols[SymNewI].Type = OperandType;
1919 }
1920
1921 // Check symbol size
1922 if (RelocationType == 4 && WordSize > 16) {
1923 Symbols[SymNewI].Size = 4; // Image relative
1924 }
1925 if (RelocationType == 0x10 && WordSize > 16) {
1926 Symbols[SymNewI].Size = 4; // Relative to table base
1927 }
1928 else {
1929 Symbols[SymNewI].Size = WordSize / 8; // Direct
1930 }
1931
1932 // Follow what the jump/call table points to
1933 FollowJumpTable(SymNewI, RelocationType);
1934 }
1935 }
1936 }
1937 }
1938 }
1939
1940
FollowJumpTable(uint32_t symi,uint32_t RelType)1941 void CDisassembler::FollowJumpTable(uint32_t symi, uint32_t RelType) {
1942 // Check jump/call table and its targets
1943 uint32_t sym1, sym2, sym3 = 0; // Symbol indices
1944 uint32_t NextLabel; // Offset of next label
1945 uint32_t Pos; // Current position
1946 SARelocation rel; // Relocation record for searching
1947 int32_t Reli; // Index to relocation
1948 uint32_t NewType = 0; // Type to assign to symbol
1949 int32_t SourceSection; // Section of relocation source
1950 uint32_t SourceOffset; // Offset of relocation source
1951 uint32_t SourceSize; // Size of relocation source
1952 uint32_t TargetType; // Type for relocation target
1953 uint32_t RefPoint = 0; // Reference point if relocationtype = 0x10
1954 int32_t Addend = 0; // Inline addend
1955
1956 // Check if sym is valid
1957 if (Symbols[symi].OldIndex == 0) return;
1958
1959 // Get type of target
1960 switch (s.OpcodeDef->Destination & 0xFF) {
1961 case 0x0B: // Near indirect jump. Target type = jump destination
1962 NewType = 0x82; break;
1963 case 0x0C: // Near indirect call. Target type = call destination
1964 NewType = 0x83; break;
1965 default: // Should not occur
1966 return;
1967 }
1968
1969 // Check symbol size
1970 if ((RelType & 4) && WordSize >= 32) {
1971 // Image relative relocation
1972 Symbols[symi].Size = 4;
1973 }
1974 else if ((RelType & 0x10) && WordSize >= 32) {
1975 // Relative to table base
1976 Symbols[symi].Size = 4;
1977 RefPoint = Symbols[symi].OldIndex; // Reference point = table base
1978 }
1979 else if ((RelType & 0x21) || Symbols[symi].Size == 0) {
1980 // Direct near relocation
1981 Symbols[symi].Size = WordSize / 8;
1982 }
1983
1984 // Check symbol type
1985 if (uint32_t(s.OpcodeDef->Destination & 0xFF) > (Symbols[symi].Type & 0xFF)) {
1986 // No type assigned yet, or new type overrides old type
1987 Symbols[symi].Type = s.OpcodeDef->Destination | 0x4000000;
1988 }
1989 // Make sure symbol is marked as data
1990 Symbols[symi].Type |= 0x4000000;
1991
1992 // Check if symbol has a scope assigned
1993 if (Symbols[symi].Scope == 0) Symbols[symi].Scope = 2;
1994
1995 // Save symbol properties
1996 // (The reference to sym will become invalid when new symbols are created)
1997 SourceSection = Symbols[symi].Section;
1998 SourceOffset = Symbols[symi].Offset;
1999 SourceSize = Symbols[symi].Size;
2000 TargetType = 0x82;
2001
2002 // Target type = jump label
2003 if ((Symbols[symi].Type & 0xFF) == 0x0C) TargetType++; // Target type = call label
2004
2005 // Find next label
2006 sym1 = Symbols.FindByAddress(SourceSection, SourceOffset, &sym2, &sym3);
2007 if (sym1 && sym3) {
2008 // Assume that table ends at next label
2009 NextLabel = Symbols[sym3].Offset;
2010 }
2011 else {
2012 // No next label. End at source section end
2013 NextLabel = Sections[SourceSection].InitSize;
2014 }
2015
2016 // Loop through table of jump/call addresses
2017 for (Pos = SourceOffset; Pos < NextLabel; Pos += SourceSize) {
2018
2019 // Search for relocation source at table entry
2020 rel.Section = SourceSection;
2021 rel.Offset = Pos;
2022 Reli = Relocations.Exists(rel);
2023
2024 if (Reli > 0) {
2025 // Relocation found. Check target
2026 CheckRelocationTarget(Reli, TargetType, 0);
2027 }
2028 else {
2029 // No relocation here. Make one if possible
2030
2031 uint32_t symi = MakeMissingRelocation(rel.Section, rel.Offset, RelType, TargetType, 2, 0, RefPoint);
2032 if (!symi) {
2033 // Failed to make a meaningful relocation. End jump table
2034 break;
2035 }
2036 int32_t TargetSection = Symbols[symi].Section;
2037 if (!TargetSection || (Sections[TargetSection].Type & 0xFF) != 1) {
2038 // Target is not in code section. End jump table
2039 break;
2040 }
2041 // Find the newly made relocation
2042 Reli = Relocations.Exists(rel);
2043 if (Reli <= 0) break;
2044 }
2045 // Relocation found. Check if valid
2046 if (!(Relocations[Reli].Type & 0x37) || !Relocations[Reli].TargetOldIndex) {
2047 // Wrong relocation type or invalid. Stop searching
2048 break;
2049 }
2050 // Find relocation target
2051 uint32_t TargetSymI = Symbols.Old2NewIndex(Relocations[Reli].TargetOldIndex);
2052 if (!TargetSymI) {
2053 // Target invalid
2054 break;
2055 }
2056
2057 // Calculate target address
2058 Addend = Relocations[Reli].Addend;
2059 // Check inline addend if target is section-relative and this is an object file
2060 if (!ExeType && Symbols[TargetSymI].Offset == 0) {
2061
2062 switch (SourceSize) {
2063 case 2:
2064 Addend += *(int16_t*)(Sections[SourceSection].Start + Pos);
2065 break;
2066 case 4: case 8:
2067 Addend += *(int32_t*)(Sections[SourceSection].Start + Pos);
2068 break;
2069 default:
2070 Addend += 0;
2071 }
2072 if (Addend) {
2073 // Make new symbol at target address
2074 uint32_t NewSymOffset = Addend;
2075 if (Relocations[Reli].Type & 2) { // relative
2076 if (RelType == 0x10) { // arbitrary reference point
2077 NewSymOffset -= (Relocations[Reli].Offset - SourceOffset);
2078 }
2079 }
2080 uint32_t NewSym = Symbols.NewSymbol(Symbols[TargetSymI].Section, NewSymOffset, 2);
2081 if (NewSym) TargetSymI = NewSym;
2082 }
2083 }
2084
2085 // Update target symbol type
2086 if ((Symbols[TargetSymI].Type & 0xFF) < NewType) {
2087 Symbols[TargetSymI].Type = (Symbols[TargetSymI].Type & ~0xFF) | NewType;
2088 }
2089 // Extend current function to include target
2090 CheckJumpTarget(TargetSymI);
2091
2092 // Update NextLabel in case new target is between Pos and NextLabel
2093 if (Symbols[TargetSymI].Section == SourceSection && Symbols[TargetSymI].Offset > Pos && Symbols[TargetSymI].Offset < NextLabel) {
2094 NextLabel = Symbols[TargetSymI].Offset;
2095 }
2096 }
2097
2098 if (Pos < NextLabel) {
2099 // There is no label after jump table. Make one with zero scope
2100 SASymbol SymAfter;
2101 SymAfter.Reset();
2102 SymAfter.Section = SourceSection;
2103 SymAfter.Offset = Pos;
2104 SymAfter.Type = (Sections[SourceSection].Type & 0xFF) == 1 ? 0x82 : 0;
2105 Symbols.NewSymbol(SymAfter);
2106 }
2107 }
2108
2109
MakeMissingRelocation(int32_t Section,uint32_t Offset,uint32_t RelType,uint32_t TargetType,uint32_t TargetScope,uint32_t SourceSize,uint32_t RefPoint)2110 uint32_t CDisassembler::MakeMissingRelocation(int32_t Section, uint32_t Offset, uint32_t RelType, uint32_t TargetType, uint32_t TargetScope, uint32_t SourceSize, uint32_t RefPoint) {
2111 // Make a relocation and its target symbol from inline address
2112 /* This function is used for executable files that have already been
2113 relocated for making the relocation information that has been
2114 lost as well as the symbol record that the relocation should
2115 point to.
2116 Parameters:
2117 Section Section of relocation source
2118 Offset Offset of relocation source
2119 RelType Relocation type: 1 = direct, 2 = self relative, 4 = image relative, 0x10 = relative to reference point
2120 TargetType Symbol type for target
2121 TargetScope Scope for target symbol
2122 SourceSize Size of source field (0 = default for relocation type and WordSize)
2123 RefPoint Reference point if RelType = 0x10 (symbol old index)
2124
2125 The return value is a symbol new index for the target, or zero if failure
2126
2127 The size of the relocation source is implied from RelType
2128 A symbol record for the target will be made if none exists.
2129 The scope of the target symbol will be file local (2)
2130 */
2131
2132 SARelocation Rel; // Temporary relocation record
2133 SASymbol Sym; // Temporary symbol record for target
2134 Sym.Reset();
2135 int32_t irel; // Relocation index
2136 uint32_t isym = 0; // Symbol new index
2137 int64_t InlineA; // Inline address or displacement
2138 int64_t TargetAbsAddr; // Absolute address of target
2139
2140 // Check if Section valid
2141 if (Section <= 0 || (uint32_t)Section >= Sections.GetNumEntries() || Offset >= Sections[Section].InitSize || !Sections[Section].Start) {
2142 return 0;
2143 }
2144
2145 // Check if a relocation would be missing
2146 if (RelType & 1) {
2147 // Direct relocation
2148 if (RelocationsInSource & 0x20) return 0; // Source file has base relocations. There would be a relocation here if needed
2149 }
2150 else if (RelType & 4) {
2151 // Image relative
2152 if (!ExeType) return 0; // Object file. There would be a relocation here if needed
2153 }
2154
2155 // Check if a relocation already exists
2156 Rel.Section = Section;
2157 Rel.Offset = Offset;
2158 irel = Relocations.Exists(Rel);
2159 if (irel > 0) return 0; // Relocation exists. Don't do anything
2160
2161 if (SourceSize == 0) {
2162 // Source size not specified. Get default source size
2163 if ((TargetType & 0xFF) == 0x81) {
2164 // Short jump
2165 SourceSize = 1;
2166 }
2167 else if (RelType & 1) {
2168 // Direct relocation. Size depends on word size
2169 SourceSize = WordSize / 8;
2170 }
2171 else if (RelType & 0x12) {
2172 // Self relative or relative to table base
2173 SourceSize = (WordSize == 16) ? 2 : 4;
2174 }
2175 else if (RelType & 4 && WordSize > 16) {
2176 // Image relative
2177 SourceSize = 4;
2178 }
2179 else {
2180 // Other value. Ignore
2181 return 0;
2182 }
2183 }
2184
2185 // Get inline address or displacement from source address
2186 if (SourceSize == 8) {
2187 InlineA = *(int64_t*)(Sections[Section].Start + Offset);
2188 }
2189 else if (SourceSize == 4) {
2190 InlineA = *(int32_t*)(Sections[Section].Start + Offset);
2191 }
2192 else if (SourceSize == 2) {
2193 InlineA = *(int16_t*)(Sections[Section].Start + Offset);
2194 }
2195 else { // 1
2196 InlineA = *(int8_t*)(Sections[Section].Start + Offset);
2197 }
2198
2199 // Get absolute virtual address of target
2200 if (RelType & 1) {
2201 // Direct address
2202 TargetAbsAddr = InlineA;
2203 }
2204 else if (RelType & 2) {
2205 // Self relative. Translate self-relative to absolute address
2206 TargetAbsAddr = InlineA + ImageBase + SectionAddress + IEnd;
2207 }
2208 else if (RelType & 0x10) {
2209 // Relative to reference point. Translate relative to absolute address
2210 uint32_t RefSym = Symbols.Old2NewIndex(RefPoint);
2211 TargetAbsAddr = InlineA + Symbols[RefSym].Offset + Sections[Symbols[RefSym].Section].SectionAddress;
2212 }
2213 else {
2214 // Image relative
2215 TargetAbsAddr = InlineA + ImageBase;
2216 }
2217
2218 if (ExeType) {
2219 // Executable file
2220 // Translate to section:offset address
2221 if (TranslateAbsAddress(TargetAbsAddr, Sym.Section, Sym.Offset)) {
2222
2223 // Make a symbol for this address if none exists
2224 Sym.Scope = TargetScope;
2225 Sym.Type = TargetType;
2226 isym = Symbols.NewSymbol(Sym);
2227 }
2228 else if (TargetAbsAddr == ImageBase && TargetAbsAddr) {
2229 // Reference to image base (nonzero)
2230 // Make a symbol for image base if none exists
2231 Sym.Scope = 0x20;
2232 Sym.Type = 0;
2233 isym = Symbols.NewSymbol(Sym);
2234 if (isym && Symbols[isym].Name == 0) {
2235 Symbols.AssignName(isym, "__ImageBase");
2236 }
2237 }
2238 }
2239 else {
2240 // Object file
2241 Sym.Section = Section;
2242 Sym.Offset = (uint32_t)TargetAbsAddr - SectionAddress;
2243
2244 // Make a symbol for this address if none exists
2245 Sym.Scope = TargetScope;
2246 Sym.Type = TargetType;
2247 isym = Symbols.NewSymbol(Sym);
2248 }
2249
2250 if ((RelType & 2) && (TargetType & 0xF0) == 0x80 && Sym.Section == Section && CodeMode == 1) {
2251 // Relocation not needed for relative jump/call within same section
2252 return isym;
2253 }
2254
2255 if (isym) {
2256 // Relocation addend
2257 int32_t Addend = -(int32_t)InlineA;
2258 if (RelType & 2) {
2259 // Correct self-relative record for bias
2260 if (s.MFlags & 0x100) {
2261 // rip-relative address
2262 Addend -= IEnd - s.AddressField;
2263 }
2264 else {
2265 // self-relative jump etc.
2266 Addend -= SourceSize;
2267 }
2268 }
2269
2270 // Make a relocation record
2271 AddRelocation (Section, Offset, Addend, RelType, SourceSize, Symbols[isym].OldIndex, RefPoint);
2272
2273 // Update s.AddressRelocation and s.ImmediateRelocation
2274 if (CodeMode & 3) {
2275 FindRelocations();
2276
2277 // Remove warning for absolute address
2278 s.Warnings1 &= ~0x8000;
2279 }
2280 }
2281 return isym;
2282 }
2283
2284
CheckImportSymbol(uint32_t symi)2285 void CDisassembler::CheckImportSymbol(uint32_t symi) {
2286 // Check for indirect jump to import table entry
2287
2288 if (Symbols[symi].DLLName) {
2289 // Instruction is an indirect jump to symbol table entry
2290 // Find label at current instruction
2291 uint32_t sym2 = Symbols.FindByAddress(Section, IBegin);
2292 if (sym2 && Symbols[sym2].Name == 0) {
2293 // Label at current instruction has no name
2294 // Give current instruction the import name without "_imp" prefix
2295 const char * ImpName = Symbols.GetName(symi);
2296 if (strncmp(ImpName, Symbols.ImportTablePrefix, (uint32_t)strlen(Symbols.ImportTablePrefix)) == 0) {
2297 Symbols.AssignName(sym2, ImpName + (uint32_t)strlen(Symbols.ImportTablePrefix));
2298 }
2299 }
2300 }
2301 }
2302
MarkCodeAsDubious()2303 void CDisassembler::MarkCodeAsDubious() {
2304 // Remember that this may be data in a code segment
2305 uint32_t sym1, sym2 = 0, sym3 = 0; // Preceding and succeding symbols
2306
2307 // Check likelihood that this is data rather than code
2308 if (((s.Errors & 0x4000) && ((s.Warnings1 & 0x10000000) || CountErrors > 1))
2309 || CountErrors > 5) {
2310 // There are more than 5 errors, or consecutive zeroes and at
2311 // least one more error or inaccessible code.
2312 // Consider this sufficient evidence that this is very unlikely
2313 // to be code. Show it as data only
2314 CodeMode = 4;
2315 }
2316 if (CodeMode < 4) {
2317 // This may be code containing errors or interpreted out of phase.
2318 // Set CodeMode to dubious so that it will be shown as both code and data
2319 CodeMode = 2;
2320 }
2321
2322 if (Pass & 0x0F) {
2323 // Pass 1. Mark preceding label as dubious
2324
2325 // Check nearest preceding label
2326 if (LabelBegin == 0) {
2327 // There is no preceding label. Make one
2328 Symbols.NewSymbol(Section, IBegin, 1);
2329 LabelBegin = 0;
2330 FindLabels();
2331 }
2332
2333 // Find symbol index for nearest preceding label
2334 sym1 = Symbols.FindByAddress(Section, LabelBegin, &sym2, &sym3);
2335
2336 if (sym1 && sym2) {
2337 // Mark symbol as dubious or data
2338 Symbols[sym2].Type = (Symbols[sym2].Type & ~0xF000000) | (CodeMode << 24);
2339 }
2340
2341 // Request repetition of pass 1
2342 Pass |= 0x100;
2343
2344 /* Skip to next label.
2345 This is removed because we want to accumulate errors as evidence for
2346 determined whether this is code or data
2347 // Is there a label after this?
2348 if (sym3) {
2349 // Skip to next label
2350 if (Symbols[sym3].Offset > IEnd) {
2351 IBegin = IEnd = Symbols[sym3].Offset;
2352 }
2353 }
2354 else {
2355 // No next label. Skip to section end
2356 IBegin = IEnd = SectionEnd;
2357 }
2358 */
2359 }
2360 }
2361
2362
NextInstruction1()2363 int CDisassembler::NextInstruction1() {
2364 // Go to next instruction or data item. Return 0 if none. Pass 1
2365 IBegin = IEnd;
2366
2367 // Reset everything in s field
2368 s.Reset();
2369
2370 // Return if there are more instructions
2371 return (IBegin < SectionEnd);
2372 }
2373
NextInstruction2()2374 int CDisassembler::NextInstruction2() {
2375 // Go to next instruction or data item. Return 0 if none. Pass 2
2376 IBegin = IEnd;
2377
2378 // Reset everything in s field
2379 s.Reset();
2380
2381 // Return if there are more instructions
2382 return (IBegin < FunctionEnd && IBegin < LabelEnd && IBegin < SectionEnd);
2383 }
2384
ParseInstruction()2385 void CDisassembler::ParseInstruction() {
2386 // Parse one opcode
2387 FlagPrevious = 0; // Reset flag from previous instruction
2388
2389 s.OpcodeStart1 = IBegin; // Index to start of instruction
2390
2391 // Scan prefixes first
2392 ScanPrefixes();
2393
2394 // Find opcode map entry
2395 FindMapEntry(); // Find entry in opcode maps
2396
2397 // Find operands
2398 FindOperands(); // Interpret mod/reg/rm and SIB bytes and find operands
2399
2400 // Determine the types of each operand
2401 FindOperandTypes();
2402
2403 if (s.Prefixes[3] == 0x62) {
2404 if (s.Prefixes[6] & 0x20) { // EVEX
2405 FindBroadcast(); // Find broadcast and offet multiplier for EVEX code
2406 }
2407 else { // MVEX
2408 SwizTableLookup(); // Find swizzle table record if MVEX prefix
2409 }
2410 }
2411
2412 // Find any relocation sources in this instruction
2413 FindRelocations();
2414
2415 // Find any reasons for warnings
2416 FindWarnings();
2417
2418 // Find any errors
2419 FindErrors();
2420
2421 if (!s.Errors && CodeMode == 1) {
2422 // Find instruction set
2423 FindInstructionSet();
2424
2425 // Update symbol types for operands of this instruction
2426 UpdateSymbols();
2427
2428 // Trace register values
2429 UpdateTracer();
2430 }
2431 }
2432
2433
ScanPrefixes()2434 void CDisassembler::ScanPrefixes() {
2435 // Scan prefixes
2436 uint32_t i; // Index to current byte
2437 uint8_t Byte; // Current byte of code
2438 for (i = IBegin; i < SectionEnd; i++) {
2439
2440 // Read code byte
2441 Byte = Buffer[i];
2442
2443 // Check if Byte is a prefix
2444 if (WordSize == 64 && (Byte & 0xF0) == 0x40) {
2445
2446 // This is a REX prefix
2447 if (Byte & 0x08) {
2448 // REX.W prefix
2449 StorePrefix(4, 0x48); // REX.W also in category operand size
2450 }
2451 StorePrefix(7, Byte); // Store in category REX
2452 }
2453 else if (i+1 < SectionEnd &&
2454 ((((Byte & 0xFE) == 0xC4 || Byte == 0x62) && (WordSize == 64 || (Buffer[i+1] >= 0xC0)))
2455 || (Byte == 0x8F && (Buffer[i+1] & 0x38)))) {
2456 // This is a VEX, EVEX, MVEX or XOP prefix
2457
2458 // Check for invalid prefixes before this
2459 if (s.Prefixes[5] | s.Prefixes[7]) s.Warnings1 |= 0x800;
2460
2461 // Get equivalent prefixes
2462 uint8_t prefix3 = Byte; // Repeat prefix (F2, F3) or VEX prefix (C4, C5, 62)
2463 uint8_t prefix4; // 66, 48 Operand size prefix
2464 uint8_t prefix5; // 66, F2, F3 operand type prefixes
2465 uint8_t prefix6; // VEX.mmmmm and VEX.L
2466 uint8_t prefix7; // equivalent to REX prefix
2467 uint8_t vvvv; // vvvv register operand
2468 if (Byte == 0xC5) {
2469 // 2-bytes VEX prefix
2470 if (i+2 >= SectionEnd) {
2471 IEnd = i+2;
2472 s.Errors |= 0x10; return; // End of buffer reached
2473 }
2474 Byte = Buffer[++i]; // Second byte
2475 prefix5 = Byte & 3; // pp bits
2476 prefix6 = (Byte << 3) & 0x20; // L bit
2477 prefix6 |= 1; // mmmmm bits = 1 for 0F map
2478 vvvv = (~Byte >> 3) & 0x0F; // vvvv operand
2479 prefix7 = 0x10; // Indicate 2-bytes VEX prefix
2480 prefix7 |= (~Byte >> 5) & 4; // R bit
2481 }
2482 else {
2483 // 3 or 4-bytes VEX/EVEX/MVEX prefix or XOP prefix
2484 if (i+3+(Byte==0x62) >= SectionEnd) {
2485 IEnd = i+3+(Byte==0x62);
2486 s.Errors |= 0x10; return; // End of buffer reached
2487 }
2488 prefix7 = (Byte == 0x8F) ? 0x80 : 0x20;// Indicate 3/4-bytes VEX prefix or XOP prefix
2489 Byte = Buffer[++i]; // Second byte
2490 prefix6 = Byte & 0x1F; // mmmmm bits
2491 prefix7 |= (~Byte >> 5) & 7; // R,X,B bits
2492 Byte = Buffer[++i]; // Third byte
2493 prefix5 = Byte & 3; // pp bits
2494 prefix6 |= (Byte << 3) & 0x20; // VEX: L bit, MVEX: 0, EVEX: 1
2495 vvvv = (~Byte >> 3) & 0x0F; // vvvv operand
2496 prefix7 |= (Byte >> 4) & 8; // W bit
2497 if (prefix3 == 0x62) {
2498 // 4-bytes EVEX or MVEX prefix
2499 prefix6 |= 0x40; // Indicates EVEX or MVEX prefix, bit 5 is 0 for MVEX, 1 for EVEX
2500 Byte = Buffer[++i]; // Fourth byte
2501 s.Kreg = Byte & 0x07; // kkk mask register
2502 vvvv |= (~Byte & 8) << 1; // extra v bit
2503 s.Esss = Byte >> 4; // EVEX: zLLb, MVEX: Esss bits
2504 }
2505 }
2506 StorePrefix(3, prefix3); // VEX prefix
2507 // Get operand size prefix
2508 prefix4 = (prefix5 == 1) ? 0x66 : 0;
2509 if (prefix7 & 8) prefix4 = 0x48;
2510 StorePrefix(4, prefix4); // Operand size prefix
2511 // Translate operand type prefix values
2512 static const uint8_t PrefixValues[4] = {0, 0x66, 0xF3, 0xF2};
2513 prefix5 = PrefixValues[prefix5];
2514 StorePrefix(5, prefix5); // Operand type prefix
2515 StorePrefix(6, prefix6); // VEX mmmmm,L
2516 StorePrefix(7, prefix7); // REX prefix equivalent
2517 s.Vreg = vvvv; // Store vvvv operand
2518 // Next byte cannot be a prefix. Stop searching for prefixes
2519 s.OpcodeStart1 = i + 1;
2520 return;
2521 }
2522 else if (OpcodeMap0[Byte].InstructionFormat & 0x8000) {
2523
2524 // This is a prefix (other than REX/VEX)
2525 switch (Byte) {
2526 case 0x26: case 0x2E: case 0x36: case 0x3E: case 0x64: case 0x65:
2527 // Segment prefix
2528 StorePrefix(0, Byte); // Store prefix
2529 if (Byte == 0x64) MasmOptions |= 2; // Remember FS used
2530 if (Byte == 0x65) MasmOptions |= 4; // Remember GS used
2531 break;
2532
2533 case 0x67:
2534 // Address size prefix
2535 StorePrefix(1, Byte); break;
2536
2537 case 0xF0:
2538 // Lock prefix
2539 StorePrefix(2, Byte); break;
2540
2541 case 0xF2: case 0xF3:
2542 // Repeat prefix
2543 StorePrefix(3, Byte); // Both in category repeat and operand type
2544 StorePrefix(5, Byte); break;
2545
2546 case 0x66:
2547 // Operand size
2548 StorePrefix(4, Byte); // Both in category operand size and operand type
2549 StorePrefix(5, Byte); break;
2550
2551 default:
2552 err.submit(9000);
2553 }
2554 }
2555 else {
2556 // This is not a prefix
2557 s.OpcodeStart1 = i;
2558 return;
2559 }
2560 }
2561 // Error: end of block reached before end of prefixes
2562 IEnd = i;
2563 s.Errors |= 0x10;
2564 }
2565
2566
StorePrefix(uint32_t Category,uint8_t Byte)2567 void CDisassembler::StorePrefix(uint32_t Category, uint8_t Byte) {
2568 // Store prefix according to category
2569 if (Category > 7) {err.submit(9000); return;} // Out of range
2570
2571 // Check if we already have a prefix in this category
2572 if (s.Prefixes[Category]) {
2573 // We already have a prefix in this category
2574 if (s.Prefixes[Category] != Byte || Category == 7) {
2575 // Conflicting prefixes in this category
2576 s.Conflicts[Category]++;
2577 }
2578 else {
2579 // Same prefix occurs more than once
2580 s.Warnings1 |= 0x100;
2581 }
2582 }
2583 // Check if REX prefix before this
2584 if (s.Prefixes[7]) s.Errors |= 0x20;
2585
2586 // Save prefix in category
2587 s.Prefixes[Category] = Byte;
2588 }
2589
2590
FindMapEntry()2591 void CDisassembler::FindMapEntry() {
2592 // Find entry in opcode maps
2593 uint32_t i = s.OpcodeStart1; // Index to current byte
2594 uint16_t Link; // Link to another map
2595 uint8_t Byte = Buffer[i]; // Current byte of code or index into map
2596 uint32_t MapNumber = 0; // Map number in opcodes.cpp
2597 uint32_t StartPage; // Index to start page in opcode map
2598 uint32_t MapNumber0 = 0; // Fallback start page if no map entry found in StartPage
2599 SOpcodeDef const * MapEntry; // Point to current opcode map entry
2600
2601 // Get start page from VEX.mmmm or XOP.mmmm bits if any
2602 switch (s.Prefixes[3]) {
2603 default: // no multibyte prefix
2604 StartPage = 0;
2605 MapEntry = OpcodeTables[StartPage] + Byte;
2606 break;
2607 case 0xC4: case 0xC5: case 0x62: // 2-, 3-, or 4-bytes VEX prefix
2608 StartPage = s.Prefixes[6] & 0x0F; // 4 mmmm bits or 0 if no VEX or XOP prefix
2609 if (StartPage >= NumOpcodeStartPageVEX) {
2610 s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range
2611 }
2612 MapNumber = OpcodeStartPageVEX[StartPage];
2613 if (StartPage == 1) MapNumber0 = 1;
2614 if (StartPage == 2 && s.Prefixes[3] == 0x62) {
2615 if ((s.Prefixes[5] & 0xFE) == 0xF2) { // shortcut for EVEX F2 0F 38 and EVEX F3 0F 38
2616 StartPage = 8 + (s.Prefixes[5] & 1);
2617 MapNumber0 = MapNumber;
2618 MapNumber = OpcodeStartPageVEX[StartPage];
2619 }
2620 }
2621
2622 // Get entry [Byte] in map
2623 MapEntry = OpcodeTables[MapNumber] + Byte;
2624
2625 // There are two entries for mm = 1: OpcodeMap1 for legacy code and OpcodeMapB1 for VEX-only code.
2626 // There are two entries for mm = 2: OpcodeMap2 for legacy code and OpcodeMapB2 for EVEX-only code with F3 prefix.
2627 // We don't want to have the same code in two different maps because this may cause errors if a code
2628 // is updated only in one of the maps.
2629 // Search the shortcut map first, then the default map
2630 if ((MapEntry->Name == 0 && MapEntry->TableLink == 0) || Byte >= OpcodeTableLength[MapNumber]) {
2631 // not found here, try in default map
2632 MapNumber = MapNumber0;
2633 MapEntry = OpcodeTables[MapNumber] + Byte;
2634 }
2635 if (MapNumber == 0) s.Errors |= 0x10000; // no map found
2636 break;
2637 case 0x8F: // XOP prefix
2638 StartPage = (s.Prefixes[6] & 0x1F) - 8; // 4 mmmm bits or 0 if no VEX or XOP prefix
2639 if (StartPage >= NumOpcodeStartPageXOP) {
2640 s.Errors |= 0x10000; StartPage = 0; // mmmm bits out of range
2641 }
2642 MapEntry = OpcodeStartPageXOP[StartPage] + Byte;// Get entry [Byte] in map
2643 }
2644
2645 // Save previous opcode and options
2646 *(uint32_t*)&PreviousOpcodei = *(uint32_t*)&Opcodei;
2647 *(uint32_t*)&Opcodei = 0;
2648
2649 // Loop through map tree (exit loop when Link == 0)
2650 while (1) {
2651
2652 // Check if MapEntry has a link to another map
2653 Link = MapEntry->TableLink;
2654
2655 switch (Link) {
2656 case 0: // No link
2657 // Final map entry found
2658 s.OpcodeStart2 = i;
2659 s.OpcodeDef = MapEntry;
2660
2661 // Save opcode and options
2662 Opcodei = (MapNumber << 8) | Byte;
2663 OpcodeOptions = MapEntry->Options;
2664
2665 // Return success
2666 return;
2667
2668 case 1: // Use following byte as index into next table
2669 if (i >= SectionEnd) {
2670 // Instruction extends beyond end of block
2671 IEnd = i; s.Errors |= 0x10;
2672 s.OpcodeStart2 = i;
2673 return;
2674 }
2675 Byte = Buffer[++i]; // Get next byte of code as index
2676 break;
2677
2678 case 2: // Use reg field of mod/reg/rm byte as index into next table
2679 Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits
2680 break;
2681
2682 case 3: // Use mod < 3 vs. mod == 3 as index into next table
2683 Byte = (Buffer[i+1] & 0xC0) == 0xC0; // 1 if mod == 3
2684 break;
2685
2686 case 4: // Use mod and reg fields of mod/reg/rm byte as index into next table,
2687 // first 8 entries indexed by reg for mod < 3, next 8 entries indexed by reg for mod = 3.
2688 Byte = (Buffer[i+1] >> 3) & 7; // Read reg bits
2689 if ((Buffer[i+1] & 0xC0) == 0xC0) Byte += 8; // Add 8 if mod == 3
2690 break;
2691
2692 case 5: // Use rm bits of mod/reg/rm byte as index into next table
2693 Byte = Buffer[i+1] & 7; // Read r/m bits
2694 break;
2695
2696 case 6: // Use immediate byte after any other operands as index into next table
2697 s.OpcodeStart2 = i;
2698 s.OpcodeDef = MapEntry;
2699 FindOperands(); // Find size of all operand fields and end of instruction
2700 Byte = Buffer[IEnd - 1]; // Last byte of instruction
2701 break;
2702
2703 case 7: // Use mode as index into next table (16, 32, 64 bits)
2704 switch (WordSize) {
2705 case 16:
2706 Byte = 0; break;
2707 case 32: default:
2708 Byte = 1; break;
2709 case 64:
2710 Byte = 2;
2711 }
2712 break;
2713
2714 case 8: // Use operand size as index into next table (16, 32, 64 bits)
2715 switch (WordSize) {
2716 case 64:
2717 if (s.Prefixes[4] == 0x48) { // REX.W prefix = 64 bit
2718 Byte = 2; break;
2719 }
2720 // Else continue in case 32:
2721 case 32: default:
2722 Byte = (s.Prefixes[4] == 0x66) ? 0 : 1; break;
2723 case 16:
2724 Byte = (s.Prefixes[4] == 0x66) ? 1 : 0; break;
2725 }
2726 break;
2727
2728 case 9: // Use operand type prefixes as index into next table (none, 66, F2, F3)
2729 switch (s.Prefixes[5]) {
2730 case 0: default:
2731 Byte = 0; break;
2732 case 0x66:
2733 Byte = 1;
2734 if (s.Prefixes[3] == 0xF2) Byte = 2; // F2/F3 take precedence over 66 in (tzcnt instruction)
2735 else if (s.Prefixes[3] == 0xF3) Byte = 3;
2736 break;
2737 case 0xF2:
2738 Byte = 2; break;
2739 case 0xF3:
2740 Byte = 3; break;
2741 }
2742 break;
2743
2744 case 0xA: // Use address size as index into next table (16, 32, 64 bits)
2745 switch (WordSize) {
2746 case 64:
2747 Byte = (s.Prefixes[1] == 0x67) ? 1 : 2; break;
2748 case 32: default:
2749 Byte = (s.Prefixes[1] == 0x67) ? 0 : 1; break;
2750 case 16:
2751 Byte = (s.Prefixes[1] == 0x67) ? 1 : 0; break;
2752 }
2753 break;
2754
2755 case 0x0B: // Use VEX prefix and VEX.L bits as index into next table
2756 // 0: VEX absent, 1: VEX.L=0, 2: VEX.L=1, 3:MVEX or EVEX.LL=2, 4: EVEX.LL=3
2757 // (VEX absent, VEX.L=0, VEX.L=1)
2758 if ((s.Prefixes[7] & 0xB0) == 0) {
2759 Byte = 0; // VEX absent
2760 }
2761 else if ((s.Prefixes[6] & 0x60) == 0x60) { // EVEX
2762 Byte = ((s.Esss >> 1) & 3) + 1; // EVEX.LL bits
2763 }
2764 else if ((s.Prefixes[6] & 0x60) == 0x40) { // MVEX
2765 Byte = 3;
2766 }
2767 else { // VEX
2768 Byte = 1 + (s.Prefixes[6] >> 5 & 1); // 1 + VEX.L
2769 }
2770 break;
2771
2772 case 0x0C: // Use VEX.W bit as index into next table
2773 Byte = (s.Prefixes[7] & 0x08) >> 3;
2774 break;
2775
2776 case 0x0D: // Use vector size by VEX.L bit and EVEX/MVEX as index into next table
2777 // 0: VEX.L=0, 1: VEX.L=1, 2:MVEX or EVEX.LL=2, 3: EVEX.LL=3
2778 Byte = (s.Prefixes[6] >> 5) & 1; // VEX.L indicates xmm or ymm
2779 if (s.Prefixes[3] == 0x62) {
2780 if (s.Prefixes[6] & 0x20) {
2781 // EVEX. Use LL bits
2782 Byte = (s.Esss >> 1) & 3;
2783 }
2784 else {
2785 // MVEX. Always 512 bits
2786 Byte = 2;
2787 }
2788 }
2789 break;
2790
2791 case 0x0E: // Use VEX type as index into next table: 0 = 2 or 3 bytes VEX, 1 = 4 bytes EVEX
2792 Byte = (s.Prefixes[3] == 0x62); // EVEX
2793 break;
2794
2795 case 0x0F: // Use MVEX.E bit as index into next table
2796 Byte = (s.Prefixes[3] == 0x62 && (s.Esss & 8)); // MVEX.E bit
2797 break;
2798
2799 case 0x10: // Use assembly language dialect as index into next table
2800 Byte = Syntax;
2801 break;
2802
2803 case 0x11: // Use VEX prefix type as index into next table. (0: none, 1: VEX prefix, 2: EVEX prefix, 3: MVEX prefix)
2804 if ((s.Prefixes[3] & ~1) == 0xC4) Byte = 1; // 2 or 3-bytes VEX prefix
2805 else if (s.Prefixes[3] == 0x62) { // EVEX or MVEX
2806 if (s.Prefixes[6] & 0x20) Byte = 2; // EVEX
2807 else Byte = 3; // MVEX
2808 }
2809 else Byte = 0; // no VEX
2810 break;
2811
2812 default: // Internal error in map tree
2813 err.submit(9007, MapNumber);
2814 s.OpcodeStart2 = i;
2815 return;
2816 }
2817
2818 // Get next map from branched tree of maps
2819 MapNumber = MapEntry->InstructionSet;
2820 if (MapNumber >= NumOpcodeTables1 || OpcodeTableLength[MapNumber] == 0) {
2821 err.submit(9007, MapNumber); return; // Map number out of range
2822 }
2823
2824 // Use Byte as index into new map. Check if within range
2825 if (Byte >= OpcodeTableLength[MapNumber]) {
2826 // Points outside map. Get last entry in map containing default
2827 Byte = OpcodeTableLength[MapNumber] - 1;
2828 }
2829 // Point to entry [Byte] in new map
2830 MapEntry = OpcodeTables[MapNumber] + Byte;
2831 if (MapEntry == 0) {
2832 err.submit(9007, MapNumber); return; // Map missing
2833 }
2834
2835 } // Loop end. Go to next
2836 }
2837
2838
FindOperands()2839 void CDisassembler::FindOperands() {
2840 // Interpret mod/reg/rm and SIB bytes and find operands
2841 s.MFlags = 0; // Memory operand flags:
2842 // 1 = has memory operand,
2843 // 2 = has mod/reg/rm byte,
2844 // 4 = has SIB byte,
2845 // 8 = has DREX byte (AMD SSE5 instructions never implemented),
2846 // 0x10 = is rip-relative
2847 uint8_t ModRegRM; // mod/reg/rm byte
2848 uint8_t SIB; // SIB byte
2849
2850 // Get address size
2851 if (WordSize == 64) s.AddressSize = (s.Prefixes[1] == 0x67) ? 32 : 64;
2852 else s.AddressSize = (WordSize == 16) ^ (s.Prefixes[1] == 0x67) ? 16 : 32;
2853
2854 s.AddressFieldSize = s.ImmediateFieldSize = 0;// Initialize
2855
2856 // Position of next element in opcode
2857 s.AddressField = s.OpcodeStart2 + 1;
2858
2859 // Check if there is a mod/reg/rm byte
2860 if (s.OpcodeDef->InstructionFormat & 0x10) {
2861
2862 // There is a mod/reg/rm byte
2863 s.MFlags |= 2;
2864
2865 if (s.OpcodeStart2 + 1 >= FunctionEnd) {
2866 CheckForMisplacedLabel();
2867 }
2868
2869 // Read mod/reg/rm byte
2870 ModRegRM = Buffer[s.AddressField++];
2871 s.Mod = ModRegRM >> 6; // mod = bit 6-7
2872 s.Reg = (ModRegRM >> 3) & 7; // reg = bit 3-5
2873 s.RM = ModRegRM & 7; // RM = bit 0-2
2874
2875 // Check if there is a SIB byte
2876 if (s.AddressSize > 16 && s.Mod != 3 && s.RM == 4) {
2877 // There is a SIB byte
2878 s.MFlags |= 4; // Remember we have a SIB byte
2879 SIB = Buffer[s.AddressField++]; // Read SIB byte
2880 // Get scale, index, base
2881 s.Scale = SIB >> 6; // Scale = bit 6-7
2882 s.IndexReg = (SIB >> 3) & 7; // Index = bit 3-5
2883 s.BaseReg = SIB & 7; // Base = bit 0-2
2884 }
2885
2886 // Check if there is a DREX byte (AMD SSE5 instructions never implemented):
2887 if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
2888 s.MFlags |= 8; // Remember we have a DREX byte
2889 s.Vreg = Buffer[s.AddressField++]; // Read DREX byte
2890 // The R,X,B bits of Vreg are equivalent to the corresponding bits of a REX prefix:
2891 s.Prefixes[7] |= (s.Vreg & 7) | 0x80;
2892 }
2893
2894 if (s.AddressField > FunctionEnd) {
2895 CheckForMisplacedLabel();
2896 }
2897
2898 // Check REX prefix
2899 if (s.Prefixes[7] & 4) s.Reg |= 8; // Add REX.R to reg field
2900 if (s.Prefixes[7] & 1) s.RM |= 8; // Add REX.B to RM field
2901
2902 // Interpretation of mod/reg/rm byte is different for 16 bit address size
2903 if (s.AddressSize == 16) {
2904
2905 if (s.Mod != 3) {
2906 // There is a memory operand
2907 s.MFlags |= 1;
2908
2909 // Get size of address/displacement operand from mod bits
2910 // (Will be overwritten later if none)
2911 if (s.Mod == 1) {
2912 s.AddressFieldSize = 1; // Size of displacement field
2913 }
2914 else if (s.Mod == 2) {
2915 s.AddressFieldSize = 2; // Size of displacement field
2916 }
2917
2918 // Check if direct memory operand
2919 if (s.Mod == 0 && s.RM == 6) {
2920 // Direct memory operand and nothing else
2921 s.AddressFieldSize = 2; // Size of address field
2922 }
2923 else {
2924 // Indirect memory operand
2925 // Get base and index registers
2926 // [bx+si], [bx+di], [bp+si], [bp+di], [si], [di], [bp], [bx]
2927 static const uint8_t BaseRegister [8] = {3+1, 3+1, 5+1, 5+1, 0, 0, 5+1, 3+1};
2928 static const uint8_t IndexRegister[8] = {6+1, 7+1, 6+1, 7+1, 6+1, 7+1, 0, 0};
2929 // Save register number + 1, because 0 means none.
2930 s.BaseReg = BaseRegister [s.RM]; // Base register = BX or BP or none
2931 s.IndexReg = IndexRegister[s.RM]; // Index register = SI or DI or none
2932 s.Scale = 0; // No scale factor in 16 bit mode
2933 }
2934 }
2935 }
2936 else {
2937 // Address size is 32 or 64 bits
2938
2939 if (s.Mod != 3) {
2940 // There is a memory operand
2941 s.MFlags |= 1;
2942
2943 // Get size of address/displacement operand from mod bits
2944 // (Will be overwritten later if none)
2945 if (s.Mod == 1) {
2946 s.AddressFieldSize = 1; // Size of displacement field
2947 }
2948 else if (s.Mod == 2) {
2949 s.AddressFieldSize = 4; // Size of displacement field
2950 }
2951
2952 // Check if direct memory operand
2953 if (s.Mod == 0 && (s.RM & 7) == 5) {
2954 // Direct memory operand and nothing else
2955 s.AddressFieldSize = 4; // Size of address field
2956 }
2957 else if ((s.RM & 7) == 4) {
2958 // There is a SIB byte
2959
2960 // Check REX prefix
2961 if (s.Prefixes[7] & 2) s.IndexReg |= 8; // Add REX.X to index
2962 if (s.Prefixes[7] & 1) s.BaseReg |= 8; // Add REX.B to base
2963 s.RM &= 7; // Remove REX.B from RM
2964
2965 s.BaseReg++; // Add 1 so that 0 means none
2966 if (s.IndexReg == 4 && (s.OpcodeDef->InstructionFormat & 0x1F) != 0x1E) {
2967 // No index register
2968 s.IndexReg = 0;
2969 }
2970 else {
2971 s.IndexReg++; // Add 1 so that 0 means none
2972 }
2973
2974 if (s.Mod == 0 && s.BaseReg == 5+1) {
2975 // No base register, 32 bit address
2976 s.AddressFieldSize = 4;
2977 s.BaseReg = 0;
2978 }
2979 }
2980 else {
2981 // Indirect memory operand and no SIB byte
2982 s.BaseReg = s.RM; // Get base register from RM bits
2983 s.BaseReg++; // Add 1 because 0 means none
2984 }
2985 }
2986 else {
2987 // No memory operand. Address size is 32 or 64 bits
2988 }
2989 // Check if rip-relative
2990 if (WordSize == 64 && (s.MFlags & 7) == 3 && !s.BaseReg && s.AddressFieldSize == 4) {
2991 // Memory operand is rip-relative
2992 s.MFlags |= 0x100;
2993 }
2994 }
2995 if (s.Prefixes[3] == 0x62) {
2996 // EVEX prefix gives another extra register bit
2997 s.Reg += ~(s.Prefixes[6]) & 0x10; // extra r bit = highest m bit
2998 if (s.Mod == 3) {
2999 // Register operands only. B bit extended by X bit
3000 s.RM += (s.Prefixes[7] & 2) << 3;
3001 }
3002 else if (s.IndexReg && s.OpcodeDef->InstructionFormat == 0x1E) {
3003 // VSIB byte. Index register extended by one of the v bits, base register < 16
3004 s.IndexReg += s.Vreg & 0x10;
3005 }
3006 }
3007 }
3008
3009 // Get operand size
3010 uint32_t OpSizePrefix = 0;
3011 if (s.Prefixes[4] == 0x66 && (s.OpcodeDef->AllowedPrefixes & 0x100)) OpSizePrefix = 1; // Operand size prefix
3012 if (s.Prefixes[4] == 0x48 && (s.OpcodeDef->AllowedPrefixes & 0x1000)) OpSizePrefix = 2; // Rex.W prefix
3013 s.OperandSize = (WordSize == 16) ^ (OpSizePrefix & 1) ? 16 : 32;
3014 if (OpSizePrefix == 2) s.OperandSize = 64;
3015 if ((s.OpcodeDef->AllowedPrefixes & 0x3000) == 0x3000 && WordSize == 64 && (OpSizePrefix & 2)) s.OperandSize = 64;
3016
3017 // Get any immediate operand
3018 // Offset to immediate operand field, if any
3019 s.ImmediateField = s.AddressField + s.AddressFieldSize;
3020
3021 // Check InstructionFormat for immediate and direct operands
3022 switch (s.OpcodeDef->InstructionFormat & 0x0FE0) {
3023 case 0x20: // Has 2 bytes immediate operand
3024 s.ImmediateFieldSize = 2; break;
3025
3026 case 0x40: // Has 1 byte immediate operand or short jump
3027 s.ImmediateFieldSize = 1; break;
3028
3029 case 0x60: // Has 3 bytes immediate operand (enter)
3030 s.ImmediateFieldSize = 3; break;
3031
3032 case 0x80: // Has 2 or 4 bytes immediate operand or near jump/call
3033 if ((s.OpcodeDef->Destination & 0xFE) == 0x82) {
3034 // Near jump/call address size depends on WordSize and operand size prefix,
3035 // but not on address size prefix
3036 s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 2 : 4;
3037 if (WordSize == 64) s.ImmediateFieldSize = 4; // 66 prefix ignored in 64 bit mode
3038 }
3039 else {
3040 // Size of other immediate data depend on operand size
3041 s.ImmediateFieldSize = (s.OperandSize == 16) ? 2 : 4;
3042 }
3043 break;
3044
3045 case 0x100: // Has 2, 4 or 8 bytes immediate operand
3046 s.ImmediateFieldSize = s.OperandSize / 8;
3047 break;
3048
3049 case 0x200: // Has 2+2 or 4+2 bytes far direct jump/call operand
3050 s.ImmediateFieldSize = (WordSize == 16) ^ (s.Prefixes[4] == 0x66) ? 4 : 6;
3051 break;
3052
3053 case 0x400: // Has 2, 4 or 8 bytes direct memory operand
3054 s.AddressFieldSize = s.AddressSize / 8;
3055 s.AddressField = s.ImmediateField;
3056 s.ImmediateField = s.AddressField + s.AddressFieldSize;
3057 s.ImmediateFieldSize = 0;
3058 break;
3059
3060 default: // No immediate operand
3061 s.ImmediateFieldSize = 0;
3062 }
3063
3064 // Find instruction end
3065 IEnd = s.ImmediateField + s.ImmediateFieldSize;
3066 if (IEnd > FunctionEnd) {
3067 CheckForMisplacedLabel();
3068 if (IEnd > SectionEnd) {
3069 // instruction extends outside code block
3070 s.Errors |= 0x10;
3071 IEnd = SectionEnd;
3072 }
3073 }
3074 }
3075
FindBroadcast()3076 void CDisassembler::FindBroadcast() {
3077 // Find broadcast and offset multiplier for EVEX code
3078 if (s.Mod != 3) {
3079 // has memory operand
3080 uint32_t m; // find memory operand
3081 for (m = 0; m < s.MaxNumOperands; m++) {
3082 if (s.Operands[m] & 0x2000) break;
3083 }
3084 if (m == s.MaxNumOperands) return; // no memory operand found. should not occur
3085 uint32_t r; // find largest vector operand
3086 uint32_t vectortype = 0;
3087 for (r = 0; r < s.MaxNumOperands; r++) {
3088 if ((s.Operands[r] & 0xF00) > vectortype) vectortype = s.Operands[r] & 0xF00;
3089 }
3090 uint32_t vectorsize = GetDataItemSize(vectortype);
3091 if (m < s.MaxNumOperands) {
3092 if ((s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
3093 // broadcasting. multiplier = element size
3094 s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
3095 // operand size = element size
3096 s.Operands[m] &= ~0xF00;
3097 if (s.OffsetMultiplier >= vectorsize) {
3098 s.Warnings2 |= 0x200; // broadcasting to scalar
3099 }
3100 }
3101 else if (s.OpcodeDef->EVEX & 0x1000) {
3102 // multiplier = element size, not broadcasting
3103 s.OffsetMultiplier = GetDataElementSize(s.Operands[m]);
3104 }
3105 else if (s.OpcodeDef->EVEX & 0x2000) {
3106 // multiplier = fraction of largest vector size
3107 s.OffsetMultiplier = vectorsize >> ((s.OpcodeDef->EVEX & 0x600) >> 9);
3108 }
3109 else {
3110 // not broadcasting. multiplier = vector size
3111 s.OffsetMultiplier = GetDataItemSize(s.Operands[m]);
3112 }
3113 }
3114 }
3115 }
3116
3117
SwizTableLookup()3118 void CDisassembler::SwizTableLookup() {
3119 // Find the swizzle table record that correspond to the instruction and the sss bits for MVEX instructions
3120 int sw = (s.OpcodeDef->MVEX & 0x1F); // swizzle metatable index
3121 int opsize = 0; // operand size override
3122 if (s.OpcodeDef->Options & 1) {
3123 // operand size depends on prefix bits
3124 if (s.OpcodeDef->AllowedPrefixes & 0x1000) {
3125 // operand size depends on W bit
3126 if (s.Prefixes[7] & 8) opsize = 1;
3127 }
3128 else if (s.OpcodeDef->AllowedPrefixes & 0x300) {
3129 // operand size depends on 66 implied prefix
3130 if (s.Prefixes[5] == 0x66) opsize = 1;
3131 }
3132 }
3133 int IsMem = s.Mod != 3; // has memory operand
3134 // find record in swizzle tables
3135 s.SwizRecord = &(SwizTables[sw | opsize][IsMem][s.Esss & 7]);
3136 // find offset multiplier
3137 if (s.OpcodeDef->MVEX & 0x40) {
3138 // address single element
3139 s.OffsetMultiplier = s.SwizRecord->elementsize;
3140 }
3141 else {
3142 // address vector or subvector
3143 s.OffsetMultiplier = s.SwizRecord->memopsize;
3144 if (s.OffsetMultiplier == 0) {
3145 // no swizzle, use vector size
3146 uint16_t source = s.OpcodeDef->Source2; // last source operand
3147 if (!(source & 0xF00)) source = s.OpcodeDef->Source1; // if source2 is not a vector, use source1
3148 switch ((source >> 8) & 0xF) {
3149 case 2:
3150 // vector size depends on prefixes, currently only zmm supported when EVEX prefix is present
3151 s.OffsetMultiplier = 0x40; break;
3152 case 4:
3153 s.OffsetMultiplier = 0x10; break;
3154 case 5:
3155 s.OffsetMultiplier = 0x20; break;
3156 case 6:
3157 s.OffsetMultiplier = 0x40; break;
3158 }
3159 }
3160 }
3161 }
3162
FindOperandTypes()3163 void CDisassembler::FindOperandTypes() {
3164 // Determine the type of each operand
3165 uint32_t i, j, k; // Operands index
3166 int nimm = 0; // Number of immediate operands
3167 uint32_t AllowedPref = s.OpcodeDef->AllowedPrefixes;
3168 uint32_t oper; // current operand definition
3169
3170 s.MaxNumOperands = 4; // may be 5 in the future in cases where EVEX field is used as an extra operand
3171
3172 // Copy all operands from opcode map and zero-extend
3173 for (i = 0; i < s.MaxNumOperands; i++) {
3174 s.Operands[i] = (&s.OpcodeDef->Destination)[i];
3175 }
3176
3177 // Check instruction format
3178 switch (s.OpcodeDef->InstructionFormat & 0x1F) {
3179
3180 case 2: // No operands or only immediate operand
3181 break;
3182
3183 case 3: // Register operand indicated by bits 0-2 of opcode
3184 // Find which of the operands it applies to
3185 if ((s.Operands[0] & 0xFF) > 0 && (s.Operands[0] & 0xFF) < 0xB) i = 0; else i = 1;
3186 // Indicate this operand uses opcode bits
3187 s.Operands[i] |= 0x20000;
3188 break;
3189
3190 case 4: // Register operand indicated by VEX.vvvv bits
3191 // Find which of the operands it applies to
3192 if ((s.Operands[0] & 0xFF) < 0xB || (s.Operands[0] & 0xFF) == 0x95) i = 0; else i = 1;
3193 // Indicate this operand uses VEX.vvvv bits
3194 s.Operands[i] |= 0x60000;
3195 break;
3196
3197 case 0x11: // There is a mod/reg/rm byte and one operand
3198 // Find which of the operands it applies to
3199 for (j = k = 0; j < 2; j++) {
3200 if (s.Operands[j]) {
3201 switch (s.Operands[j] & 0xF0) {
3202 case 0: case 0x40: case 0x50:
3203 // This operand can have use rm bits
3204 k |= j+1;
3205 }
3206 }
3207 }
3208 if (k < 1 || k > 2) {
3209 // There must be one, and only one, operand that can use rm bits
3210 s.Errors |= 0x80000; // Error in opcode table
3211 }
3212 else {
3213 // Indicate this operand uses mod and rm bits
3214 s.Operands[k-1] |= 0x30000;
3215 }
3216 break;
3217
3218 case 0x12: // There is a mod/reg/rm byte and two operands. Destination is reg
3219 // Destination operand uses s.Reg bits
3220 s.Operands[0] |= 0x40000;
3221 // Source operand uses mod and rm bits
3222 s.Operands[1] |= 0x30000;
3223 break;
3224
3225 case 0x13: // There is a mod/reg/rm byte and two operands. Source is reg
3226 // Destination operand uses mod and rm bits
3227 s.Operands[0] |= 0x30000;
3228 // Source operand uses s.Reg bits
3229 s.Operands[1] |= 0x40000;
3230 break;
3231
3232 case 0x14: case 0x15: { // There is a DREX byte and three or four operands
3233 // Combine OC0 from DREX byte and OC1 from opcode byte into Operand configuration
3234 int OperandConfiguration = ((s.Vreg >> 3) & 1) | ((Get<uint8_t>(s.OpcodeStart2) >> 1) & 2);
3235 // Determine operands
3236 s.Operands[0] |= 0x50000; // Destination determined by dest field of DREX byte
3237 if (s.OpcodeDef->InstructionFormat & 1) {
3238 // Four XMM or register operands
3239 switch (OperandConfiguration) {
3240 case 0:
3241 s.Operands[1] = s.Operands[0]; // 1. source = same as destination
3242 s.Operands[2] |= 0x40000; // 2. source = reg
3243 s.Operands[3] |= 0x30000; // 3. source = rm
3244 break;
3245 case 1:
3246 s.Operands[1] = s.Operands[0]; // 1. source = same as destination
3247 s.Operands[2] |= 0x30000; // 2. source = rm
3248 s.Operands[3] |= 0x40000; // 3. source = reg
3249 break;
3250 case 2:
3251 s.Operands[1] |= 0x40000; // 1. source = reg
3252 s.Operands[2] |= 0x30000; // 2. source = rm
3253 s.Operands[3] = s.Operands[0]; // 3. source = same as destination
3254 break;
3255 case 3:
3256 s.Operands[1] |= 0x30000; // 1. source = rm
3257 s.Operands[2] |= 0x40000; // 2. source = reg
3258 s.Operands[3] = s.Operands[0]; // 3. source = same as destination
3259 break;
3260 }
3261 }
3262 else {
3263 // Three XMM or register operands
3264 if ((OperandConfiguration & 1) == 0) {
3265 // OC0 = 0
3266 s.Operands[1] |= 0x40000; // 1. source = reg
3267 s.Operands[2] |= 0x30000; // 2. source = rm
3268 }
3269 else {
3270 // OC0 = 1
3271 s.Operands[1] |= 0x30000; // 1. source = rm
3272 s.Operands[2] |= 0x40000; // 2. source = reg
3273 }
3274 }
3275 break;}
3276
3277 case 0x18: // Has VEX prefix and 2 operands
3278 // Dest = VEX.vvvv, src = rm, opcode extension in r bits.
3279 // Destination operand uses VEX.vvvv bits
3280 s.Operands[0] |= 0x60000;
3281 // Source1 operand uses mod and rm bits
3282 s.Operands[1] |= 0x30000;
3283 if (!(s.Prefixes[7] & 0xB0)) {
3284 // One operand omitted if no VEX prefix
3285 s.Operands[0] = s.Operands[1]; s.Operands[1] = 0;
3286 }
3287 break;
3288
3289 case 0x19: // Has VEX prefix and 3 operands
3290 // Dest = r, src1 = VEX.vvvv, src2 = rm.
3291 s.Operands[0] |= 0x40000;
3292 s.Operands[1] |= 0x60000;
3293 s.Operands[2] |= 0x30000;
3294 if (!(s.Prefixes[7] & 0xB0)) {
3295 // One source operand omitted if no VEX prefix
3296 s.Operands[1] = s.Operands[2]; s.Operands[2] = 0;
3297 }
3298 // Preliminary AMD specification
3299 if ((AllowedPref & 0x7000) == 0x7000 && !(s.Prefixes[7] & 8)) {
3300 // Swap src1 and src2 if XOP prefix and XOP.W = 0
3301 k = s.Operands[1]; s.Operands[1] = s.Operands[2]; s.Operands[2] = k;
3302 }
3303 break;
3304
3305 case 0x1A: // Has VEX prefix and 3 operands.
3306 // Dest = rm, src1 = VEX.v, src2 = r
3307 s.Operands[0] |= 0x30000;
3308 s.Operands[1] |= 0x60000;
3309 s.Operands[2] |= 0x40000;
3310 if (!(s.Prefixes[7] & 0xB0)) {
3311 // One source operand omitted if no VEX prefix
3312 s.Operands[1] = s.Operands[2]; s.Operands[2] = 0;
3313 }
3314 break;
3315
3316 case 0x1B: // Has VEX prefix and 3 operands
3317 // Dest = r, src1 = rm, src2 = VEX.vvvv
3318 s.Operands[0] |= 0x40000;
3319 s.Operands[1] |= 0x30000;
3320 s.Operands[2] |= 0x60000;
3321 if (!(s.Prefixes[7] & 0xB0)) {
3322 // Last source operand omitted if no VEX prefix
3323 s.Operands[2] = 0;
3324 }
3325 break;
3326
3327 case 0x1C: // Has VEX prefix and 4 operands
3328 // Dest = r, src1 = VEX.v, src2 = rm, src3 = bits 4-7 of immediate byte
3329 s.Operands[0] |= 0x40000;
3330 s.Operands[1] |= 0x60000;
3331 s.Operands[2] |= 0x30000;
3332 s.Operands[3] |= 0x70000;
3333 if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
3334 // Swap src2 and src3 if VEX.W
3335 k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
3336 }
3337 nimm++; // part of immediate byte used
3338 break;
3339
3340 case 0x1D: // Has VEX prefix and 4 operands
3341 // Dest = r, src1 = bits 4-7 of immediate byte, src2 = rm, src3 = VEX.vvvv
3342 s.Operands[0] |= 0x40000;
3343 s.Operands[1] |= 0x70000;
3344 s.Operands[2] |= 0x30000;
3345 s.Operands[3] |= 0x60000;
3346 if ((s.Prefixes[7] & 8) && (AllowedPref & 0x7000) == 0x7000) {
3347 // Swap src2 and src3 if VEX.W
3348 k = s.Operands[2]; s.Operands[2] = s.Operands[3]; s.Operands[3] = k;
3349 }
3350 nimm++; // part of immediate byte used
3351 break;
3352
3353 case 0x1E: // Has VEX prefix, VSIB and 1, 2 or 3 operands.
3354 if (s.Operands[0] & 0x2000) {
3355 // destination is memory
3356 // Dest = rm, src1 = r
3357 s.Operands[0] |= 0x30000;
3358 s.Operands[1] |= 0x40000;
3359 //if (s.Operands[2]) s.Operands[2] |= 0x60000;
3360 }
3361 else {
3362 // Dest = r, src1 = rm, src2 = VEX.v
3363 if (s.Operands[0]) s.Operands[0] |= 0x40000;
3364 s.Operands[1] |= 0x30000;
3365 if (s.Operands[2]) s.Operands[2] |= 0x60000;
3366 }
3367 break;
3368
3369 default: // No explicit operands.
3370 // Check for implicit memory operands
3371 for (i = 0; i < 2; i++) {
3372 if (s.Operands[i] & 0x2000) {
3373 // Direct memory operand
3374 s.Operands[i] |= 0x10000;
3375 if (s.OpcodeDef->InstructionFormat > 1) {
3376 // There is an address field
3377 s.AddressFieldSize = s.AddressSize / 8;
3378 s.AddressField = s.OpcodeStart2 + 1;
3379 s.MFlags |= 1; // Remember we have a memory operand
3380 }
3381 }
3382 }
3383 break;
3384 }
3385
3386 // Loop for destination and source operands
3387 for (i = 0; i < s.MaxNumOperands; i++) {
3388 // Ignore empty operands
3389 if (s.Operands[i] == 0) continue;
3390
3391 // Immediate operands
3392 if ((s.Operands[i] & 0xFF) >= 0x10 && (s.Operands[i] & 0xFF) < 0x40) {
3393 if (nimm++) {
3394 s.Operands[i] |= 0x200000; // second immediate operand
3395 }
3396 else {
3397 s.Operands[i] |= 0x100000; // first immediate operand
3398 }
3399 }
3400
3401 // Check if register or memory
3402 switch (s.Operands[i] & 0x3000) {
3403 case 0x1000: // Must be register
3404 if ((s.Operands[i] & 0xF0000) == 0x30000 && s.Mod != 3 && (s.OpcodeDef->InstructionFormat & 0x10)) {
3405 s.Errors |= 8; // Is memory. Indicate wrong operand type
3406 s.Operands[i] = (s.Operands[i] & ~0x1000) | 0x2000;// Indicate it is memory
3407 }
3408 break;
3409
3410 case 0x2000: // Must be memory operand
3411 if ((s.Operands[i] & 0xD0000) != 0x10000 || s.Mod == 3) {
3412 s.Errors |= 8; // Is register. Indicate wrong operand type
3413 s.Operands[i] = (s.Operands[i] & ~0x2000) | 0x1000; // Indicate it is register
3414 }
3415 break;
3416
3417 case 0x0000: // Can be register or memory
3418 if ((s.Operands[i] & 0xF0000) == 0x10000) {
3419 // Direct memory operand
3420 s.Operands[i] |= 0x2000; break;
3421 }
3422 if ((s.Operands[i] & 0xF0000) == 0x30000) {
3423 // Indicated by mod/rm bits
3424 if (s.Mod == 3) {
3425 s.Operands[i] |= 0x1000; // Is register
3426 }
3427 else {
3428 s.Operands[i] |= 0x2000; // Is memory
3429 }
3430 break;
3431 }
3432 if ((s.Operands[i] & 0xF0) != 0x10) { // Not a constant
3433 s.Operands[i] |= 0x1000; // Anything else is register
3434 }
3435 break;
3436 }
3437
3438 // Resolve types that depend on prefixes or WordSize
3439 switch (s.Operands[i] & 0xFF) {
3440 case 8: case 0x18: case 0x28: case 0x38: case 0xA8:
3441 // 16 or 32 bits
3442 s.Operands[i] &= ~0x0F;
3443 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3444 break;
3445
3446 case 9: case 0x19: case 0x29: case 0x39: case 0xA9:
3447 // 8, 16, 32 or 64 bits, depending on operand size prefixes
3448 s.Operands[i] &= ~0x0F;
3449 switch (AllowedPref & 0x7000) {
3450 case 0x3000: default: // 32 or 64 depending on mode and 66 or REX.W prefix
3451 s.Operands[i] |= (s.OperandSize == 16) ? 2 : ((s.OperandSize == 64) ? 4 : 3);
3452 break;
3453 case 0x4000: // VEX.W prefix determines integer (vector) operand size b/w
3454 if ((s.Prefixes[7] & 8) == 0) { // W bit
3455 s.OperandSize = 8;
3456 s.Operands[i] |= 1;
3457 }
3458 else {
3459 s.OperandSize = 16;
3460 s.Operands[i] |= 2;
3461 }
3462 break;
3463 case 0x5000: // VEX.W and 66 prefix determines integer operand size b/w/d/q (mask instructions. B = 66W0, W = _W0, D = 66W1, Q = _W1)
3464 s.Operands[i] |= (s.Prefixes[5] != 0x66) + ((s.Prefixes[7] & 8) >> 2) + 1;
3465 break;
3466 }
3467 break;
3468
3469 case 0xB: case 0xC: // 16, 32 or 64 bits. Fixed size = 64 in 64 bit mode
3470 s.Operands[i] &= ~0x0F;
3471 if (WordSize == 64) {
3472 s.Operands[i] |= 4;
3473 }
3474 else {
3475 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3476 }
3477 break;
3478
3479 case 0xA: // 16, 32 or 64 bits. Default size = 64 in 64 bit mode
3480 s.Operands[i] &= ~0x0F;
3481 if (WordSize == 64) {
3482 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 4;
3483 }
3484 else {
3485 s.Operands[i] |= (s.OperandSize == 16) ? 2 : 3;
3486 }
3487 break;
3488
3489 case 0xD: // 16+16, 32+16 or 64+16 bits far indirect pointer (jump or call)
3490 s.Operands[i] &= ~0x0F;
3491 s.Operands[i] |= (s.OperandSize == 16) ? 3 : ((s.OperandSize == 64) ? 5 : 7);
3492 break;
3493
3494 case 0x4F: // XMM float. Size and precision depend on prefix bits
3495 s.Operands[i] &= ~0x7F; // remove type
3496 if ((AllowedPref & 0x1000) && !((AllowedPref & 0xF00) == 0xE00)) {
3497 // precision depends on VEX.W bit
3498 if (s.Prefixes[7] & 8) {
3499 s.Operands[i] |= 0x4C;
3500 }
3501 else {
3502 s.Operands[i] |= 0x4B;
3503 }
3504 }
3505 else {
3506 // Size and precision depend on prefix: none = ps, 66 = pd, F2 = sd, F3 = ss
3507 switch (s.Prefixes[5]) {
3508 case 0: // No prefix = ps
3509 s.Operands[i] |= 0x4B; break;
3510 case 0x66: // 66 prefix = pd
3511 s.Operands[i] |= 0x4C; break;
3512 case 0xF3: // F3 prefix = ss
3513 s.Operands[i] |= 0x4B;
3514 s.Operands[i] &= ~0xF00; // make scalar
3515 break;
3516 case 0xF2: // F2 prefix = sd
3517 s.Operands[i] |= 0x4C;
3518 s.Operands[i] &= ~0xF00; // make scalar
3519 break;
3520 };
3521 break;
3522 }
3523 }
3524
3525 // Resolve vector size
3526 switch (s.Operands[i] & 0xF00) {
3527 case 0x100: // MMX or XMM or YMM or ZMM depending on 66 prefix and VEX.L prefix and EVEX prefix
3528 case 0x200: // XMM or YMM or ZMM depending on prefixes
3529 case 0xF00: // Half the size defined by VEX.L prefix and EVEX.LL prefix. Minimum size = 8 bytes for memory, xmm for register
3530
3531 oper = s.Operands[i] & ~0xF00; // element type
3532 if (s.Prefixes[3] == 0x62) { // EVEX or MVEX prefix
3533 if (s.Prefixes[6] & 0x20) {
3534 // EVEX prefix
3535 // Do LL bits specify vector size when b = 1 for instructions that allow
3536 // sae but not rounding? Perhaps not, because sae is only allowed for
3537 // 512 bit vectors, but manual says otherwise.
3538 // NASM version 2.11.06 sets LL = 0 when b = 1 for vrangeps instruction
3539 //??if ((s.OpcodeDef->EVEX & 4) && (s.Mod == 3) && (s.Esss & 1)) {
3540 if ((s.OpcodeDef->EVEX & 6) && (s.Mod == 3) && (s.Esss & 1)) {
3541 // rounding control, register operand. L'L do not indicate vector size
3542 oper |= 0x600; // zmm
3543 }
3544 else if (s.OpcodeDef->EVEX & 8) {
3545 // scalar
3546 oper |= 0x400; // xmm
3547 }
3548 else {
3549 // L'L indicates vector size
3550 oper |= 0x400 + ((s.Esss & 6) << 7); // xmm, ymm, zmm,
3551 }
3552 }
3553 else {
3554 // MVEX prefix
3555 oper |= 0x600; // zmm
3556 }
3557 }
3558 else if (s.Prefixes[6] & 0x20) {
3559 oper |= 0x500; // VEX.L: ymm
3560 }
3561 else if (s.Prefixes[5] == 0x66 || (s.Operands[i] & 0x200)) {
3562 oper |= 0x400; // 66 prefix or mm not allowed: xmm
3563 }
3564 else {
3565 oper |= 0x300; // no prefix: mm
3566 }
3567 if ((s.Operands[i] & 0xF00) == 0xF00) {
3568 // half size vector
3569 oper -= 0x100;
3570 if ((oper & 0x1000) || (s.OpcodeDef->InstructionFormat == 0x1E)) {
3571 // is register or vsib index. minimum size is xmm
3572 if ((oper & 0xF00) < 0x400) {
3573 oper = (oper & ~0x300) | 0x400;
3574 }
3575 }
3576 }
3577 s.Operands[i] = oper; // save corrected vector size
3578
3579 break;
3580 }
3581
3582 // resolve types that depend on MVEX swizzle
3583 if ((s.Prefixes[6] & 0x60) == 0x40 && (s.Operands[i] & 0xF0000) == 0x30000) {
3584 int sw = (s.OpcodeDef->MVEX & 0x1F);
3585 if (sw) {
3586 int optype = s.SwizRecord ? s.SwizRecord->memop : 0; //?
3587 if (s.OpcodeDef->InstructionFormat == 0x1E) {
3588 // vsib addressing: s.Operands[i] & 0xF00 indicates index register size, s.Operands[i] & 0xFF indicates operand size
3589 s.Operands[i] = (s.Operands[i] & ~0xFF) | (optype & 0xFF);
3590 }
3591 else if (s.OpcodeDef->MVEX & 0x40) {
3592 // operand is not a full vector
3593 s.Operands[i] = (s.Operands[i] & ~0xFFF) | (optype & 0xFF);
3594 }
3595 else {
3596 // get operand type from swizzle table only
3597 if (optype) s.Operands[i] = optype | 0x30000;
3598 }
3599 }
3600 }
3601 }
3602 }
3603
3604
FindWarnings()3605 void CDisassembler::FindWarnings() {
3606 // Find any reasons for warnings in code
3607 uint32_t i; // Operand index
3608 uint32_t OperandSize; // Operand size
3609 uint8_t RexBits = 0; // Bits in REX prefix
3610
3611 if ((s.OpcodeDef->Options & 0x80) && s.ImmediateFieldSize > 1 && s.ImmediateRelocation == 0) {
3612 // Check if sign-extended operand can be used
3613 if ((s.ImmediateFieldSize == 2 && Get<int16_t>(s.ImmediateField) == Get<int8_t>(s.ImmediateField))
3614 || (s.ImmediateFieldSize == 4 && Get<int32_t>(s.ImmediateField) == Get<int8_t>(s.ImmediateField))) {
3615 s.Warnings1 |= 1; // Sign-extended operand could be used
3616 }
3617 }
3618 if (WordSize == 64 && s.ImmediateFieldSize == 8 && s.ImmediateRelocation == 0) {
3619 // We have a 64 bit immediate operand. Could it be made shorter?
3620 if (Get<uint32_t>(s.ImmediateField+4) == 0) {
3621 s.Warnings1 |= 2; // Upper half is zero. Could use zero-extension
3622 }
3623 else if (Get<int64_t>(s.ImmediateField) == Get<int32_t>(s.ImmediateField)) {
3624 s.Warnings1 |= 1; // Could use sign-extension
3625 }
3626 }
3627 // Check if displacement could be made smaller
3628 if (s.AddressFieldSize > 0 && s.AddressRelocation == 0
3629 && (s.BaseReg || (s.IndexReg && !s.BaseReg && s.Scale < 2))
3630 && s.OffsetMultiplier <= 1) {
3631 // There is a displacement which might be unnecessary
3632 switch (s.AddressFieldSize) {
3633 case 1: // 1 byte displacement
3634 if (Get<uint8_t>(s.AddressField) == 0
3635 && (((s.BaseReg-1) & 7) != 5 || (s.AddressSize == 16 && s.IndexReg)))
3636 s.Warnings1 |= 4; // Displacement is 0 and an addressing mode without displacement exists
3637 break;
3638 case 2: // 2 bytes displacement
3639 if (Get<int16_t>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
3640 else if (Get<int16_t>(s.AddressField) == Get<int8_t>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
3641 break;
3642 case 4: // 4 bytes displacement
3643 if (s.OpcodeDef->InstructionFormat != 0x1E) {
3644 if (Get<int32_t>(s.AddressField) == 0) s.Warnings1 |= 4; // Displacement is 0
3645 else if (Get<int32_t>(s.AddressField) == Get<int8_t>(s.AddressField)) s.Warnings1 |= 8; // Could use sign extension
3646 }
3647 break;
3648 case 8: // 8 bytes displacement
3649 if (Get<int32_t>(s.AddressField) == Get<int64_t>(s.AddressField))
3650 // Has 8 bytes displacement. Could use sign-extended or rip-relative
3651 s.Warnings1 |= 8;
3652 break;
3653 }
3654 }
3655 // Check for unnecessary SIB byte
3656 if ((s.MFlags&4) && (s.BaseReg&7)!=4+1 && (s.IndexReg==0 || (s.BaseReg==0 && s.Scale==0))) {
3657 if (WordSize == 64 && s.BaseReg==0 && s.IndexReg==0) s.Warnings1 |= 0x4000; // 64-bit address not rip-relative
3658 else if ((s.Operands[0] & 0xFF) != 0x98 && (s.Operands[1] & 0xFF) != 0x98 && s.OpcodeDef->InstructionFormat != 0x1E) { // ignore if bounds register used or vsib
3659 s.Warnings1 |= 0x10; // Unnecessary SIB byte
3660 }
3661 }
3662 // Check if shorter instruction exists for register operands
3663 if ((s.OpcodeDef->Options & 0x80) && !(s.OpcodeDef->InstructionFormat & 0xFE0) && s.Mod == 3
3664 && !(WordSize == 64 && Get<uint8_t>(s.OpcodeStart1) == 0xFF)) {
3665 s.Warnings1 |= 0x20; // No memory operand. A shorter version exists for register operand
3666 }
3667 // Check for length-changing prefix
3668 if (s.ImmediateFieldSize > 1 && s.Prefixes[4] == 0x66
3669 && (s.OpcodeDef->AllowedPrefixes & 0x100) && !(s.OpcodeDef->InstructionFormat & 0x20)) {
3670 // 66 prefix changes length of immediate field
3671 s.Warnings1 |= 0x40;
3672 }
3673 // Check for bogus length-changing prefix causing stall on Intel Core2.
3674 // Will occur if 66 prefix and first opcode byte is F7 and there is a 16 bytes boundary between opcode byte and mod/reg/rm byte
3675 if (Get<uint8_t>(s.OpcodeStart1) == 0xF7 && s.Prefixes[4] == 0x66 && ((s.OpcodeStart1+1) & 0xF) == 0 && !s.ImmediateFieldSize) {
3676 s.Warnings1 |= 0x2000000;
3677 }
3678 // Warn for address size prefix if mod/reg/rm byte
3679 // (This does not cause a stall in 64 bit mode, but I am issueing a
3680 // warning anyway because the changed address size is probably unintended)
3681 if (s.Prefixes[1] == 0x67 && (s.MFlags & 2)) {
3682 s.Warnings1 |= 0x80;
3683 }
3684 // Check for unnecessary REX.W prefix
3685 if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x2000 && s.Prefixes[7] == 0x48) {
3686 s.Warnings1 |= 0x200; // REX.W prefix valid but unnecessary
3687 }
3688 // Check for meaningless prefixes
3689 if (!(s.OpcodeDef->InstructionFormat & 0x10) || s.Mod == 3) {
3690 // No mod/reg/rm byte or only register operand. Check for address size and segment prefixes
3691 if ((s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0xC))
3692 || (s.Prefixes[1] && !(s.OpcodeDef->AllowedPrefixes & 3))) {
3693 s.Warnings1 |= 0x400; // Unnecessary segment or address size prefix
3694 }
3695 }
3696
3697 // Check for meaningless segment prefixes
3698 if (s.Prefixes[0] && !(s.OpcodeDef->AllowedPrefixes & 0x0C)) {
3699 // Segment prefix is not branch hint
3700 if (WordSize == 64 && (s.Prefixes[0] & 0x02))
3701 s.Warnings1 |= 0x400; // CS, DS, ES or SS prefix in 64 bit mode has no effect
3702 if (s.Prefixes[0] == 0x3E && s.BaseReg != 4+1 && s.BaseReg != 5+1)
3703 s.Warnings1 |= 0x400; // Unnecessary DS: segment prefix
3704 if (s.Prefixes[0] == 0x36 && (s.BaseReg == 4+1 || s.BaseReg == 5+1) )
3705 s.Warnings1 |= 0x400; // Unnecessary SS: segment prefix
3706 if (Opcodei == 0x8D)
3707 s.Warnings1 |= 0x400; // Segment prefix on LEA instruction
3708 if (s.Mod == 3)
3709 s.Warnings1 |= 0x400; // mod/reg/rm byte indicates no memory operand
3710 }
3711
3712 // Check for meaningless 66 prefix
3713 if (s.Prefixes[4] == 0x66 && !(s.OpcodeDef->AllowedPrefixes & 0x380))
3714 s.Warnings1 |= 0x400; // 66 prefix not allowed here
3715
3716 // Check for meaningless F2 prefix
3717 if (s.Prefixes[3] == 0xF2 && !(s.OpcodeDef->AllowedPrefixes & 0x868))
3718 s.Warnings1 |= 0x400; // F2 prefix not allowed here
3719
3720 // Check for meaningless F3 prefix
3721 if (s.Prefixes[3] == 0xF3 && !(s.OpcodeDef->AllowedPrefixes & 0x460))
3722 s.Warnings1 |= 0x400; // F3 prefix not allowed here
3723
3724 // Check for meaningless REX prefix bits
3725 if (s.Prefixes[7]) {
3726 // REX, VEX, XOP or DREX present
3727 // Get significant bits
3728 RexBits = s.Prefixes[7] & 0x0F;
3729 // Check if empty REX prefix
3730 if (RexBits == 0 && (s.Prefixes[7] & 0x40) && (s.Operands[0] & 0xFF) != 1 && (s.Operands[1] & 0xFF) != 1) {
3731 // Empty REX prefix needed only if 8 bit register register
3732 s.Warnings1 |= 0x400;
3733 }
3734 // Clear bits that are used:
3735 // Check if REX.W bit used
3736 if (s.OpcodeDef->AllowedPrefixes & 0x3000) RexBits &= ~8;
3737 // Check if REX.R and REX.B bit used for source or destination operands
3738 for (i = 0; i < 4; i++) {
3739 switch (s.Operands[i] & 0xF0000) {
3740 case 0x40000: // uses reg bits, check if REX.R allowed
3741 if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
3742 // REX.R used for operand and register type allows value > 7
3743 RexBits &= ~4;
3744 break;
3745 case 0x30000: // Uses rm bits. check if REX.B allowed
3746 if ((s.Operands[i] & 0xF00) != 0x300 && (s.Operands[i] & 0x58) != 0x40 && (s.Operands[i] & 0xFF) != 0x91)
3747 // REX.B used for operand and register type allows value > 7
3748 RexBits &= ~1;
3749 break;
3750 case 0x20000: // Register operand indicated by opcode bits and REX:B
3751 RexBits &= ~1;
3752 break;
3753 }
3754 }
3755 // Check if REX.X bit used for index register
3756 if (s.IndexReg) RexBits &= ~2;
3757 // Check if REX.B bit used for base register
3758 if (s.BaseReg) RexBits &= ~1;
3759 // Check if REX.X bit used for base register with EVEX prefix
3760 if (s.Prefixes[3] == 0x62 && s.Mod == 3) RexBits &= ~2;
3761
3762 // Check if VEX.W bit used for some purpose
3763 if ((s.OpcodeDef->AllowedPrefixes & 0x7000) != 0 && (s.Prefixes[7] & 0xB0)) RexBits &= ~8;
3764
3765 // Any unused bits left?
3766 if (RexBits) {
3767 s.Warnings1 |= 0x400; // At least one REX bit makes no sense here
3768 }
3769 }
3770 // Check for registers not allowed in 32-bit mode
3771 if (this->WordSize < 64) {
3772 if (s.Prefixes[7] & 7 & ~RexBits) {
3773 s.Errors |= 0x200; // Register 8-15 not allowed in this mode
3774 }
3775 if (s.Prefixes[7] & 0xB0) {
3776 // VEX present, check vvvv register operand
3777 if (s.Vreg & 8) s.Errors |= 0x200; // Register 8-15 not allowed in this mode
3778 // Check imm[7:4] register operand
3779 if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x1C && (Get<uint8_t>(s.ImmediateField) & 8)) {
3780 s.Errors |= 0x200; // Register 8-15 not allowed in this mode
3781 }
3782 }
3783 }
3784
3785 // Check for meaningless VEX prefix bits
3786 if (s.Prefixes[7] & 0xB0) {
3787 // VEX present
3788 if ((s.Prefixes[6] & 0x60) == 0x20) { // VEX.L bit set and not EVEX
3789 if (!(s.OpcodeDef->AllowedPrefixes & 0x240000)) s.Warnings1 |= 0x40000000; // L bit not allowed
3790 if ((s.OpcodeDef->AllowedPrefixes & 0x200000) && s.Prefixes[5] > 0x66) s.Warnings1 |= 0x40000000; // L bit not allowed with F2 and F3 prefix
3791 }
3792 else {
3793 if ((s.OpcodeDef->AllowedPrefixes & 0x100000) && !(s.Prefixes[6] & 0x20)) s.Warnings1 |= 0x1000; // L bit missing
3794 }
3795 if ((s.Prefixes[6] & 0x10) && s.Prefixes[3] != 0x62) {
3796 s.Warnings1 |= 0x40000000; // Uppermost m bit only allowed if EVEX prefix
3797 }
3798 // check VEX.v bits
3799 if (s.Prefixes[3] == 0x62 && s.OpcodeDef->InstructionFormat == 0x1E) {
3800 // has EVEX VSIB address
3801 if (s.Vreg & 0xF) {
3802 s.Warnings1 |= 0x40000000; // vvvv bits not allowed, v' bit allowed
3803 }
3804 }
3805 else { // not EVEX VSIB
3806 if ((s.Vreg & 0x1F) && !(s.OpcodeDef->AllowedPrefixes & 0x80000)) {
3807 s.Warnings1 |= 0x40000000; // vvvvv bits not allowed
3808 }
3809 }
3810 }
3811 // Check for meaningless EVEX and MVEX prefix bits
3812 if (s.Prefixes[3] == 0x62) {
3813 if (s.Prefixes[6] & 0x20) {
3814 // EVEX prefix
3815 if (s.Mod == 3) {
3816 // register operands
3817 if (!(s.OpcodeDef->EVEX & 6) && (s.Esss & 1)) {
3818 s.Warnings2 |= 0x40; // rounding and sae not allowed
3819 }
3820 }
3821 else {
3822 // memory operand
3823 if (!(s.OpcodeDef->EVEX & 1) && (s.Esss & 1)) {
3824 s.Warnings2 |= 0x40; // broadcast not allowed
3825 }
3826 }
3827 if (!(s.OpcodeDef->EVEX & 0x30) && s.Kreg) {
3828 s.Warnings2 |= 0x40; // masking not allowed
3829 }
3830 else if (!(s.OpcodeDef->EVEX & 0x20) && (s.Esss & 8)) {
3831 s.Warnings2 |= 0x40; // zeroing not allowed
3832 }
3833 else if ((s.OpcodeDef->EVEX & 0x40) && s.Kreg == 0) {
3834 s.Warnings2 |= 0x100; // mask register must be nonzero
3835 }
3836 }
3837 else {
3838 // MVEX prefix.
3839 if (s.Mod == 3) {
3840 // register operands only
3841 if ((s.Esss & 8) && (s.OpcodeDef->MVEX & 0x600) == 0) {
3842 s.Warnings2 |= 0x80; // E bit not allowed for register operand here
3843 }
3844 }
3845 if (((s.OpcodeDef->MVEX & 0x1F) == 0) && (s.Esss & 7) != 0) {
3846 s.Warnings2 |= 0x80; // sss bits not allowed here
3847 }
3848 if (s.Kreg && (s.OpcodeDef->MVEX & 0x3000) == 0) {
3849 s.Warnings2 |= 0x80; // kkk bits not allowed here
3850 }
3851 }
3852 }
3853
3854 // Check for conflicting prefixes
3855 if (s.OpcodeDef->AllowedPrefixes & 0x140) s.Conflicts[5] = 0; // 66 + F2/F3 allowed for string instructions
3856 if ((s.OpcodeDef->AllowedPrefixes & 0x1200) == 0x1200) s.Conflicts[4] = 0; // 66 + REX.W allowed for e.g. movd/movq instruction
3857 if (*(int64_t*)&s.Conflicts) s.Warnings1 |= 0x800; // Conflicting prefixes. Check all categories at once
3858
3859 // Check for missing prefixes
3860 if ((s.OpcodeDef->AllowedPrefixes & 0x8000) && s.Prefixes[5] == 0)
3861 s.Warnings1 |= 0x1000; // Required 66/F2/F3 prefix missing
3862 if ((s.OpcodeDef->AllowedPrefixes & 0x20000) && (s.Prefixes[7] & 0xB0) == 0)
3863 s.Warnings1 |= 0x1000; // Required VEX prefix missing
3864
3865 // Check for VEX prefix not allowed
3866 if (!(s.OpcodeDef->AllowedPrefixes & 0xC30000) && (s.Prefixes[7] & 0xB0))
3867 s.Warnings1 |= 0x40000000; // VEX prefix not allowed
3868
3869 // Check for EVEX and MVEX prefix allowed
3870 if (s.Prefixes[3] == 0x62) {
3871
3872 if (s.Prefixes[6] & 0x20) {
3873 if (!(s.OpcodeDef->AllowedPrefixes & 0x800000)) s.Warnings2 |= 0x10;
3874 }
3875 else {
3876 if (!(s.OpcodeDef->AllowedPrefixes & 0x400000)) s.Warnings2 |= 0x20;
3877 }
3878 }
3879
3880 // Check for unused SIB scale factor
3881 if (s.Scale && s.IndexReg == 0) s.Warnings1 |= 0x2000; // SIB has scale factor but no index register
3882
3883 // Check if address in 64 bit mode is rip-relative
3884 if (WordSize == 64 && s.AddressFieldSize >= 4 && s.AddressRelocation && !(s.MFlags & 0x100)) {
3885 // 32-bit address in 64 bit mode is not rip-relative. Check if image-relative
3886 if (s.AddressRelocation >= Relocations.GetNumEntries() || !(Relocations[s.AddressRelocation].Type & 0x14)) {
3887 // Not image-relative or relative to reference point
3888 if (s.AddressFieldSize == 8) {
3889 s.Warnings1 |= 0x20000000; // Full 64-bit address
3890 }
3891 else {
3892 s.Warnings1 |= 0x4000; // 32-bit absolute address
3893 }
3894 }
3895 }
3896 // Check if direct address is relocated
3897 if (s.AddressFieldSize > 1 && !s.AddressRelocation && !s.BaseReg && !s.IndexReg && (WordSize != 16 || !(s.Prefixes[0] & 0x40)))
3898 s.Warnings1 |= 0x8000; // Direct address has no relocation, except FS: and GS:
3899
3900 // Check if address relocation type is correct
3901 if (s.AddressFieldSize > 1 && s.AddressRelocation && (s.MFlags & 1)) {
3902 // Memory operand found. Should it be direct or self-relative
3903 if (s.MFlags & 0x100) {
3904 // Memory address should be self-relative (rip-relative)
3905 if (!(Relocations[s.AddressRelocation].Type & 2)) {
3906 s.Warnings1 |= 0x10000; // rip-relative relocation expected but not found
3907 }
3908 }
3909 else {
3910 // Memory address should be direct
3911 if (Relocations[s.AddressRelocation].Type & 0x302) {
3912 s.Warnings1 |= 0x10000; // direct address expected, other type found
3913 }
3914 }
3915
3916 // Check if memory address has correct alignment
3917 // Loop through destination and source operands
3918 for (i = 0; i < s.MaxNumOperands; i++) {
3919 // Operand type
3920 uint32_t OperandType = s.Operands[i];
3921 if ((OperandType & 0x2000) && Opcodei != 0x8D) {
3922 // This is a memory operand (except LEA). Get target offset
3923 int64_t TargetOffset = 0;
3924 switch (s.AddressFieldSize) {
3925 case 1:
3926 TargetOffset = Get<int8_t>(s.AddressField); break;
3927 case 2:
3928 TargetOffset = Get<int16_t>(s.AddressField); break;
3929 case 4:
3930 TargetOffset = Get<int32_t>(s.AddressField);
3931 if (s.MFlags & 0x100) {
3932 // Compute rip-relative address
3933 TargetOffset += IEnd - s.AddressField;
3934 }
3935 break;
3936 case 8:
3937 TargetOffset = Get<int64_t>(s.AddressField); break;
3938 }
3939 // Add relocation offset
3940 TargetOffset += Relocations[s.AddressRelocation].Addend;
3941
3942 // Find relocation target
3943 uint32_t SymbolOldIndex = Relocations[s.AddressRelocation].TargetOldIndex;
3944 uint32_t SymbolNewIndex = Symbols.Old2NewIndex(SymbolOldIndex);
3945 if (SymbolNewIndex) {
3946 // Add relocation target offset
3947 TargetOffset += Symbols[SymbolNewIndex].Offset;
3948 // Target section
3949 int32_t TargetSection = Symbols[SymbolNewIndex].Section;
3950 if (TargetSection && (uint32_t)TargetSection < Sections.GetNumEntries()) {
3951 // Add relocation section address
3952 TargetOffset += Sections[TargetSection].SectionAddress;
3953 }
3954 if ((Relocations[s.AddressRelocation].Type & 0x10) && Relocations[s.AddressRelocation].RefOldIndex) {
3955 // Add offset of reference point
3956 uint32_t RefIndex = Symbols.Old2NewIndex(Relocations[s.AddressRelocation].RefOldIndex);
3957 TargetOffset += Symbols[RefIndex].Offset;
3958 }
3959 if (Relocations[s.AddressRelocation].Type & 0x3000) {
3960 // GOT entry etc. Can't check alignment
3961 continue;
3962 }
3963 }
3964
3965 // Get operand size
3966 OperandSize = GetDataItemSize(OperandType);
3967 if (s.OffsetMultiplier) OperandSize = s.OffsetMultiplier;
3968 while (OperandSize & (OperandSize-1)) {
3969 // Not a power of 2. Get nearest lower power of 2
3970 OperandSize = OperandSize & (OperandSize-1);
3971 }
3972
3973 // Check if aligned
3974 if ((TargetOffset & (OperandSize-1)) && !(s.Warnings1 & 0x10000)) {
3975 // Memory operand is misaligned
3976 if (s.OffsetMultiplier) {
3977 // EVEX code with required alignment
3978 s.Warnings1 |= 0x800000; // Serious. Generates fault
3979 }
3980 else if (OperandSize < 16) {
3981 // Performance penalty but no fault
3982 s.Warnings1 |= 0x400000; // Warn not aligned
3983 }
3984 else {
3985 // XMM or larger. May generate fault
3986 // with VEX: only explicitly aligned instructions generate fault
3987 // without VEX: all require alignment except explicitly unaligned
3988 if (s.OpcodeDef->Options & 0x100 || (!(s.Prefixes[7] & 0xB0) && !(s.OpcodeDef->Options & 0x200))) {
3989 s.Warnings1 |= 0x800000; // Serious. Generates fault
3990 }
3991 else {
3992 s.Warnings1 |= 0x400000; // Not serious. Performance penalty only
3993 }
3994 }
3995 }
3996 }
3997 }
3998 }
3999
4000 // Check if jump relocation type is correct
4001 if (s.ImmediateFieldSize > 1 && s.ImmediateRelocation && (s.OpcodeDef->Destination & 0xFE) == 0x82) {
4002 // Near jump or call. Relocation must be self-relative
4003 if (!(Relocations[s.ImmediateRelocation].Type & 2)) {
4004 s.Warnings1 |= 0x10000; // Self-relative relocation expected but not found
4005 }
4006 }
4007 // Check operand size for jumps
4008 if ((s.OpcodeDef->AllowedPrefixes & 0x80) && s.Prefixes[4]) {
4009 // Jump instruction sensitive to operand size prefix
4010 if (WordSize == 32) s.Warnings1 |= 0x20000; // Instruction pointer truncated
4011 if (WordSize == 64) s.Warnings1 |= 0x400; // Prefix has no effect
4012 }
4013
4014 // Check address size for stack operations
4015 if ((s.OpcodeDef->AllowedPrefixes & 2) && s.Prefixes[1])
4016 s.Warnings1 |= 0x40000; // Stack operation has address size prefix
4017
4018 // Check for undocumented opcode
4019 if ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name)
4020 s.Warnings1 |= 0x100000; // Undocumented opcode
4021
4022 // Check for future opcode
4023 if (s.OpcodeDef->InstructionFormat & 0x2000)
4024 s.Warnings1 |= 0x200000; // Opcode reserved for future extensions
4025
4026 // Check instruction set
4027 if (s.OpcodeDef->InstructionSet & 0x10000)
4028 s.Warnings2 |= 0x2; // Planned future instruction
4029
4030 if (s.OpcodeDef->InstructionSet & 0x20000)
4031 s.Warnings2 |= 0x4; // Proposed instruction code never implemented, preliminary specification later changed
4032
4033 // Check operand size for stack operations
4034 if ((s.OpcodeDef->AllowedPrefixes & 0x102) == 0x102) {
4035 if (s.Prefixes[4] == 0x66 || (Get<uint8_t>(s.OpcodeStart1) == 0xCF && s.OperandSize != WordSize)) {
4036 s.Warnings1 |= 0x4000000; // Non-default size for stack operation
4037 }
4038 }
4039
4040 // Check if function ends with ret or unconditional jump (or nop)
4041 if (IEnd == FunctionEnd && !(s.OpcodeDef->Options & 0x50)) {
4042 s.Warnings1 |= 0x8000000; // Function does not end with return or jump
4043 }
4044
4045 // Check for multi-byte NOP and UD2
4046 if (s.OpcodeDef->Options & 0x50) CheckForNops();
4047
4048 // Check for inaccessible code
4049 if (IBegin == LabelInaccessible) {
4050 s.Warnings1 |= 0x10000000; // Inaccessible code other than NOP or UD2
4051 }
4052 }
4053
4054
FindErrors()4055 void CDisassembler::FindErrors() {
4056 // Find any errors in code
4057 if (IEnd - IBegin > 15) {
4058 // Instruction longer than 15 bytes
4059 s.Errors |= 1;
4060 }
4061 if (s.Prefixes[2] && (!(s.OpcodeDef->AllowedPrefixes & 0x10) || !(s.MFlags & 1))) {
4062 // Lock prefix not allowed for this instruction
4063 s.Errors |= 2;
4064 }
4065 if ( s.OpcodeDef->InstructionFormat == 0
4066 || ((s.OpcodeDef->InstructionFormat & 0x4000) && s.OpcodeDef->Name == 0)) {
4067 // Illegal instruction
4068 s.Errors |= 4;
4069 }
4070 if ((s.OpcodeDef->InstructionSet & 0x8000) && WordSize == 64) {
4071 // Instruction not allowed in 64 bit mode
4072 s.Errors |= 0x40;
4073 }
4074 if (IEnd > LabelEnd && IBegin < LabelEnd) {
4075 // Instruction crosses a label
4076 // Check if label is public
4077 uint32_t sym1 = Symbols.FindByAddress(Section, LabelEnd, 0, 0);
4078 if (sym1 && (Symbols[sym1].Scope & 0x1C)) {
4079 // Label is public. Code interpretation may be out of phase
4080 s.Errors |= 0x80;
4081 // Put interpretation in phase with label
4082 IEnd = LabelEnd;
4083 }
4084 else {
4085 // Symbol is local.
4086 // This may be a spurious label produced by misinterpretation elsewhere
4087 if (sym1) Symbols[sym1].Type = 0; // Remove symbol type
4088 s.Warnings2 |= 1;
4089 }
4090 }
4091 if ((s.MFlags & 3) == 3 && (s.Prefixes[7] & 1) && s.BaseReg == 0 && s.AddressFieldSize == 4) {
4092 // Attempt to use R13 as base register without displacement
4093 s.Errors |= 0x100;
4094 }
4095 if ((s.OpcodeDef->InstructionFormat & 0x1E) == 0x14) {
4096 // Check validity of DREX byte
4097 if ((s.Vreg & 0x87) && WordSize < 64) {
4098 s.Errors |= 0x200; // Attempt to use XMM8-15 in 16 or 32 bit mode (ignored, may be changed to warning)
4099 }
4100 if (s.Prefixes[7] & 0x40) {
4101 s.Errors |= 0x400; // Both REX and DREX byte
4102 }
4103 if ((s.Vreg & 2) && !(s.MFlags & 4)) {
4104 s.Errors |= 0x800; // DREX.X bit but no SIB byte (probably ignored, may be changed to warning)
4105 }
4106 }
4107 if ((s.OpcodeDef->InstructionFormat & 0x1F) == 0x1E) {
4108 // Instruction needs VSIB byte
4109 if (s.IndexReg == 0) s.Errors |= 8; // Illegal operand: no index register
4110 }
4111 if (LabelEnd >= s.OpcodeStart2+2 && (
4112 Get<uint16_t>(s.OpcodeStart2) == 0
4113 || Get<uint16_t>(s.OpcodeStart2) == 0xFFFF
4114 // || Get<uint16_t>(s.OpcodeStart2) == 0xCCCC
4115 )) {
4116 // Two consecutive bytes of zero gives the instruction: add byte ptr [eax],al
4117 // This instruction is very unlikely to occur in normal code but occurs
4118 // frequently in data. Mark to code as probably data.
4119 // Two bytes of 0xFF makes no legal instruction but occurs frequently in data.
4120 // Two bytes of 0xCC is debug breaks used by debuggers for marking illegal addresses or unitialized data
4121 s.Errors = 0x4000;
4122 }
4123 if (s.Errors) {
4124 // Errors found. May be data in code segment
4125 CountErrors++;
4126 MarkCodeAsDubious();
4127 }
4128 }
4129
4130
FindRelocations()4131 void CDisassembler::FindRelocations() {
4132 // Find any relocation sources in this instruction
4133 SARelocation rel1, rel2; // Make relocation records for searching
4134 rel1.Section = Section;
4135 rel1.Offset = IBegin; // rel1 marks begin of this instruction
4136 rel2.Section = Section;
4137 rel2.Offset = IEnd; // rel2 marks end of this instruction
4138
4139 // Search for relocations in this instruction
4140 uint32_t irel = Relocations.FindFirst(rel1); // Finds first relocation source >= IBegin
4141
4142 if (irel == 0 || irel >= Relocations.GetNumEntries()) {
4143 // No relocations found
4144 return;
4145 }
4146 if (Relocations[irel] < rel2) {
4147 // Found relocation points between IBegin and IEnd
4148 if (Relocations[irel].Offset == s.AddressField && s.AddressFieldSize) {
4149 // Relocation points to address field
4150 s.AddressRelocation = irel;
4151 if (Relocations[irel].Size > s.AddressFieldSize) {
4152 // Right place but wrong size
4153 s.Errors |= 0x1000;
4154 }
4155 }
4156 else if (Relocations[irel].Offset == s.ImmediateField && s.ImmediateFieldSize) {
4157 // Relocation points to immediate operand/jump address field
4158 s.ImmediateRelocation = irel;
4159 if (Relocations[irel].Size > s.ImmediateFieldSize) {
4160 // Right place but wrong size
4161 s.Errors |= 0x1000;
4162 }
4163 }
4164 else {
4165 // Relocation source points to a wrong address
4166 s.Errors |= 0x1000;
4167 }
4168 if (s.AddressRelocation) {
4169 // Found relocation for address field, there may be
4170 // a second relocation for the immediate field
4171 if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
4172 // Second relocation found
4173 if (Relocations[irel+1].Offset == s.ImmediateField && s.ImmediateFieldSize) {
4174 // Relocation points to immediate operand/jump address field
4175 s.ImmediateRelocation = irel + 1;
4176 if (Relocations[irel+1].Size > s.ImmediateFieldSize) {
4177 // Right place but wrong size
4178 s.Errors |= 0x1000;
4179 }
4180 else {
4181 // Second relocation accepted
4182 irel++;
4183 }
4184 }
4185 }
4186 }
4187 // Check if there are more relocations
4188 if (irel + 1 < Relocations.GetNumEntries() && Relocations[irel+1] < rel2) {
4189 // This relocation points before IEnd but doesn't fit any operand or overlaps previous relocation
4190 if ((s.Operands[0] & 0xFE) == 0x84 && Relocations[irel+1].Offset == s.ImmediateField + s.ImmediateFieldSize - 2) {
4191 // Fits segment field of far jump/call
4192 ;
4193 }
4194 else {
4195 // Relocation doesn't fit anywhere
4196 s.Errors |= 0x1000;
4197 }
4198 }
4199 }
4200 }
4201
4202
FindInstructionSet()4203 void CDisassembler::FindInstructionSet() {
4204 // Update instruction set
4205 uint16_t InstSet = s.OpcodeDef->InstructionSet;
4206 if (InstSet == 7 && s.Prefixes[5] == 0x66) {
4207 // Change MMX to SSE2 if 66 prefix
4208 InstSet = 0x12;
4209 }
4210 if ((s.Prefixes[7] & 0x30) && InstSet < 0x19) {
4211 // VEX instruction set if VEX prefix
4212 InstSet = 0x19;
4213 }
4214 if (s.Prefixes[6] & 0x40) {
4215 // EVEX or MVEX prefix
4216 if (s.Prefixes[6] & 0x20) {
4217 // EVEX prefix
4218 if (InstSet < 0x20) InstSet = 0x20;
4219 }
4220 else {
4221 // MVEX prefix
4222 if (InstSet < 0x80) InstSet = 0x80;
4223 }
4224 }
4225 if ((InstSet & 0xFF00) == 0x1000) {
4226 // AMD-specific instruction set
4227 // Set AMD-specific instruction set to max
4228 if ((InstSet & 0xFF) > InstructionSetAMDMAX) {
4229 InstructionSetAMDMAX = InstSet & 0xFF;
4230 }
4231 }
4232 else {
4233 // Set Intel or generic instruction set to maximum
4234 if ((InstSet & 0xFF) > InstructionSetMax) {
4235 InstructionSetMax = InstSet & 0xFF;
4236 }
4237 }
4238
4239 // Set InstructionSetOR to a bitwise OR of all instruction sets encountered
4240 InstructionSetOR |= InstSet;
4241
4242 if (s.OpcodeDef->Options & 0x10) {
4243 FlagPrevious |= 2;
4244 }
4245 }
4246
4247
CheckLabel()4248 void CDisassembler::CheckLabel() {
4249 // Check if there is a label at instruction, and write it
4250 // Write begin and end of function
4251
4252 // Search in symbol table
4253 uint32_t Sym1, Sym2; // First and last symbol
4254
4255 // Find all symbol table entries at this address
4256 Sym1 = Symbols.FindByAddress(Section, IBegin, &Sym2);
4257
4258 if (Sym1) {
4259 // Found at least one symbol
4260 // Loop for all symbols with same address
4261 for (uint32_t s = Sym1; s <= Sym2; s++) {
4262
4263 // Check if label has already been written as a function label
4264 if (!(Symbols[s].Scope & 0x100) && !(Symbols[s].Type & 0x80000000)) {
4265
4266 // Write label as a private or public code label
4267 WriteCodeLabel(s);
4268 }
4269 }
4270 // Get symbol type and size
4271 DataType = Symbols[Sym2].Type;
4272 DataSize = GetDataItemSize(DataType);
4273 }
4274 }
4275
4276
CheckForNops()4277 void CDisassembler::CheckForNops() {
4278 // Check for multi-byte NOP and UD2 instructions
4279
4280 switch (Opcodei) {
4281 case 0x3C00: case 0x3C01: case 0x3C02: case 0x11F: // NOP
4282 // These opcodes are intended for NOPs. Indicate if longer than one byte
4283 if (IEnd - IBegin > 1) s.Warnings1 |= 0x1000000;
4284 // Remember NOP
4285 FlagPrevious |= 1;
4286 break;
4287
4288 case 0x8D: // LEA
4289 // LEA is often used as NOP with destination = base register
4290 if (s.Mod < 3 && s.Reg+1 == s.BaseReg && s.IndexReg == 0 &&
4291 s.AddressSize == s.OperandSize && s.OperandSize >= WordSize) {
4292 // Destination is same as base register.
4293 // Check if displacement is 0
4294 switch (s.AddressFieldSize) {
4295 case 0:
4296 break;
4297 case 1:
4298 if (Get<int8_t>(s.AddressField) != 0) return;
4299 break;
4300 case 2:
4301 if (Get<int16_t>(s.AddressField) != 0) return;
4302 break;
4303 case 4:
4304 if (Get<int32_t>(s.AddressField) != 0) return;
4305 break;
4306 default:
4307 return;
4308 }
4309 // Displacement is zero. This is a multi-byte NOP
4310 s.Warnings1 |= 0x1000000;
4311 break;
4312 }
4313
4314 case 0x86: case 0x87: // XCHG
4315 case 0x88: case 0x89: case 0x8A: case 0x8B: // MOV
4316 // Check if source and destination are the same register
4317 if (s.Mod == 3 && s.Reg == s.RM && s.OperandSize >= WordSize) {
4318 // Moving a register to itself. This is a NOP
4319 s.Warnings1 |= 0x1000000;
4320 }
4321 break;
4322 case 0x10B: // UD2
4323 FlagPrevious |= 6;
4324 break;
4325 }
4326
4327 if (s.Warnings1 & 0x1000000) {
4328 // A multi-byte NOP is detected.
4329 // Remove warnings for longer-than-necessary instruction
4330 s.Warnings1 &= ~ 0x873D;
4331 // Remember NOP
4332 FlagPrevious |= 1;
4333 }
4334 }
4335
4336
InitialErrorCheck()4337 void CDisassembler::InitialErrorCheck() {
4338 // Check for illegal relocations table entries
4339 uint32_t i; // Loop counter
4340
4341 // Loop through relocations table
4342 for (i = 1; i < Relocations.GetNumEntries(); i++) {
4343 if (Relocations[i].TargetOldIndex >= Symbols.GetLimit()) {
4344 // Nonexisting relocation target
4345 Relocations[i].TargetOldIndex = 0;
4346 }
4347 if (Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
4348 // Nonexisting reference index
4349 Relocations[i].RefOldIndex = 0;
4350 }
4351 // Remember types of relocations in source
4352 RelocationsInSource |= Relocations[i].Type;
4353 }
4354
4355 // Check opcode tables
4356 if (NumOpcodeTables1 != NumOpcodeTables2) {
4357 err.submit(9007, 0xFFFF);
4358 }
4359 }
4360
4361
FinalErrorCheck()4362 void CDisassembler::FinalErrorCheck() {
4363 // Check for illegal entries in symbol table and relocations table
4364 uint32_t i; // Loop counter
4365 int SpaceWritten = 0; // Blank line written
4366
4367 // Loop through symbol table
4368 for (i = 1; i < Symbols.GetNumEntries(); i++) {
4369 if (Symbols[i].Section <= 0 || (Symbols[i].Type & 0x80000000)) {
4370 // Constant or external symbol or section
4371 continue;
4372 }
4373 if ((uint32_t)Symbols[i].Section >= Sections.GetNumEntries()
4374 || Symbols[i].Offset > Sections[Symbols[i].Section].TotalSize) {
4375 // Symbol has illegal address
4376 // Blank line
4377 if (!SpaceWritten++) OutFile.NewLine();
4378 // Write comment
4379 OutFile.Put(CommentSeparator);
4380 OutFile.Put("Error: Symbol ");
4381 // Write symbol name
4382 OutFile.Put(Symbols.GetName(i));
4383 // Write the illegal address
4384 OutFile.Put(" has a non-existing address. Section: ");
4385 if (Symbols[i].Section != ASM_SEGMENT_IMGREL) {
4386 OutFile.PutDecimal(Symbols[i].Section, 1);
4387 }
4388 else {
4389 OutFile.Put("Unknown");
4390 }
4391 OutFile.Put(" Offset: ");
4392 OutFile.PutHex(Symbols[i].Offset, 1);
4393 OutFile.NewLine();
4394 }
4395 }
4396 // Loop through relocations table
4397 for (i = 1; i < Relocations.GetNumEntries(); i++) {
4398 // Check source address
4399 if (Relocations[i].Section == 0
4400 || (uint32_t)Relocations[i].Section >= Sections.GetNumEntries()
4401 || (Sections[Relocations[i].Section].Type & 0xFF) == 3
4402 || Relocations[i].Offset >= Sections[Relocations[i].Section].InitSize) {
4403 // Relocation has illegal source address
4404 // Blank line
4405 if (!SpaceWritten++) OutFile.NewLine();
4406 // Write comment
4407 OutFile.Put(CommentSeparator);
4408 OutFile.Put("Error: Relocation number ");
4409 OutFile.PutDecimal(i);
4410 OutFile.Put(" has a non-existing source address. Section: ");
4411 if (Relocations[i].Section != ASM_SEGMENT_IMGREL) {
4412 OutFile.PutDecimal(Relocations[i].Section, 1);
4413 }
4414 else {
4415 OutFile.Put("Unknown");
4416 }
4417 OutFile.Put(" Offset: ");
4418 OutFile.PutHex(Relocations[i].Offset, 1);
4419 OutFile.NewLine();
4420 }
4421 // Check target
4422 if (Relocations[i].TargetOldIndex == 0
4423 || Relocations[i].TargetOldIndex >= Symbols.GetLimit()
4424 || Relocations[i].RefOldIndex >= Symbols.GetLimit()) {
4425 // Relocation has illegal target
4426 // Blank line
4427 if (!SpaceWritten++) OutFile.NewLine();
4428 // Write comment
4429 OutFile.Put(CommentSeparator);
4430 OutFile.Put("Error: Relocation number ");
4431 OutFile.PutDecimal(i);
4432 OutFile.Put(" at section ");
4433 OutFile.PutDecimal(Relocations[i].Section);
4434 OutFile.Put(" offset ");
4435 OutFile.PutHex(Relocations[i].Offset);
4436 OutFile.Put(" has a non-existing target index. Target: ");
4437 OutFile.PutDecimal(Relocations[i].TargetOldIndex, 1);
4438 if (Relocations[i].RefOldIndex) {
4439 OutFile.Put(", Reference point index: ");
4440 OutFile.PutDecimal(Relocations[i].RefOldIndex, 1);
4441 }
4442 OutFile.NewLine();
4443 }
4444 }
4445 }
4446
4447
CheckNamesValid()4448 void CDisassembler::CheckNamesValid() {
4449 // Fix invalid symbol and section names
4450 uint32_t i, j; // Loop counter
4451 uint32_t Len; // Length of name
4452 uint32_t Changed; // Symbol is changed
4453 char c; // Character in symbol
4454 const char * ValidCharacters = ""; // List of valid characters in symbol names
4455 // Make list of characters valid in symbol names other than alphanumeric characters
4456 switch (Syntax) {
4457 case SUBTYPE_MASM:
4458 ValidCharacters = "_$@?"; break;
4459 case SUBTYPE_NASM:
4460 ValidCharacters = "_$@?.~#"; break;
4461 case SUBTYPE_GASM:
4462 ValidCharacters = "_$."; break;
4463 default:
4464 err.submit(9000);
4465 }
4466
4467 // Loop through sections
4468 for (i = 1; i < Sections.GetNumEntries(); i++) {
4469 char * SecName = (char*)NameBuffer.Buf() + Sections[i].Name;
4470 if (Syntax == SUBTYPE_MASM && SecName[0] == '.') {
4471 // Name begins with dot
4472 // Check for reserved names
4473 if (stricmp(SecName, ".text") == 0
4474 || stricmp(SecName, ".data") == 0
4475 || stricmp(SecName, ".code") == 0
4476 || stricmp(SecName, ".const") == 0) {
4477 // Change . to _ in beginning of name to avoid reserved directive name
4478 SecName[0] = '_';
4479 }
4480 else {
4481 // Other name beginning with .
4482 // Set option dotname
4483 MasmOptions |= 1;
4484 }
4485 }
4486 }
4487 // Loop through symbols
4488 for (i = 1; i < Symbols.GetNumEntries(); i++) {
4489 if (Symbols[i].Name) {
4490 // Warning: violating const specifier in GetName():
4491 char * SymName = (char *)Symbols.GetName(i);
4492 Len = (uint32_t)strlen(SymName); Changed = 0;
4493 // Loop through characters in symbol
4494 for (j = 0; j < Len; j++) {
4495 c = SymName[j];
4496 if (!(((c | 0x20) >= 'a' && (c | 0x20) <= 'z')
4497 || (c >= '0' && c <= '9' && j != 0)
4498 || strchr(ValidCharacters, c))) {
4499 // Illegal character found
4500 if (Syntax == SUBTYPE_MASM) {
4501 if (j == 0 && c == '.') {
4502 // Symbol beginning with dot in MASM
4503 if (Symbols[i].Type & 0x80000000) {
4504 // This is a segment. Check for reserved names
4505 if (stricmp(SymName, ".text") == 0
4506 || stricmp(SymName, ".data") == 0
4507 || stricmp(SymName, ".code") == 0
4508 || stricmp(SymName, ".const") == 0) {
4509 // Change . to _ in beginning of name to avoid reserved directive name
4510 SymName[0] = '_'; // Warning: violating const specifier in GetName()
4511 break; // break out of j loop
4512 }
4513 }
4514 // Set option dotname
4515 MasmOptions |= 1;
4516 }
4517 else {
4518 // Other illegal character in MASM
4519 #if ReplaceIllegalChars
4520 SymName[j] = '?';
4521 #endif
4522 Changed++;
4523 }
4524 }
4525 else {
4526 // Illegal character in GAS or YASM syntax
4527 #if ReplaceIllegalChars
4528 SymName[j] = (Syntax == SUBTYPE_NASM) ? '?' : '$';
4529 #endif
4530 Changed++;
4531 }
4532 }
4533 }
4534 // Count names changed
4535 if (Changed) NamesChanged++;
4536 }
4537 }
4538 }
4539
4540
FixRelocationTargetAddresses()4541 void CDisassembler::FixRelocationTargetAddresses() {
4542 // Fix missing relocation target addresses
4543 // to section:offset addresses
4544 uint32_t r; // Relocation index
4545 uint32_t s; // Symbol index
4546 int32_t sect;
4547
4548 // Loop through relocations
4549 for (r = 1; r < Relocations.GetNumEntries(); r++) {
4550
4551 if (Relocations[r].TargetOldIndex == 0 && (Relocations[r].Type & 0x60)) {
4552 // Target symbol not defined. Make new symbol
4553 SASymbol sym;
4554 sym.Reset();
4555
4556 // Find target address from relocation source
4557 sect = Relocations[r].Section;
4558 if ((uint32_t)sect >= Sections.GetNumEntries()) continue;
4559 uint8_t * pSectionData = Sections[sect].Start;
4560 if (!pSectionData) continue;
4561 int64_t TargetOffset = 0;
4562 if (Relocations[r].Size == 4) {
4563 TargetOffset = *(int32_t*)(pSectionData + Relocations[r].Offset);
4564 }
4565 else if (Relocations[r].Size == 8) {
4566 TargetOffset = *(int64_t*)(pSectionData + Relocations[r].Offset);
4567 }
4568 else {
4569 // Error: wrong size
4570 continue;
4571 }
4572 if (HighDWord(TargetOffset)) {
4573 // Error: out of range
4574 continue;
4575 }
4576 // Translate to section:offset address
4577 if (!(TranslateAbsAddress(TargetOffset, sym.Section, sym.Offset))) {
4578 // Translation failed
4579 continue;
4580 }
4581 // Default scope is file local
4582 sym.Scope = 2;
4583
4584 // Add symbol if it doesn't exist or get index of existing symbol
4585 s = Symbols.NewSymbol(sym);
4586
4587 // Make reference to symbol from relocation record
4588 if (s) {
4589 Relocations[r].TargetOldIndex = Symbols[s].OldIndex;
4590 }
4591 }
4592 }
4593 }
4594
4595
TranslateAbsAddress(int64_t Addr,int32_t & Sect,uint32_t & Offset)4596 int CDisassembler::TranslateAbsAddress(int64_t Addr, int32_t &Sect, uint32_t &Offset) {
4597 // Translate absolute virtual address to section and offset
4598 // Returns 1 if valid address found.
4599 int32_t Section;
4600
4601 // Get image-relative address
4602 Addr -= ImageBase;
4603 // Fail if too big
4604 if (HighDWord(Addr)) return 0;
4605
4606 // Search through sections
4607 for (Section = 1; (uint32_t)Section < Sections.GetNumEntries(); Section++) {
4608 uint32_t SectionAddress = Sections[Section].SectionAddress;
4609 if ((uint32_t)Addr >= SectionAddress && (uint32_t)Addr < SectionAddress + Sections[Section].TotalSize) {
4610 // Address is within this section
4611 // Return section and offset
4612 Sect = Section;
4613 Offset = (uint32_t)Addr - SectionAddress;
4614 // Return 1 to indicate success
4615 return 1;
4616 }
4617 }
4618 // Not found. Return 0
4619 return 0;
4620 }
4621
4622
GetDataItemSize(uint32_t Type)4623 uint32_t CDisassembler::GetDataItemSize(uint32_t Type) {
4624 // Get size in bytes of data item with specified type
4625 uint32_t Size = 1;
4626
4627 switch (Type & 0xFF) {
4628 // Scalar types
4629 case 1:
4630 Size = 1; break;
4631 case 2: case 0x4A: case 0x95:
4632 Size = 2; break;
4633 case 3: case 0x43: case 0x4B:
4634 Size = 4; break;
4635 case 4: case 0x44: case 0x4C:
4636 Size = 8; break;
4637 case 5: case 0x45:
4638 Size = 10; break;
4639 case 7:
4640 Size = 6; break;
4641 case 0x50: case 51:
4642 Size = 16; break;
4643 case 0x0B: case 0x0C:
4644 // Function pointer
4645 Size = WordSize / 8; break;
4646 case 0x0D:
4647 // Far function pointer
4648 Size = WordSize / 8 + 2; break;
4649 }
4650 switch (Type & 0xF00) {
4651 // Override above size if vector of known size
4652 case 0x300:
4653 Size = 8; break;
4654 case 0x400:
4655 Size = 16; break;
4656 case 0x500:
4657 Size = 32; break;
4658 case 0x600:
4659 Size = 64; break;
4660 case 0x700:
4661 Size = 128; break;
4662 }
4663 return Size;
4664 }
4665
4666
GetDataElementSize(uint32_t Type)4667 uint32_t CDisassembler::GetDataElementSize(uint32_t Type) {
4668 // Get size of vector element in data item with specified type
4669 if ((Type & 0xF0) == 0x50) {
4670 // Vector of unknown elements
4671 return GetDataItemSize(Type);
4672 }
4673 else {
4674 // Vector of known elements. Return element type
4675 return GetDataItemSize(Type & 7);
4676 }
4677 }
4678
4679
GetSegmentRegisterFromPrefix()4680 int32_t CDisassembler::GetSegmentRegisterFromPrefix() {
4681 // Translate segment prefix to segment register
4682 switch (s.Prefixes[0]) {
4683 case 0x26: // ES:
4684 return 0;
4685 case 0x2E: // CS:
4686 return 1;
4687 case 0x36: // SS:
4688 return 2;
4689 case 0x3E: // DS:
4690 return 3;
4691 case 0x64: // FS:
4692 return 4;
4693 case 0x65: // GS:
4694 return 5;
4695 }
4696 return -1; // Error: none
4697 }
4698