1 /**************************** disasm2.cpp ********************************
2 * Author: Agner Fog
3 * Date created: 2007-02-25
4 * Last modified: 2020-06-08
5 * Project: objconv
6 * Module: disasm2.cpp
7 * Description:
8 * Module for disassembler containing file output functions
9 *
10 * Changes that relate to assembly language syntax should be done in this file only.
11 *
12 * Copyright 2007-2020 GNU General Public License http://www.gnu.org/licenses
13 *****************************************************************************/
14 #include "stdafx.h"
15
16 /********************** Warning and error texts ***************************
17 These texts are inserted in disassembled code in case of warnings or errors.
18
19 The occurrence of an error makes the disassembler mark the code block between
20 the nearest known code labels as dubious. This means that the byte sequence
21 might be data in the code segment or the disassembler might be out of phase
22 with instruction boundaries. Dubious code will be shown both as code and as
23 data.
24
25 A warning will be shown as 'Note:' before the instruction it applies to.
26 This might indicate suboptimal coding or a possible cause for concern.
27
28 The criteria for distinguishing between warnings and errors is not the
29 severity of consequences, but whether the condition is likely to be caused
30 by common programming errors or by data in the code segment.
31
32 Some of the warning messages are quite benign, e.g. an unnecessary prefix.
33 Other warning messages can have severe consequences, e.g. a function missing
34 a return statement.
35
36 Still other warnings are no case for concern, but a condition requiring
37 attention. For example the message: "Multi-byte NOP. Replace with ALIGN",
38 might actually indicate a well optimized code. But it requires attention
39 because the assembler cannot re-create the multi-byte NOP if the code
40 is assembled again. The programmer needs to decide what level of alignment
41 is optimal and replace the NOP with an align statement.
42
43 *****************************************************************************/
44
45 // Define error texts.
46 SIntTxt AsmErrorTexts[] = {
47 {1, "Instruction longer than 15 bytes"},
48 {2, "Lock prefix not allowed for this opcode"},
49 {4, "Illegal opcode"},
50 {8, "Illegal operands for this opcode"},
51 {0x10, "Instruction extends beyond end of code block"},
52 {0x20, "Prefix after REX prefix not allowed"},
53 {0x40, "This instruction is not allowed in 64 bit mode"},
54 {0x80, "Instruction out of phase with next label"},
55 {0x100, "Attempt to use R13 as base register without displacement"},
56 {0x200, "Register 8 - 15 only allowed in 64 bit mode (Ignored)."},
57 {0x400, "REX prefix not allowed on instruction with DREX byte"},
58 {0x800, "VEX has X bit but no SIB byte (Probably ignored)"},
59 {0x1000, "Relocation source does not match address or operand field"},
60 {0x2000, "Overlapping relocations"},
61 {0x4000, "This is unlikely to be code"}, // Consecutive bytes of 0 found
62 {0x8000, "VEX.L bit not allowed here"},
63 {0x10000, "VEX.mmmm bits out of range"},
64 {0x80000, "Internal error in opcode table in opcodes.cpp"}
65 };
66
67 // Warning texts 1: Warnings about conditions that could be intentional and suboptimal code
68 SIntTxt AsmWarningTexts1[] = {
69 {1, "Immediate operand could be made smaller by sign extension"},
70 {2, "Immediate operand could be made smaller by zero extension"},
71 {4, "Zero displacement could be omitted"},
72 {8, "Displacement could be made smaller by sign extension"},
73 {0x10, "SIB byte unnecessary here"},
74 {0x20, "A shorter instruction exists for register operand"},
75 {0x40, "Length-changing prefix causes delay on Intel processors"},
76 {0x80, "Address size prefix should be avoided"},
77 {0x100, "Same prefix occurs more than once"},
78 {0x200, "Prefix valid but unnecessary"},
79 {0x400, "Prefix bit or byte has no meaning in this context"},
80 {0x800, "Contradicting prefixes"},
81 {0x1000, "Required prefix missing"},
82 {0x2000, "Address has scale factor but no index register"},
83 {0x4000, "Address is not rip-relative"},
84 {0x8000, "Absolute memory address without relocation"},
85 {0x10000, "Unusual relocation type for this operand"},
86 {0x20000, "Instruction pointer truncated by operand size prefix"},
87 {0x40000, "Stack pointer truncated by address size prefix"},
88 {0x80000, "Jump or call to data segment not allowed"},
89 {0x100000, "Undocumented opcode"},
90 {0x200000, "Unknown opcode reserved for future extensions"},
91 {0x400000, "Memory operand is misaligned. Performance penalty"},
92 {0x800000, "Alignment fault. Memory operand must be aligned"},
93 {0x1000000, "Multi-byte NOP. Replace with ALIGN"},
94 {0x2000000, "Bogus length-changing prefix causes delay on Intel processors here"},
95 {0x4000000, "Non-default size for stack operation"},
96 {0x8000000, "Function does not end with ret or jmp"},
97 {0x10000000, "No jump seems to point here"},
98 {0x20000000, "Full 64-bit address"},
99 {0x40000000, "VEX prefix bits not allowed here"}
100 };
101
102 // Warning texts 2: Warnings about possible misinterpretation; serious warnings
103 SIntTxt AsmWarningTexts2[] = {
104 {1, "Label out of phase with instruction. Possibly spurious"},
105 {2, "Planned future instruction, according to preliminary specification"},
106 {4, "This instruction has been planned but never implemented because plans were changed. Will not work"},
107 {0x10, "EVEX prefix not allowed for this instruction"},
108 {0x20, "MVEX prefix not allowed for this instruction"},
109 {0x40, "EVEX prefix option bits not allowed here"},
110 {0x80, "MVEX prefix option bits not allowed here"},
111 {0x100, "Mask register must be nonzero"},
112 {0x200, "Broadcasting to scalar not allowd"},
113 };
114
115
116 // Indication of relocation types in comments:
117 SIntTxt RelocationTypeNames[] = {
118 {0x001, "(d)" }, // Direct address in flat address space
119 {0x002, "(rel)" }, // Self-relative
120 {0x004, "(imgrel)" }, // Image-relative
121 {0x008, "(segrel)" }, // Segment-relative
122 {0x010, "(refpoint)" }, // Relative to arbitrary point (position-independent code in Mach-O)
123 {0x021, "(d)" }, // Direct (adjust by image base)
124 {0x041, "(d)" }, // Direct (make procecure linkage table entry)
125 {0x081, "(indirect)" }, // Gnu indirect function dispatcher (make procecure linkage table entry?)
126 {0x100, "(seg)" }, // Segment address or descriptor
127 {0x200, "(sseg)" }, // Segment of symbol
128 {0x400, "(far)" }, // Far segment:offset address
129 {0x1001, "(GOT)" }, // GOT entry
130 {0x1002, "(GOT r)" }, // self-relative to GOT entry
131 {0x2002, "(PLT r)" } // self-relative to PLT entry
132 };
133
134 // Instruction set names
135 const char * InstructionSetNames[] = {
136 "8086", "80186", "80286", "80386", // 0 - 3
137 "80486", "Pentium", "Pentium Pro", "MMX", // 4 - 7
138 "Pentium II", "", "", "", // 8 - B
139 "", "", "", "", // C - F
140 "", "SSE", "SSE2", "SSE3", // 10 - 13
141 "Supplementary SSE3", "SSE4.1", "SSE4.2", "AES", // 14 - 17
142 "CLMUL", "AVX", "FMA3", "?", // 18 - 1B
143 "AVX2", "BMI etc.", "?", "?", // 1C - 1F
144 "AVX-512", "AVX512PF/ER/CD", "MPX,SHA,TBD", "AVX512IFMA/VBMI", // 20 - 23
145 "AVX512_4FMAPS", "?", "?", "?", // 24 - 27
146 "?", "?", "?", "?", // 28 - 2B
147 "?", "?", "?", "?", // 2C - 2F
148 "?", "?", "?", "?", // 30 - 33
149 "?", "?", "?", "?", // 34 - 37
150 "?", "?", "?", "?", // 38 - 3B
151 "?", "?", "?", "?", // 3C - 3F
152 "?", "?", "?", "?", // 40 - 43
153 "?", "?", "?", "?", // 44 - 47
154 "?", "?", "?", "?", // 48 - 4B
155 "?", "?", "?", "?", // 4C - 4F
156 "?", "?", "?", "?", // 50 - 53
157 "?", "?", "?", "?", // 54 - 57
158 "?", "?", "?", "?", // 58 - 5B
159 "?", "?", "?", "?", // 5C - 5F
160 "?", "?", "?", "?", // 60 - 63
161 "?", "?", "?", "?", // 64 - 67
162 "?", "?", "?", "?", // 68 - 6B
163 "?", "?", "?", "?", // 6C - 6F
164 "?", "?", "?", "?", // 70 - 73
165 "?", "?", "?", "?", // 74 - 77
166 "?", "?", "?", "?", // 78 - 7B
167 "?", "?", "?", "?", // 7C - 7F
168 "Knights Corner", "?", "?", "?", // 80 - 83
169 "?", "?", "?", "?" // 84 - 87
170 };
171
172 const int InstructionSetNamesLen = TableSize(InstructionSetNames);
173
174
175 /************************** class CDisassembler *****************************
176 Most member functions of CDisassembler are defined in disasm1.cpp
177
178 Only the functions that produce output are defined here:
179 ******************************************************************************/
180
WriteShortRegOperand(uint32_t Type)181 void CDisassembler::WriteShortRegOperand(uint32_t Type) {
182 // Write register operand from lower 3 bits of opcode byte to OutFile
183 uint32_t rnum = Get<uint8_t>(s.OpcodeStart2) & 7;
184 // Check REX.B prefix
185 if (s.Prefixes[7] & 1) rnum |= 8; // Add 8 if REX.B prefix
186 // Write register name
187 WriteRegisterName(rnum, Type);
188 }
189
WriteRegOperand(uint32_t Type)190 void CDisassembler::WriteRegOperand(uint32_t Type) {
191 // Write register operand from reg bits
192 uint32_t Num = s.Reg; // Register number
193
194 // Write register name
195 WriteRegisterName(Num, Type);
196 }
197
WriteRMOperand(uint32_t Type)198 void CDisassembler::WriteRMOperand(uint32_t Type) {
199 // Write memory or register operand from mod/rm bits of mod/reg/rm byte
200 // and possibly SIB byte or direct memory operand to OutFile.
201 // Also used for writing direct memory operand
202
203 if ((Type & 0xFF) == 0) {
204 // No explicit operand
205 return;
206 }
207
208 uint32_t Components = 0; // Count number of addends inside []
209 int64_t Addend = 0; // Inline displacement or addend
210 int AddressingMode = 0; // 0: 16- or 32 bit addressing mode
211 // 1: 64-bit pointer
212 // 2: 32-bit absolute in 64-bit mode
213 // 4: 64-bit rip-relative
214 // 8: 64-bit absolute
215 // Check if register or memory
216 if (s.Mod == 3) {
217 // Register operand
218 WriteRegisterName(s.RM, Type);
219 return;
220 }
221
222 // Find addend, if any
223 switch (s.AddressFieldSize) {
224 case 1: // 1 byte displacement
225 Addend = Get<int8_t>(s.AddressField);
226 break;
227 case 2: // 2 bytes displacement
228 Addend = Get<int16_t>(s.AddressField);
229 break;
230 case 4: // 4 bytes displacement
231 Addend = Get<int32_t>(s.AddressField);
232 if ((s.MFlags & 0x100) && !s.AddressRelocation) {
233 // rip-relative
234 Addend += ImageBase + uint64_t(SectionAddress + IEnd);
235 }
236 break;
237 case 8: // 8 bytes address
238 Addend = Get<int64_t>(s.AddressField);
239 break;
240 }
241 // Get AddressingMode
242 if (s.AddressSize > 32) {
243 if (s.MFlags & 0x100) {
244 AddressingMode = 4; // 64-bit rip-relative
245 }
246 else if (s.AddressFieldSize == 8) {
247 AddressingMode = 8; // 64-bit absolute
248 }
249 else if (s.AddressRelocation || (s.BaseReg==0 && s.IndexReg==0)) {
250 AddressingMode = 2; // 32-bit absolute in 64-bit mode
251 }
252 else {
253 AddressingMode = 1; // 64-bit pointer
254 }
255 }
256
257 // Make exception for LEA with no type
258 if (Opcodei == 0x8D) {
259 Type = 0;
260 }
261 // Write type override
262 if ((s.OpcodeDef->InstructionFormat & 0x1F) == 0x1E) {
263 WriteOperandType(Type & 0xFF); // has vsib address: write element type rather than vector type
264 }
265 else if (!(s.OpcodeDef->Options & 0x800)) {
266 WriteOperandType(Type); // write operand type
267 }
268
269 if (Syntax != SUBTYPE_MASM) {
270 // Write "[" around memory operands, before segment
271 OutFile.Put("[");
272 }
273
274 // Write segment prefix, if any
275 if (s.Prefixes[0]) {
276 OutFile.Put(RegisterNamesSeg[GetSegmentRegisterFromPrefix()]);
277 OutFile.Put(":");
278 }
279 else if (!s.BaseReg && !s.IndexReg && (!s.AddressRelocation || (s.Warnings1 & 0x10000)) && Syntax != SUBTYPE_NASM) {
280 // No pointer register and no memory reference or wrong type of memory reference.
281 // Write segment register to indicate that we have a memory operand
282 OutFile.Put("DS:");
283 }
284
285 if (Syntax == SUBTYPE_MASM) {
286 // Write "[" around memory operands, after segment
287 OutFile.Put("[");
288 }
289
290 if (Syntax == SUBTYPE_NASM && (AddressingMode & 0x0E)) {
291 // Specify absolute or relative addressing mode
292 switch (AddressingMode) {
293 case 2: OutFile.Put("abs "); break;
294 case 4: OutFile.Put("rel "); break;
295 case 8: OutFile.Put("abs qword "); break;
296 }
297 }
298
299 // Write relocation target, if any
300 if (s.AddressRelocation) {
301 // Write cross reference
302 WriteRelocationTarget(s.AddressRelocation, 4 | (s.MFlags & 0x100), Addend);
303 // Addend has been written, don't write it again
304 Addend = 0;
305 // Remember that something has been written
306 Components++;
307 }
308
309 // Check address size for pointer registers
310 //const char * * PointerRegisterNames;
311 uint32_t RegisterType = 0;
312 switch (s.AddressSize) {
313 case 16:
314 RegisterType = 2; break;
315 case 32:
316 RegisterType = 3; break;
317 case 64:
318 RegisterType = 4; break;
319 }
320
321 // Write base register, if any
322 if (s.BaseReg) {
323 if (Components++) OutFile.Put("+"); // Put "+" if anything before
324 WriteRegisterName(s.BaseReg - 1, RegisterType);
325 }
326
327 // Write index register, if any
328 if (s.IndexReg) {
329 if (Components++) OutFile.Put("+"); // Put "+" if anything before
330 if ((s.OpcodeDef->InstructionFormat & 0x1F) != 0x1E) {
331 // normal index register
332 WriteRegisterName(s.IndexReg - 1, RegisterType);
333 }
334 else {
335 // VSIB byte specifies vector index register
336 WriteRegisterName(s.IndexReg - 1, Type & 0xF00);
337 }
338 // Write scale factor, if any
339 if (s.Scale) {
340 OutFile.Put("*");
341 OutFile.PutDecimal(1 << s.Scale);
342 }
343 }
344
345 // Write +/- before addend
346 if (Components && Addend) {
347 // Displacement comes after base/index registers
348 if (Addend >= 0 || s.AddressFieldSize == 8) {
349 // Positive. Write +
350 OutFile.Put("+");
351 }
352 else {
353 // Negative. Write -
354 OutFile.Put("-");
355 Addend = -Addend;
356 }
357 }
358
359 if (Addend || Components == 0) {
360 // Find minimum number of digits needed
361 uint32_t AddendSize = s.AddressFieldSize;
362 if ((uint64_t)Addend < 0x100 && AddendSize > 1) AddendSize = 1;
363 else if ((uint64_t)Addend < 0x10000 && AddendSize > 2) AddendSize = 2;
364
365 // Write address or addend as hexadecimal
366 OutFile.PutHex((uint64_t)Addend, 2);
367
368 // Check if offset multiplier needed
369 if (s.OffsetMultiplier && s.AddressFieldSize == 1 && Addend) {
370 OutFile.Put("*");
371 OutFile.PutHex(s.OffsetMultiplier, 2);
372 }
373 }
374
375 if (Syntax == SUBTYPE_GASM && (AddressingMode == 4)) {
376 // Need to specify rip-relative address
377 OutFile.Put("+rip");
378 }
379
380 // End with "]"
381 OutFile.Put("]");
382 }
383
384
WriteOperandType(uint32_t type)385 void CDisassembler::WriteOperandType(uint32_t type) {
386 switch (Syntax) {
387 case SUBTYPE_MASM:
388 WriteOperandTypeMASM(type); break;
389 case SUBTYPE_NASM:
390 WriteOperandTypeYASM(type); break;
391 case SUBTYPE_GASM:
392 WriteOperandTypeGASM(type); break;
393 }
394 }
395
WriteOperandTypeMASM(uint32_t type)396 void CDisassembler::WriteOperandTypeMASM(uint32_t type) {
397 // Write type override before operand, e.g. "dword ", MASM syntax
398 if (type & 0xF00) {
399 type &= 0xF00; // Ignore element type for vectors
400 }
401 else {
402 type &= 0xFF; // Use operand type only
403 }
404
405 switch (type) {
406 case 1: // 8 bits
407 OutFile.Put("byte "); break;
408 case 2: // 16 bits
409 OutFile.Put("word "); break;
410 case 3: // 32 bits
411 OutFile.Put("dword "); break;
412 case 4: // 64 bits
413 OutFile.Put("qword "); break;
414 case 5: // 80 bits
415 if ((s.OpcodeDef->Destination & 0xFF) == 0xD) {
416 // 64+16 bit far pointer. Not supported by MASM
417 OutFile.Put("fword ");
418 s.OpComment = "64+16 bit. Need REX.W prefix";
419 }
420 else {
421 OutFile.Put("tbyte ");}
422 break;
423 case 6: case 0x40: case 0x48: case 0:
424 // Other size. Write nothing
425 break;
426 case 7: case 0x0D: // 48 bits or far
427 OutFile.Put("fword ");
428 if ((s.OpcodeDef->Destination & 0xFF) == 0xD && WordSize == 64) {
429 // All assemblers I have tried forget the REX.W prefix here. Make a notice
430 s.OpComment = "32+16 bit. Possibly forgot REX.W prefix";
431 }
432 break;
433 case 0x4A: // 16 bits float
434 OutFile.Put("word "); break;
435 case 0x43: // 32 bits float (x87)
436 case 0x4B: // 32 bits float (SSE2)
437 OutFile.Put("dword "); break;
438 case 0x44: // 64 bits float
439 case 0x4C: // 64 bits float (SSE2)
440 OutFile.Put("qword "); break;
441 case 0x45: // 80 bits float
442 OutFile.Put("tbyte "); break;
443 case 0x84: case 0x85: // far call
444 OutFile.Put("far "); break;
445 case 0x95: // 16 bits mask register
446 OutFile.Put("word "); break;
447 case 0x300: // MMX
448 OutFile.Put("qword "); break;
449 case 0x400: // XMM
450 OutFile.Put("xmmword "); break;
451 case 0x500: // YMM
452 OutFile.Put("ymmword "); break;
453 case 0x600: // ZMM
454 OutFile.Put("zmmword "); break;
455 case 0x700: // future 1024 bit
456 OutFile.Put("?mmword "); break;
457 }
458 if (type) OutFile.Put("ptr ");
459 }
460
WriteOperandTypeYASM(uint32_t type)461 void CDisassembler::WriteOperandTypeYASM(uint32_t type) {
462 // Write type override before operand, e.g. "dword", NASM/YASM syntax
463 if (type & 0xF00) {
464 type &= 0xF00; // Ignore element type for vectors
465 }
466 else {
467 type &= 0xFF; // Use operand type only
468 }
469 uint32_t Dest = s.OpcodeDef->Destination & 0xFF;// Destination operand
470 if (Dest >= 0xB && Dest < 0x10) {
471 // This is a pointer
472 if (Dest < 0x0D) {
473 OutFile.Put("near "); // Near indirect jump/call
474 }
475 else {
476 // Far pointer
477 if ((WordSize == 16 && type == 3) || (WordSize == 32 && type == 7)) {
478 OutFile.Put("far ");
479 }
480 else {
481 // Size currently not supported by YASM
482 switch (type) {
483 case 3: OutFile.Put("far ");
484 s.OpComment = "16+16 bit. Needs 66H prefix";
485 break;
486 case 7: OutFile.Put("far ");
487 s.OpComment = "32+16 bit. Possibly forgot REX.W prefix";
488 break;
489 case 5: OutFile.Put("far ");
490 s.OpComment = "64+16 bit. Needs REX.W prefix";
491 break;
492 }
493 }
494 }
495 return;
496 }
497 switch (type) {
498 case 1: // 8 bits
499 OutFile.Put("byte "); break;
500 case 2: // 16 bits
501 OutFile.Put("word "); break;
502 case 3: // 32 bits
503 OutFile.Put("dword "); break;
504 case 4: // 64 bits
505 OutFile.Put("qword "); break;
506 case 5: // 80 bits
507 OutFile.Put("tbyte "); break;
508 case 7: // 48 bits
509 OutFile.Put("fword "); break;
510 case 0x4A: // 16 bits float
511 OutFile.Put("word "); break;
512 case 0x43: // 32 bits float (x87)
513 case 0x4B: // 32 bits float (SSE2)
514 OutFile.Put("dword "); break;
515 case 0x44: // 64 bits float
516 case 0x4C: // 64 bits float (SSE2)
517 OutFile.Put("qword "); break;
518 case 0x45: // 80 bits float
519 OutFile.Put("tbyte "); break;
520 case 0x84: case 0x85: // far call
521 OutFile.Put("far "); break;
522 case 0x95: // 16 bits mask register
523 OutFile.Put("word "); break;
524 case 0x300: // MMX
525 OutFile.Put("qword "); break;
526 case 0x400: // XMM
527 OutFile.Put("oword "); break;
528 case 0x500: // YMM
529 OutFile.Put("yword "); break;
530 case 0x600: // ZMM
531 OutFile.Put("zword "); break;
532 case 0x700: // Future 128 bytes
533 OutFile.Put("?word "); break;
534 default:; // Anything else: write nothing
535 }
536 }
537
WriteOperandTypeGASM(uint32_t type)538 void CDisassembler::WriteOperandTypeGASM(uint32_t type) {
539 // Write type override before operand, e.g. "dword ", GAS syntax
540 if (type & 0xF00) {
541 type &= 0xF00; // Ignore element type for vectors
542 }
543 else {
544 type &= 0xFF; // Use operand type only
545 }
546
547 switch (type) {
548 case 1: // 8 bits
549 OutFile.Put("byte "); break;
550 case 2: // 16 bits
551 OutFile.Put("word "); break;
552 case 3: // 32 bits
553 OutFile.Put("dword "); break;
554 case 4: // 64 bits
555 OutFile.Put("qword "); break;
556 case 5: // 80 bits
557 if ((s.OpcodeDef->Destination & 0xFF) == 0xD) {
558 // 64+16 bit far pointer. Not supported by Gas
559 OutFile.Put("fword ");
560 s.OpComment = "64+16 bit. Needs REX.W prefix";
561 }
562 else {
563 OutFile.Put("tbyte ");}
564 break;
565 case 6: case 0x40: case 0x48: case 0:
566 // Other size. Write nothing
567 break;
568 case 7: // 48 bits
569 OutFile.Put("fword ");
570 if ((s.OpcodeDef->Destination & 0xFF) == 0xD && WordSize == 64) {
571 // All assemblers I have tried forget the REX.W prefix here. Make a notice
572 s.OpComment = "32+16 bit. Possibly forgot REX.W prefix";
573 }
574 break;
575 case 0x4A: // 16 bits float
576 OutFile.Put("word "); break;
577 case 0x43: // 32 bits float (x87)
578 case 0x4B: // 32 bits float (SSE2)
579 OutFile.Put("dword "); break;
580 case 0x44: // 64 bits float
581 case 0x4C: // 64 bits float (SSE2)
582 OutFile.Put("qword "); break;
583 case 0x45: // 80 bits float
584 OutFile.Put("tbyte "); break;
585 case 0x84: case 0x85: // far call
586 OutFile.Put("far "); break;
587 case 0x95: // 16 bits mask register
588 OutFile.Put("word "); break;
589 case 0x300: // MMX
590 OutFile.Put("qword "); break;
591 case 0x400: // XMM
592 OutFile.Put("xmmword "); break;
593 case 0x500: // YMM
594 OutFile.Put("ymmword "); break;
595 case 0x600: // ZMM
596 OutFile.Put("zmmword "); break;
597 case 0x700: // future 1024 bit
598 OutFile.Put("?mmword "); break;
599 }
600 }
601
602
WriteDREXOperand(uint32_t Type)603 void CDisassembler::WriteDREXOperand(uint32_t Type) {
604 // Write register operand from dest bits of DREX byte (AMD only)
605 uint32_t Num = s.Vreg >> 4; // Register number
606 // Write register name
607 WriteRegisterName(Num, Type);
608 }
609
WriteVEXOperand(uint32_t Type,int i)610 void CDisassembler::WriteVEXOperand(uint32_t Type, int i) {
611 // Write register operand from VEX.vvvv bits or immediate bits
612 uint32_t Num; // Register number
613 switch (i) {
614 case 0: // Use VEX.vvvv bits
615 Num = s.Vreg & 0x1F; break;
616 case 1: // Use immediate bits 4-7
617 Num = Get<uint8_t>(s.ImmediateField) >> 4; break;
618 case 2: // Use immediate bits 0-3 (Unused. For possible future use)
619 Num = Get<uint8_t>(s.ImmediateField) & 0x0F; break;
620 default:
621 Num = 0;
622 }
623 // Write register name
624 WriteRegisterName(Num, Type);
625 }
626
627
WriteOperandAttributeEVEX(int i,int isMem)628 void CDisassembler::WriteOperandAttributeEVEX(int i, int isMem) {
629 // Write operand attributes and instruction attributes from EVEX z, LL, b and aaa bits
630 // i = operand number (0 = destination, 1 = first source, 2 = second source,
631 // 98 = after last SIMD operand, 99 = after last operand)
632 // isMem: true if memory operand, false if register operand
633 uint32_t swiz = s.OpcodeDef->EVEX; // indicates meaning of EVEX attribute bits
634
635 if ((swiz & 0x30) && (i == 0 || (s.OpcodeDef->Destination == 0 && i == 1))) { // first operand
636 // write mask
637 if (s.Kreg || (swiz & 0xC0)) {
638 OutFile.Put(" {k");
639 OutFile.PutDecimal(s.Kreg);
640 OutFile.Put("}");
641 if ((swiz & 0x20) && (s.Esss & 8)) {
642 // zeroing
643 OutFile.Put("{z}");
644 }
645 }
646 }
647 if (swiz & 0x07) {
648 // broadcast, rounding or sae allowed
649 if (isMem && i < 8) {
650 // memory operand
651 if ((swiz & 0x01) && (s.Esss & 1)) {
652 // write memory broadcast
653 // calculate broadcast factor
654 uint32_t op = s.Operands[i]; // operand
655 uint32_t elementsize = GetDataElementSize(op); // element size
656 uint32_t opv = s.Operands[0]; // any vector operand
657 if (!(opv & 0xF00)) opv = s.Operands[1]; // first operand is not a vector, use next
658 uint32_t vectorsize = GetDataItemSize(opv); // vector size
659 if (vectorsize > elementsize) { // avoid broadcasting to scalar
660 if (elementsize) { // avoid division by zero
661 OutFile.Put(" {1to");
662 OutFile.PutDecimal(vectorsize/elementsize);
663 OutFile.Put("}");
664 }
665 else {
666 OutFile.Put("{unknown broadcast}");
667 }
668 }
669 }
670 }
671 if (i == 98 && s.Mod == 3) { // after last SIMD operand. no memory operand
672 // NASM has rounding mode and sae decoration after last SIMD operand with a comma.
673 // No spec. for other assemblers available yet (2014).
674 // use i == 99 if it should be placed after last operand.
675 // Perhaps the comma should be removed for other assemblers?
676 if ((swiz & 0x4) && (s.Esss & 1)) {
677 // write rounding mode
678 uint32_t rounding = (s.Esss >> 1) & 3;
679 OutFile.Put(", {");
680 OutFile.Put(EVEXRoundingNames[rounding]);
681 OutFile.Put("}");
682 }
683 else if ((swiz & 0x2) && (s.Esss & 1)) {
684 // no rounding mode. write sae
685 OutFile.Put(", {");
686 OutFile.Put(EVEXRoundingNames[4]);
687 OutFile.Put("}");
688 }
689 }
690 }
691 }
692
693
WriteOperandAttributeMVEX(int i,int isMem)694 void CDisassembler::WriteOperandAttributeMVEX(int i, int isMem) {
695 // Write operand attributes and instruction attributes from MVEX sss, e and kkk bits.
696 // i = operand number (0 = destination, 1 = first source, 2 = second source, 99 = after last operand)
697 // isMem: true if memory operand, false if register operand
698 uint32_t swiz = s.OpcodeDef->MVEX; // indicates meaning of MVEX attribute bits
699 const int R_sae_syntax = 0; // syntax alternatives for rounding mode + sae
700 // 0: {rn-sae}, 1: {rn}{sae}
701 const char * text = 0; // temporary text pointer
702
703 if ((swiz & 0x1000) && (i == 0 || (s.OpcodeDef->Destination == 0 && i == 1))) { // first operand
704 // write mask
705 if (s.Kreg || (swiz & 0x2000)) {
706 OutFile.Put(" {k");
707 OutFile.PutDecimal(s.Kreg);
708 OutFile.Put("}");
709 }
710 }
711 if (swiz & 0x1F) {
712 // swizzle allowed
713 if (isMem && i < 90) {
714 // write memory broadcast/up/down conversion
715 text = s.SwizRecord->name;
716 if (text && *text) {
717 OutFile.Put(" {"); OutFile.Put(text); OutFile.Put("}");
718 }
719 }
720 //if (i == 2 || ((s.OpcodeDef->Source2 & 0xF0F00) == 0 && i == 1)) {
721 if (i == 98) { // after last SIMD operand
722 // last register or memory operand
723 if (s.Mod == 3 && !((swiz & 0x700) && (s.Esss & 8))) { // skip alternative meaning of sss field for register operand when E=1
724 // write register swizzle
725 text = s.SwizRecord->name;
726 if (text && *text) {
727 OutFile.Put(" {"); OutFile.Put(text); OutFile.Put("}");
728 }
729 }
730 }
731 if (i == 99) { // after last operand
732 if (s.Mod == 3 && (swiz & 0x300) && (s.Esss & 8)) {
733 // alternative meaning of sss field for register operand when E=1
734 switch (swiz & 0x300) {
735 case 0x100: // rounding mode and not sae
736 text = SwizRoundTables[0][0][s.Esss & 3].name;
737 break;
738 case 0x200: // suppress all exceptions
739 if ((s.Esss & 4) && !(swiz & 0x800)) text = "sae";
740 break;
741 case 0x300: // rounding mode and sae
742 text = SwizRoundTables[0][R_sae_syntax][s.Esss & 7].name;
743 break;
744 }
745 }
746 if (text && *text) {
747 OutFile.Put(", {"); OutFile.Put(text); OutFile.Put("}");
748 }
749 }
750 }
751 if (isMem && (s.Esss & 8) && !(swiz & 0x800)) {
752 // cache eviction hint after memory operand
753 OutFile.Put(" {eh}");
754 }
755 }
756
WriteRegisterName(uint32_t Value,uint32_t Type)757 void CDisassembler::WriteRegisterName(uint32_t Value, uint32_t Type) {
758 // Write name of register to OutFile
759 if (Type & 0xF00) {
760 // vector register
761 Type &= 0xF00;
762 }
763 else {
764 // Other register
765 Type &= 0xFF; // Remove irrelevant bits
766 }
767
768 // Check fixed registers (do not depend on Value)
769 switch (Type) {
770 case 0xA1: // al
771 Type = 1; Value = 0;
772 break;
773
774 case 0xA2: // ax
775 Type = 2; Value = 0;
776 break;
777
778 case 0xA3: // eax
779 Type = 3; Value = 0;
780 break;
781
782 case 0xA4: // rax
783 Type = 4; Value = 0;
784 break;
785
786 case 0xAE: // xmm0
787 Type = 0x400; Value = 0;
788 break;
789
790 case 0xAF: // st(0)
791 Type = 0x40; Value = 0;
792 break;
793
794 case 0xB2: // dx
795 Type = 2; Value = 2;
796 break;
797
798 case 0xB3: // cl
799 Type = 1; Value = 1;
800 break;
801 }
802
803 // Get register number limit
804 uint32_t RegNumLimit = 7; // largest register number
805 if (WordSize >= 64) {
806 RegNumLimit = 15;
807 if ((s.Prefixes[6] & 0x40) && (Type & 0xF40)) {
808 // EVEX or MVEX prefix and vector
809 RegNumLimit = 31;
810 }
811 }
812
813 switch (Type) {
814 case 0x91: // segment register
815 RegNumLimit = 5;
816 break;
817 case 0x300: // mmx
818 case 0x40: // st register
819 case 0x95: // k mask register
820 RegNumLimit = 7;
821 break;
822 case 0x98: // bounds register
823 RegNumLimit = 3;
824 break;
825 }
826 if (Value > RegNumLimit) {
827 // register number out of range
828 OutFile.Put("unknown register ");
829 switch (Type) {
830 case 1:
831 OutFile.Put("(8 bit) "); break;
832 case 2:
833 OutFile.Put("(16 bit) "); break;
834 case 3:
835 OutFile.Put("(32 bit) "); break;
836 case 4:
837 OutFile.Put("(64 bit) "); break;
838 case 0x40: // st register
839 OutFile.Put("st"); break;
840 case 0x91: // Segment register
841 OutFile.Put("seg"); break;
842 case 0x92: // Control register
843 OutFile.Put("cr"); break;
844 case 0x95: // k mask register
845 OutFile.Put("k"); break;
846 case 0x300: // mmx register
847 OutFile.Put("mm"); break;
848 case 0x400: // xmm register
849 OutFile.Put("xmm"); break;
850 case 0x500: // ymm register
851 OutFile.Put("ymm"); break;
852 case 0x600: // zmm register
853 OutFile.Put("zmm"); break;
854 case 0x700: // future 1024 bit register
855 OutFile.Put("?mm"); break;
856 }
857 OutFile.PutDecimal(Value);
858 }
859 else {
860 // Write register name depending on type
861 switch (Type) {
862 case 1: // 8 bit register. Depends on any REX prefix
863 OutFile.Put(s.Prefixes[7] ? RegisterNames8x[Value] : RegisterNames8[Value & 7]);
864 break;
865
866 case 2: // 16 bit register
867 OutFile.Put(RegisterNames16[Value]);
868 break;
869
870 case 3: // 32 bit register
871 OutFile.Put(RegisterNames32[Value]);
872 break;
873
874 case 4: // 64 bit register
875 OutFile.Put(RegisterNames64[Value]);
876 break;
877
878 case 0x300: // mmx register
879 OutFile.Put("mm");
880 OutFile.PutDecimal(Value);
881 break;
882
883 case 0x400: // xmm register (packed integer or float)
884 case 0x48: case 0x4B: case 0x4C: // xmm register (scalar float)
885 OutFile.Put("xmm");
886 OutFile.PutDecimal(Value);
887 break;
888
889 case 0x500: // ymm register (packed)
890 OutFile.Put("ymm");
891 OutFile.PutDecimal(Value);
892 break;
893
894 case 0x600: // zmm register (packed)
895 OutFile.Put("zmm");
896 OutFile.PutDecimal(Value);
897 break;
898
899 case 0x700: // future 1024 bit register
900 OutFile.Put("?mm");
901 OutFile.PutDecimal(Value);
902 break;
903
904 case 0x40: // st register
905 if (Syntax == SUBTYPE_NASM) {
906 // NASM, YASM and GAS-AT&T use st0
907 OutFile.Put("st");
908 OutFile.PutDecimal(Value);
909 }
910 else {
911 // MASM and GAS-Intel use st(0),
912 OutFile.Put("st(");
913 OutFile.PutDecimal(Value);
914 OutFile.Put(")");
915 }
916 break;
917
918 case 0x91: // Segment register
919 OutFile.Put(RegisterNamesSeg[Value & 7]);
920 break;
921
922 case 0x92: // Control register
923 OutFile.Put(RegisterNamesCR[Value]);
924 break;
925
926 case 0x93: // Debug register
927 OutFile.Put("dr");
928 OutFile.PutDecimal(Value);
929 break;
930
931 case 0x94: // Test register (obsolete)
932 OutFile.Put("tr");
933 OutFile.PutDecimal(Value);
934 break;
935
936 case 0x95: // k mask register
937 OutFile.Put("k");
938 OutFile.PutDecimal(Value);
939 break;
940
941 case 0x98: // bounds register
942 OutFile.Put("bnd");
943 OutFile.PutDecimal(Value);
944 break;
945
946 case 0xB1: // 1
947 OutFile.Put("1");
948 break;
949
950 default: // Unexpected
951 OutFile.Put("UNKNOWN REGISTER TYPE ");
952 OutFile.PutDecimal(Value);
953 break;
954 }
955 }
956 }
957
958
WriteImmediateOperand(uint32_t Type)959 void CDisassembler::WriteImmediateOperand(uint32_t Type) {
960 // Write immediate operand or direct jump/call address
961 int WriteFormat; // 0: unsigned, 1: signed, 2: hexadecimal
962 int Components = 0; // Number of components in immediate operand
963 uint32_t OSize; // Operand size
964 uint32_t FieldPointer; // Pointer to field containing value
965 uint32_t FieldSize; // Size of field containing value
966 int64_t Value = 0; // Value of immediate operand
967
968 // Check if far
969 if ((Type & 0xFE) == 0x84) {
970 // Write far
971 WriteOperandType(Type);
972 }
973
974 // Check if type override needed
975 if ((s.OpcodeDef->AllowedPrefixes & 2) && s.Prefixes[4] == 0x66
976 && (Opcodei == 0x68 || Opcodei == 0x6A)) {
977 // Push immediate with non-default operand size needs type override
978 WriteOperandType(s.OperandSize == 16 ? 2 : 3);
979 }
980
981 FieldPointer = s.ImmediateField;
982 FieldSize = s.ImmediateFieldSize;
983
984 if (Syntax == SUBTYPE_NASM && (Type & 0x0F) == 4 && FieldSize == 8) {
985 // Write type override to make sure we get 8 bytes address in case there is a relocation here
986 WriteOperandType(4);
987 }
988
989 if (Type & 0x200000) {
990 if (FieldSize > 1) {
991 // Uses second part of field. Single byte only
992 FieldPointer += FieldSize-1;
993 FieldSize = 1;
994 }
995 else {
996 // Uses half a byte
997 FieldSize = 0;
998 }
999 }
1000
1001 // Get inline value
1002 switch (FieldSize) {
1003 case 0: // 4 bits
1004 Value = Get<uint8_t>(FieldPointer) & 0x0F;
1005 break;
1006
1007 case 1: // 8 bits
1008 Value = Get<int8_t>(FieldPointer);
1009 break;
1010
1011 case 2: // 16 bits
1012 Value = Get<int16_t>(FieldPointer); break;
1013
1014 case 6: // 48 bits
1015 Value = Get<int32_t>(FieldPointer);
1016 Value += (uint64_t)Get<uint16_t>(FieldPointer + 4) << 32;
1017 break;
1018
1019 case 4: // 32 bits
1020 Value = Get<int32_t>(FieldPointer); break;
1021
1022 case 8: // 64 bits
1023 Value = Get<int64_t>(FieldPointer); break;
1024
1025 case 3: // 16+8 bits ("Enter" instruction)
1026 if ((Type & 0xFF) == 0x12) {
1027 // First 16 bits
1028 FieldSize = 2; Value = Get<int16_t>(FieldPointer); break;
1029 }
1030 // else continue in default case to get error message
1031
1032 default: // Other sizes should not occur
1033 err.submit(3000); Value = -1;
1034 }
1035
1036 // Check if relocation
1037 if (s.ImmediateRelocation) {
1038 // Write relocation target name
1039 uint32_t Context = 2;
1040 if ((Type & 0xFC) == 0x80) Context = 8; // Near jump/call destination
1041 if ((Type & 0xFC) == 0x84) Context = 0x10; // Far jump/call destination
1042
1043 // Write cross reference
1044 WriteRelocationTarget(s.ImmediateRelocation, Context, Value);
1045
1046 // Remember that Value has been written
1047 Value = 0;
1048 Components++;
1049 }
1050 // Check if AAM or AAD
1051 if (Value == 10 && (Opcodei & 0xFE) == 0xD4) {
1052 // Don't write operand for AAM or AAD if = 10
1053 return;
1054 }
1055
1056 // Write as unsigned, signed or hexadecimal:
1057 if ((Type & 0xF0) == 0x30 || (Type & 0xF0) == 0x80) {
1058 // Hexadecimal
1059 WriteFormat = 2;
1060 }
1061 else if (s.ImmediateFieldSize == 8) {
1062 // 64 bit constant
1063 if (Value == (int32_t)Value) {
1064 // Signed
1065 WriteFormat = 1;
1066 }
1067 else {
1068 // Hexadecimal
1069 WriteFormat = 2;
1070 }
1071 }
1072 else if ((Type & 0xF0) == 0x20) {
1073 // Signed
1074 WriteFormat = 1;
1075 }
1076 else {
1077 // Unsigned
1078 WriteFormat = 0;
1079 }
1080
1081 if ((Type & 0xFC) == 0x80 && !s.ImmediateRelocation) {
1082 // Self-relative jump or call without relocation. Adjust immediate value
1083 Value += IEnd; // Get absolute address of target
1084
1085 // Look for symbol at target address
1086 uint32_t ISymbol = Symbols.FindByAddress(Section, (uint32_t)Value);
1087 if (ISymbol && (Symbols[ISymbol].Name || CodeMode == 1)) {
1088 // Symbol found. Write its name
1089 OutFile.Put(Symbols.GetName(ISymbol));
1090 // No offset to write
1091 return;
1092 }
1093 // Target address has no name
1094 Type |= 0x4000; // Write target as hexadecimal
1095 }
1096
1097 // Operand size
1098 if ((s.Operands[0] & 0xFFF) <= 0xA || (s.Operands[0] & 0xF0) == 0xA0) {
1099 // Destination is general purpose register
1100 OSize = s.OperandSize;
1101 }
1102 else {
1103 // Constant probably unrelated to destination size
1104 OSize = 8;
1105 }
1106 // Check if destination is 8 bit operand
1107 //if ((s.Operands[0] & 0xFF) == 1 || (s.Operands[0] & 0xFF) == 0xA1) OSize = 8;
1108
1109 // Check if sign extended
1110 if (OSize > s.ImmediateFieldSize * 8) {
1111 if (WriteFormat == 2 && Value >= 0) {
1112 // Hexadecimal sign extended, not negative:
1113 // Does not need full length
1114 OSize = s.ImmediateFieldSize * 8;
1115 }
1116 else if (WriteFormat == 0) {
1117 // Unsigned and sign extended, change to signed
1118 WriteFormat = 1;
1119 }
1120 }
1121
1122 if (Components) {
1123 // There was a relocated name
1124 if (Value) {
1125 // Addend to relocation is not zero
1126 if (Value > 0 || WriteFormat != 1) {
1127 OutFile.Put("+"); // Put "+" between name and addend
1128 }
1129 else {
1130 OutFile.Put("-"); // Put "-" between name and addend
1131 Value = - Value; // Change sign to avoid another "-"
1132 }
1133 }
1134 else {
1135 // No addend to relocated name
1136 return;
1137 }
1138 }
1139 // Write value
1140 if (WriteFormat == 2) {
1141 // Write with hexadecimal number appropriate size
1142 switch (OSize) {
1143 case 8: // 8 bits
1144 OutFile.PutHex((uint8_t)Value, 1); break;
1145 case 16: // 16 bits
1146 if ((Type & 0xFC) == 0x84) {
1147 // Segment of far call
1148 OutFile.PutHex((uint16_t)(Value >> 16), 1);
1149 OutFile.Put(':');
1150 }
1151 OutFile.PutHex((uint16_t)Value, 2); break;
1152 case 32: // 32 bits
1153 default: // Should not occur
1154 if ((Type & 0xFC) == 0x84) {
1155 // Segment of far call
1156 OutFile.PutHex((uint16_t)(Value >> 32), 1);
1157 OutFile.Put(':');
1158 }
1159 OutFile.PutHex((uint32_t)Value, 2); break;
1160 case 64: // 64 bits
1161 OutFile.PutHex((uint64_t)Value, 2); break;
1162 }
1163 }
1164 else {
1165 // Write as signed or unsigned decimal
1166 if (WriteFormat == 0) { // unsigned
1167 switch (OSize) {
1168 case 8: // 8 bits
1169 Value &= 0x00FF; break;
1170 case 16: // 16 bits
1171 Value &= 0xFFFF; break;
1172 }
1173 }
1174 OutFile.PutDecimal((int32_t)Value, WriteFormat); // Write value. Signed or usigned decimal
1175 }
1176 }
1177
1178
WriteOtherOperand(uint32_t Type)1179 void CDisassembler::WriteOtherOperand(uint32_t Type) {
1180 // Write other type of operand
1181 const char * * OpRegisterNames; // Pointer to list of register names
1182 uint32_t RegI = 0; // Index into list of register names
1183
1184 switch (Type & 0x8FF) {
1185 case 0xA1: // AL
1186 OpRegisterNames = RegisterNames8;
1187 break;
1188 case 0xA2: // AX
1189 OpRegisterNames = RegisterNames16;
1190 break;
1191 case 0xA3: // EAX
1192 OpRegisterNames = RegisterNames32;
1193 break;
1194 case 0xA4: // RAX
1195 OpRegisterNames = RegisterNames64;
1196 break;
1197 case 0xAE: // xmm0
1198 OutFile.Put("xmm0");
1199 return;
1200 case 0xAF: // ST(0)
1201 OutFile.Put("st(0)");
1202 return;
1203 case 0xB1: // 1
1204 OutFile.Put("1");
1205 return;
1206 case 0xB2: // DX
1207 OpRegisterNames = RegisterNames16;
1208 RegI = 2;
1209 break;
1210 case 0xB3: // CL
1211 OpRegisterNames = RegisterNames8;
1212 RegI = 1;
1213 break;
1214 default:
1215 OutFile.Put("unknown operand");
1216 err.submit(3000);
1217 return;
1218 }
1219 // Write register name
1220 OutFile.Put(OpRegisterNames[RegI]);
1221 }
1222
1223
WriteErrorsAndWarnings()1224 void CDisassembler::WriteErrorsAndWarnings() {
1225 // Write errors, warnings and comments, if any
1226 uint32_t n; // Error bit
1227 if (s.Errors) {
1228 // There are errors
1229 // Loop through all bits in s.Errors
1230 for (n = 1; n; n <<= 1) {
1231 if (s.Errors & n) {
1232 if (OutFile.GetColumn()) OutFile.NewLine();
1233 OutFile.Put(CommentSeparator); // Write "\n; "
1234 OutFile.Put("Error: "); // Write "Error: "
1235 OutFile.Put(Lookup(AsmErrorTexts,n));// Write error text
1236 OutFile.NewLine();
1237 }
1238 }
1239 }
1240
1241 if (s.Warnings1) {
1242 // There are warnings 1
1243 // Loop through all bits in s.Warnings1
1244 for (n = 1; n; n <<= 1) {
1245 if (s.Warnings1 & n) {
1246 if (OutFile.GetColumn()) OutFile.NewLine();
1247 OutFile.Put(CommentSeparator); // Write "; "
1248 OutFile.Put("Note: "); // Write "Note: "
1249 OutFile.Put(Lookup(AsmWarningTexts1, n));// Write warning text
1250 OutFile.NewLine();
1251 }
1252 }
1253 }
1254 if (s.Warnings2) {
1255 // There are warnings 2
1256 // Loop through all bits in s.Warnings2
1257 for (n = 1; n; n <<= 1) {
1258 if (s.Warnings2 & n) {
1259 if (OutFile.GetColumn()) OutFile.NewLine();
1260 OutFile.Put(CommentSeparator); // Write "; "
1261 OutFile.Put("Warning: "); // Write "Warning: "
1262 OutFile.Put(Lookup(AsmWarningTexts2, n)); // Write warning text
1263 OutFile.NewLine();
1264 }
1265 }
1266 if (s.Warnings2 & 1) {
1267 // Write spurious label
1268 uint32_t sym1 = Symbols.FindByAddress(Section, LabelEnd);
1269 if (sym1) {
1270 const char * name = Symbols.GetName(sym1);
1271 OutFile.Put(CommentSeparator);
1272 OutFile.Put(name);
1273 OutFile.Put("; Misplaced symbol at address ");
1274 OutFile.PutHex(Symbols[sym1].Offset);
1275 OutFile.NewLine();
1276 }
1277 }
1278 }
1279
1280 if (s.OpcodeDef && (s.OpcodeDef->AllowedPrefixes & 8) && !s.Warnings1) {
1281 if (s.Prefixes[0]) {
1282 // Branch hint prefix. Write comment
1283 OutFile.Put(CommentSeparator); // Write "; "
1284 switch (s.Prefixes[0]) {
1285 case 0x2E:
1286 OutFile.Put("Branch hint prefix for Pentium 4: Predict no jump");
1287 break;
1288 case 0x3E:
1289 OutFile.Put("Branch hint prefix for Pentium 4: Predict jump");
1290 break;
1291 case 0x64:
1292 OutFile.Put("Branch hint prefix for Pentium 4: Predict alternate");
1293 break;
1294 default:
1295 OutFile.Put("Note: Unrecognized branch hint prefix");
1296 }
1297 OutFile.NewLine();
1298 }
1299 }
1300 }
1301
WriteSymbolName(uint32_t symi)1302 void CDisassembler::WriteSymbolName(uint32_t symi) {
1303 // Write symbol name. symi = new symbol index
1304 OutFile.Put(Symbols.GetName(symi));
1305 }
1306
WriteSectionName(int32_t SegIndex)1307 void CDisassembler::WriteSectionName(int32_t SegIndex) {
1308 // Write name of section, segment or group from section index
1309 const char * Name = 0;
1310 // Check for special index values
1311 switch (SegIndex) {
1312 case ASM_SEGMENT_UNKNOWN: // Unknown segment. Typical for external symbols
1313 Name = "Unknown"; break;
1314 case ASM_SEGMENT_ABSOLUTE: // No segment. Used for absolute symbols
1315 Name = "Absolute"; break;
1316 case ASM_SEGMENT_FLAT: // Flat segment group
1317 Name = "flat"; break;
1318 case ASM_SEGMENT_NOTHING: // No segment
1319 Name = "Nothing"; break;
1320 case ASM_SEGMENT_ERROR: // Segment register assumed to error
1321 Name = "Error"; break;
1322 case ASM_SEGMENT_IMGREL: // Segment unknown. Offset relative to image base or file base
1323 Name = "ImageBased"; break;
1324 default: // > 0 means normal segment index
1325 if ((uint32_t)SegIndex >= Sections.GetNumEntries()) {
1326 // Out of range
1327 Name = "IndexOutOfRange";
1328 }
1329 else {
1330 // Get index into NameBuffer
1331 uint32_t NameIndex = Sections[SegIndex].Name;
1332 // Check if valid
1333 if (NameIndex == 0 || NameIndex >= NameBuffer.GetDataSize()) {
1334 Name = "ErrorNameMissing";
1335 }
1336 else {
1337 // Normal valid name of segment, section or group
1338 Name = (char*)NameBuffer.Buf() + NameIndex;
1339 }
1340 }
1341 break;
1342 }
1343 if (Syntax == SUBTYPE_NASM && Name[0] == '_') {
1344 // Change leading underscore to dot
1345 OutFile.Put('.');
1346 OutFile.Put(Name+1); // Write rest of name
1347 }
1348 else {
1349 // Write name
1350 OutFile.Put(Name);
1351 }
1352 }
1353
WriteDataItems()1354 void CDisassembler::WriteDataItems() {
1355 // Write data items to output file
1356
1357 int LineState; // 0: Start of new line, write label
1358 // 1: Label written if any, write data directive
1359 // 2: Data directive written, write data
1360 // 3: First data item written, write comma and more data
1361 // 4: Last data item written, write comment
1362 // 5: Comment written if any, start new line
1363 uint32_t Pos = IBegin; // Current position
1364 uint32_t LinePos = IBegin; // Position for beginning of output line
1365 uint32_t BytesPerLine; // Number of bytes to write per line
1366 uint32_t LineEnd; // Data position for end of line
1367 uint32_t DataEnd; // End of data
1368 uint32_t ElementSize, OldElementSize; // Size of each data element
1369 uint32_t RelOffset; // Offset of relocation
1370 uint32_t irel, Oldirel; // Relocation index
1371 int64_t Value; // Inline value or addend
1372 const char * Symname; // Symbol name
1373 int SeparateLine; // Label is on separate line
1374
1375 SARelocation Rel; // Dummy relocation record
1376
1377 // Check if size is valid
1378 if (DataSize == 0) DataSize = 1;
1379 if (DataSize > 32) DataSize = 32;
1380
1381 // Expected end position
1382 if (CodeMode & 3) {
1383 // Writing data for dubious code. Make same length as code instruction
1384 DataEnd = IEnd;
1385 }
1386 else {
1387 // Regular data. End at next label
1388 DataEnd = LabelEnd;
1389 if (DataEnd > FunctionEnd) DataEnd = FunctionEnd;
1390 if (DataEnd <= Pos) DataEnd = Pos + DataSize;
1391 if (DataEnd > Sections[Section].InitSize && Pos < Sections[Section].InitSize) {
1392 DataEnd = Sections[Section].InitSize;
1393 }
1394 }
1395
1396 // Size of each data element
1397 ElementSize = DataSize;
1398
1399 // Check if packed type
1400 if (DataType & 0xF00) {
1401 // This is a packed vector type. Get element size
1402 ElementSize = GetDataElementSize(DataType);
1403 }
1404
1405 // Avoid sizes that are not powers of 2
1406 if (ElementSize == 6 || ElementSize == 10) ElementSize = 2;
1407
1408 // Set maximum element size to 8
1409 if (ElementSize > 8) ElementSize = 8;
1410
1411 // Set minimum element size to 1
1412 if (ElementSize < 1) ElementSize = 1;
1413
1414 if (Pos + ElementSize > DataEnd) {
1415 // Make sure we end at DataEnd
1416 ElementSize = 1; BytesPerLine = 8;
1417 LineEnd = DataEnd;
1418 }
1419
1420 // Set number of bytes per line
1421 BytesPerLine = (DataSize == 10) ? 10 : 8;
1422
1423 if (!(CodeMode & 3)) {
1424 // Begin new line for each data item (except in code segment)
1425 OutFile.NewLine();
1426 }
1427 LineState = 0; irel = 0;
1428
1429 // Check if alignment required
1430 if (DataSize >= 16 && (DataType & 0xC00) && (DataType & 0xFF) != 0x51
1431 && (FlagPrevious & 0x100) < (DataSize << 4) && !(IBegin & (DataSize-1))) {
1432 // Write align directive
1433 WriteAlign(DataSize);
1434 // Remember that data is aligned
1435 FlagPrevious |= (DataSize << 4);
1436 }
1437
1438 // Get symbol name for label
1439 uint32_t sym; // Current symbol index
1440 uint32_t sym1, sym2 = 0; // First and last symbol at current address
1441
1442 sym1 = Symbols.FindByAddress(Section, Pos, &sym2);
1443
1444 // Loop for one or more symbols at this address
1445 for (sym = sym1; sym <= sym2; sym++) {
1446
1447 if (sym && Symbols[sym].Scope && !(Symbols[sym].Scope & 0x100) && !(Symbols[sym].Type & 0x80000000)) {
1448
1449 // Prepare for writing symbol label
1450 Symname = Symbols.GetName(sym); // Symbol name
1451 // Check if label needs a separate line
1452 SeparateLine = (ElementSize != DataSize
1453 || Symbols[sym].Size != DataSize
1454 || strlen(Symname) > AsmTab1
1455 || sym < sym2
1456 // || (Sections[Section].Type & 0xFF) == 3
1457 || ((Symbols[sym].Type+1) & 0xFE) == 0x0C);
1458
1459 // Write symbol label
1460 switch (Syntax) {
1461 case SUBTYPE_MASM:
1462 WriteDataLabelMASM(Symname, sym, SeparateLine); break;
1463 case SUBTYPE_NASM:
1464 WriteDataLabelYASM(Symname, sym, SeparateLine); break;
1465 case SUBTYPE_GASM:
1466 WriteDataLabelGASM(Symname, sym, SeparateLine); break;
1467 }
1468 LineState = 1; // Label written
1469 if (SeparateLine) {
1470 LineState = 0;
1471 }
1472 }
1473 }
1474
1475 if ((Sections[Section].Type & 0xFF) == 3 || Pos >= Sections[Section].InitSize) {
1476 // This is an unitialized data (BSS) section
1477 // Data repeat count
1478 uint32_t DataCount = (DataEnd - Pos) / ElementSize;
1479 if (DataCount) {
1480 OutFile.Tabulate(AsmTab1);
1481 // Write data directives
1482 switch (Syntax) {
1483 case SUBTYPE_MASM:
1484 WriteUninitDataItemsMASM(ElementSize, DataCount); break;
1485 case SUBTYPE_NASM:
1486 WriteUninitDataItemsYASM(ElementSize, DataCount); break;
1487 case SUBTYPE_GASM:
1488 WriteUninitDataItemsGASM(ElementSize, DataCount); break;
1489 }
1490 // Write comment
1491 WriteDataComment(ElementSize, Pos, Pos, 0);
1492 OutFile.NewLine();
1493 LineState = 0;
1494 }
1495 // Update data position
1496 Pos += DataCount * ElementSize;
1497
1498 if (Pos < DataEnd) {
1499 // Some odd data remain. Write as bytes
1500 DataCount = DataEnd - Pos;
1501 ElementSize = 1;
1502 OutFile.Tabulate(AsmTab1);
1503 switch (Syntax) {
1504 case SUBTYPE_MASM:
1505 WriteUninitDataItemsMASM(ElementSize, DataCount); break;
1506 case SUBTYPE_NASM:
1507 WriteUninitDataItemsYASM(ElementSize, DataCount); break;
1508 case SUBTYPE_GASM:
1509 WriteUninitDataItemsGASM(ElementSize, DataCount); break;
1510 }
1511 // Write comment
1512 WriteDataComment(ElementSize, Pos, Pos, 0);
1513 OutFile.NewLine();
1514 Pos = DataEnd;
1515 LineState = 0;
1516 }
1517 }
1518 else {
1519 // Not a BSS section
1520 // Label has been written, write data
1521
1522 // Loop for one or more elements
1523 LinePos = Pos;
1524 while (Pos < DataEnd) {
1525
1526 // Find end of line position
1527 LineEnd = LinePos + BytesPerLine;
1528
1529 // Remember element size and relocation
1530 OldElementSize = ElementSize;
1531 Oldirel = irel;
1532
1533 // Check if relocation
1534 Rel.Section = Section;
1535 Rel.Offset = Pos;
1536 uint32_t irel = Relocations.FindFirst(Rel);
1537 if (irel >= Relocations.GetNumEntries() || Relocations[irel].Section != (int32_t)Section) {
1538 // No relevant relocation
1539 irel = 0;
1540 }
1541 if (irel) {
1542 // A relocation is found
1543 // Check relocation source
1544 RelOffset = Relocations[irel].Offset;
1545 if (RelOffset == Pos) {
1546 // Relocation source is here
1547 // Make sure the size fits and begin new line
1548 ElementSize = Relocations[irel].Size; BytesPerLine = 8;
1549 if (ElementSize < 1) ElementSize = WordSize / 8;
1550 if (ElementSize < 1) ElementSize = 4;
1551 LineEnd = Pos + ElementSize;
1552 if (LineState > 2) LineState = 4; // Make sure we begin at new line
1553 }
1554 else if (RelOffset < Pos + ElementSize) {
1555 // Relocation source begins before end of element with current ElementSize
1556 // Change ElementSize to make sure a new element begins at relocation source
1557 ElementSize = 1; BytesPerLine = 8;
1558 LineEnd = RelOffset;
1559 if (LineState > 2) LineState = 4; // Make sure we begin at new line
1560 irel = 0;
1561 }
1562 else {
1563 // Relocation is after this element
1564 irel = 0;
1565 }
1566 // Check for overlapping relocations
1567 if (irel && irel+1 < Relocations.GetNumEntries()
1568 && Relocations[irel+1].Section == (int32_t)Section
1569 && Relocations[irel+1].Offset < RelOffset + ElementSize) {
1570 // Overlapping relocations
1571 s.Errors |= 0x2000;
1572 WriteErrorsAndWarnings();
1573 LineEnd = Relocations[irel+1].Offset;
1574 if (LineState > 2) LineState = 4; // Make sure we begin at new line
1575 }
1576 // Drop alignment
1577 FlagPrevious &= ~0xF00;
1578 }
1579 if (irel == 0) {
1580 // No relocation here
1581 // Check if DataEnd would be exceeded
1582 if (Pos + ElementSize > DataEnd) {
1583 // Make sure we end at DataEnd unless there is a relocation source here
1584 ElementSize = 1; BytesPerLine = 8;
1585 LineEnd = DataEnd;
1586 if (LineState > 2) LineState = 4; // Make sure we begin at new line
1587 FlagPrevious &= ~0xF00; // Drop alignment
1588 }
1589 }
1590 // Check if new line needed
1591 if (LineState == 4) {
1592 // Finish this line
1593 if (!(CodeMode & 3)) {
1594 WriteDataComment(OldElementSize, LinePos, Pos, Oldirel);
1595 }
1596 // Start new line
1597 OutFile.NewLine();
1598 LineState = 0;
1599 LinePos = Pos;
1600 continue;
1601 }
1602
1603 // Tabulate
1604 OutFile.Tabulate(AsmTab1);
1605
1606 if (LineState < 2) {
1607 // Write data definition directive for appropriate size
1608 switch (Syntax) {
1609 case SUBTYPE_MASM:
1610 WriteDataDirectiveMASM(ElementSize); break;
1611 case SUBTYPE_NASM:
1612 WriteDataDirectiveYASM(ElementSize); break;
1613 case SUBTYPE_GASM:
1614 WriteDataDirectiveGASM(ElementSize); break;
1615 }
1616 LineState = 2;
1617 }
1618 else if (LineState == 3) {
1619 // Not the first element, write comma
1620 OutFile.Put(", ");
1621 }
1622 // Get inline value
1623 switch (ElementSize) {
1624 case 1: Value = Get<int8_t>(Pos); break;
1625 case 2: Value = Get<int16_t>(Pos); break;
1626 case 4: Value = Get<int32_t>(Pos); break;
1627 case 6: Value = Get<uint32_t>(Pos) + ((uint64_t)Get<uint16_t>(Pos+4) << 32); break;
1628 case 8: Value = Get<int64_t>(Pos); break;
1629 case 10: Value = Get<int64_t>(Pos); break;
1630 default: Value = 0; // should not occur
1631 }
1632 if (irel) {
1633 // There is a relocation here. Write the name etc.
1634 WriteRelocationTarget(irel, 1, Value);
1635 }
1636 else {
1637 // Write value
1638 switch (ElementSize) {
1639 case 1:
1640 OutFile.PutHex((uint8_t)Value, 1);
1641 break;
1642 case 2:
1643 OutFile.PutHex((uint16_t)Value, 1);
1644 break;
1645 case 4:
1646 OutFile.PutHex((uint32_t)Value, 1);
1647 break;
1648 case 6:
1649 OutFile.PutHex((uint16_t)(Value >> 32), 1);
1650 OutFile.Put(":");
1651 OutFile.PutHex((uint32_t)Value, 1);
1652 break;
1653 case 8:
1654 OutFile.PutHex((uint64_t)Value, 1);
1655 break;
1656 case 10:
1657 OutFile.Put("??");
1658 break;
1659 }
1660 }
1661 LineState = 3;
1662 // Increment position
1663 Pos += ElementSize;
1664
1665 // Check if end of line
1666 if (Pos >= LineEnd || Pos >= DataEnd) LineState = 4;
1667
1668 if (LineState == 4) {
1669 // End of line
1670 if (!(CodeMode & 3)) {
1671 // Write comment
1672 WriteDataComment(ElementSize, LinePos, Pos, irel);
1673 }
1674 OutFile.NewLine();
1675 LinePos = Pos;
1676 LineState = 0;
1677 }
1678 }
1679 }
1680
1681 // Indicate end
1682 if (IEnd < Pos) IEnd = Pos;
1683 if (IEnd > LabelEnd) IEnd = LabelEnd;
1684 if (IEnd > FunctionEnd && FunctionEnd) IEnd = FunctionEnd;
1685
1686 // Reset FlagPrevious if not aligned
1687 if (DataSize < 16 || (DataType & 0xFF) == 0x28) FlagPrevious = 0;
1688 }
1689
1690
WriteDataLabelMASM(const char * name,uint32_t sym,int line)1691 void CDisassembler::WriteDataLabelMASM(const char * name, uint32_t sym, int line) {
1692 // Write label before data item, MASM syntax
1693 // name = name of data item(s)
1694 // sym = symbol index
1695 // line = 1 if label is on separate line, 0 if data follows on same line
1696 // Write name
1697 OutFile.Put(name);
1698 // At least one space
1699 OutFile.Put(" ");
1700 // Tabulate
1701 OutFile.Tabulate(AsmTab1);
1702
1703 if (line) {
1704 // Write label and type on seperate line
1705 // Get size
1706 uint32_t Symsize = Symbols[sym].Size;
1707 if (Symsize == 0) Symsize = DataSize;
1708 OutFile.Put("label ");
1709 // Write type
1710 switch(Symsize) {
1711 case 1: default:
1712 OutFile.Put("byte"); break;
1713 case 2:
1714 OutFile.Put("word"); break;
1715 case 4:
1716 OutFile.Put("dword"); break;
1717 case 6:
1718 OutFile.Put("fword"); break;
1719 case 8:
1720 OutFile.Put("qword"); break;
1721 case 10:
1722 OutFile.Put("tbyte"); break;
1723 case 16:
1724 OutFile.Put("xmmword"); break;
1725 case 32:
1726 OutFile.Put("ymmword"); break;
1727 }
1728 // Check if jump table or call table
1729 if (((Symbols[sym].Type+1) & 0xFE) == 0x0C) {
1730 OutFile.Tabulate(AsmTab3);
1731 OutFile.Put(CommentSeparator);
1732 if (Symbols[sym].DLLName) {
1733 // DLL import
1734 OutFile.Put("import from ");
1735 OutFile.Put(Symbols.GetDLLName(sym));
1736 }
1737 else if (Symbols[sym].Type & 1) {
1738 OutFile.Put("switch/case jump table");
1739 }
1740 else {
1741 OutFile.Put("virtual table or function pointer");
1742 }
1743 }
1744 // New line
1745 OutFile.NewLine();
1746 }
1747 }
1748
WriteDataLabelYASM(const char * name,uint32_t sym,int line)1749 void CDisassembler::WriteDataLabelYASM(const char * name, uint32_t sym, int line) {
1750 // Write label before data item, YASM syntax
1751 // name = name of data item(s)
1752 // sym = symbol index
1753 // line = 1 if label is on separate line, 0 if data follows on same line
1754 // Write name and colon
1755 OutFile.Put(name);
1756 OutFile.Put(": ");
1757 // Tabulate
1758 OutFile.Tabulate(AsmTab1);
1759
1760 if (line) {
1761 // Write label on seperate line
1762 // Write comment
1763 OutFile.Tabulate(AsmTab3);
1764 OutFile.Put(CommentSeparator);
1765 // Check if jump table or call table
1766 if (((Symbols[sym].Type+1) & 0xFE) == 0x0C) {
1767 if (Symbols[sym].DLLName) {
1768 // DLL import
1769 OutFile.Put("import from ");
1770 OutFile.Put(Symbols.GetDLLName(sym));
1771 }
1772 else if (Symbols[sym].Type & 1) {
1773 OutFile.Put("switch/case jump table");
1774 }
1775 else {
1776 OutFile.Put("virtual table or function pointer");
1777 }
1778 }
1779 else {
1780 // Write size
1781 uint32_t Symsize = Symbols[sym].Size;
1782 if (Symsize == 0) Symsize = DataSize;
1783 switch(Symsize) {
1784 case 1: default:
1785 OutFile.Put("byte"); break;
1786 case 2:
1787 OutFile.Put("word"); break;
1788 case 4:
1789 OutFile.Put("dword"); break;
1790 case 6:
1791 OutFile.Put("fword"); break;
1792 case 8:
1793 OutFile.Put("qword"); break;
1794 case 10:
1795 OutFile.Put("tbyte"); break;
1796 case 16:
1797 OutFile.Put("oword"); break;
1798 case 32:
1799 OutFile.Put("yword"); break;
1800 case 64:
1801 OutFile.Put("zword"); break;
1802 }
1803 }
1804 // New line
1805 OutFile.NewLine();
1806 }
1807 }
1808
WriteDataLabelGASM(const char * name,uint32_t sym,int line)1809 void CDisassembler::WriteDataLabelGASM(const char * name, uint32_t sym, int line) {
1810 // Write label before data item, GAS syntax
1811 // name = name of data item(s)
1812 // sym = symbol index
1813 // line = 1 if label is on separate line, 0 if data follows on same line
1814 // Write name and colon
1815 OutFile.Put(name);
1816 OutFile.Put(": ");
1817 // Tabulate
1818 OutFile.Tabulate(AsmTab1);
1819
1820 if (line) {
1821 // Write label on seperate line
1822 // Write comment
1823 OutFile.Tabulate(AsmTab3);
1824 OutFile.Put(CommentSeparator);
1825 // Check if jump table or call table
1826 if (((Symbols[sym].Type+1) & 0xFE) == 0x0C) {
1827 if (Symbols[sym].DLLName) {
1828 // DLL import
1829 OutFile.Put("import from ");
1830 OutFile.Put(Symbols.GetDLLName(sym));
1831 }
1832 else if (Symbols[sym].Type & 1) {
1833 OutFile.Put("switch/case jump table");
1834 }
1835 else {
1836 OutFile.Put("virtual table or function pointer");
1837 }
1838 }
1839 else {
1840 // Write size
1841 uint32_t Symsize = Symbols[sym].Size;
1842 if (Symsize == 0) Symsize = DataSize;
1843 switch(Symsize) {
1844 case 1: default:
1845 OutFile.Put("byte"); break;
1846 case 2:
1847 OutFile.Put("word"); break;
1848 case 4:
1849 OutFile.Put("int"); break;
1850 case 6:
1851 OutFile.Put("farword"); break;
1852 case 8:
1853 OutFile.Put("qword"); break;
1854 case 10:
1855 OutFile.Put("tfloat"); break;
1856 case 16:
1857 OutFile.Put("xmmword"); break;
1858 case 32:
1859 OutFile.Put("ymmword"); break;
1860 }
1861 }
1862 // New line
1863 OutFile.NewLine();
1864 }
1865 }
1866
WriteUninitDataItemsMASM(uint32_t size,uint32_t count)1867 void CDisassembler::WriteUninitDataItemsMASM(uint32_t size, uint32_t count) {
1868 // Write uninitialized (BSS) data, MASM syntax
1869 // size = size of each data element
1870 // count = number of data elements on each line
1871
1872 // Write data definition directive for appropriate size
1873 switch (size) {
1874 case 1:
1875 OutFile.Put("db "); break;
1876 case 2:
1877 OutFile.Put("dw "); break;
1878 case 4:
1879 OutFile.Put("dd "); break;
1880 case 6:
1881 OutFile.Put("df "); break;
1882 case 8:
1883 OutFile.Put("dq "); break;
1884 case 10:
1885 OutFile.Put("dt "); break;
1886 }
1887 OutFile.Tabulate(AsmTab2);
1888 if (count > 1) {
1889 // Write duplication operator
1890 OutFile.PutDecimal(count);
1891 OutFile.Put(" dup (?)");
1892 }
1893 else {
1894 // DataCount == 1
1895 OutFile.Put("?");
1896 }
1897 }
1898
WriteUninitDataItemsYASM(uint32_t size,uint32_t count)1899 void CDisassembler::WriteUninitDataItemsYASM(uint32_t size, uint32_t count) {
1900 // Write uninitialized (BSS) data, YASM syntax
1901 // Write data definition directive for appropriate size
1902 switch (size) {
1903 case 1:
1904 OutFile.Put("resb "); break;
1905 case 2:
1906 OutFile.Put("resw "); break;
1907 case 4:
1908 OutFile.Put("resd "); break;
1909 case 6:
1910 OutFile.Put("resw "); count *= 3; break;
1911 case 8:
1912 OutFile.Put("resq "); break;
1913 case 10:
1914 OutFile.Put("rest "); break;
1915 }
1916 OutFile.Tabulate(AsmTab2);
1917 OutFile.PutDecimal(count);
1918 }
1919
WriteUninitDataItemsGASM(uint32_t size,uint32_t count)1920 void CDisassembler::WriteUninitDataItemsGASM(uint32_t size, uint32_t count) {
1921 // Write uninitialized (BSS) data, GAS syntax
1922 OutFile.Put(".zero");
1923 OutFile.Tabulate(AsmTab2);
1924 if (count != 1) {
1925 OutFile.PutDecimal(count); OutFile.Put(" * ");
1926 }
1927 OutFile.PutDecimal(size);
1928 }
1929
WriteDataDirectiveMASM(uint32_t size)1930 void CDisassembler::WriteDataDirectiveMASM(uint32_t size) {
1931 // Write DB, etc., MASM syntax
1932 // Write data definition directive for appropriate size
1933 switch (size) {
1934 case 1: OutFile.Put("db "); break;
1935 case 2: OutFile.Put("dw "); break;
1936 case 4: OutFile.Put("dd "); break;
1937 case 6: OutFile.Put("df "); break;
1938 case 8: OutFile.Put("dq "); break;
1939 case 10: OutFile.Put("dt "); break;
1940 case 16: OutFile.Put("xmmword "); break;
1941 case 32: OutFile.Put("ymmword "); break;
1942 default: OutFile.Put("Error "); break;
1943 }
1944 }
1945
WriteDataDirectiveYASM(uint32_t size)1946 void CDisassembler::WriteDataDirectiveYASM(uint32_t size) {
1947 // Write DB, etc., YASM syntax
1948 // Write data definition directive for appropriate size
1949 switch (size) {
1950 case 1: OutFile.Put("db "); break;
1951 case 2: OutFile.Put("dw "); break;
1952 case 4: OutFile.Put("dd "); break;
1953 case 6: OutFile.Put("df "); break;
1954 case 8: OutFile.Put("dq "); break;
1955 case 10: OutFile.Put("dt "); break;
1956 case 16: OutFile.Put("ddq "); break;
1957 default: OutFile.Put("Error "); break;
1958 }
1959 }
1960
WriteDataDirectiveGASM(uint32_t size)1961 void CDisassembler::WriteDataDirectiveGASM(uint32_t size) {
1962 // Write DB, etc., GAS syntax
1963 // Write data definition directive for appropriate size
1964 switch (size) {
1965 case 1: OutFile.Put(".byte "); break;
1966 case 2: OutFile.Put(".short "); break;
1967 case 4: OutFile.Put(".int "); break;
1968 case 8: OutFile.Put(".quad "); break;
1969 case 10: OutFile.Put(".tfloat "); break;
1970 default: OutFile.Put("Error "); break;
1971 }
1972 }
1973
1974
WriteDataComment(uint32_t ElementSize,uint32_t LinePos,uint32_t Pos,uint32_t irel)1975 void CDisassembler::WriteDataComment(uint32_t ElementSize, uint32_t LinePos, uint32_t Pos, uint32_t irel) {
1976 // Write comment after data item
1977 uint32_t pos1; // Position of data for comment
1978 uint32_t RelType = 0; // Relocation type
1979 char TextBuffer[64]; // Buffer for writing floating point number
1980
1981 OutFile.Tabulate(AsmTab3); // Tabulate to comment field
1982 OutFile.Put(CommentSeparator); // Start comment
1983
1984 // Write address
1985 if (SectionEnd + SectionAddress + (uint32_t)ImageBase > 0xFFFF) {
1986 // Write 32 bit address
1987 OutFile.PutHex(LinePos + SectionAddress + (uint32_t)ImageBase);
1988 }
1989 else {
1990 // Write 16 bit address
1991 OutFile.PutHex((uint16_t)(LinePos + SectionAddress));
1992 }
1993
1994 if ((Sections[Section].Type & 0xFF) == 3 || Pos > Sections[Section].InitSize) {
1995 // Unitialized data. Write no data
1996 return;
1997 }
1998
1999 if (irel && irel < Relocations.GetNumEntries() && Relocations[irel].Offset == LinePos) {
2000 // Value is relocated, get relocation type
2001 RelType = Relocations[irel].Type;
2002 }
2003
2004 // Space after address
2005 OutFile.Put(" _ ");
2006
2007 // Comment type depends on ElementSize and DataType
2008 switch (ElementSize) {
2009 case 1:
2010 // Bytes. Write ASCII characters
2011 for (pos1 = LinePos; pos1 < Pos; pos1++) {
2012 // Get character
2013 int8_t c = Get<int8_t>(pos1);
2014 // Avoid non-printable characters
2015 if (c < ' ' || c == 0x7F) c = '.';
2016 // Print ASCII character
2017 OutFile.Put(c);
2018 }
2019 break;
2020 case 2:
2021 // Words. Write as decimal
2022 for (pos1 = LinePos; pos1 < Pos; pos1 += 2) {
2023 if (RelType) {
2024 OutFile.PutHex(Get<uint16_t>(pos1), 1); // Write as hexadecimal
2025 }
2026 else {
2027 OutFile.PutDecimal(Get<int16_t>(pos1), 1);// Write as signed decimal
2028 }
2029 OutFile.Put(' ');
2030 }
2031 break;
2032 case 4:
2033 // Dwords
2034 for (pos1 = LinePos; pos1 < Pos; pos1 += 4) {
2035 if ((DataType & 0x47) == 0x43) {
2036 // Write as float
2037 sprintf(TextBuffer, "%.8G", Get<float>(pos1));
2038 OutFile.Put(TextBuffer);
2039 // Make sure the number has a . or E to indicate a floating point number
2040 if (!strchr(TextBuffer,'.') && !strchr(TextBuffer,'E')) OutFile.Put(".0");
2041 }
2042 else if (((DataType + 1) & 0xFF) == 0x0C || RelType) {
2043 // jump/call address or offset. Write as hexadecimal
2044 OutFile.PutHex(Get<uint32_t>(pos1));
2045 }
2046 else {
2047 // Other. Write as decimal
2048 OutFile.PutDecimal(Get<int32_t>(pos1), 1);
2049 }
2050 OutFile.Put(' ');
2051 }
2052 break;
2053 case 8:
2054 // Qwords
2055 for (pos1 = LinePos; pos1 < Pos; pos1 += 8) {
2056 if ((DataType & 0x47) == 0x44) {
2057 // Write as double
2058 sprintf(TextBuffer, "%.16G", Get<double>(pos1));
2059 OutFile.Put(TextBuffer);
2060 // Make sure the number has a . or E to indicate a floating point number
2061 if (!strchr(TextBuffer,'.') && !strchr(TextBuffer,'E')) OutFile.Put(".0");
2062 }
2063 else {
2064 // Write as hexadecimal
2065 OutFile.PutHex(Get<uint64_t>(pos1));
2066 }
2067 OutFile.Put(' ');
2068 }
2069 break;
2070 case 10:
2071 // tbyte. Many compilers do not support long doubles in sprintf. Write as bytes
2072 for (pos1 = LinePos; pos1 < Pos; pos1++) {
2073 OutFile.PutHex(Get<uint8_t>(pos1), 1);
2074 }
2075 break;
2076 }
2077 if (RelType) {
2078 // Indicate relocation type
2079 OutFile.Put(Lookup(RelocationTypeNames, RelType));
2080 }
2081 }
2082
2083
WriteRelocationTarget(uint32_t irel,uint32_t Context,int64_t Addend)2084 void CDisassembler::WriteRelocationTarget(uint32_t irel, uint32_t Context, int64_t Addend) {
2085 // Write cross reference, including addend, but not including segment override and []
2086 // irel = index into Relocations
2087 // Context:
2088 // 1 = Data definition
2089 // 2 = Immediate data field in instruction
2090 // 4 = Data address in instruction
2091 // 8 = Near jump/call destination
2092 // 0x10 = Far jump/call destination
2093 // 0x100 = Self-relative address expected
2094 // Addend: inline addend
2095 // Implicit parameters:
2096 // IBegin: value of '$' operator
2097 // IEnd: reference point for self-relative addressing
2098 // BaseReg, IndexReg
2099
2100 uint32_t RefFrame; // Target segment
2101 int32_t Addend2 = 0; // Difference between '$' and reference point
2102
2103 // Get relocation type
2104 uint32_t RelType = Relocations[irel].Type;
2105
2106 if (RelType & 0x60) {
2107 // Inline addend is already relocated.
2108 // Ignore addend and treat as direct relocation
2109 RelType = 1;
2110 Addend = 0;
2111 }
2112
2113 // Get relocation size
2114 uint32_t RelSize = Relocations[irel].Size;
2115
2116 // Get relocation addend
2117 Addend += Relocations[irel].Addend;
2118
2119 // Get relocation target
2120 uint32_t Target = Relocations[irel].TargetOldIndex;
2121
2122 // Is offset operand needed?
2123 if (Syntax != SUBTYPE_NASM && (
2124 ((RelType & 0xB) && (Context & 2))
2125 || ((RelType & 8) && (Context & 0x108)))) {
2126 // offset operator needed to convert memory operand to immediate address
2127 OutFile.Put("offset ");
2128 }
2129
2130 // Is seg operand needed?
2131 if (RelType & 0x200) {
2132 // seg operator needed to convert memory operand to its segment
2133 OutFile.Put("seg ");
2134 }
2135
2136 // Is explicit segment or frame needed?
2137 if ((RelType & 0x408) && (Context & 0x11B)) {
2138 // Write name of segment/group frame
2139 RefFrame = Relocations[irel].RefOldIndex;
2140 if (!RefFrame) {
2141 // No frame. Use segment of symbol
2142 RefFrame = Symbols[Symbols.Old2NewIndex(Target)].Section;
2143 }
2144 if (RefFrame && RefFrame < Sections.GetNumEntries()) {
2145 // Write segment or group name
2146 const char * SecName = (char*)NameBuffer.Buf()+Sections[RefFrame].Name;
2147 OutFile.Put(SecName);
2148 OutFile.Put(":");
2149 }
2150 }
2151
2152 // Is imagerel operator needed?
2153 if (RelType & 4) {
2154 // imagerel operator needed to get image-relative address
2155 OutFile.Put("imagerel(");
2156 }
2157
2158 // Adjust addend
2159 // Adjust offset if self-relative relocation expected and found
2160 if ((RelType & 2) && (Context & 0x108)) {
2161 // Self-relative relocation expected and found
2162 // Adjust by size of address field and immediate field
2163 Addend += IEnd - Relocations[irel].Offset;
2164 }
2165 // Subtract self-reference if unexpected self-relative relocation
2166 if ((RelType & 2) && !(Context & 0x108)) {
2167 // Self-relative relocation found but not expected
2168 // Fix difference between '$' and reference point
2169 Addend2 = Relocations[irel].Offset - IBegin;
2170 Addend -= Addend2;
2171 }
2172 // Add self-reference if self-relative relocation expected but not found
2173 if (!(RelType & 2) && (Context & 0x108)) {
2174 // Self-relative relocation expected but not found
2175 // Fix difference between '$' and reference point
2176 Addend += IEnd - IBegin;
2177 }
2178
2179 if (RelType & 0x100) {
2180 // Target is a segment
2181 RefFrame = Symbols[Symbols.Old2NewIndex(Target)].Section;
2182 if (RefFrame && RefFrame < Sections.GetNumEntries()) {
2183 const char * SecName = (char*)NameBuffer.Buf()+Sections[RefFrame].Name;
2184 OutFile.Put(SecName);
2185 }
2186 else {
2187 OutFile.Put("undefined segment");
2188 }
2189 }
2190 else {
2191 // Target is a symbol
2192
2193 // Find target symbol
2194 uint32_t TargetSym = Symbols.Old2NewIndex(Target);
2195
2196 // Check if Target is appropriate
2197 if (((Symbols[TargetSym].Type & 0x80000000) || (int32_t)Addend)
2198 && !(CodeMode == 1 && s.BaseReg)) {
2199 // Symbol is a start-of-section entry in symbol table, or has an addend
2200 // Look for a more appropriate symbol, except if code with base register
2201 uint32_t sym, sym1, sym2 = 0;
2202 sym1 = Symbols.FindByAddress(Symbols[TargetSym].Section, Symbols[TargetSym].Offset + (int32_t)Addend, &sym2);
2203 for (sym = sym1; sym && sym <= sym2; sym++) {
2204 if (Symbols[sym].Scope && !(Symbols[sym].Type & 0x80000000)) {
2205 // Found a better symbol name for target address
2206 TargetSym = sym;
2207 Addend = Addend2;
2208 }
2209 }
2210 }
2211 // Write name of target symbol
2212 OutFile.Put(Symbols.GetName(TargetSym));
2213
2214 if (Syntax == SUBTYPE_GASM && (
2215 RelType == 0x41 || RelType == 0x81 || RelType == 0x2002)) {
2216 // make PLT entry
2217 OutFile.Put("@PLT");
2218 }
2219 }
2220
2221 // End parenthesis if we started one
2222 if (RelType & 4) {
2223 OutFile.Put(")");
2224 }
2225
2226 // Subtract reference point, if any
2227 if (RelType & 0x10) {
2228 OutFile.Put("-");
2229 // Write name of segment/group frame
2230 uint32_t RefPoint = Relocations[irel].RefOldIndex;
2231 if (RefPoint) {
2232 // Reference point name valid
2233 OutFile.Put(Symbols.GetNameO(RefPoint));
2234 }
2235 else {
2236 OutFile.Put("Reference_Point_Missing");
2237 }
2238 }
2239
2240 // Subtract self-reference if unexpected self-relative relocation
2241 if ((RelType & 2) && !(Context & 0x108)) {
2242 // Self-relative relocation found but not expected
2243 OutFile.Put("-"); OutFile.Put(HereOperator);
2244 }
2245
2246 // Add self-reference if self-relative relocation expected but not found
2247 if (!(RelType & 2) && (Context & 0x108)) {
2248 // Self-relative relocation expected but not found
2249 OutFile.Put("+"); OutFile.Put(HereOperator);
2250 }
2251
2252 // Write addend, if not zero
2253 if (Addend) {
2254 if (Addend < 0) {
2255 // Negative, write "-"
2256 OutFile.Put("-");
2257 Addend = -Addend;
2258 }
2259 else {
2260 // Positive, write "+"
2261 OutFile.Put("+");
2262 }
2263
2264 // Write value as hexadecimal
2265 switch (RelSize) {
2266 case 1:
2267 OutFile.PutHex((uint8_t)Addend, 1);
2268 break;
2269 case 2:
2270 OutFile.PutHex((uint16_t)Addend, 2);
2271 break;
2272 case 4:
2273 OutFile.PutHex((uint32_t)Addend, 2);
2274 break;
2275 case 6:
2276 OutFile.PutHex((uint16_t)(Addend >> 32), 1);
2277 OutFile.Put(":");
2278 OutFile.PutHex((uint32_t)Addend, 1);
2279 break;
2280 case 8:
2281 OutFile.PutHex((uint64_t)Addend, 2);
2282 break;
2283 default:
2284 OutFile.Put("??"); // Should not occur
2285 break;
2286 }
2287 }
2288 }
2289
2290
WriteFillers()2291 int CDisassembler::WriteFillers() {
2292 // Check if code is a series of NOPs or other fillers.
2293 // If so then write it as filler and return 1.
2294 // If not, then return 0.
2295
2296 // Check if code is filler
2297 if (!(OpcodeOptions & 0x40)) {
2298 // This instruction can not be used as filler
2299 return 0;
2300 }
2301 uint32_t FillerType; // Type of filler
2302 const char * FillerName = s.OpcodeDef->Name; // Name of filler
2303 uint32_t IFillerBegin = IBegin; // Start of filling space
2304 uint32_t IFillerEnd; // End of filling space
2305
2306 // check for CC = int 3 breakpoint, 3C00 = 90 NOP, 11F = multibyte NOP
2307 if (Opcodei == 0xCC || (Opcodei & 0xFFFE) == 0x3C00 || Opcodei == 0x11F) {
2308 // Instruction is a NOP or int 3 breakpoint
2309 FillerType = Opcodei;
2310 }
2311 else if (s.Warnings1 & 0x1000000) {
2312 // Instruction is a LEA, MOV, etc. with same source and destination
2313 // used as a multi-byte NOP
2314 FillerType = 0xFFFFFFFF;
2315 }
2316 else {
2317 // This instruction does something. Not a filler
2318 return 0;
2319 }
2320 // Save beginning position
2321 IFillerEnd = IEnd = IBegin;
2322
2323 // Loop through instructions to find all consecutive fillers
2324 while (NextInstruction2()) {
2325
2326 // Parse instruction
2327 ParseInstruction();
2328
2329 // Check if code is filler
2330 if (!(OpcodeOptions & 0x40)) {
2331 // This instruction can not be a filler
2332 // Save position of this instruction
2333 IFillerEnd = IBegin;
2334 break;
2335 }
2336 if (Opcodei != 0xCC && (Opcodei & 0xFFFE) != 0x3C00 && Opcodei != 0x11F
2337 && !(s.Warnings1 & 0x1000000)) {
2338 // Not a filler
2339 // Save position of this instruction
2340 IFillerEnd = IBegin;
2341 break;
2342 }
2343 // If loop exits here then fillers end at end of this instruction
2344 IFillerEnd = IEnd;
2345 }
2346 // Safety check
2347 if (IFillerEnd <= IFillerBegin) return 0;
2348
2349 // Size of fillers
2350 uint32_t FillerSize = IFillerEnd - IFillerBegin;
2351
2352 // Write size of filling space
2353 OutFile.Put(CommentSeparator);
2354 OutFile.Put("Filling space: ");
2355 OutFile.PutHex(FillerSize, 2);
2356 OutFile.NewLine();
2357 // Write filler type
2358 OutFile.Put(CommentSeparator);
2359 OutFile.Put("Filler type: ");
2360 switch (FillerType) {
2361 case 0xCC:
2362 FillerName = "INT 3 Debug breakpoint"; break;
2363 case 0x3C00:
2364 FillerName = "NOP"; break;
2365 case 0x3C01:
2366 FillerName = "NOP with prefixes"; break;
2367 case 0x011F:
2368 FillerName = "Multi-byte NOP";break;
2369 }
2370 OutFile.Put(FillerName);
2371 if (FillerType == 0xFFFFFFFF) {
2372 OutFile.Put(" with same source and destination");
2373 }
2374
2375 // Write as bytes
2376 uint32_t Pos;
2377 for (Pos = IFillerBegin; Pos < IFillerEnd; Pos++) {
2378 if (((Pos - IFillerBegin) & 7) == 0) {
2379 // Start new line
2380 OutFile.NewLine();
2381 OutFile.Put(CommentSeparator);
2382 OutFile.Tabulate(AsmTab1);
2383 OutFile.Put(Syntax == SUBTYPE_GASM ? ".byte " : "db ");
2384 }
2385 else {
2386 // Continue on same line
2387 OutFile.Put(", ");
2388 }
2389 // Write byte value
2390 OutFile.PutHex(Get<uint8_t>(Pos), 1);
2391 }
2392 // Blank line
2393 OutFile.NewLine(); OutFile.NewLine();
2394
2395 // Find alignment
2396 uint32_t Alignment = 4; // Limit to 2^4 = 16
2397
2398 // Check if first non-filler is aligned by this value
2399 while (Alignment && (IFillerEnd & ((1 << Alignment) - 1))) {
2400 // Not aligned by 2^Alignment
2401 Alignment--;
2402 }
2403 if (Alignment) {
2404
2405 // Check if smaller alignment would do
2406 if (Alignment > 3 && FillerSize < 1u << (Alignment-1)) {
2407 // End is aligned by 16, but there are less than 8 filler bytes.
2408 // Change to align 8
2409 Alignment--;
2410 }
2411 // Write align directive
2412 WriteAlign(1 << Alignment);
2413 // Prevent writing ALIGN again
2414 FlagPrevious &= ~1;
2415 }
2416
2417 // Restore IBegin and IEnd to beginning of first non-filler instruction
2418 IBegin = IEnd = IFillerEnd;
2419
2420 if (LabelInaccessible == IFillerBegin && IFillerEnd < LabelEnd) {
2421 // Mark first instruction after filler as inaccessible
2422 LabelInaccessible = IFillerEnd;
2423 }
2424
2425 // Return success. Fillers have been written. Don't write as normal instructions
2426 return 1;
2427 }
2428
WriteAlign(uint32_t a)2429 void CDisassembler::WriteAlign(uint32_t a) {
2430 // Write alignment directive
2431 OutFile.Put(Syntax == SUBTYPE_GASM ? ".ALIGN" : "ALIGN");
2432 OutFile.Tabulate(AsmTab1);
2433 OutFile.PutDecimal(a);
2434 OutFile.NewLine();
2435 }
2436
WriteFileBegin()2437 void CDisassembler::WriteFileBegin() {
2438 // Write begin of file
2439
2440 OutFile.SetFileType(FILETYPE_ASM);
2441
2442 // Initial comment
2443 OutFile.Put(CommentSeparator);
2444 OutFile.Put("Disassembly of file: ");
2445 OutFile.Put(cmd.InputFile);
2446 OutFile.NewLine();
2447 // Date and time.
2448 // Note: will fail after year 2038 on computers that use 32-bit time_t
2449 time_t time1 = time(0);
2450 char * timestring = ctime(&time1);
2451 if (timestring) {
2452 // Remove terminating '\n' in timestring
2453 for (char *c = timestring; *c; c++) {
2454 if (*c < ' ') *c = 0;
2455 }
2456 // Write date and time as comment
2457 OutFile.Put(CommentSeparator);
2458 OutFile.Put(timestring);
2459 OutFile.NewLine();
2460 }
2461
2462 // Write type and mode
2463 OutFile.Put(CommentSeparator);
2464 OutFile.Put("Type: ");
2465 OutFile.Put(CFileBuffer::GetFileFormatName(cmd.InputType));
2466 OutFile.PutDecimal(WordSize);
2467 OutFile.NewLine();
2468
2469 // Write syntax dialect
2470 OutFile.Put(CommentSeparator);
2471 OutFile.Put("Syntax: ");
2472 switch (Syntax) {
2473 case SUBTYPE_MASM:
2474 OutFile.Put(WordSize < 64 ? "MASM/ML" : "MASM/ML64"); break;
2475 case SUBTYPE_NASM:
2476 OutFile.Put("NASM"); break;
2477 case SUBTYPE_GASM:
2478 OutFile.Put("GAS(Intel)"); break;
2479 }
2480 OutFile.NewLine();
2481
2482 // Write instruction set as comment
2483 // Instruction set is at least .386 if 32 bit mode
2484 if (InstructionSetMax < 3 && (MasmOptions & 0x200)) InstructionSetMax = 3;
2485
2486 // Get name of basic instruction set
2487 const char * set0 = "";
2488 if (InstructionSetMax < InstructionSetNamesLen) {
2489 set0 = InstructionSetNames[InstructionSetMax];
2490 }
2491
2492 // Write as comment
2493 OutFile.Put(CommentSeparator);
2494 OutFile.Put("Instruction set: ");
2495 OutFile.Put(set0);
2496
2497 if (InstructionSetAMDMAX) {
2498 // Get name of any AMD-specific instruction set
2499 const char * setA = "";
2500 switch (InstructionSetAMDMAX) {
2501 case 1: setA = "AMD 3DNow"; break;
2502 case 2: setA = "AMD 3DNowE"; break;
2503 case 4: setA = "AMD SSE4a"; break;
2504 case 5: setA = "AMD XOP"; break;
2505 case 6: setA = "AMD FMA4"; break;
2506 case 7: setA = "AMD TBM"; break;
2507 }
2508 if (*setA) {
2509 OutFile.Put(", ");
2510 OutFile.Put(setA);
2511 }
2512 }
2513 // VIA instruction set:
2514 if (InstructionSetOR & 0x2000) OutFile.Put(", VIA");
2515
2516 // Additional instruction sets:
2517 if (WordSize > 32) OutFile.Put(", x64");
2518 if (InstructionSetOR & 0x100) OutFile.Put(", 80x87");
2519 if (InstructionSetOR & 0x800) OutFile.Put(", privileged instructions");
2520 OutFile.NewLine();
2521
2522 if (NamesChanged) {
2523 // Tell that symbol names have been changed
2524 OutFile.NewLine();
2525 OutFile.Put(CommentSeparator);
2526 OutFile.Put("Error: symbol names contain illegal characters,");
2527 OutFile.NewLine(); OutFile.Put(CommentSeparator);
2528 OutFile.PutDecimal(NamesChanged);
2529 #if ReplaceIllegalChars
2530 OutFile.Put(" Symbol names changed");
2531 #else
2532 OutFile.Put(" Symbol names not changed");
2533 #endif
2534 OutFile.NewLine();
2535 }
2536
2537 // Write syntax-specific initializations
2538 switch (Syntax) {
2539 case SUBTYPE_MASM:
2540 WriteFileBeginMASM();
2541 WritePublicsAndExternalsMASM();
2542 break;
2543 case SUBTYPE_NASM:
2544 WriteFileBeginYASM();
2545 WritePublicsAndExternalsYASMGASM();
2546 break;
2547 case SUBTYPE_GASM:
2548 WriteFileBeginGASM();
2549 WritePublicsAndExternalsYASMGASM();
2550 break;
2551 }
2552 }
2553
2554
WriteFileBeginMASM()2555 void CDisassembler::WriteFileBeginMASM() {
2556 // Write MASM-specific file init
2557 if (WordSize < 64) {
2558 // Write instruction set directive, except for 64 bit assembler
2559 const char * set1 = "";
2560 switch (InstructionSetMax) {
2561 case 0: set1 = ".8086"; break;
2562 case 1: set1 = ".186"; break;
2563 case 2: set1 = ".286"; break;
2564 case 3: set1 = ".386"; break;
2565 case 4: set1 = ".486"; break;
2566 case 5: set1 = ".586"; break;
2567 case 6: default:
2568 set1 = ".686"; break;
2569 }
2570 // Write basic instruction set
2571 OutFile.NewLine();
2572 OutFile.Put(set1);
2573 if (InstructionSetOR & 0x800) {
2574 // Privileged. Add "p"
2575 OutFile.Put("p");
2576 }
2577 OutFile.NewLine();
2578 // Write extended instruction set
2579 if (InstructionSetOR & 0x100) {
2580 // Floating point
2581 if (InstructionSetMax < 3) {
2582 OutFile.Put(".8087"); OutFile.NewLine();
2583 }
2584 else if (InstructionSetMax < 5) {
2585 OutFile.Put(".387"); OutFile.NewLine();
2586 }
2587 }
2588 if (InstructionSetMax >= 0x11) {
2589 // .xmm directive. Not differentiated between SSE, SSE2, etc.
2590 OutFile.Put(".xmm"); OutFile.NewLine();
2591 }
2592 else if (InstructionSetMax >= 7) {
2593 // .mmx directive
2594 OutFile.Put(".mmx"); OutFile.NewLine();
2595 }
2596 }
2597 if (MasmOptions & 1) {
2598 // Need dotname option
2599 OutFile.Put("option dotname"); OutFile.NewLine();
2600 }
2601 if (WordSize == 32) {
2602 // Write .model flat if 32 bit mode
2603 OutFile.Put(".model flat"); OutFile.NewLine();
2604 }
2605 // Initialize Assumes for segment registers
2606 if (!(MasmOptions & 0x100)) {
2607 // No 16-bit segments. Assume CS=DS=ES=SS=flat
2608 Assumes[0]=Assumes[1]=Assumes[2]=Assumes[3] = ASM_SEGMENT_FLAT;
2609 }
2610 else {
2611 // 16-bit segmented model. Segment register values unknown
2612 Assumes[0]=Assumes[1]=Assumes[2]=Assumes[3] = ASM_SEGMENT_UNKNOWN;
2613 }
2614 // FS and GS assumed to ERROR
2615 Assumes[4] = Assumes[5] = ASM_SEGMENT_ERROR;
2616
2617 // Write assume if FS or GS used
2618 // This is superfluous because an assume directive will be written at first use of FS/GS
2619 if (MasmOptions & 2) {
2620 OutFile.Put("assume fs:nothing"); OutFile.NewLine();
2621 }
2622 if (MasmOptions & 4) {
2623 OutFile.Put("assume gs:nothing"); OutFile.NewLine();
2624 }
2625 OutFile.NewLine(); // Blank line
2626 }
2627
WriteFileBeginYASM()2628 void CDisassembler::WriteFileBeginYASM() {
2629 // Write YASM-specific file init
2630 OutFile.NewLine();
2631 if (WordSize == 64) {
2632 OutFile.Put("default rel"); OutFile.NewLine();
2633 }
2634 //if (InstructionSetMax >= 0x11) {OutFile.Put("%define xmmword oword"); OutFile.NewLine();}
2635 //if (InstructionSetMax >= 0x19) {OutFile.Put("%define ymmword"); OutFile.NewLine();}
2636 OutFile.NewLine();
2637 }
2638
WriteFileBeginGASM()2639 void CDisassembler::WriteFileBeginGASM() {
2640 // Write GAS-specific file init
2641 OutFile.NewLine();
2642 OutFile.Put(CommentSeparator);
2643 OutFile.Put("Note: Uses Intel syntax with destination operand first. Remember to");
2644 OutFile.NewLine();
2645 OutFile.Put(CommentSeparator);
2646 OutFile.Put("put syntax directives in the beginning and end of inline assembly:");
2647 OutFile.NewLine();
2648 OutFile.Put(".intel_syntax noprefix ");
2649 OutFile.NewLine(); OutFile.NewLine();
2650 }
2651
WritePublicsAndExternalsMASM()2652 void CDisassembler::WritePublicsAndExternalsMASM() {
2653 // Write public and external symbol definitions
2654 uint32_t i; // Loop counter
2655 uint32_t LinesWritten = 0; // Count lines written
2656 const char * XName; // Name of external symbols
2657
2658 // Loop through public symbols
2659 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2660 if (Symbols[i].Scope & 0x1C) {
2661 // Symbol is public
2662 OutFile.Put("public ");
2663 // Write name
2664 OutFile.Put(Symbols.GetName(i));
2665 // Check if weak or communal
2666 if (Symbols[i].Scope & 0x18) {
2667 // Scope is weak or communal
2668 OutFile.Tabulate(AsmTab3);
2669 OutFile.Put(CommentSeparator);
2670 if (Symbols[i].Scope & 8) OutFile.Put("Note: Weak. Not supported by MASM ");
2671 if (Symbols[i].Scope & 0x10) OutFile.Put("Note: Communal. Not supported by MASM");
2672 }
2673 OutFile.NewLine(); LinesWritten++;
2674 }
2675 }
2676 // Blank line if anything written
2677 if (LinesWritten) {
2678 OutFile.NewLine();
2679 LinesWritten = 0;
2680 }
2681 // Loop through external symbols
2682 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2683
2684 if (Symbols[i].Scope & 0x20) {
2685 // Symbol is external
2686 OutFile.Put("extern ");
2687 // Get name
2688 XName = Symbols.GetName(i);
2689 // Check for dynamic import
2690 if (Symbols[i].DLLName && strncmp(XName, Symbols.ImportTablePrefix, (uint32_t)strlen(Symbols.ImportTablePrefix)) == 0) {
2691 // Remove "_imp" prefix from name
2692 XName += (uint32_t)strlen(Symbols.ImportTablePrefix);
2693 }
2694
2695 // Write name
2696 OutFile.Put(XName);
2697 OutFile.Put(": ");
2698
2699 // Write type
2700 if ((Symbols[i].Type & 0xFE) == 0x84) {
2701 // Far
2702 OutFile.Put("far");
2703 }
2704 else if ((Symbols[i].Type & 0xF0) == 0x80 || Symbols[i].DLLName) {
2705 // Near
2706 OutFile.Put("near");
2707 }
2708 else {
2709 // Data. Write size
2710 switch (GetDataItemSize(Symbols[i].Type)) {
2711 case 1: default: OutFile.Put("byte"); break;
2712 case 2: OutFile.Put("word"); break;
2713 case 4: OutFile.Put("dword"); break;
2714 case 6: OutFile.Put("fword"); break;
2715 case 8: OutFile.Put("qword"); break;
2716 case 10: OutFile.Put("tbyte"); break;
2717 case 16: OutFile.Put("xmmword"); break;
2718 case 32: OutFile.Put("ymmword"); break;
2719 }
2720 }
2721 // Add comment if DLL import
2722 if (Symbols[i].DLLName) {
2723 OutFile.Tabulate(AsmTab3);
2724 OutFile.Put(CommentSeparator);
2725 OutFile.Put(Symbols.GetDLLName(i));
2726 }
2727 // Finished line
2728 OutFile.NewLine(); LinesWritten++;
2729 }
2730 }
2731 // Blank line if anything written
2732 if (LinesWritten) {
2733 OutFile.NewLine();
2734 LinesWritten = 0;
2735 }
2736 // Write the value of any constants
2737 // Loop through symbols
2738 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2739 // Local symbols included because there might be a rip-relative address to a named constant = 0
2740 if (Symbols[i].Section == ASM_SEGMENT_ABSOLUTE /*&& (Symbols[i].Scope & 0x1C)*/) {
2741 // Symbol is constant
2742 // Write name
2743 OutFile.Put(Symbols.GetName(i));
2744 OutFile.Put(" equ ");
2745 // Write value as hexadecimal
2746 OutFile.PutHex(Symbols[i].Offset, 1);
2747 // Write decimal value as comment
2748 OutFile.Tabulate(AsmTab3);
2749 OutFile.Put(CommentSeparator);
2750 OutFile.PutDecimal(Symbols[i].Offset, 1);
2751 OutFile.NewLine(); LinesWritten++;
2752 }
2753 }
2754 // Blank line if anything written
2755 if (LinesWritten) {
2756 OutFile.NewLine();
2757 LinesWritten = 0;
2758 }
2759 // Write any group definitions
2760 int32_t GroupId, SegmentId;
2761 // Loop through sections to search for group definitions
2762 for (GroupId = 1; GroupId < (int32_t)Sections.GetNumEntries(); GroupId++) {
2763
2764 // Get section type
2765 uint32_t SectionType = Sections[GroupId].Type;
2766 if (SectionType & 0x800) {
2767 // This is a segment group definition
2768 // Count number of members
2769 uint32_t NumMembers = 0;
2770 // Write group name
2771 WriteSectionName(GroupId);
2772 // Write "group"
2773 OutFile.Put(" "); OutFile.Tabulate(AsmTab1); OutFile.Put("GROUP ");
2774 // Search for group members
2775 for (SegmentId = 1; SegmentId < (int32_t)Sections.GetNumEntries(); SegmentId++) {
2776 if (Sections[SegmentId].Group == GroupId && !(Sections[SegmentId].Type & 0x800)) {
2777 // is this first member?
2778 if (NumMembers++) {
2779 // Not first member. Write comma
2780 OutFile.Put(", ");
2781 }
2782 // Write group member
2783 WriteSectionName(SegmentId);
2784 }
2785 }
2786 // End line
2787 OutFile.NewLine(); LinesWritten++;
2788 }
2789 }
2790 // Blank line if anything written
2791 if (LinesWritten) {
2792 OutFile.NewLine();
2793 LinesWritten = 0;
2794 }
2795 }
2796
2797
WritePublicsAndExternalsYASMGASM()2798 void CDisassembler::WritePublicsAndExternalsYASMGASM() {
2799 // Write public and external symbol definitions, YASM and GAS syntax
2800 uint32_t i; // Loop counter
2801 uint32_t LinesWritten = 0; // Count lines written
2802 const char * XName; // Name of external symbols
2803
2804 // Loop through public symbols
2805 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2806 if (Symbols[i].Scope & 0x1C) {
2807 // Symbol is public
2808 if (Syntax == SUBTYPE_GASM) OutFile.Put(".");
2809 OutFile.Put("global ");
2810 // Write name
2811 OutFile.Put(Symbols.GetName(i));
2812
2813 // Write type
2814 if ((Symbols[i].Type & 0xF0) == 0x80) {
2815 // Symbol is a function
2816 if (Syntax == SUBTYPE_NASM) {
2817 OutFile.Put(": function");
2818 }
2819 else if (Syntax == SUBTYPE_GASM) {
2820 OutFile.NewLine();
2821 OutFile.Put(".type ");
2822 OutFile.Put(Symbols.GetName(i));
2823 OutFile.Put(", @function");
2824 }
2825 }
2826
2827 // Check if weak or communal
2828 if (Symbols[i].Scope & 0x18) {
2829 // Scope is weak or communal
2830 OutFile.Tabulate(AsmTab3);
2831 OutFile.Put(CommentSeparator);
2832 if (Symbols[i].Scope & 8) OutFile.Put("Note: Weak.");
2833 if (Symbols[i].Scope & 0x10) OutFile.Put("Note: Communal.");
2834 }
2835 OutFile.NewLine(); LinesWritten++;
2836 }
2837 }
2838 // Blank line if anything written
2839 if (LinesWritten) {
2840 OutFile.NewLine();
2841 LinesWritten = 0;
2842 }
2843 // Loop through external symbols
2844 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2845
2846 if (Symbols[i].Scope & 0x20) {
2847 // Symbol is external
2848 if (Syntax == SUBTYPE_GASM) OutFile.Put(".");
2849 OutFile.Put("extern ");
2850 // Get name
2851 XName = Symbols.GetName(i);
2852 // Check for dynamic import
2853 if (Symbols[i].DLLName && strncmp(XName, Symbols.ImportTablePrefix, (uint32_t)strlen(Symbols.ImportTablePrefix)) == 0) {
2854 // Remove "_imp" prefix from name
2855 XName += (uint32_t)strlen(Symbols.ImportTablePrefix);
2856 }
2857 // Write name
2858 OutFile.Put(XName);
2859 OutFile.Put(" ");
2860 OutFile.Tabulate(AsmTab3);
2861 OutFile.Put(CommentSeparator);
2862
2863 // Write type
2864 if ((Symbols[i].Type & 0xFE) == 0x84) {
2865 // Far
2866 OutFile.Put("far");
2867 }
2868 else if ((Symbols[i].Type & 0xF0) == 0x80 || Symbols[i].DLLName) {
2869 // Near
2870 OutFile.Put("near");
2871 }
2872 else {
2873 // Data. Write size
2874 switch (GetDataItemSize(Symbols[i].Type)) {
2875 case 1: default: OutFile.Put("byte"); break;
2876 case 2: OutFile.Put("word"); break;
2877 case 4: OutFile.Put("dword"); break;
2878 case 6: OutFile.Put("fword"); break;
2879 case 8: OutFile.Put("qword"); break;
2880 case 10: OutFile.Put("tbyte"); break;
2881 case 16: OutFile.Put("xmmword"); break;
2882 case 32: OutFile.Put("ymmword"); break;
2883 }
2884 }
2885 // Add comment if DLL import
2886 if (Symbols[i].DLLName) {
2887 OutFile.Tabulate(AsmTab3);
2888 OutFile.Put(CommentSeparator);
2889 OutFile.Put(Symbols.GetDLLName(i));
2890 }
2891 // Finished line
2892 OutFile.NewLine(); LinesWritten++;
2893 }
2894 }
2895 // Blank line if anything written
2896 if (LinesWritten) {
2897 OutFile.NewLine(); LinesWritten = 0;
2898 }
2899 // Write the value of any constants
2900 // Loop through symbols
2901 for (i = 0; i < Symbols.GetNumEntries(); i++) {
2902 if (Symbols[i].Section == ASM_SEGMENT_ABSOLUTE /*&& (Symbols[i].Scope & 0x1C)*/) {
2903 // Symbol is constant
2904 if (Syntax == SUBTYPE_NASM) {
2905 // Write name equ value
2906 OutFile.Put(Symbols.GetName(i));
2907 OutFile.Put(" equ ");
2908 }
2909 else {
2910 // Gas: write .equ name, value
2911 OutFile.Put(".equ ");
2912 OutFile.Tabulate(AsmTab1);
2913 OutFile.Put(Symbols.GetName(i));
2914 OutFile.Put(", ");
2915 }
2916 // Write value as hexadecimal
2917 OutFile.PutHex(Symbols[i].Offset, 1);
2918 // Write decimal value as comment
2919 OutFile.Tabulate(AsmTab3);
2920 OutFile.Put(CommentSeparator);
2921 OutFile.PutDecimal(Symbols[i].Offset, 1);
2922 OutFile.NewLine(); LinesWritten++;
2923 }
2924 }
2925 // Blank line if anything written
2926 if (LinesWritten) {
2927 OutFile.NewLine();
2928 LinesWritten = 0;
2929 }
2930 // Write any group definitions
2931 int32_t GroupId, SegmentId;
2932 // Loop through sections to search for group definitions
2933 for (GroupId = 1; GroupId < (int32_t)Sections.GetNumEntries(); GroupId++) {
2934 // Get section type
2935 uint32_t SectionType = Sections[GroupId].Type;
2936 if (SectionType & 0x800) {
2937 // This is a segment group definition
2938 // Count number of members
2939 uint32_t NumMembers = 0;
2940 // Write group name
2941 WriteSectionName(GroupId);
2942 // Write "group"
2943 OutFile.Put(" "); OutFile.Tabulate(AsmTab1); OutFile.Put("GROUP ");
2944 // Search for group members
2945 for (SegmentId = 1; SegmentId < (int32_t)Sections.GetNumEntries(); SegmentId++) {
2946 if (Sections[SegmentId].Group == GroupId && !(Sections[SegmentId].Type & 0x800)) {
2947 // is this first member?
2948 if (NumMembers++) {
2949 // Not first member. Write comma
2950 OutFile.Put(", ");
2951 }
2952 // Write group member
2953 WriteSectionName(SegmentId);
2954 }
2955 }
2956 // End line
2957 OutFile.NewLine(); LinesWritten++;
2958 }
2959 }
2960 // Blank line if anything written
2961 if (LinesWritten) {
2962 OutFile.NewLine();
2963 LinesWritten = 0;
2964 }
2965 }
2966
2967
WriteFileEnd()2968 void CDisassembler::WriteFileEnd() {
2969 // Write end of file
2970 OutFile.NewLine();
2971 switch(Syntax) {
2972 case SUBTYPE_MASM:
2973 OutFile.Put("END"); break;
2974 case SUBTYPE_GASM:
2975 OutFile.Put(CommentSeparator);
2976 OutFile.Put("Return to AT&T syntax with destination operand last:");
2977 OutFile.NewLine();
2978 OutFile.Put(".att_syntax prefix ");
2979 OutFile.NewLine();
2980 break;
2981 case SUBTYPE_NASM:
2982 break;
2983 }
2984 }
2985
2986
WriteSegmentBegin()2987 void CDisassembler::WriteSegmentBegin() {
2988 // Write begin of segment
2989 // Choose dialect
2990 switch (Syntax) {
2991 case SUBTYPE_MASM:
2992 WriteSegmentBeginMASM(); break;
2993 case SUBTYPE_NASM:
2994 WriteSegmentBeginYASM(); break;
2995 case SUBTYPE_GASM:
2996 WriteSegmentBeginGASM(); break;
2997 }
2998 }
2999
3000
WriteSegmentBeginMASM()3001 void CDisassembler::WriteSegmentBeginMASM() {
3002 // Write begin of segment
3003 OutFile.NewLine(); // Blank line
3004
3005 // Check if Section is valid
3006 if (Section == 0 || Section >= Sections.GetNumEntries()) {
3007 // Illegal segment entry
3008 OutFile.Put("UNKNOWN SEGMENT"); OutFile.NewLine();
3009 return;
3010 }
3011
3012 // Write segment name
3013 WriteSectionName(Section);
3014 // Tabulate
3015 OutFile.Put(" "); OutFile.Tabulate(AsmTab1);
3016 // Write "segment"
3017 OutFile.Put("SEGMENT ");
3018
3019 // Write alignment
3020 switch (Sections[Section].Align) {
3021 case 0: // 1
3022 OutFile.Put("BYTE "); break;
3023 case 1: // 2
3024 OutFile.Put("WORD "); break;
3025 case 2: // 4
3026 OutFile.Put("DWORD "); break;
3027 case 4: // 16
3028 OutFile.Put("PARA "); break;
3029 //case 8: // 256 or 4096. Definition is ambiguous!
3030 // OutFile.Put("PAGE "); break;
3031 default:
3032 // Non-standard alignment
3033 OutFile.Put("ALIGN(");
3034 OutFile.PutDecimal(1 << Sections[Section].Align);
3035 OutFile.Put(") ");
3036 break;
3037 }
3038 if (WordSize != 64) {
3039 // "PUBLIC" not supported by ml64 assembler
3040 OutFile.Put("PUBLIC ");
3041 // Write segment word size if necessary
3042 if (MasmOptions & 0x100) {
3043 // There is at least one 16-bit segment. Write segment word size
3044 OutFile.Put("USE");
3045 OutFile.PutDecimal(Sections[Section].WordSize);
3046 OutFile.Put(" ");
3047 }
3048 }
3049 // Write segment class
3050 switch (Sections[Section].Type & 0xFF) {
3051 case 1:
3052 OutFile.Put("'CODE'"); break;
3053 case 2:
3054 OutFile.Put("'DATA'"); break;
3055 case 3:
3056 OutFile.Put("'BSS'"); break;
3057 case 4:
3058 OutFile.Put("'CONST'"); break;
3059 default:;
3060 // Unknown class. Write nothing
3061 }
3062
3063 // Tabulate to comment
3064 OutFile.Put(" "); OutFile.Tabulate(AsmTab3);
3065 OutFile.Put(CommentSeparator);
3066 // Write section number
3067 OutFile.Put("section number ");
3068 OutFile.PutDecimal(Section);
3069
3070 // New line
3071 OutFile.NewLine();
3072
3073 if (Sections[Section].Type & 0x1000) {
3074 // Communal
3075 OutFile.Put(CommentSeparator);
3076 OutFile.Put(" Communal section not supported by MASM");
3077 OutFile.NewLine();
3078 }
3079
3080 if (WordSize == 16 && Sections[Section].Type == 1) {
3081 // 16 bit code segment. Write ASSUME CS: SEGMENTNAME
3082 OutFile.Put("ASSUME ");
3083 OutFile.Tabulate(AsmTab1);
3084 OutFile.Put("CS:");
3085 if (Sections[Section].Group) {
3086 // Group name takes precedence over segment name
3087 WriteSectionName(Sections[Section].Group);
3088 }
3089 else {
3090 WriteSectionName(Section);
3091 }
3092 OutFile.NewLine();
3093 Assumes[1] = Section;
3094 }
3095 }
3096
WriteSegmentBeginYASM()3097 void CDisassembler::WriteSegmentBeginYASM() {
3098 // Write begin of segment
3099 OutFile.NewLine(); // Blank line
3100
3101 // Check if Section is valid
3102 if (Section == 0 || Section >= Sections.GetNumEntries()) {
3103 // Illegal segment entry
3104 OutFile.Put("UNKNOWN SEGMENT"); OutFile.NewLine();
3105 return;
3106 }
3107
3108 // Write SECTION directive
3109 OutFile.Put("SECTION ");
3110 // Write segment name
3111 WriteSectionName(Section);
3112 // Tabulate
3113 OutFile.Put(" "); OutFile.Tabulate(AsmTab2);
3114 OutFile.Put("align=");
3115 OutFile.PutDecimal(1 << Sections[Section].Align);
3116 if (Sections[Section].WordSize != WordSize) {
3117 OutFile.Put(" use");
3118 OutFile.PutDecimal(Sections[Section].WordSize);
3119 }
3120 if ((Sections[Section].Type & 0xFF) == 1) {
3121 OutFile.Put(" execute");
3122 }
3123 else {
3124 OutFile.Put(" noexecute");
3125 }
3126
3127 // Tabulate to comment
3128 OutFile.Put(" "); OutFile.Tabulate(AsmTab3);
3129 OutFile.Put(CommentSeparator);
3130 // Write section number
3131 OutFile.Put("section number ");
3132 OutFile.PutDecimal(Section);
3133 // Write type
3134 OutFile.Put(", ");
3135 switch (Sections[Section].Type & 0xFF) {
3136 case 1: OutFile.Put("code"); break;
3137 case 2: OutFile.Put("data"); break;
3138 case 3: OutFile.Put("bss"); break;
3139 case 4: OutFile.Put("const"); break;
3140 default: OutFile.Put("unknown type: ");
3141 OutFile.PutHex(Sections[Section].Type & 0xFF);
3142 break;
3143 }
3144
3145 // New line
3146 OutFile.NewLine();
3147
3148 if (Sections[Section].Type & 0x1000) {
3149 // Communal
3150 OutFile.Put(CommentSeparator);
3151 OutFile.Put(" Communal section not supported by YASM");
3152 OutFile.NewLine();
3153 }
3154 }
3155
WriteSegmentBeginGASM()3156 void CDisassembler::WriteSegmentBeginGASM() {
3157 // Write begin of segment
3158 uint32_t Type; // Section type
3159
3160 OutFile.NewLine(); // Blank line
3161
3162 // Check if Section is valid
3163 if (Section == 0 || Section >= Sections.GetNumEntries()) {
3164 // Illegal segment entry
3165 OutFile.Put("UNKNOWN SEGMENT"); OutFile.NewLine();
3166 return;
3167 }
3168
3169 // Write SECTION directive
3170 OutFile.Put(".SECTION ");
3171 OutFile.Tabulate(AsmTab1);
3172 // Write segment name
3173 WriteSectionName(Section);
3174 // Tabulate
3175 OutFile.Put(" "); OutFile.Tabulate(AsmTab2);
3176 // Flags not supported by all versions of Gas. Put as comment:
3177 OutFile.Put(CommentSeparator);
3178 // Write flags
3179 OutFile.Put('"');
3180 Type = Sections[Section].Type & 0xFF;
3181 if (Type) OutFile.Put('a'); // Allocatable
3182 if (Type != 1 && Type != 4) OutFile.Put('w'); // Writeable
3183 if (Type == 1) OutFile.Put('x'); // Executable
3184 OutFile.Put('"');
3185 if (Type) OutFile.Put(", @progbits"); // Allocatable
3186
3187 // Tabulate to comment
3188 OutFile.Put(" "); OutFile.Tabulate(AsmTab3);
3189 OutFile.Put(CommentSeparator);
3190 // Write section number
3191 OutFile.Put("section number ");
3192 OutFile.PutDecimal(Section);
3193 // Write type
3194 OutFile.Put(", ");
3195 switch (Sections[Section].Type & 0xFF) {
3196 case 1: OutFile.Put("code"); break;
3197 case 2: OutFile.Put("data"); break;
3198 case 3: OutFile.Put("bss"); break;
3199 case 4: OutFile.Put("const"); break;
3200 default: OutFile.Put("unknown"); break;
3201 }
3202 OutFile.NewLine(); // Blank line
3203 if (Sections[Section].Type & 0x1000) {
3204 // Communal
3205 OutFile.Put(CommentSeparator);
3206 OutFile.Put(" Communal section ");
3207 OutFile.NewLine();
3208 }
3209
3210 // Write alignment
3211 OutFile.Tabulate(AsmTab1);
3212 OutFile.Put(".ALIGN");
3213 OutFile.Tabulate(AsmTab2);
3214 OutFile.PutDecimal(1 << Sections[Section].Align);
3215
3216 // New line
3217 OutFile.NewLine();
3218 }
3219
3220
WriteSegmentEnd()3221 void CDisassembler::WriteSegmentEnd() {
3222 // Write end of segment
3223 OutFile.NewLine();
3224
3225 if (Syntax != SUBTYPE_MASM) {
3226 // Not MASM syntax, write only blank line
3227 return;
3228 }
3229
3230 // Check if Section is valid
3231 if (Section == 0 || Section >= Sections.GetNumEntries()) {
3232 // Illegal segment entry
3233 OutFile.Put("UNKNOWN ENDS"); OutFile.NewLine();
3234 return;
3235 }
3236
3237 // Write segment name
3238 const char * segname = (char*)NameBuffer.Buf() + Sections[Section].Name;
3239 OutFile.Put(segname);
3240
3241 // Tabulate
3242 OutFile.Put(" "); OutFile.Tabulate(AsmTab1);
3243 // Write "segment"
3244 OutFile.Put("ENDS");
3245 // New line
3246 OutFile.NewLine();
3247 }
3248
3249
3250
WriteFunctionBegin()3251 void CDisassembler::WriteFunctionBegin() {
3252 // Write begin of function IFunction
3253
3254 // Check if IFunction is valid
3255 if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) {
3256 // Should not occur
3257 OutFile.Put(CommentSeparator);
3258 OutFile.Put("Internal error: undefined function begin");
3259 return;
3260 }
3261
3262 // Get symbol old index
3263 uint32_t symi = FunctionList[IFunction].OldSymbolIndex;
3264
3265 // Get symbol record
3266 uint32_t SymI = Symbols.Old2NewIndex(symi);
3267
3268 OutFile.NewLine(); // Blank line
3269
3270 // Remember that symbol has been written
3271 Symbols[SymI].Scope |= 0x100;
3272
3273 // Check alignment if preceded by NOP
3274 if ((FlagPrevious & 1) && (IBegin & 0x0F) == 0 && Sections[Section].Align >= 4) {
3275 WriteAlign(16);
3276 }
3277
3278 if (Symbols[SymI].Name == 0) {
3279 // Has no name. Probably only NOP fillers
3280 return;
3281 }
3282
3283 // Write function name etc.
3284 switch (Syntax) {
3285 case SUBTYPE_MASM:
3286 WriteFunctionBeginMASM(SymI, Symbols[SymI].Scope); break;
3287 case SUBTYPE_NASM:
3288 WriteFunctionBeginYASM(SymI, Symbols[SymI].Scope); break;
3289 case SUBTYPE_GASM:
3290 WriteFunctionBeginGASM(SymI, Symbols[SymI].Scope); break;
3291 }
3292 }
3293
WriteFunctionBeginMASM(uint32_t symi,uint32_t scope)3294 void CDisassembler::WriteFunctionBeginMASM(uint32_t symi, uint32_t scope) {
3295 // Write begin of function, MASM syntax
3296 // Write name
3297 WriteSymbolName(symi);
3298 // Space
3299 OutFile.Put(" "); OutFile.Tabulate(AsmTab1);
3300
3301 if (scope & 0x1C) {
3302 // Scope is public
3303 // Write "PROC"
3304 OutFile.Put("PROC");
3305 // Write "NEAR" unless 64 bit mode
3306 if (WordSize < 64) OutFile.Put(" NEAR");
3307 // Check if weak
3308 if (scope & 8) {
3309 OutFile.NewLine();
3310 OutFile.Put(CommentSeparator);
3311 OutFile.Put(" WEAK ");
3312 WriteSymbolName(symi);
3313 }
3314 // Check if communal
3315 if (scope & 0x10) {
3316 OutFile.NewLine();
3317 OutFile.Put(CommentSeparator);
3318 OutFile.Put(" COMDEF ");
3319 WriteSymbolName(symi);
3320 }
3321 }
3322 else {
3323 // Scope is local
3324 OutFile.Put("LABEL NEAR");
3325 }
3326 // Check if Gnu indirect
3327 if (Symbols[symi].Type & 0x40000000) {
3328 OutFile.Put(CommentSeparator);
3329 OutFile.Put("Gnu indirect function"); // Cannot be represented in Masm syntax
3330 }
3331 // End line
3332 OutFile.NewLine();
3333 }
3334
WriteFunctionBeginYASM(uint32_t symi,uint32_t scope)3335 void CDisassembler::WriteFunctionBeginYASM(uint32_t symi, uint32_t scope) {
3336 // Write begin of function, YASM syntax
3337 // Write name
3338 WriteSymbolName(symi);
3339 // Colon
3340 OutFile.Put(":"); OutFile.Tabulate(AsmTab1);
3341
3342 if (scope & 0x1C) {
3343 // Scope is public
3344 // Write comment
3345 OutFile.Put(CommentSeparator);
3346 OutFile.Put("Function begin");
3347 // Check if weak
3348 if (scope & 8) {
3349 OutFile.Put(", weak");
3350 }
3351 // Check if communal
3352 if (scope & 0x10) {
3353 OutFile.Put(", communal");
3354 }
3355 }
3356 else {
3357 // Scope is local. Write comment
3358 OutFile.Put(CommentSeparator);
3359 OutFile.Put("Local function");
3360 }
3361 // Check if Gnu indirect
3362 if (Symbols[symi].Type & 0x40000000) {
3363 OutFile.Put(CommentSeparator);
3364 OutFile.Put("Gnu indirect function"); // Cannot be represented in NASM/YASM syntax
3365 }
3366 // End line
3367 OutFile.NewLine();
3368 }
3369
WriteFunctionBeginGASM(uint32_t symi,uint32_t scope)3370 void CDisassembler::WriteFunctionBeginGASM(uint32_t symi, uint32_t scope) {
3371 // Write begin of function, GAS syntax
3372 WriteSymbolName(symi); // Write name
3373 OutFile.Put(":");
3374 OutFile.Tabulate(AsmTab3); OutFile.Put(CommentSeparator);
3375 if (scope & 3) OutFile.Put("Local ");
3376 if (scope & 8) OutFile.Put("weak ");
3377 if (scope & 0x10) OutFile.Put("communal ");
3378 OutFile.Put("Function");
3379 OutFile.NewLine();
3380 OutFile.Tabulate(AsmTab1);
3381 OutFile.Put(".type ");
3382 OutFile.Tabulate(AsmTab2);
3383 WriteSymbolName(symi); // Write name
3384 if (Symbols[symi].Type & 0x40000000) {
3385 OutFile.Put(", @gnu_indirect_function");
3386 }
3387 else {
3388 OutFile.Put(", @function");
3389 }
3390 OutFile.NewLine();
3391 }
3392
3393
WriteFunctionEnd()3394 void CDisassembler::WriteFunctionEnd() {
3395 // Write end of function
3396
3397 // Check if IFunction is valid
3398 if (IFunction == 0 || IFunction >= FunctionList.GetNumEntries()) {
3399 // Should not occur
3400 OutFile.Put(CommentSeparator);
3401 OutFile.Put("Internal error: undefined function end");
3402 return;
3403 }
3404
3405 // Get symbol index
3406 uint32_t SymOldI = FunctionList[IFunction].OldSymbolIndex;
3407 uint32_t SymNewI = Symbols.Old2NewIndex(SymOldI);
3408
3409 // check scope
3410 if (Symbols[SymNewI].Scope & 0x1C) {
3411 // Has public scope. Write end of function
3412 switch (Syntax) {
3413 case SUBTYPE_MASM:
3414 WriteFunctionEndMASM(SymNewI); break;
3415 case SUBTYPE_NASM:
3416 WriteFunctionEndYASM(SymNewI); break;
3417 case SUBTYPE_GASM:
3418 WriteFunctionEndGASM(SymNewI); break;
3419 }
3420 }
3421 }
3422
WriteFunctionEndMASM(uint32_t symi)3423 void CDisassembler::WriteFunctionEndMASM(uint32_t symi) {
3424 // Write end of function, MASM syntax
3425 // Write name
3426 WriteSymbolName(symi);
3427
3428 // Space
3429 OutFile.Put(" "); OutFile.Tabulate(AsmTab1);
3430 // Write "ENDP"
3431 OutFile.Put("ENDP");
3432 OutFile.NewLine();
3433 }
3434
WriteFunctionEndYASM(uint32_t symi)3435 void CDisassembler::WriteFunctionEndYASM(uint32_t symi) {
3436 // Write end of function, YASM syntax
3437 // Write comment
3438 OutFile.Put(CommentSeparator);
3439 // Write name
3440 WriteSymbolName(symi);
3441 OutFile.Put(" End of function");
3442 OutFile.NewLine();
3443 }
3444
WriteFunctionEndGASM(uint32_t symi)3445 void CDisassembler::WriteFunctionEndGASM(uint32_t symi){
3446 // Write end of function, GAS syntax
3447 // Write .size directive
3448 OutFile.Tabulate(AsmTab1);
3449 OutFile.Put(".size ");
3450 OutFile.Tabulate(AsmTab2);
3451 WriteSymbolName(symi); // Name of function
3452 OutFile.Put(", . - ");
3453 WriteSymbolName(symi); // Name of function
3454 OutFile.Tabulate(AsmTab3);
3455 OutFile.Put(CommentSeparator);
3456 OutFile.Put("End of function is probably here");
3457 OutFile.NewLine();
3458 }
3459
3460
WriteCodeLabel(uint32_t symi)3461 void CDisassembler::WriteCodeLabel(uint32_t symi) {
3462 // Write private or public code label. symi is new symbol index
3463
3464 // Get scope
3465 uint32_t Scope = Symbols[symi].Scope;
3466
3467 // Check scope
3468 if (Scope & 0x100) return; // Has been written as function begin
3469
3470 if (Scope == 0) {
3471 // Inaccessible. No name. Make blank line
3472 OutFile.NewLine();
3473 // Remember position for warning check
3474 LabelInaccessible = IBegin;
3475 return;
3476 }
3477
3478 // Begin on new line if preceded by another symbol
3479 if (OutFile.GetColumn()) OutFile.NewLine();
3480
3481 // Check alignment if preceded by NOP
3482 if ((Scope & 0xFF) > 1 && (FlagPrevious & 1) && (IBegin & 0x0F) == 0 && Sections[Section].Align >= 4) {
3483 WriteAlign(16);
3484 }
3485
3486 switch (Syntax) {
3487 case SUBTYPE_MASM:
3488 WriteCodeLabelMASM(symi, Symbols[symi].Scope); break;
3489 case SUBTYPE_NASM:
3490 WriteCodeLabelYASM(symi, Symbols[symi].Scope); break;
3491 case SUBTYPE_GASM:
3492 WriteCodeLabelGASM(symi, Symbols[symi].Scope); break;
3493 }
3494
3495 // Remember this has been written
3496 Symbols[symi].Scope |= 0x100;
3497 }
3498
3499
WriteCodeLabelMASM(uint32_t symi,uint32_t scope)3500 void CDisassembler::WriteCodeLabelMASM(uint32_t symi, uint32_t scope) {
3501 // Write private or public code label, MASM syntax
3502 if ((scope & 0xFF) > 1) {
3503 // Scope > function local. Write as label near
3504 // Check if extra linefeed needed
3505 // if (!(IFunction && FunctionList[IFunction].Start == IBegin))
3506 // New line
3507 OutFile.NewLine();
3508
3509 // Write name
3510 WriteSymbolName(symi);
3511 // Space
3512 OutFile.Put(" "); OutFile.Tabulate(AsmTab1);
3513 // Write "LABEL"
3514 OutFile.Put("LABEL");
3515 // Write "NEAR" even 64 bit mode
3516 OutFile.Put(" NEAR");
3517 // New line
3518 OutFile.NewLine();
3519
3520 // Check if weak
3521 if (scope & 8) {
3522 OutFile.Put(CommentSeparator);
3523 OutFile.Put(" WEAK ");
3524 WriteSymbolName(symi);
3525 OutFile.NewLine();
3526 }
3527 // Check if communal
3528 if (scope & 0x10) {
3529 OutFile.Put(CommentSeparator);
3530 OutFile.Put(" COMDEF ");
3531 WriteSymbolName(symi);
3532 OutFile.NewLine();
3533 }
3534 }
3535 else {
3536 // Symbol is local to current function. Write name with colon
3537 if (FlagPrevious & 2) {
3538 // Insert blank line if previous instruction was unconditional jump or return
3539 OutFile.NewLine();
3540 }
3541 // Write name
3542 WriteSymbolName(symi);
3543 // Write ":"
3544 OutFile.Put(":");
3545 if (OutFile.GetColumn() > AsmTab1) {
3546 // Past tabstop. Go to next line
3547 OutFile.NewLine(); // New line
3548 }
3549 }
3550 }
3551
WriteCodeLabelYASM(uint32_t symi,uint32_t scope)3552 void CDisassembler::WriteCodeLabelYASM(uint32_t symi, uint32_t scope) {
3553 // Write private or public code label, YASM syntax
3554 if ((scope & 0xFF) > 2) {
3555 // Scope is public
3556 OutFile.NewLine();
3557 // Write name
3558 WriteSymbolName(symi);
3559 OutFile.Put(":");
3560
3561 // Check if weak
3562 if (scope & 8) {
3563 OutFile.Put(CommentSeparator);
3564 OutFile.Put(" weak ");
3565 WriteSymbolName(symi);
3566 }
3567 // Check if communal
3568 if (scope & 0x10) {
3569 OutFile.Put(CommentSeparator);
3570 OutFile.Put(" communal ");
3571 WriteSymbolName(symi);
3572 }
3573 OutFile.NewLine();
3574 }
3575 else {
3576 // Symbol is local to current function. Write name with colon
3577 if (FlagPrevious & 2) {
3578 // Insert blank line if previous instruction was unconditional jump or return
3579 OutFile.NewLine();
3580 }
3581 // Write name
3582 WriteSymbolName(symi);
3583 // Write ":"
3584 OutFile.Put(":");
3585 if (OutFile.GetColumn() > AsmTab1) {
3586 // Past tabstop. Go to next line
3587 OutFile.NewLine(); // New line
3588 }
3589 }
3590 }
3591
WriteCodeLabelGASM(uint32_t symi,uint32_t scope)3592 void CDisassembler::WriteCodeLabelGASM(uint32_t symi, uint32_t scope) {
3593 // Write private or public code label, GAS syntax same as YASM syntax
3594 WriteCodeLabelYASM(symi, scope);
3595 }
3596
WriteAssume()3597 void CDisassembler::WriteAssume() {
3598 // Write assume directive for segment register if MASM syntax
3599 if (Syntax != SUBTYPE_MASM) return;
3600 if (!s.AddressField) return;
3601
3602 int32_t SegReg, PrefixSeg; // Segment register used
3603 uint32_t symo; // Target symbol old index
3604 uint32_t symi; // Target symbol new index
3605 int32_t TargetSegment; // Target segment/section
3606 int32_t TargetGroup; // Group containing target segment
3607
3608 // Find which segment register is used for addressing memory operand
3609 SegReg = 3; // DS is default
3610 if (s.BaseReg == 4+1 || s.BaseReg == 5+1) {
3611 // Base register is (E)BP or ESP
3612 SegReg = 2; // SS register used unless there is a prefix
3613 }
3614 if (s.Prefixes[0]) {
3615 // There is a segment prefix
3616 PrefixSeg = GetSegmentRegisterFromPrefix();
3617 if (PrefixSeg >= 0 && PrefixSeg <= 5) {
3618 // Segment prefix is valid. Segment determined by segment prefix
3619 SegReg = PrefixSeg;
3620 }
3621 }
3622 // Default target segment is none
3623 TargetSegment = TargetGroup = 0;
3624
3625 // Find symbol referenced by next instruction
3626 if (s.AddressRelocation && s.AddressRelocation < Relocations.GetNumEntries()) {
3627 symo = Relocations[s.AddressRelocation].TargetOldIndex; // Target symbol old index
3628 if (symo) {
3629 symi = Symbols.Old2NewIndex(symo); // Target symbol new index
3630 if (symi) {
3631 TargetSegment = Symbols[symi].Section; // Target segment
3632 if (TargetSegment < 0 || TargetSegment >= (int32_t)Sections.GetNumEntries()) {
3633 TargetSegment = 0;
3634 }
3635 else {
3636 TargetGroup = Sections[TargetSegment].Group; // Group containing target segment
3637 if (TargetGroup <= ASM_SEGMENT_ERROR || TargetGroup >= (int32_t)Sections.GetNumEntries()) {
3638 TargetGroup = 0;
3639 }
3640 }
3641 }
3642 }
3643 }
3644 if (TargetSegment) {
3645 // Target has a segment. Check if it is different from currently assumed segment
3646 if (TargetSegment != Assumes[SegReg] && TargetGroup != Assumes[SegReg]) {
3647 // Assume directive needed
3648 // If segment belongs to a group then the group takes precedence
3649 if (TargetGroup) TargetSegment = TargetGroup;
3650 // Write assume directive
3651 OutFile.Put("ASSUME ");
3652 OutFile.Tabulate(AsmTab1);
3653 OutFile.Put(RegisterNamesSeg[SegReg]); // Name of segment register used
3654 OutFile.Put(":");
3655 WriteSectionName(TargetSegment); // Name of segment or group referenced
3656 OutFile.NewLine();
3657 Assumes[SegReg] = TargetSegment;
3658 }
3659 }
3660 else {
3661 // Target segment not specified. Assumed value may be anyting but 'error'
3662 if (Assumes[SegReg] <= ASM_SEGMENT_ERROR) {
3663 // Segment register is assumed to 'error'. Change assume to 'nothing'
3664 OutFile.Put("ASSUME ");
3665 OutFile.Tabulate(AsmTab1);
3666 OutFile.Put(RegisterNamesSeg[SegReg]); // Name of segment register used
3667 OutFile.Put(":NOTHING");
3668 OutFile.NewLine();
3669 Assumes[SegReg] = ASM_SEGMENT_NOTHING;
3670 }
3671 }
3672 }
3673
3674
WriteInstruction()3675 void CDisassembler::WriteInstruction() {
3676 // Write instruction and operands
3677 uint32_t NumOperands = 0; // Number of operands written
3678 uint32_t i; // Loop index
3679 const char * OpName; // Opcode name
3680
3681 if (s.AddressFieldSize && Syntax == SUBTYPE_MASM) {
3682 // There is a memory operand. Check if ASSUME directive needed
3683 WriteAssume();
3684 }
3685
3686 if (CodeMode & 6) {
3687 // Code is dubious. Show as comment only
3688 OutFile.Put(CommentSeparator); // Start comment
3689 }
3690 else if ((s.OpcodeDef->Options & 0x20) && s.OpcodeStart1 > IBegin) {
3691 // Write prefixes explicitly.
3692 // This is used for rare cases where the assembler cannot generate the prefix
3693 OutFile.Tabulate(AsmTab1); // Tabulate
3694 OutFile.Put(Syntax == SUBTYPE_GASM ? ".byte " : "DB ");
3695 OutFile.Tabulate(AsmTab2); // Tabulate
3696 for (i = IBegin; i < s.OpcodeStart1; i++) {
3697 if (i > IBegin) OutFile.Put(", ");
3698 OutFile.PutHex(Get<uint8_t>(i), 1);
3699 }
3700 OutFile.Tabulate(AsmTab3); // Tabulate
3701 OutFile.Put(CommentSeparator);
3702 if ((s.OpcodeDef->AllowedPrefixes & 8) && Get<uint8_t>(IBegin) == 0xF2) {
3703 OutFile.Put("BND prefix coded explicitly"); // Comment
3704 }
3705 else {
3706 OutFile.Put("Prefix coded explicitly"); // Comment
3707 }
3708 OutFile.NewLine();
3709 }
3710
3711 if ((s.Operands[0] & 0xF0) == 0xC0 || (s.Operands[1] & 0xF0) == 0xC0) {
3712 // String instruction or xlat instruction
3713 WriteStringInstruction();
3714 return;
3715 }
3716
3717 OutFile.Tabulate(AsmTab1); // Tabulate
3718
3719 if ((s.OpcodeDef->AllowedPrefixes & 0xC40) == 0xC40) {
3720 switch (s.Prefixes[5]) {
3721 case 0xF2:
3722 OutFile.Put("xacquire "); break; // xacquire prefix
3723 case 0xF3:
3724 OutFile.Put("xrelease "); break; // xrelease prefix
3725 }
3726 }
3727 if (s.Prefixes[2]) {
3728 OutFile.Put("lock "); // Lock prefix
3729 }
3730
3731 // Get opcode name
3732 if (s.OpcodeDef->Name) {
3733 // Opcode name
3734 OpName = s.OpcodeDef->Name;
3735 // Search for opcode comment
3736 s.OpComment = strchr(OpName, ';');
3737 if (s.OpComment) s.OpComment++; // Point to after ';'
3738 }
3739 else {
3740 OpName = "UNDEFINED"; // Undefined code with no name
3741 s.OpComment = 0;
3742 }
3743
3744 // Check prefix option
3745 if ((s.OpcodeDef->Options & 2) && (s.Prefixes[7] & 0x30)) {
3746 // Put prefix 'v' for VEX-prefixed instruction
3747 OutFile.Put('v');
3748 }
3749
3750 // Write opcode name
3751 if (s.OpComment) {
3752 // OpName string contains opcode name and comment, separated by ';'
3753 while (*OpName != ';' && *OpName != 0) { // Write opcode name until comment
3754 OutFile.Put(*(OpName++));
3755 }
3756 }
3757 else {
3758 OutFile.Put(OpName); // Write normal opcode name
3759 }
3760
3761 // Check suffix option
3762 if (s.OpcodeDef->Options & 1) {
3763 // Append suffix for operand size or type to name
3764 if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x1000) {
3765 // F.P. operand size defined by W prefix bit
3766 i = s.Prefixes[7] & 8; // W prefix bit
3767 OutFile.Put(i ? 'd' : 's');
3768 }
3769 else if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x3000) {
3770 // Integer or f.p. operand size defined by W prefix bit
3771 bool f = false;
3772 // Find out if operands are integer or f.p.
3773 for (i = 0; i < s.MaxNumOperands; i++) {
3774 if ((s.Operands[i] & 0xF0) == 0x40) {
3775 f = true; break;
3776 }
3777 }
3778 i = s.Prefixes[7] & 8; // W prefix bit
3779 if (f) {
3780 OutFile.Put(i ? 'd' : 's'); // float precision suffix
3781 }
3782 else {
3783 OutFile.Put(i ? 'q' : 'd'); // integer size suffix
3784 }
3785 }
3786 else if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x4000) {
3787 // Integer operand size defined by W prefix bit
3788 i = s.Prefixes[7] & 8; // W prefix bit
3789 OutFile.Put(i ? 'w' : 'b');
3790 }
3791 else if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x5000) {
3792 // mask register operand size defined by W prefix bit and 66 prefix
3793 i = (s.Prefixes[7] & 8) >> 2; // W prefix bit
3794 i |= s.Prefixes[5] != 0x66; // 66 prefix bit
3795 OutFile.Put("bwdq"[i]);
3796 }
3797 else if (s.OpcodeDef->AllowedPrefixes & 0xE00) {
3798 // F.P. operand type and size defined by prefixes
3799 switch (s.Prefixes[5]) {
3800 case 0: // No prefix = ps
3801 OutFile.Put("ps"); break;
3802 case 0x66: // 66 prefix = pd
3803 OutFile.Put("pd"); break;
3804 case 0xF3: // F3 prefix = ss
3805 OutFile.Put("ss"); break;
3806 case 0xF2: // F2 prefix = sd
3807 OutFile.Put("sd"); break;
3808 default:
3809 err.submit(9000); // Should not occur
3810 }
3811 }
3812 else if (s.OpcodeDef->AllowedPrefixes & 0x100){
3813 // Integer operand size defined by prefixes
3814 // Suffix for operand size
3815 i = s.OperandSize / 8;
3816 if (i <= 8) {
3817 static const char SizeSuffixes[] = " bw d f q"; // Table of suffixes
3818 OutFile.Put(SizeSuffixes[i]);
3819 }
3820 }
3821 }
3822 // Alternative suffix option
3823 if (s.OpcodeDef->Options & 0x1000) {
3824 // Append alternative suffix for vector element size to name
3825 if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x3000) {
3826 // Integer operand size defined by W prefix bit
3827 i = ((s.Prefixes[7] & 8) + 8) * 4; // W prefix bit -> 8 / 16
3828 OutFile.PutDecimal(i);
3829 }
3830 if ((s.OpcodeDef->AllowedPrefixes & 0x7000) == 0x4000) { // 32 / 64
3831 i = (s.Prefixes[7] & 8) + 8; // W prefix bit -> 8 / 16
3832 OutFile.PutDecimal(i);
3833 }
3834 }
3835 // More suffix option
3836 if ((s.OpcodeDef->Options & 0x400) && s.ImmediateFieldSize == 8) {
3837 // 64 bit immediate mov
3838 if (Syntax == SUBTYPE_GASM) OutFile.Put("abs");
3839 }
3840
3841 // Space between opcode name and operands
3842 OutFile.Put(" "); OutFile.Tabulate(AsmTab2); // Tabulate. At least one space
3843
3844 // Loop for all operands to write
3845 for (i = 0; i < s.MaxNumOperands; i++) {
3846 if (s.Operands[i] & 0xFFFF) {
3847
3848 // Write operand i
3849 if (NumOperands++) {
3850 // At least one operand before this one. Separate by ", "
3851 OutFile.Put(", ");
3852 }
3853
3854 // Write constant and jump operands
3855 switch (s.Operands[i] & 0xF0) {
3856 case 0x10: case 0x20: case 0x30: case 0x80:
3857 WriteImmediateOperand(s.Operands[i]);
3858 continue;
3859 }
3860
3861 // Write register and memory operands
3862 uint32_t optype = (s.Operands[i] >> 16) & 0x0F;
3863 switch (optype) {
3864 case 0: // Other type of operand
3865 WriteOtherOperand(s.Operands[i]); break;
3866
3867 case 0x1: // Direct memory operand
3868 WriteRMOperand(s.Operands[i]); break;
3869
3870 case 0x2: // Register operand indicated by last bits of opcode
3871 WriteShortRegOperand(s.Operands[i]); break;
3872
3873 case 0x3: // Register or memory operand indicated by mod/rm bits
3874 WriteRMOperand(s.Operands[i]); break;
3875
3876 case 0x4: // Register operand indicated by reg bits
3877 WriteRegOperand(s.Operands[i]); break;
3878
3879 case 0x5: // Register operand indicated by dest bits of DREX byte
3880 WriteDREXOperand(s.Operands[i]); break;
3881
3882 case 0x6: // Register operand indicated by VEX.vvvv bits
3883 WriteVEXOperand(s.Operands[i], 0); break;
3884
3885 case 0x7: // Register operand indicated by bits 4-7 of immediate operand
3886 WriteVEXOperand(s.Operands[i], 1); break;
3887
3888 case 0x8: // Register operand indicated by bits 0-3 of immediate operand
3889 WriteVEXOperand(s.Operands[i], 2); break; // Unused. For future use
3890 }
3891 int isMem = optype == 3 && s.Mod != 3;
3892 if (s.Prefixes[3] == 0x62) { // EVEX and MVEX prefix can have extra operand attributes
3893 if (s.Prefixes[6] & 0x20) {
3894 WriteOperandAttributeEVEX(i, isMem);
3895 }
3896 else {
3897 WriteOperandAttributeMVEX(i, isMem);
3898 }
3899 }
3900 if (s.Prefixes[3] == 0x62 && (i == s.MaxNumOperands - 1 || (s.Operands[i+1] & 0xFFF) < 0x40)) {
3901 // This is the last SIMD operand
3902 if (!(s.Operands[4] & 0x80000000)) {
3903 s.Operands[4] |= 0x80000000; // Make sure we don't write this twice
3904 if (s.Prefixes[6] & 0x20) {
3905 WriteOperandAttributeEVEX(98, isMem);
3906 }
3907 else {
3908 WriteOperandAttributeMVEX(98, isMem);
3909 }
3910 }
3911 }
3912 }
3913 }
3914 if (s.Prefixes[3] == 0x62) { // EVEX and MVEX prefix can have extra attributes after operands
3915 if (s.Prefixes[6] & 0x20) {
3916 WriteOperandAttributeEVEX(99, 0);
3917 }
3918 else {
3919 WriteOperandAttributeMVEX(99, 0);
3920 }
3921 }
3922 if (s.OpComment) {
3923 // Write opcode comment
3924 OutFile.Put(' ');
3925 OutFile.Put(CommentSeparator);
3926 OutFile.Put(s.OpComment);
3927 }
3928 }
3929
3930
WriteStringInstruction()3931 void CDisassembler::WriteStringInstruction() {
3932 // Write string instruction or xlat instruction
3933 uint32_t NumOperands = 0; // Number of operands written
3934 uint32_t i; // Loop index
3935 uint32_t Segment; // Possible segment prefix
3936
3937 if (!(s.OpcodeDef->AllowedPrefixes & 0x1100)) {
3938 // Operand size is 8 if operand size prefixes not allowed
3939 s.OperandSize = 8;
3940 }
3941
3942 OutFile.Tabulate(AsmTab1); // Tabulate
3943
3944 if (Syntax != SUBTYPE_MASM && s.Prefixes[0] && (s.OpcodeDef->AllowedPrefixes & 4)) {
3945 // Get segment prefix
3946 Segment = GetSegmentRegisterFromPrefix(); // Interpret segment prefix
3947 // Write segment override
3948 OutFile.Put(RegisterNamesSeg[Segment]);
3949 OutFile.Put(" ");
3950 }
3951
3952 // Check repeat prefix
3953 if (s.OpcodeDef->AllowedPrefixes & 0x20) {
3954 if (s.Prefixes[3]) {
3955 // Repeat prefix
3956 OutFile.Put("rep ");
3957 }
3958 }
3959 else if (s.OpcodeDef->AllowedPrefixes & 0x40) {
3960 if (s.Prefixes[3] == 0xF2) {
3961 // repne prefix
3962 OutFile.Put("repne ");
3963 }
3964 else if (s.Prefixes[3] == 0xF3) {
3965 // repe prefix
3966 OutFile.Put("repe ");
3967 }
3968 }
3969
3970 // Write opcode name
3971 OutFile.Put(s.OpcodeDef->Name); // Opcode name
3972
3973 if (Syntax == SUBTYPE_MASM
3974 && (((s.OpcodeDef->AllowedPrefixes & 4) && s.Prefixes[0])
3975 || ((s.OpcodeDef->AllowedPrefixes & 1) && s.Prefixes[1]))) {
3976 // Has segment or address size prefix. Must write operands explicitly
3977 OutFile.Put(" "); // Space before operands
3978
3979 // Check address size for pointer registers
3980 const char * * PointerRegisterNames;
3981 switch (s.AddressSize) {
3982 case 16:
3983 PointerRegisterNames = RegisterNames16; break;
3984 case 32:
3985 PointerRegisterNames = RegisterNames32; break;
3986 case 64:
3987 PointerRegisterNames = RegisterNames64; break;
3988 default:
3989 PointerRegisterNames = 0; // should not occur
3990 }
3991
3992 // Loop for possibly two operands
3993 for (i = 0; i < 2; i++) {
3994 if (s.Operands[i]) {
3995 // Operand i defined
3996 if (NumOperands++) {
3997 // An operand before this one. Separate by ", "
3998 OutFile.Put(", ");
3999 }
4000 if (NumOperands == 1) {
4001 // Write operand size for first operand
4002 switch (s.OperandSize) {
4003 case 8:
4004 OutFile.Put("byte "); break;
4005 case 16:
4006 OutFile.Put("word "); break;
4007 case 32:
4008 OutFile.Put("dword "); break;
4009 case 64:
4010 OutFile.Put("qword "); break;
4011 }
4012 }
4013 // Get segment
4014 Segment = 1; // Default segment is DS
4015 if (s.Prefixes[0]) {
4016 Segment = GetSegmentRegisterFromPrefix(); // Interpret segment prefix
4017 }
4018 if ((s.Operands[i] & 0xCF) == 0xC2) {
4019 Segment = 0; // Segment is ES regardless of prefix for [edi] operand
4020 }
4021 // Write segment override
4022 OutFile.Put(RegisterNamesSeg[Segment]);
4023 OutFile.Put(":");
4024 // Opening "["
4025 OutFile.Put("[");
4026
4027 // Write pointer register
4028 switch (s.Operands[i] & 0xCF) {
4029 case 0xC0: // [bx], [ebx] or [rbx]
4030 OutFile.Put(PointerRegisterNames[3]);
4031 break;
4032 case 0xC1: // [si], [esi] or [rsi]
4033 OutFile.Put(PointerRegisterNames[6]);
4034 break;
4035 case 0xC2: // [di], [edi] or [rdi]
4036 OutFile.Put(PointerRegisterNames[7]);
4037 break;
4038 }
4039 // Closing "]"
4040 OutFile.Put("]");
4041 }
4042 }
4043 }
4044 else {
4045 // We don't have to write the operands
4046 // Append suffix for operand size, except for xlat
4047 if ((s.Operands[1] & 0xCF) != 0xC0) {
4048
4049 // Suffix for operand size
4050 uint32_t i = s.OperandSize / 8;
4051 if (i <= 8) {
4052 static const char SizeSuffixes[] = " bw d q"; // Table of suffixes
4053 OutFile.Put(SizeSuffixes[i]);
4054 }
4055 }
4056 }
4057 }
4058
4059
WriteCodeComment()4060 void CDisassembler::WriteCodeComment() {
4061 // Write hex listing of instruction as comment after instruction
4062 uint32_t i; // Index to current byte
4063 uint32_t FieldSize; // Number of bytes in field
4064 const char * Spacer; // Space between fields
4065
4066 OutFile.Tabulate(AsmTab3); // Tabulate to comment field
4067 OutFile.Put(CommentSeparator); // Start comment
4068
4069 // Write address
4070 if (SectionEnd + SectionAddress + (uint32_t)ImageBase > 0xFFFF) {
4071 // Write 32 bit address
4072 OutFile.PutHex(IBegin + SectionAddress + (uint32_t)ImageBase);
4073 }
4074 else {
4075 // Write 16 bit address
4076 OutFile.PutHex((uint16_t)(IBegin + SectionAddress));
4077 }
4078
4079 // Space after address
4080 OutFile.Put(" _");
4081
4082 // Start of instruction
4083 i = IBegin;
4084
4085 // Write bytes
4086 while (i < IEnd) {
4087 FieldSize = 1; // Size of field to write
4088 Spacer = " "; // Space between fields
4089
4090 // Spacer and FieldSize depends on fields
4091 if (i == s.OpcodeStart1 && i > IBegin) {
4092 Spacer = ": "; // Space between prefixes and opcode
4093 }
4094 if (i == s.OpcodeStart2 + 1) {
4095 Spacer = ". "; // Space between opcode and mod/reg/rm bytes
4096 }
4097 if (i == s.AddressField && s.AddressFieldSize) {
4098 Spacer = ", "; // Space before address field
4099 FieldSize = s.AddressFieldSize;
4100 }
4101 if (i == s.ImmediateField && s.ImmediateFieldSize) {
4102 Spacer = ", "; // Space before immediate operand field
4103 FieldSize = s.ImmediateFieldSize;
4104 }
4105 // Write space
4106 OutFile.Put(Spacer);
4107
4108 // Write byte or bytes
4109 switch (FieldSize) {
4110 case 1: // Write single byte
4111 OutFile.PutHex(Get<uint8_t>(i));
4112 break;
4113 case 2: // Write two bytes
4114 OutFile.PutHex(Get<uint16_t>(i));
4115 break;
4116 case 3: // Write three bytes (operands for "enter" instruction)
4117 OutFile.PutHex(Get<uint16_t>(i));
4118 OutFile.Put(", ");
4119 OutFile.PutHex(Get<uint8_t>(i+2));
4120 break;
4121 case 4: // Write four bytes
4122 if ((s.Operands[0] & 0xFE) == 0x84) {
4123 // Far jump/call address
4124 OutFile.PutHex(Get<uint16_t>(i));
4125 OutFile.Put(" ");
4126 OutFile.PutHex(Get<uint16_t>(i+2));
4127 }
4128 else {
4129 // Any other 32 bit operand
4130 OutFile.PutHex(Get<uint32_t>(i));
4131 }
4132 break;
4133 case 6: // Write six bytes (far jump address)
4134 OutFile.PutHex(Get<uint32_t>(i));
4135 OutFile.Put(" ");
4136 OutFile.PutHex(Get<uint16_t>(i+4));
4137 break;
4138 case 8: // Write eight bytes
4139 OutFile.PutHex(Get<uint64_t>(i));
4140 break;
4141 }
4142 // Search for relocation
4143 SARelocation rel1; // Make relocation records for searching
4144 rel1.Section = Section;
4145 rel1.Offset = i; // rel1 marks current field in instruction
4146
4147 // Is there a relocation source exactly here?
4148 int32_t irel = Relocations.Exists(rel1); // Finds relocation with source = i
4149
4150 if (irel > 0) {
4151 // This field has a relocation. Indicate relocation type
4152 // 0 = unknown, 1 = direct, 2 = self-relative, 3 = image-relative,
4153 // 4 = segment relative, 5 = relative to arbitrary ref. point, 8 = segment address/descriptor
4154 uint32_t RelType = Relocations[irel].Type;
4155 if (RelType) {
4156 OutFile.Put(Lookup(RelocationTypeNames, RelType));
4157 }
4158 if (Relocations[irel].Size > FieldSize) {
4159 // Relocation has wrong size
4160 OutFile.Put(" Misplaced relocation.");
4161 }
4162 }
4163
4164 // Point to next byte
4165 i += FieldSize;
4166 }
4167 // New line
4168 OutFile.NewLine();
4169 }
4170
4171
CountInstructions()4172 void CDisassembler::CountInstructions() {
4173 // Count total number of instructions defined in opcodes.cpp
4174 // Two instructions are regarded as the same and counted as one if they
4175 // have the same name and differ only in the bits that define register
4176 // name, operand size, etc.
4177
4178 uint32_t map; // Map number
4179 uint32_t index; // Index into map
4180 uint32_t n; // Number of instructions with same code
4181 uint32_t iset; // Instruction set
4182 uint32_t instructions = 0; // Total number of instructions
4183 uint32_t mmxinstr = 0; // Number of MMX instructions
4184 uint32_t sseinstr = 0; // Number of SSE instructions
4185 uint32_t sse2instr = 0; // Number of SSE2 instructions
4186 uint32_t sse3instr = 0; // Number of SSE3 instructions
4187 uint32_t ssse3instr = 0; // Number of SSSE3 instructions
4188 uint32_t sse41instr = 0; // Number of SSE4.1 instructions
4189 uint32_t sse42instr = 0; // Number of SSE4.2 instructions
4190 uint32_t AVXinstr = 0; // Number of AVX instructions
4191 uint32_t FMAinstr = 0; // Number of FMA3 and later instructions
4192 uint32_t AVX2instr = 0; // Number of AVX2 instructions
4193 uint32_t BMIinstr = 0; // Number of BMI instructions and other small instruction sets
4194 uint32_t AVX512instr = 0; // Number of AVX-512 instructions
4195 uint32_t MICinstr = 0; // Number of MIC instructions
4196 uint32_t AMDinstr = 0; // Number of AMD instructions
4197 uint32_t VIAinstr = 0; // Number of AMD instructions
4198 uint32_t privilinstr = 0; // Number of privileged instructions
4199 uint32_t undocinstr = 0; // Number of undocumented instructions
4200 uint32_t droppedinstr = 0; // Number of opcodes planned but never implemented
4201 uint32_t VEXdouble = 0; // Number of instructions that have both VEX and non-VEX version
4202 SOpcodeDef const * opcode; // Pointer to map entry
4203
4204 // Loop through all maps
4205 for (map = 0; map < NumOpcodeTables1; map++) {
4206 // Loop through each map
4207 for (index = 0; index < OpcodeTableLength[map]; index++) {
4208 opcode = OpcodeTables[map] + index;
4209 if (opcode->InstructionFormat && opcode->Name
4210 && !opcode->TableLink && !(opcode->InstructionFormat & 0x8000)) {
4211 // instruction is defined
4212 if ((opcode->InstructionFormat & 0xFFF) == 3
4213 && index > 0 && (opcode-1)->Name
4214 && strcmp(opcode->Name, (opcode-1)->Name) == 0) {
4215 // Same as previous instruction, just with another register
4216 continue; // Don't count this
4217 }
4218 n = 1; // Default = one instruction per map entry
4219 // Check if we have multiple instructions with different prefixes
4220 if (opcode->Options & 1) {
4221 if (opcode->AllowedPrefixes & 0x3000) {
4222 n++; // Extra instruction with W prefix bit
4223 }
4224 else if (opcode->AllowedPrefixes & 0xE00) {
4225 if (opcode->AllowedPrefixes & 0x200) n++; // Extra instruction with 66 prefix
4226 if (opcode->AllowedPrefixes & 0x400) n++; // Extra instruction with F3 prefix
4227 if (opcode->AllowedPrefixes & 0x800) n++; // Extra instruction with F2 prefix
4228 }
4229 else if (opcode->AllowedPrefixes & 0x100) {
4230 n++; // Extra instruction with 66 prefix
4231 if (opcode->AllowedPrefixes & 0x1000) n++;// Extra instruction with L prefix bit
4232 }
4233 }
4234 if (opcode->Options & 2) VEXdouble += n; // Instructions that have both VEX and non-VEX version
4235 instructions += n; // Count total instructions
4236
4237 iset = opcode->InstructionSet; // Instruction set
4238 if (iset & 0x20000) {
4239 droppedinstr += n; iset = 0; // Opcodes planned but never implemented
4240 }
4241 if (iset & 0x800) privilinstr += n; // Privileged instruction
4242 if (opcode->InstructionFormat & 0x4000) undocinstr += n; // Undocumented instruction
4243
4244 switch (iset & 0x37FF) {
4245 case 7: // MMX
4246 mmxinstr += n; break;
4247 case 0x11: // SSE
4248 sseinstr += n; break;
4249 case 0x12: // SSE2
4250 sse2instr += n; break;
4251 case 0x13: // SSE3
4252 sse3instr += n; break;
4253 case 0x14: // SSSE3
4254 ssse3instr += n; break;
4255 case 0x15: // SSE4.1
4256 sse41instr += n; break;
4257 case 0x16: // SSE4.2
4258 sse42instr += n; break;
4259 case 0x17: case 0x18: case 0x19: // VEX etc.
4260 AVXinstr += n; break;
4261 case 0x1A: case 0x1B: // FMA and later instructions
4262 FMAinstr += n; break;
4263 case 0x1C: // AVX2 instructions
4264 AVX2instr += n; break;
4265 case 0x1D: case 0x1E: // BMI and other small instruction sets
4266 BMIinstr += n; break;
4267 case 0x20: // AVX-512 instructions
4268 AVX512instr += n; break;
4269 case 0x80: // MIC instructions
4270 MICinstr += n; break;
4271 case 0x1001: case 0x1002: case 0x1004: case 0x1005: case 0x1006: // AMD
4272 AMDinstr += n; break;
4273 case 0x2001: // VIA
4274 VIAinstr += n; break;
4275 }
4276 }
4277 }
4278 }
4279
4280 // output result
4281 printf("\n\nNumber of instruction opcodes supported by disassembler:\n%5i Total, including:",
4282 instructions);
4283 printf("\n%5i Privileged instructions", privilinstr);
4284 printf("\n%5i MMX instructions", mmxinstr);
4285 printf("\n%5i SSE instructions", sseinstr);
4286 printf("\n%5i SSE2 instructions", sse2instr);
4287 printf("\n%5i SSE3 instructions", sse3instr);
4288 printf("\n%5i SSSE3 instructions", ssse3instr);
4289 printf("\n%5i SSE4.1 instructions", sse41instr);
4290 printf("\n%5i SSE4.2 instructions", sse42instr);
4291 printf("\n%5i AVX instructions etc.", AVXinstr);
4292 printf("\n%5i AVX2 instructions", AVX2instr);
4293 printf("\n%5i FMA3 instructions", FMAinstr);
4294 printf("\n%5i BMI/micsellaneous instr.", BMIinstr);
4295 printf("\n%5i AVX-512 instructions", AVX512instr);
4296 printf("\n%5i MIC/Xeon Phi instructions", MICinstr);
4297 printf("\n%5i AMD instructions", AMDinstr);
4298 printf("\n%5i VIA instructions", VIAinstr);
4299 printf("\n%5i instructions planned but never implemented in any CPU", droppedinstr);
4300 printf("\n%5i undocumented or illegal instructions", undocinstr);
4301 printf("\n%5i instructions have both VEX and non-VEX versions", VEXdouble);
4302 printf("\n");
4303
4304 #if 0 // temporary test code
4305
4306 // find entries with 0x2000 prefix code
4307 printf("\n\nInstructions with operand swap flag:\n");
4308 // Loop through all maps
4309 for (map = 0; map < NumOpcodeTables1; map++) {
4310 // Loop through each map
4311 for (index = 0; index < OpcodeTableLength[map]; index++) {
4312 opcode = OpcodeTables[map] + index;
4313 if ((opcode->AllowedPrefixes & 0x2000) == 0x2000) {
4314 printf("\n%04X %02X %s", map, index, opcode->Name);
4315 }
4316 }
4317 }
4318
4319 /*
4320 printf("\n\nTables linked by type 0x0E:\n");
4321 // Loop through all maps
4322 for (map = 0; map < NumOpcodeTables1; map++) {
4323 // Loop through each map
4324 for (index = 0; index < OpcodeTableLength[map]; index++) {
4325 opcode = OpcodeTables[map] + index;
4326 if (opcode->TableLink == 0x0E) {
4327 printf(" 0x%02X", opcode->InstructionSet);
4328 }
4329 }
4330 }*/
4331
4332 printf("\n");
4333
4334 #endif
4335 }
4336