1 /*
2 * PROJECT: ReactOS host tools
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: ASM preprocessor
5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
6 */
7
8 // Optimize even on debug builds, because otherwise it's ridiculously slow
9 #ifdef _MSC_VER
10 #pragma optimize("gst", on)
11 #pragma auto_inline(on)
12 #else
13 #pragma GCC optimize("O3,inline")
14 #endif
15
16 #include "tokenizer.hpp"
17 #include <cstdlib>
18 #include <cstdio>
19 #include <sstream>
20 #include <ctime>
21
22 #define PROFILING_ENABLED 0
23
24 using namespace std;
25
26 time_t search_time;
27
28 enum TOKEN_TYPE
29 {
30 Invalid = -1,
31 Eof,
32 WhiteSpace,
33 NewLine,
34 Comment,
35 DecNumber,
36 HexNumber,
37 String,
38
39 BraceOpen,
40 BraceClose,
41 MemRefStart,
42 MemRefEnd,
43 Colon,
44 Operator,
45 StringDef,
46
47 KW_include,
48 KW_const,
49 KW_code,
50 KW_endprolog,
51 KW_ALIGN,
52 KW_EXTERN,
53 KW_PUBLIC,
54 KW_ENDM,
55 KW_END,
56 KW_if,
57 KW_ifdef,
58 KW_ifndef,
59 KW_else,
60 KW_endif,
61
62 KW_allocstack,
63 KW_savereg,
64 KW_savexmm128,
65
66 KW_DB,
67 KW_DW,
68 KW_DD,
69 KW_DQ,
70 KW_EQU,
71 KW_TEXTEQU,
72 KW_MACRO,
73 KW_PROC,
74 KW_FRAME,
75 KW_ENDP,
76 KW_RECORD,
77
78 KW_MASK,
79 KW_ERRDEF,
80
81 Filename,
82 Instruction,
83 Reg8,
84 Reg16,
85 Reg32,
86 Reg64,
87 RegXmm,
88 BYTE_PTR,
89 WORD_PTR,
90 DWORD_PTR,
91 QWORD_PTR,
92 XMMWORD_PTR,
93
94 LabelName,
95 Identifier
96 };
97
fake_printf(const char * format,...)98 int fake_printf(const char* format, ...)
99 {
100 return 0;
101 }
102
103 //#define printf fake_printf
104
105 // Use a look-ahead for following characters, not included into the match
106 //#define FOLLOWED_BY(x) R"((?=)" x R"())"
107 #define FOLLOWED_BY(x) x
108
109 #define ANY_CHAR R"((?:.|\n))"
110 #define WHITESPACE R"((?:[ \t]++))"
111 #define NEWLINE R"([\n])"
112 #define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)"
113 #define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])"
114
115 #define INSTRUCTION \
116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \
117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \
118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \
119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \
120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \
121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \
122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \
123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \
124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \
125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \
126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \
127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \
128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \
129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \
130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \
131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \
132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \
133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \
134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \
135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \
136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \
137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \
138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \
139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \
140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \
141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \
142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \
143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \
144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \
145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \
146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \
147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \
148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \
149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \
150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \
151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \
152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \
153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \
154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \
155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \
156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \
157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \
158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \
159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \
160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \
161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \
162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \
163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \
164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \
165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \
166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \
167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \
168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \
169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \
170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \
171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \
172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \
173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \
174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \
175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \
176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \
177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \
178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \
179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \
180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \
181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \
182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \
183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \
184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \
185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \
186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \
187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \
188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \
189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \
190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \
191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \
192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \
193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \
194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \
195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \
196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \
197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \
198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \
199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \
200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \
201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \
202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \
203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \
204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \
205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \
206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \
207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \
208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \
209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \
210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \
211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \
212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \
213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \
214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \
215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \
216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \
217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \
218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \
219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \
220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \
221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \
222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \
223 "XSAVES|XSETBV|XTEST"
224
225 vector<TOKEN_DEF> g_TokenList =
226 {
227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" },
228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" },
229 { TOKEN_TYPE::NewLine, R"((\n))" },
230 { TOKEN_TYPE::Comment, R"((;.*\n))" },
231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") },
232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") },
233 { TOKEN_TYPE::String, R"((\".*\"))" },
234
235 { TOKEN_TYPE::BraceOpen, R"((\())"},
236 { TOKEN_TYPE::BraceClose, R"((\)))"},
237 { TOKEN_TYPE::MemRefStart, R"((\[))"},
238 { TOKEN_TYPE::MemRefEnd, R"((\]))"},
239 { TOKEN_TYPE::Colon, R"((\:))"},
240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"},
241 { TOKEN_TYPE::StringDef, R"((<.+>))" },
242
243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") },
244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") },
245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") },
246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") },
247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") },
248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") },
249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") },
250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") },
251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") },
252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") },
253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") },
254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")},
255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")},
256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")},
257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")},
258
259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") },
260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") },
261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") },
262
263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") },
264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") },
265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") },
266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") },
267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") },
268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") },
269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") },
270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") },
271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") },
272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") },
273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") },
274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")},
275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")},
276
277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") },
278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") },
279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") },
280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") },
281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") },
282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") },
283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") },
284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
289
290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)},
291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
293
294 };
295
296 // FIXME: use context?
297 unsigned int g_label_number = 0;
298
299 bool g_processing_jmp = false;
300
301 enum class IDTYPE
302 {
303 Memory,
304 Register,
305 Label,
306 Constant,
307 Macro,
308 Instruction,
309 String,
310 Unknown
311 };
312
313 struct IDENTIFIER
314 {
315 string Name;
316 IDTYPE Type;
317 };
318
319 vector<IDENTIFIER> g_identifiers;
320
321 static
322 void
add_identifier(Token & tok,IDTYPE type)323 add_identifier(Token& tok, IDTYPE type)
324 {
325 g_identifiers.push_back(IDENTIFIER{ tok.str(), type });
326 //fprintf(stderr, "Added id: '%s'\n", tok.str().c_str());
327 }
328
329 void
add_mem_id(Token & tok)330 add_mem_id(Token& tok)
331 {
332 add_identifier(tok, IDTYPE::Memory);
333 }
334
335 bool
is_mem_id(Token & tok)336 is_mem_id(Token& tok)
337 {
338 for (IDENTIFIER& identifier : g_identifiers)
339 {
340 if (identifier.Name == tok.str())
341 {
342 return identifier.Type == IDTYPE::Memory;
343 }
344 }
345
346 return true;
347 }
348
349 bool
iequals(const string & a,const string & b)350 iequals(const string &a, const string &b)
351 {
352 size_t sz = a.size();
353 if (b.size() != sz)
354 return false;
355 for (unsigned int i = 0; i < sz; ++i)
356 if (tolower(a[i]) != tolower(b[i]))
357 return false;
358 return true;
359 }
360
361 Token
get_expected_token(Token && tok,TOKEN_TYPE type)362 get_expected_token(Token&& tok, TOKEN_TYPE type)
363 {
364 if (tok.type() != type)
365 {
366 throw "Not white space after identifier!\n";
367 }
368
369 return tok;
370 }
371
get_ws(Token && tok)372 Token get_ws(Token&& tok)
373 {
374 int type = tok.type();
375 if (type != TOKEN_TYPE::WhiteSpace)
376 {
377 throw "Not white space after identifier!\n";
378 }
379
380 return tok;
381 }
382
get_ws_or_nl(Token && tok)383 Token get_ws_or_nl(Token&& tok)
384 {
385 int type = tok.type();
386 if ((type != TOKEN_TYPE::WhiteSpace) &&
387 (type != TOKEN_TYPE::NewLine))
388 {
389 throw "Not white space after identifier!\n";
390 }
391
392 return tok;
393 }
394
is_string_in_list(vector<string> list,string str)395 bool is_string_in_list(vector<string> list, string str)
396 {
397 for (string &s : list)
398 {
399 if (s == str)
400 {
401 return true;
402 }
403 }
404
405 return false;
406 }
407
408 size_t
translate_token(TokenList & tokens,size_t index,const vector<string> & macro_params)409 translate_token(TokenList& tokens, size_t index, const vector<string> ¯o_params)
410 {
411 Token tok = tokens[index];
412 switch (tok.type())
413 {
414 case TOKEN_TYPE::Comment:
415 printf("//%s", tok.str().c_str() + 1);
416 break;
417
418 case TOKEN_TYPE::DecNumber:
419 {
420 unsigned long long num = stoull(tok.str(), nullptr, 10);
421 printf("%llu", num);
422 break;
423 }
424
425 case TOKEN_TYPE::HexNumber:
426 {
427 string number = tok.str();
428 printf("0x%s", number.substr(0, number.size() - 1).c_str());
429 break;
430 }
431
432 case TOKEN_TYPE::Identifier:
433 if (is_string_in_list(macro_params, tok.str()))
434 {
435 printf("\\");
436 }
437 printf("%s", tok.str().c_str());
438 break;
439
440 // We migt want to improve these
441 case TOKEN_TYPE::BYTE_PTR:
442 case TOKEN_TYPE::WORD_PTR:
443 case TOKEN_TYPE::DWORD_PTR:
444 case TOKEN_TYPE::QWORD_PTR:
445 case TOKEN_TYPE::XMMWORD_PTR:
446
447 // Check these. valid only in instructions?
448 case TOKEN_TYPE::Reg8:
449 case TOKEN_TYPE::Reg16:
450 case TOKEN_TYPE::Reg32:
451 case TOKEN_TYPE::Reg64:
452 case TOKEN_TYPE::RegXmm:
453 case TOKEN_TYPE::Instruction:
454
455 case TOKEN_TYPE::WhiteSpace:
456 case TOKEN_TYPE::NewLine:
457 case TOKEN_TYPE::Operator:
458 printf("%s", tok.str().c_str());
459 break;
460
461 default:
462 printf("%s", tok.str().c_str());
463 break;
464 }
465
466 return index + 1;
467 }
468
complete_line(TokenList & tokens,size_t index,const vector<string> & macro_params)469 size_t complete_line(TokenList &tokens, size_t index, const vector<string> ¯o_params)
470 {
471 while (index < tokens.size())
472 {
473 Token tok = tokens[index];
474 index = translate_token(tokens, index, macro_params);
475 if ((tok.type() == TOKEN_TYPE::NewLine) ||
476 (tok.type() == TOKEN_TYPE::Comment))
477 {
478 break;
479 }
480 }
481
482 return index;
483 }
484
485 size_t
translate_expression(TokenList & tokens,size_t index,const vector<string> & macro_params)486 translate_expression(TokenList &tokens, size_t index, const vector<string> ¯o_params)
487 {
488 while (index < tokens.size())
489 {
490 Token tok = tokens[index];
491 switch (tok.type())
492 {
493 case TOKEN_TYPE::NewLine:
494 case TOKEN_TYPE::Comment:
495 return index;
496
497 case TOKEN_TYPE::KW_MASK:
498 printf("MASK_");
499 index += 2;
500 break;
501
502 case TOKEN_TYPE::Instruction:
503 if (iequals(tok.str(), "and"))
504 {
505 printf("&");
506 index += 1;
507 }
508 else if (iequals(tok.str(), "or"))
509 {
510 printf("|");
511 index += 1;
512 }
513 else if (iequals(tok.str(), "shl"))
514 {
515 printf("<<");
516 index += 1;
517 }
518 else if (iequals(tok.str(), "not"))
519 {
520 printf("!");
521 index += 1;
522 }
523 else
524 {
525 throw "Invalid expression";
526 }
527 break;
528
529 case TOKEN_TYPE::Operator:
530 if (tok.str() == ",")
531 {
532 return index;
533 }
534 case TOKEN_TYPE::WhiteSpace:
535 case TOKEN_TYPE::BraceOpen:
536 case TOKEN_TYPE::BraceClose:
537 case TOKEN_TYPE::DecNumber:
538 case TOKEN_TYPE::HexNumber:
539 case TOKEN_TYPE::Identifier:
540 index = translate_token(tokens, index, macro_params);
541 break;
542
543 default:
544 index = translate_token(tokens, index, macro_params);
545 }
546 }
547
548 return index;
549 }
550
translate_mem_ref(TokenList & tokens,size_t index,const vector<string> & macro_params)551 size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params)
552 {
553 unsigned int offset = 0;
554
555 Token tok = tokens[index];
556
557 if ((tok.type() == TOKEN_TYPE::DecNumber) ||
558 (tok.type() == TOKEN_TYPE::HexNumber))
559 {
560 offset = stoi(tok.str(), nullptr, 0);
561 index += 2;
562 }
563
564 index = translate_token(tokens, index, macro_params);
565
566 while (index < tokens.size())
567 {
568 Token tok = tokens[index];
569 index = translate_token(tokens, index, macro_params);
570 if (tok.type() == TOKEN_TYPE::MemRefEnd)
571 {
572 if (offset != 0)
573 {
574 printf(" + %u", offset);
575 }
576 return index;
577 }
578 }
579
580 throw "Failed to translate memory ref";
581 return index;
582 }
583
translate_instruction_param(TokenList & tokens,size_t index,const vector<string> & macro_params)584 size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params)
585 {
586 switch (tokens[index].type())
587 {
588 case TOKEN_TYPE::BYTE_PTR:
589 case TOKEN_TYPE::WORD_PTR:
590 case TOKEN_TYPE::DWORD_PTR:
591 case TOKEN_TYPE::QWORD_PTR:
592 case TOKEN_TYPE::XMMWORD_PTR:
593 index = translate_token(tokens, index, macro_params);
594
595 // Optional whitespace
596 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
597 {
598 index = translate_token(tokens, index, macro_params);
599 }
600 }
601
602 while (index < tokens.size())
603 {
604 Token tok = tokens[index];
605 switch (tok.type())
606 {
607 case TOKEN_TYPE::MemRefStart:
608 return translate_mem_ref(tokens, index, macro_params);
609
610 case TOKEN_TYPE::NewLine:
611 case TOKEN_TYPE::Comment:
612 return index;
613
614 case TOKEN_TYPE::Operator:
615 if (tok.str() == ",")
616 return index;
617 return translate_token(tokens, index, macro_params);
618
619 case TOKEN_TYPE::Identifier:
620 index = translate_token(tokens, index, macro_params);
621 if (is_mem_id(tok) &&
622 !is_string_in_list(macro_params, tok.str()) &&
623 !g_processing_jmp)
624 {
625 printf("[rip]");
626 }
627 break;
628
629 default:
630 index = translate_expression(tokens, index, macro_params);
631 }
632 }
633
634 return index;
635 }
636
637 static
638 bool
is_jmp_or_call(const Token & tok)639 is_jmp_or_call(const Token& tok)
640 {
641 const char* inst_list[] = {
642 "jmp", "call", "ja", "jae", "jb", "jbe", "jc", "jcxz", "je", "jecxz", "jg", "jge",
643 "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", "jnle",
644 "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "jrcxz", "js", "jz", "loop", "loope",
645 "loopne", "loopnz", "loopz"
646 };
647
648 for (const char* inst : inst_list)
649 {
650 if (iequals(tok.str(), inst))
651 {
652 return true;
653 }
654 }
655
656 return false;
657 }
658
translate_instruction(TokenList & tokens,size_t index,const vector<string> & macro_params)659 size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params)
660 {
661 // Check for jump/call instructions
662 if (is_jmp_or_call(tokens[index]))
663 {
664 g_processing_jmp = true;
665 }
666
667 // Translate the instruction itself
668 index = translate_token(tokens, index, macro_params);
669
670 // Handle instruction parameters
671 while (index < tokens.size())
672 {
673 // Optional whitespace
674 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
675 {
676 index = translate_token(tokens, index, macro_params);
677 }
678
679 // Check for parameters
680 Token tok = tokens[index];
681 switch (tok.type())
682 {
683 case TOKEN_TYPE::Comment:
684 case TOKEN_TYPE::NewLine:
685 g_processing_jmp = false;
686 return index;
687
688 case TOKEN_TYPE::WhiteSpace:
689 case TOKEN_TYPE::Operator:
690 index = translate_token(tokens, index, macro_params);
691 break;
692
693 default:
694 index = translate_instruction_param(tokens, index, macro_params);
695 break;
696 }
697 }
698
699 g_processing_jmp = false;
700 return index;
701 }
702
translate_item(TokenList & tokens,size_t index,const vector<string> & macro_params)703 size_t translate_item(TokenList& tokens, size_t index, const vector<string> ¯o_params)
704 {
705 switch (tokens[index].type())
706 {
707 case TOKEN_TYPE::DecNumber:
708 case TOKEN_TYPE::HexNumber:
709 case TOKEN_TYPE::String:
710 case TOKEN_TYPE::WhiteSpace:
711 return translate_token(tokens, index, macro_params);
712 }
713
714 throw "Failed to translate item";
715 return -1;
716 }
717
translate_list(TokenList & tokens,size_t index,const vector<string> & macro_params)718 size_t translate_list(TokenList& tokens, size_t index, const vector<string> ¯o_params)
719 {
720 while (index < tokens.size())
721 {
722 // The item itself
723 index = translate_item(tokens, index, macro_params);
724
725 // Optional white space
726 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
727 {
728 index = translate_token(tokens, index, macro_params);
729 }
730
731 // End of list?
732 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
733 (tokens[index].type() == TOKEN_TYPE::NewLine))
734 {
735 return index;
736 }
737
738 // We expect a comma here
739 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
740 (tokens[index].str() != ","))
741 {
742 throw "Unexpected end of list";
743 }
744
745 index = translate_token(tokens, index, macro_params);
746 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
747 {
748 index = translate_token(tokens, index, macro_params);
749 }
750 }
751
752 throw "Failed to translate list";
753 return -1;
754 }
755
756 size_t
translate_data_def(TokenList & tokens,size_t index,const vector<string> & macro_params)757 translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params)
758 {
759 Token tok = tokens[index];
760 Token tok1 = get_ws(tokens[index + 1]);
761 string directive, need, have ="";
762
763 switch (tok.type())
764 {
765 case TOKEN_TYPE::KW_DB:
766 directive = ".byte";
767 break;
768
769 case TOKEN_TYPE::KW_DW:
770 directive = ".short";
771 break;
772
773 case TOKEN_TYPE::KW_DD:
774 directive = ".long";
775 break;
776
777 case TOKEN_TYPE::KW_DQ:
778 directive = ".quad";
779 break;
780 }
781
782 index += 2;
783
784 while (index < tokens.size())
785 {
786 // Check if we need '.ascii' for ASCII strings
787 if (tokens[index].str()[0] == '\"')
788 {
789 need = ".ascii";
790 }
791 else
792 {
793 need = directive;
794 }
795
796 // Output the directive we need (or a comma)
797 if (have == "")
798 {
799 printf("%s ", need.c_str());
800 }
801 else if (have != need)
802 {
803 printf("\n%s ", need.c_str());
804 }
805 else
806 {
807 printf(", ");
808 }
809
810 have = need;
811
812 // The item itself
813 index = translate_item(tokens, index, macro_params);
814
815 // Optional white space
816 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
817 {
818 index = translate_token(tokens, index, macro_params);
819 }
820
821 // End of list?
822 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
823 (tokens[index].type() == TOKEN_TYPE::NewLine))
824 {
825 return index;
826 }
827
828 // We expect a comma here
829 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
830 (tokens[index].str() != ","))
831 {
832 throw "Unexpected end of list";
833 }
834
835 // Skip comma and optional white-space
836 index++;
837 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
838 {
839 index++;
840 }
841 }
842
843 throw "Failed to translate list";
844 return -1;
845 }
846
847 size_t
translate_construct_one_param(string translated,TokenList & tokens,size_t index,const vector<string> & macro_params)848 translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params)
849 {
850 // The next token should be white space
851 Token tok1 = get_ws(tokens[index + 1]);
852
853 printf("%s%s", translated.c_str(), tok1.str().c_str());
854 return translate_expression(tokens, index + 2, macro_params);
855 }
856
857 size_t
translate_record(TokenList & tokens,size_t index,const vector<string> & macro_params)858 translate_record(TokenList &tokens, size_t index, const vector<string> ¯o_params)
859 {
860 unsigned int bits, bitpos = 0;
861 unsigned long long oldmask = 0, mask = 0;
862
863 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier);
864 index += 4;
865 while (index < tokens.size())
866 {
867 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier);
868
869 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
870 {
871 index++;
872 }
873
874 if (tokens[index++].str() != ":")
875 {
876 throw "Unexpected token";
877 }
878
879 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
880 {
881 index++;
882 }
883
884 Token tok_bits = tokens[index++];
885 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) &&
886 (tok_bits.type() != TOKEN_TYPE::HexNumber))
887 {
888 throw "Unexpected token";
889 }
890
891 bits = stoi(tok_bits.str(), nullptr, 0);
892
893 printf("%s = %u\n", tok_member.str().c_str(), bitpos);
894
895 oldmask = (1ULL << bitpos) - 1;
896 bitpos += bits;
897 mask = (1ULL << bitpos) - 1 - oldmask;
898 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask);
899
900 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
901 {
902 index++;
903 }
904
905 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
906 (tokens[index].type() == TOKEN_TYPE::Comment))
907 {
908 break;
909 }
910
911 if (tokens[index].str() != ",")
912 {
913 throw "unexpected token";
914 }
915
916 index++;
917 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
918 {
919 index++;
920 }
921
922 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
923 (tokens[index].type() == TOKEN_TYPE::Comment))
924 {
925 index++;
926 }
927
928 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
929 {
930 index++;
931 }
932 }
933
934 return index;
935 }
936
937 size_t
translate_identifier_construct(TokenList & tokens,size_t index,const vector<string> & macro_params)938 translate_identifier_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params)
939 {
940 Token tok = tokens[index];
941 Token tok1 = tokens[index + 1];
942
943 if (tok1.type() == TOKEN_TYPE::Colon)
944 {
945 if (tok.str() == "@@")
946 {
947 g_label_number++;
948 printf("%u:", g_label_number);
949 }
950 else
951 {
952 printf("%s:", tok.str().c_str());
953 }
954 add_identifier(tok, IDTYPE::Label);
955 return index + 2;
956 }
957
958 Token tok2 = tokens[index + 2];
959
960 switch (tok2.type())
961 {
962 case TOKEN_TYPE::KW_MACRO:
963 throw "Cannot have a nested macro!";
964
965 case TOKEN_TYPE::KW_DB:
966 case TOKEN_TYPE::KW_DW:
967 case TOKEN_TYPE::KW_DD:
968 case TOKEN_TYPE::KW_DQ:
969 printf("%s:%s", tok.str().c_str(), tok1.str().c_str());
970 add_mem_id(tok);
971 return translate_data_def(tokens, index + 2, macro_params);
972
973 case TOKEN_TYPE::KW_EQU:
974 //printf("%s%s", tok.str().c_str(), tok1.str().c_str());
975 printf("#define %s ", tok.str().c_str());
976 add_identifier(tok, IDTYPE::Constant);
977 return translate_expression(tokens, index + 3, macro_params);
978
979 case TOKEN_TYPE::KW_TEXTEQU:
980 {
981 Token tok3 = get_ws(tokens[index + 3]);
982 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef);
983
984 string textdef = tok4.str();
985 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str());
986 add_identifier(tok, IDTYPE::Constant);
987 return index + 5;
988 }
989
990 case TOKEN_TYPE::KW_PROC:
991 {
992 printf(".func %s\n", tok.str().c_str());
993 printf("%s:", tok.str().c_str());
994 index += 3;
995
996 if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) &&
997 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME))
998 {
999 #ifdef TARGET_amd64
1000 printf("\n.seh_proc %s\n", tok.str().c_str());
1001 #else
1002 printf("\n.cfi_startproc\n");
1003 #endif
1004 index += 2;
1005 }
1006 add_identifier(tok, IDTYPE::Label);
1007 break;
1008 }
1009
1010 case TOKEN_TYPE::KW_ENDP:
1011 {
1012 printf(".seh_endproc\n.endfunc");
1013 index += 3;
1014 break;
1015 }
1016
1017 case TOKEN_TYPE::KW_RECORD:
1018 index = translate_record(tokens, index, macro_params);
1019 break;
1020
1021 default:
1022 // We don't know what it is, assume it's a macro and treat it like an instruction
1023 index = translate_instruction(tokens, index, macro_params);
1024 break;
1025 }
1026
1027 return index;
1028 }
1029
1030 size_t
translate_construct(TokenList & tokens,size_t index,const vector<string> & macro_params)1031 translate_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params)
1032 {
1033 Token tok = tokens[index];
1034
1035 switch (tok.type())
1036 {
1037 case TOKEN_TYPE::WhiteSpace:
1038 case TOKEN_TYPE::NewLine:
1039 case TOKEN_TYPE::Comment:
1040 return translate_token(tokens, index, macro_params);
1041
1042 case TOKEN_TYPE::Identifier:
1043 return translate_identifier_construct(tokens, index, macro_params);
1044
1045 case TOKEN_TYPE::KW_ALIGN:
1046 index = translate_construct_one_param(".align", tokens, index, macro_params);
1047 break;
1048
1049 case TOKEN_TYPE::KW_allocstack:
1050 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params);
1051 break;
1052
1053 case TOKEN_TYPE::KW_code:
1054 #ifdef TARGET_amd64
1055 printf(".code64");
1056 #else
1057 printf(".code");
1058 #endif
1059 printf(" .intel_syntax noprefix");
1060 index++;
1061 break;
1062
1063 case TOKEN_TYPE::KW_const:
1064 printf(".section .rdata");
1065 index++;
1066 break;
1067
1068 case TOKEN_TYPE::KW_DB:
1069 case TOKEN_TYPE::KW_DW:
1070 case TOKEN_TYPE::KW_DD:
1071 case TOKEN_TYPE::KW_DQ:
1072 return translate_data_def(tokens, index, macro_params);
1073
1074 case TOKEN_TYPE::KW_END:
1075 printf("// END\n");
1076 return tokens.size();
1077
1078 case TOKEN_TYPE::KW_endprolog:
1079 printf(".seh_endprologue");
1080 index++;
1081 break;
1082
1083 case TOKEN_TYPE::KW_EXTERN:
1084 {
1085 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1086 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier);
1087 add_mem_id(tok2);
1088 printf("//");
1089 return complete_line(tokens, index, macro_params);
1090 }
1091
1092 case TOKEN_TYPE::KW_if:
1093 case TOKEN_TYPE::KW_ifdef:
1094 case TOKEN_TYPE::KW_ifndef:
1095 case TOKEN_TYPE::KW_else:
1096 case TOKEN_TYPE::KW_endif:
1097 // TODO: handle parameter differences between "if" and ".if" etc.
1098 printf(".");
1099 return complete_line(tokens, index, macro_params);
1100
1101 case TOKEN_TYPE::KW_include:
1102 {
1103 // The next token should be white space
1104 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1105 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename);
1106 printf("#include \"%s.h\"", tok2.str().c_str());
1107 index += 3;
1108 break;
1109 }
1110
1111 case TOKEN_TYPE::KW_PUBLIC:
1112 index = translate_construct_one_param(".global", tokens, index, macro_params);
1113 break;
1114
1115 case TOKEN_TYPE::KW_savereg:
1116 printf(".seh_savereg");
1117 return complete_line(tokens, index + 1, macro_params);
1118
1119 case TOKEN_TYPE::KW_savexmm128:
1120 printf(".seh_savexmm");
1121 return complete_line(tokens, index + 1, macro_params);
1122
1123 case TOKEN_TYPE::Instruction:
1124 index = translate_instruction(tokens, index, macro_params);
1125 break;
1126
1127 case TOKEN_TYPE::KW_ERRDEF:
1128 printf("//");
1129 return complete_line(tokens, index, macro_params);
1130
1131 default:
1132 throw "failed to translate construct";
1133 }
1134
1135 // Skip optional white-space
1136 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1137 {
1138 index++;
1139 }
1140
1141 // Line should end here!
1142 Token end = tokens[index];
1143 if ((end.type() != TOKEN_TYPE::Comment) &&
1144 (end.type() != TOKEN_TYPE::NewLine))
1145 {
1146 throw "unexpected tokens";
1147 }
1148
1149 return index;
1150 }
1151
1152 size_t
translate_macro(TokenList & tokens,size_t index)1153 translate_macro(TokenList& tokens, size_t index)
1154 {
1155 vector<string> macro_params;
1156
1157 printf(".macro %s", tokens[index].str().c_str());
1158
1159 // Parse marameters
1160 index += 3;
1161 while (index < tokens.size())
1162 {
1163 Token tok = tokens[index];
1164 switch (tok.type())
1165 {
1166 case TOKEN_TYPE::NewLine:
1167 case TOKEN_TYPE::Comment:
1168 index = translate_token(tokens, index, macro_params);
1169 break;
1170
1171 case TOKEN_TYPE::Identifier:
1172 macro_params.push_back(tok.str());
1173 printf("%s", tok.str().c_str());
1174 index++;
1175 continue;
1176
1177 case TOKEN_TYPE::WhiteSpace:
1178 case TOKEN_TYPE::Operator:
1179 index = translate_token(tokens, index, macro_params);
1180 continue;
1181 }
1182
1183 break;
1184 }
1185
1186 // Parse content
1187 while (index < tokens.size())
1188 {
1189 Token tok = tokens[index];
1190 switch (tok.type())
1191 {
1192 case TOKEN_TYPE::KW_ENDM:
1193 printf(".endm");
1194 return index + 1;
1195
1196 default:
1197 index = translate_construct(tokens, index, macro_params);
1198 }
1199 }
1200
1201 throw "Failed to translate macro";
1202 return -1;
1203 }
1204
1205 void
translate(TokenList & tokens)1206 translate(TokenList &tokens)
1207 {
1208 size_t index = 0;
1209 size_t size = tokens.size();
1210 vector<string> empty_macro_params;
1211
1212 while (index < size)
1213 {
1214 // Macros are special
1215 if ((tokens[index].type() == TOKEN_TYPE::Identifier) &&
1216 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) &&
1217 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO))
1218 {
1219 index = translate_macro(tokens, index);
1220 }
1221 else
1222 {
1223 index = translate_construct(tokens, index, empty_macro_params);
1224 }
1225 }
1226 }
1227
main(int argc,char * argv[])1228 int main(int argc, char* argv[])
1229 {
1230 if (argc < 2)
1231 {
1232 fprintf(stderr, "Invalid parameter!\n");
1233 return -1;
1234 }
1235
1236 #if PROFILING_ENABLED
1237 time_t start_time = time(NULL);
1238 #endif
1239
1240 try
1241 {
1242 // Open and read the input file
1243 string filename(argv[1]);
1244 ifstream file(filename);
1245 stringstream buffer;
1246 buffer << file.rdbuf();
1247 string text = buffer.str();
1248
1249 // Create the tokenizer
1250 Tokenizer tokenizer(g_TokenList);
1251
1252 // Get a token list
1253 TokenList toklist(tokenizer, text);
1254
1255 // Now translate the tokens
1256 translate(toklist);
1257 }
1258 catch (const char* message)
1259 {
1260 fprintf(stderr, "Exception caught: '%s'\n", message);
1261 return -2;
1262 }
1263
1264 #if PROFILING_ENABLED
1265 time_t total_time = time(NULL) + 1 - start_time;
1266 fprintf(stderr, "total_time = %llu\n", total_time);
1267 fprintf(stderr, "search_time = %llu\n", search_time);
1268 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time);
1269 #endif
1270
1271 return 0;
1272 }
1273