xref: /reactos/sdk/tools/asmpp/asmpp.cpp (revision 83e13630)
1 /*
2  * PROJECT:     ReactOS host tools
3  * LICENSE:     MIT (https://spdx.org/licenses/MIT)
4  * PURPOSE:     ASM preprocessor
5  * COPYRIGHT:   Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
6  */
7 
8 // Optimize even on debug builds, because otherwise it's ridiculously slow
9 #ifdef _MSC_VER
10 #pragma optimize("gst", on)
11 #pragma auto_inline(on)
12 #else
13 #pragma GCC optimize("O3,inline")
14 #endif
15 
16 #include "tokenizer.hpp"
17 #include <cstdlib>
18 #include <cstdio>
19 #include <sstream>
20 #include <ctime>
21 
22 #define PROFILING_ENABLED 0
23 
24 using namespace std;
25 
26 time_t search_time;
27 
28 enum TOKEN_TYPE
29 {
30     Invalid = -1,
31     Eof,
32     WhiteSpace,
33     NewLine,
34     Comment,
35     DecNumber,
36     HexNumber,
37     String,
38 
39     BraceOpen,
40     BraceClose,
41     MemRefStart,
42     MemRefEnd,
43     Colon,
44     Operator,
45     StringDef,
46 
47     KW_include,
48     KW_const,
49     KW_code,
50     KW_endprolog,
51     KW_ALIGN,
52     KW_EXTERN,
53     KW_PUBLIC,
54     KW_ENDM,
55     KW_END,
56     KW_if,
57     KW_ifdef,
58     KW_ifndef,
59     KW_else,
60     KW_endif,
61 
62     KW_allocstack,
63     KW_savereg,
64     KW_savexmm128,
65 
66     KW_DB,
67     KW_DW,
68     KW_DD,
69     KW_DQ,
70     KW_EQU,
71     KW_TEXTEQU,
72     KW_MACRO,
73     KW_PROC,
74     KW_FRAME,
75     KW_ENDP,
76     KW_RECORD,
77 
78     KW_MASK,
79     KW_ERRDEF,
80 
81     Filename,
82     Instruction,
83     Reg8,
84     Reg16,
85     Reg32,
86     Reg64,
87     RegXmm,
88     BYTE_PTR,
89     WORD_PTR,
90     DWORD_PTR,
91     QWORD_PTR,
92     XMMWORD_PTR,
93 
94     LabelName,
95     Identifier
96 };
97 
98 int fake_printf(const char* format, ...)
99 {
100     return 0;
101 }
102 
103 //#define printf fake_printf
104 
105 // Use a look-ahead for following characters, not included into the match
106 //#define FOLLOWED_BY(x) R"((?=)" x R"())"
107 #define FOLLOWED_BY(x) x
108 
109 #define ANY_CHAR R"((?:.|\n))"
110 #define WHITESPACE R"((?:[ \t]++))"
111 #define NEWLINE R"([\n])"
112 #define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)"
113 #define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])"
114 
115 #define INSTRUCTION \
116     "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \
117     "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \
118     "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \
119     "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \
120     "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \
121     "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \
122     "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \
123     "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \
124     "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \
125     "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \
126     "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \
127     "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \
128     "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \
129     "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \
130     "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \
131     "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \
132     "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \
133     "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \
134     "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \
135     "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \
136     "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \
137     "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \
138     "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \
139     "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \
140     "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \
141     "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \
142     "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \
143     "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \
144     "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \
145     "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \
146     "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \
147     "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \
148     "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \
149     "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \
150     "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \
151     "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \
152     "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \
153     "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \
154     "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \
155     "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \
156     "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \
157     "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \
158     "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \
159     "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \
160     "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \
161     "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \
162     "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \
163     "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \
164     "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \
165     "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \
166     "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \
167     "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \
168     "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \
169     "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \
170     "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \
171     "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \
172     "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \
173     "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \
174     "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \
175     "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \
176     "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \
177     "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \
178     "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \
179     "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \
180     "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \
181     "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \
182     "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \
183     "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \
184     "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \
185     "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \
186     "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \
187     "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \
188     "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \
189     "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \
190     "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \
191     "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \
192     "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \
193     "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \
194     "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \
195     "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \
196     "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \
197     "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \
198     "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \
199     "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \
200     "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \
201     "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \
202     "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \
203     "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \
204     "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \
205     "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \
206     "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \
207     "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \
208     "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \
209     "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \
210     "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \
211     "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \
212     "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \
213     "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \
214     "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \
215     "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \
216     "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \
217     "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \
218     "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \
219     "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \
220     "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \
221     "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \
222     "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \
223     "XSAVES|XSETBV|XTEST"
224 
225 vector<TOKEN_DEF> g_TokenList =
226 {
227     //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" },
228     { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" },
229     { TOKEN_TYPE::NewLine, R"((\n))" },
230     { TOKEN_TYPE::Comment, R"((;.*\n))" },
231     { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") },
232     { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") },
233     { TOKEN_TYPE::String, R"((\".*\"))" },
234 
235     { TOKEN_TYPE::BraceOpen, R"((\())"},
236     { TOKEN_TYPE::BraceClose, R"((\)))"},
237     { TOKEN_TYPE::MemRefStart, R"((\[))"},
238     { TOKEN_TYPE::MemRefEnd, R"((\]))"},
239     { TOKEN_TYPE::Colon, R"((\:))"},
240     { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"},
241     { TOKEN_TYPE::StringDef, R"((<.+>))" },
242 
243     { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") },
244     { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") },
245     { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") },
246     { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") },
247     { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") },
248     { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") },
249     { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") },
250     { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") },
251     { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") },
252     { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") },
253     { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") },
254     { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")},
255     { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")},
256     { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")},
257     { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")},
258 
259     { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") },
260     { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") },
261     { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") },
262 
263     { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") },
264     { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") },
265     { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") },
266     { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") },
267     { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") },
268     { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") },
269     { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") },
270     { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") },
271     { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") },
272     { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") },
273     { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") },
274     { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")},
275     { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")},
276 
277     { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") },
278     { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") },
279     { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") },
280     { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") },
281     { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") },
282     { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") },
283     { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") },
284     { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
285     { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
286     { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
287     { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
288     { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
289 
290     { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)},
291     { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
292     { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
293 
294 };
295 
296 // FIXME: use context?
297 unsigned int g_label_number = 0;
298 
299 vector<string> g_identifiers;
300 
301 void
302 add_mem_id(Token& tok)
303 {
304     g_identifiers.push_back(tok.str());
305     //fprintf(stderr, "Added mem id: '%s'\n", tok.str().c_str());
306 }
307 
308 bool
309 is_mem_id(Token& tok)
310 {
311     for (auto id : g_identifiers)
312     {
313         if (id == tok.str())
314         {
315             return true;
316         }
317     }
318 
319     return false;
320 }
321 
322 bool
323 iequals(const string &a, const string &b)
324 {
325     size_t sz = a.size();
326     if (b.size() != sz)
327         return false;
328     for (unsigned int i = 0; i < sz; ++i)
329         if (tolower(a[i]) != tolower(b[i]))
330             return false;
331     return true;
332 }
333 
334 Token
335 get_expected_token(Token&& tok, TOKEN_TYPE type)
336 {
337     if (tok.type() != type)
338     {
339         throw "Not white space after identifier!\n";
340     }
341 
342     return tok;
343 }
344 
345 Token get_ws(Token&& tok)
346 {
347     int type = tok.type();
348     if (type != TOKEN_TYPE::WhiteSpace)
349     {
350         throw "Not white space after identifier!\n";
351     }
352 
353     return tok;
354 }
355 
356 Token get_ws_or_nl(Token&& tok)
357 {
358     int type = tok.type();
359     if ((type != TOKEN_TYPE::WhiteSpace) &&
360         (type != TOKEN_TYPE::NewLine))
361     {
362         throw "Not white space after identifier!\n";
363     }
364 
365     return tok;
366 }
367 
368 bool is_string_in_list(vector<string> list, string str)
369 {
370     for (string &s : list)
371     {
372         if (s == str)
373         {
374             return true;
375         }
376     }
377 
378     return false;
379 }
380 
381 size_t
382 translate_token(TokenList& tokens, size_t index, const vector<string> &macro_params)
383 {
384     Token tok = tokens[index];
385     switch (tok.type())
386     {
387         case TOKEN_TYPE::Comment:
388             printf("//%s", tok.str().c_str() + 1);
389             break;
390 
391         case TOKEN_TYPE::DecNumber:
392         {
393             unsigned long long num = stoull(tok.str(), nullptr, 10);
394             printf("%llu", num);
395             break;
396         }
397 
398         case TOKEN_TYPE::HexNumber:
399         {
400             string number = tok.str();
401             printf("0x%s", number.substr(0, number.size() - 1).c_str());
402             break;
403         }
404 
405         case TOKEN_TYPE::Identifier:
406             if (is_string_in_list(macro_params, tok.str()))
407             {
408                 printf("\\");
409             }
410             printf("%s", tok.str().c_str());
411             break;
412 
413         // We migt want to improve these
414         case TOKEN_TYPE::BYTE_PTR:
415         case TOKEN_TYPE::WORD_PTR:
416         case TOKEN_TYPE::DWORD_PTR:
417         case TOKEN_TYPE::QWORD_PTR:
418         case TOKEN_TYPE::XMMWORD_PTR:
419 
420         // Check these. valid only in instructions?
421         case TOKEN_TYPE::Reg8:
422         case TOKEN_TYPE::Reg16:
423         case TOKEN_TYPE::Reg32:
424         case TOKEN_TYPE::Reg64:
425         case TOKEN_TYPE::RegXmm:
426         case TOKEN_TYPE::Instruction:
427 
428         case TOKEN_TYPE::WhiteSpace:
429         case TOKEN_TYPE::NewLine:
430         case TOKEN_TYPE::Operator:
431             printf("%s", tok.str().c_str());
432             break;
433 
434         default:
435             printf("%s", tok.str().c_str());
436             break;
437     }
438 
439     return index + 1;
440 }
441 
442 size_t complete_line(TokenList &tokens, size_t index, const vector<string> &macro_params)
443 {
444     while (index < tokens.size())
445     {
446         Token tok = tokens[index];
447         index = translate_token(tokens, index, macro_params);
448         if ((tok.type() == TOKEN_TYPE::NewLine) ||
449             (tok.type() == TOKEN_TYPE::Comment))
450         {
451             break;
452         }
453     }
454 
455     return index;
456 }
457 
458 size_t
459 translate_expression(TokenList &tokens, size_t index, const vector<string> &macro_params)
460 {
461     while (index < tokens.size())
462     {
463         Token tok = tokens[index];
464         switch (tok.type())
465         {
466             case TOKEN_TYPE::NewLine:
467             case TOKEN_TYPE::Comment:
468                 return index;
469 
470             case TOKEN_TYPE::KW_MASK:
471                 printf("MASK_");
472                 index += 2;
473                 break;
474 
475             case TOKEN_TYPE::Instruction:
476                 if (iequals(tok.str(), "and"))
477                 {
478                     printf("&");
479                     index += 1;
480                 }
481                 else if (iequals(tok.str(), "or"))
482                 {
483                     printf("|");
484                     index += 1;
485                 }
486                 else if (iequals(tok.str(), "shl"))
487                 {
488                     printf("<<");
489                     index += 1;
490                 }
491                 else if (iequals(tok.str(), "not"))
492                 {
493                     printf("!");
494                     index += 1;
495                 }
496                 else
497                 {
498                     throw "Invalid expression";
499                 }
500                 break;
501 
502             case TOKEN_TYPE::Operator:
503                 if (tok.str() == ",")
504                 {
505                     return index;
506                 }
507             case TOKEN_TYPE::WhiteSpace:
508             case TOKEN_TYPE::BraceOpen:
509             case TOKEN_TYPE::BraceClose:
510             case TOKEN_TYPE::DecNumber:
511             case TOKEN_TYPE::HexNumber:
512             case TOKEN_TYPE::Identifier:
513                 index = translate_token(tokens, index, macro_params);
514                 break;
515 
516             default:
517                 index = translate_token(tokens, index, macro_params);
518         }
519     }
520 
521     return index;
522 }
523 
524 size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params)
525 {
526     unsigned int offset = 0;
527 
528     Token tok = tokens[index];
529 
530     if ((tok.type() == TOKEN_TYPE::DecNumber) ||
531         (tok.type() == TOKEN_TYPE::HexNumber))
532     {
533         offset = stoi(tok.str(), nullptr, 0);
534         index += 2;
535     }
536 
537     index = translate_token(tokens, index, macro_params);
538 
539     while (index < tokens.size())
540     {
541         Token tok = tokens[index];
542         index = translate_token(tokens, index, macro_params);
543         if (tok.type() == TOKEN_TYPE::MemRefEnd)
544         {
545             if (offset != 0)
546             {
547                 printf(" + %u", offset);
548             }
549             return index;
550         }
551     }
552 
553     throw "Failed to translate memory ref";
554     return index;
555 }
556 
557 size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params)
558 {
559     switch (tokens[index].type())
560     {
561         case TOKEN_TYPE::BYTE_PTR:
562         case TOKEN_TYPE::WORD_PTR:
563         case TOKEN_TYPE::DWORD_PTR:
564         case TOKEN_TYPE::QWORD_PTR:
565         case TOKEN_TYPE::XMMWORD_PTR:
566             index = translate_token(tokens, index, macro_params);
567 
568             // Optional whitespace
569             if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
570             {
571                 index = translate_token(tokens, index, macro_params);
572             }
573     }
574 
575     while (index < tokens.size())
576     {
577         Token tok = tokens[index];
578         switch (tok.type())
579         {
580             case TOKEN_TYPE::MemRefStart:
581                 return translate_mem_ref(tokens, index, macro_params);
582 
583             case TOKEN_TYPE::NewLine:
584             case TOKEN_TYPE::Comment:
585                 return index;
586 
587             case TOKEN_TYPE::Operator:
588                 if (tok.str() == ",")
589                     return index;
590 
591             case TOKEN_TYPE::Identifier:
592                 index = translate_token(tokens, index, macro_params);
593                 if (is_mem_id(tok))
594                 {
595                     printf("[rip]");
596                 }
597                 break;
598 
599             default:
600                 index = translate_expression(tokens, index, macro_params);
601         }
602     }
603 
604     return index;
605 }
606 
607 size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params)
608 {
609     // Translate the instruction itself
610     index = translate_token(tokens, index, macro_params);
611 
612     // Handle instruction parameters
613     while (index < tokens.size())
614     {
615         // Optional whitespace
616         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
617         {
618             index = translate_token(tokens, index, macro_params);
619         }
620 
621         // Check for parameters
622         Token tok = tokens[index];
623         switch (tok.type())
624         {
625             case TOKEN_TYPE::Comment:
626             case TOKEN_TYPE::NewLine:
627                 return index;
628 
629             case TOKEN_TYPE::WhiteSpace:
630             case TOKEN_TYPE::Operator:
631                 index = translate_token(tokens, index, macro_params);
632                 break;
633 
634             default:
635                 index = translate_instruction_param(tokens, index, macro_params);
636                 break;
637         }
638     }
639 
640     return index;
641 }
642 
643 size_t translate_item(TokenList& tokens, size_t index, const vector<string> &macro_params)
644 {
645     switch (tokens[index].type())
646     {
647         case TOKEN_TYPE::DecNumber:
648         case TOKEN_TYPE::HexNumber:
649         case TOKEN_TYPE::String:
650         case TOKEN_TYPE::WhiteSpace:
651             return translate_token(tokens, index, macro_params);
652     }
653 
654     throw "Failed to translate item";
655     return -1;
656 }
657 
658 size_t translate_list(TokenList& tokens, size_t index, const vector<string> &macro_params)
659 {
660     while (index < tokens.size())
661     {
662         // The item itself
663         index = translate_item(tokens, index, macro_params);
664 
665         // Optional white space
666         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
667         {
668             index = translate_token(tokens, index, macro_params);
669         }
670 
671         // End of list?
672         if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
673             (tokens[index].type() == TOKEN_TYPE::NewLine))
674         {
675             return index;
676         }
677 
678         // We expect a comma here
679         if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
680             (tokens[index].str() != ","))
681         {
682             throw "Unexpected end of list";
683         }
684 
685         index = translate_token(tokens, index, macro_params);
686         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
687         {
688             index = translate_token(tokens, index, macro_params);
689         }
690     }
691 
692     throw "Failed to translate list";
693     return -1;
694 }
695 
696 size_t
697 translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params)
698 {
699     Token tok = tokens[index];
700     Token tok1 = get_ws(tokens[index + 1]);
701     string directive, need, have ="";
702 
703     switch (tok.type())
704     {
705         case TOKEN_TYPE::KW_DB:
706             directive = ".byte";
707             break;
708 
709         case TOKEN_TYPE::KW_DW:
710             directive = ".short";
711             break;
712 
713         case TOKEN_TYPE::KW_DD:
714             directive = ".long";
715             break;
716 
717         case TOKEN_TYPE::KW_DQ:
718             directive = ".quad";
719             break;
720     }
721 
722     index += 2;
723 
724     while (index < tokens.size())
725     {
726         // Check if we need '.ascii' for ASCII strings
727         if (tokens[index].str()[0] == '\"')
728         {
729             need = ".ascii";
730         }
731         else
732         {
733             need = directive;
734         }
735 
736         // Output the directive we need (or a comma)
737         if (have == "")
738         {
739             printf("%s ", need.c_str());
740         }
741         else if (have != need)
742         {
743             printf("\n%s ", need.c_str());
744         }
745         else
746         {
747             printf(", ");
748         }
749 
750         have = need;
751 
752         // The item itself
753         index = translate_item(tokens, index, macro_params);
754 
755         // Optional white space
756         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
757         {
758             index = translate_token(tokens, index, macro_params);
759         }
760 
761         // End of list?
762         if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
763             (tokens[index].type() == TOKEN_TYPE::NewLine))
764         {
765             return index;
766         }
767 
768         // We expect a comma here
769         if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
770             (tokens[index].str() != ","))
771         {
772             throw "Unexpected end of list";
773         }
774 
775         // Skip comma and optional white-space
776         index++;
777         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
778         {
779             index++;
780         }
781     }
782 
783     throw "Failed to translate list";
784     return -1;
785 }
786 
787 size_t
788 translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params)
789 {
790     // The next token should be white space
791     Token tok1 = get_ws(tokens[index + 1]);
792 
793     printf("%s%s", translated.c_str(), tok1.str().c_str());
794     return translate_expression(tokens, index + 2, macro_params);
795 }
796 
797 size_t
798 translate_record(TokenList &tokens, size_t index, const vector<string> &macro_params)
799 {
800     unsigned int bits, bitpos = 0;
801     unsigned long long oldmask = 0, mask = 0;
802 
803     Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier);
804     index += 4;
805     while (index < tokens.size())
806     {
807         Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier);
808 
809         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
810         {
811             index++;
812         }
813 
814         if (tokens[index++].str() != ":")
815         {
816             throw "Unexpected token";
817         }
818 
819         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
820         {
821             index++;
822         }
823 
824         Token tok_bits = tokens[index++];
825         if ((tok_bits.type() != TOKEN_TYPE::DecNumber) &&
826             (tok_bits.type() != TOKEN_TYPE::HexNumber))
827         {
828             throw "Unexpected token";
829         }
830 
831         bits = stoi(tok_bits.str(), nullptr, 0);
832 
833         printf("%s = %u\n", tok_member.str().c_str(), bitpos);
834 
835         oldmask = (1ULL << bitpos) - 1;
836         bitpos += bits;
837         mask = (1ULL << bitpos) - 1 - oldmask;
838         printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask);
839 
840         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
841         {
842             index++;
843         }
844 
845         if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
846             (tokens[index].type() == TOKEN_TYPE::Comment))
847         {
848             break;
849         }
850 
851         if (tokens[index].str() != ",")
852         {
853             throw "unexpected token";
854         }
855 
856         index++;
857         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
858         {
859             index++;
860         }
861 
862         if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
863             (tokens[index].type() == TOKEN_TYPE::Comment))
864         {
865             index++;
866         }
867 
868         if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
869         {
870             index++;
871         }
872     }
873 
874     return index;
875 }
876 
877 size_t
878 translate_identifier_construct(TokenList& tokens, size_t index, const vector<string> &macro_params)
879 {
880     Token tok = tokens[index];
881     Token tok1 = tokens[index + 1];
882 
883     if (tok1.type() == TOKEN_TYPE::Colon)
884     {
885         if (tok.str() == "@@")
886         {
887             g_label_number++;
888             printf("%u:", g_label_number);
889         }
890         else
891         {
892             printf("%s:", tok.str().c_str());
893         }
894         return index + 2;
895     }
896 
897     Token tok2 = tokens[index + 2];
898 
899     switch (tok2.type())
900     {
901         case TOKEN_TYPE::KW_MACRO:
902             throw "Cannot have a nested macro!";
903 
904         case TOKEN_TYPE::KW_DB:
905         case TOKEN_TYPE::KW_DW:
906         case TOKEN_TYPE::KW_DD:
907         case TOKEN_TYPE::KW_DQ:
908             printf("%s:%s", tok.str().c_str(), tok1.str().c_str());
909             add_mem_id(tok);
910             return translate_data_def(tokens, index + 2, macro_params);
911 
912         case TOKEN_TYPE::KW_EQU:
913             //printf("%s%s", tok.str().c_str(), tok1.str().c_str());
914             printf("#define %s ", tok.str().c_str());
915             return translate_expression(tokens, index + 3, macro_params);
916 
917         case TOKEN_TYPE::KW_TEXTEQU:
918         {
919             Token tok3 = get_ws(tokens[index + 3]);
920             Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef);
921 
922             string textdef = tok4.str();
923             printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str());
924             return index + 5;
925         }
926 
927         case TOKEN_TYPE::KW_PROC:
928         {
929             printf(".func %s\n", tok.str().c_str());
930             printf("%s:", tok.str().c_str());
931             index += 3;
932 
933             if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) &&
934                 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME))
935             {
936 #ifdef TARGET_amd64
937                 printf("\n.seh_proc %s\n", tok.str().c_str());
938 #else
939                 printf("\n.cfi_startproc\n");
940 #endif
941                 index += 2;
942             }
943             break;
944         }
945 
946         case TOKEN_TYPE::KW_ENDP:
947         {
948             printf(".seh_endproc\n.endfunc");
949             index += 3;
950             break;
951         }
952 
953         case TOKEN_TYPE::KW_RECORD:
954             index = translate_record(tokens, index, macro_params);
955             break;
956 
957         default:
958             // We don't know what it is, assume it's a macro and treat it like an instruction
959             index = translate_instruction(tokens, index, macro_params);
960             break;
961     }
962 
963     return index;
964 }
965 
966 size_t
967 translate_construct(TokenList& tokens, size_t index, const vector<string> &macro_params)
968 {
969     Token tok = tokens[index];
970 
971     switch (tok.type())
972     {
973         case TOKEN_TYPE::WhiteSpace:
974         case TOKEN_TYPE::NewLine:
975         case TOKEN_TYPE::Comment:
976             return translate_token(tokens, index, macro_params);
977 
978         case TOKEN_TYPE::Identifier:
979             return translate_identifier_construct(tokens, index, macro_params);
980 
981         case TOKEN_TYPE::KW_ALIGN:
982             index = translate_construct_one_param(".align", tokens, index, macro_params);
983             break;
984 
985         case TOKEN_TYPE::KW_allocstack:
986             index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params);
987             break;
988 
989         case TOKEN_TYPE::KW_code:
990 #ifdef TARGET_amd64
991             printf(".code64");
992 #else
993             printf(".code");
994 #endif
995             printf(" .intel_syntax noprefix");
996             index++;
997             break;
998 
999         case TOKEN_TYPE::KW_const:
1000             printf(".section .rdata");
1001             index++;
1002             break;
1003 
1004         case TOKEN_TYPE::KW_DB:
1005         case TOKEN_TYPE::KW_DW:
1006         case TOKEN_TYPE::KW_DD:
1007         case TOKEN_TYPE::KW_DQ:
1008             return translate_data_def(tokens, index, macro_params);
1009 
1010         case TOKEN_TYPE::KW_END:
1011             printf("// END\n");
1012             return tokens.size();
1013 
1014         case TOKEN_TYPE::KW_endprolog:
1015             printf(".seh_endprologue");
1016             index++;
1017             break;
1018 
1019         case TOKEN_TYPE::KW_EXTERN:
1020         {
1021             Token tok1 = get_ws_or_nl(tokens[index + 1]);
1022             Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier);
1023             add_mem_id(tok2);
1024             printf("//");
1025             return complete_line(tokens, index, macro_params);
1026         }
1027 
1028         case TOKEN_TYPE::KW_if:
1029         case TOKEN_TYPE::KW_ifdef:
1030         case TOKEN_TYPE::KW_ifndef:
1031         case TOKEN_TYPE::KW_else:
1032         case TOKEN_TYPE::KW_endif:
1033             // TODO: handle parameter differences between "if" and ".if" etc.
1034             printf(".");
1035             return complete_line(tokens, index, macro_params);
1036 
1037         case TOKEN_TYPE::KW_include:
1038         {
1039             // The next token should be white space
1040             Token tok1 = get_ws_or_nl(tokens[index + 1]);
1041             Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename);
1042             printf("#include \"%s.h\"", tok2.str().c_str());
1043             index += 3;
1044             break;
1045         }
1046 
1047         case TOKEN_TYPE::KW_PUBLIC:
1048             index = translate_construct_one_param(".global", tokens, index, macro_params);
1049             break;
1050 
1051         case TOKEN_TYPE::KW_savereg:
1052             printf(".seh_savereg");
1053             return complete_line(tokens, index + 1, macro_params);
1054 
1055         case TOKEN_TYPE::KW_savexmm128:
1056             printf(".seh_savexmm");
1057             return complete_line(tokens, index + 1, macro_params);
1058 
1059         case TOKEN_TYPE::Instruction:
1060             index = translate_instruction(tokens, index, macro_params);
1061             break;
1062 
1063         case TOKEN_TYPE::KW_ERRDEF:
1064             printf("//");
1065             return complete_line(tokens, index, macro_params);
1066 
1067         default:
1068             throw "failed to translate construct";
1069     }
1070 
1071     // Skip optional white-space
1072     if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1073     {
1074         index++;
1075     }
1076 
1077     // Line should end here!
1078     Token end = tokens[index];
1079     if ((end.type() != TOKEN_TYPE::Comment) &&
1080         (end.type() != TOKEN_TYPE::NewLine))
1081     {
1082         throw "unexpected tokens";
1083     }
1084 
1085     return index;
1086 }
1087 
1088 size_t
1089 translate_macro(TokenList& tokens, size_t index)
1090 {
1091     vector<string> macro_params;
1092 
1093     printf(".macro %s", tokens[index].str().c_str());
1094 
1095     // Parse marameters
1096     index += 3;
1097     while (index < tokens.size())
1098     {
1099         Token tok = tokens[index];
1100         switch (tok.type())
1101         {
1102             case TOKEN_TYPE::NewLine:
1103             case TOKEN_TYPE::Comment:
1104                 index = translate_token(tokens, index, macro_params);
1105                 break;
1106 
1107             case TOKEN_TYPE::Identifier:
1108                 macro_params.push_back(tok.str());
1109                 printf("%s", tok.str().c_str());
1110                 index++;
1111                 continue;
1112 
1113             case TOKEN_TYPE::WhiteSpace:
1114             case TOKEN_TYPE::Operator:
1115                 index = translate_token(tokens, index, macro_params);
1116                 continue;
1117         }
1118 
1119         break;
1120     }
1121 
1122     // Parse content
1123     while (index < tokens.size())
1124     {
1125         Token tok = tokens[index];
1126         switch (tok.type())
1127         {
1128             case TOKEN_TYPE::KW_ENDM:
1129                 printf(".endm");
1130                 return index + 1;
1131 
1132             default:
1133                 index = translate_construct(tokens, index, macro_params);
1134         }
1135     }
1136 
1137     throw "Failed to translate macro";
1138     return -1;
1139 }
1140 
1141 void
1142 translate(TokenList &tokens)
1143 {
1144     size_t index = 0;
1145     size_t size = tokens.size();
1146     vector<string> empty_macro_params;
1147 
1148     while (index < size)
1149     {
1150         // Macros are special
1151         if ((tokens[index].type() == TOKEN_TYPE::Identifier) &&
1152             (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) &&
1153             (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO))
1154         {
1155             index = translate_macro(tokens, index);
1156         }
1157         else
1158         {
1159             index = translate_construct(tokens, index, empty_macro_params);
1160         }
1161     }
1162 }
1163 
1164 int main(int argc, char* argv[])
1165 {
1166     if (argc < 2)
1167     {
1168         fprintf(stderr, "Invalid parameter!\n");
1169         return -1;
1170     }
1171 
1172 #if PROFILING_ENABLED
1173     time_t start_time = time(NULL);
1174 #endif
1175 
1176     try
1177     {
1178         // Open and read the input file
1179         string filename(argv[1]);
1180         ifstream file(filename);
1181         stringstream buffer;
1182         buffer << file.rdbuf();
1183         string text = buffer.str();
1184 
1185         // Create the tokenizer
1186         Tokenizer tokenizer(g_TokenList);
1187 
1188         // Get a token list
1189         TokenList toklist(tokenizer, text);
1190 
1191         // Now translate the tokens
1192         translate(toklist);
1193     }
1194     catch (const char* message)
1195     {
1196         fprintf(stderr, "Exception caught: '%s'\n", message);
1197         return -2;
1198     }
1199 
1200 #if PROFILING_ENABLED
1201     time_t total_time = time(NULL) + 1 - start_time;
1202     fprintf(stderr, "total_time = %llu\n", total_time);
1203     fprintf(stderr, "search_time = %llu\n", search_time);
1204     fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time);
1205 #endif
1206 
1207     return 0;
1208 }
1209