1 /* 2 * PROJECT: ReactOS host tools 3 * LICENSE: MIT (https://spdx.org/licenses/MIT) 4 * PURPOSE: ASM preprocessor 5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org> 6 */ 7 8 // Optimize even on debug builds, because otherwise it's ridiculously slow 9 #ifdef _MSC_VER 10 #pragma optimize("gst", on) 11 #pragma auto_inline(on) 12 #else 13 #pragma GCC optimize("O3,inline") 14 #endif 15 16 #include "tokenizer.hpp" 17 #include <cstdlib> 18 #include <cstdio> 19 #include <sstream> 20 #include <ctime> 21 22 #define PROFILING_ENABLED 0 23 24 using namespace std; 25 26 time_t search_time; 27 28 enum TOKEN_TYPE 29 { 30 Invalid = -1, 31 Eof, 32 WhiteSpace, 33 NewLine, 34 Comment, 35 DecNumber, 36 HexNumber, 37 String, 38 39 BraceOpen, 40 BraceClose, 41 MemRefStart, 42 MemRefEnd, 43 Colon, 44 Operator, 45 StringDef, 46 47 KW_include, 48 KW_const, 49 KW_code, 50 KW_endprolog, 51 KW_ALIGN, 52 KW_EXTERN, 53 KW_PUBLIC, 54 KW_ENDM, 55 KW_END, 56 KW_if, 57 KW_ifdef, 58 KW_ifndef, 59 KW_else, 60 KW_endif, 61 62 KW_allocstack, 63 KW_savereg, 64 KW_savexmm128, 65 66 KW_DB, 67 KW_DW, 68 KW_DD, 69 KW_DQ, 70 KW_EQU, 71 KW_TEXTEQU, 72 KW_MACRO, 73 KW_PROC, 74 KW_FRAME, 75 KW_ENDP, 76 KW_RECORD, 77 78 KW_MASK, 79 KW_ERRDEF, 80 81 Filename, 82 Instruction, 83 Reg8, 84 Reg16, 85 Reg32, 86 Reg64, 87 RegXmm, 88 BYTE_PTR, 89 WORD_PTR, 90 DWORD_PTR, 91 QWORD_PTR, 92 XMMWORD_PTR, 93 94 LabelName, 95 Identifier 96 }; 97 98 int fake_printf(const char* format, ...) 99 { 100 return 0; 101 } 102 103 //#define printf fake_printf 104 105 // Use a look-ahead for following characters, not included into the match 106 //#define FOLLOWED_BY(x) R"((?=)" x R"())" 107 #define FOLLOWED_BY(x) x 108 109 #define ANY_CHAR R"((?:.|\n))" 110 #define WHITESPACE R"((?:[ \t]++))" 111 #define NEWLINE R"([\n])" 112 #define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)" 113 #define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])" 114 115 #define INSTRUCTION \ 116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \ 117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \ 118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \ 119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \ 120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \ 121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \ 122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \ 123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \ 124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \ 125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \ 126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \ 127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \ 128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \ 129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \ 130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \ 131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \ 132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \ 133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \ 134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \ 135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \ 136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \ 137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \ 138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \ 139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \ 140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \ 141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \ 142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \ 143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \ 144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \ 145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \ 146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \ 147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \ 148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \ 149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \ 150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \ 151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \ 152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \ 153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \ 154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \ 155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \ 156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \ 157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \ 158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \ 159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \ 160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \ 161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \ 162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \ 163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \ 164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \ 165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \ 166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \ 167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \ 168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \ 169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \ 170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \ 171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \ 172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \ 173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \ 174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \ 175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \ 176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \ 177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \ 178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \ 179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \ 180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \ 181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \ 182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \ 183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \ 184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \ 185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \ 186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \ 187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \ 188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \ 189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \ 190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \ 191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \ 192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \ 193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \ 194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \ 195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \ 196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \ 197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \ 198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \ 199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \ 200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \ 201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \ 202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \ 203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \ 204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \ 205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \ 206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \ 207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \ 208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \ 209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \ 210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \ 211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \ 212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \ 213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \ 214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \ 215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \ 216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \ 217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \ 218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \ 219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \ 220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \ 221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \ 222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \ 223 "XSAVES|XSETBV|XTEST" 224 225 vector<TOKEN_DEF> g_TokenList = 226 { 227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" }, 228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" }, 229 { TOKEN_TYPE::NewLine, R"((\n))" }, 230 { TOKEN_TYPE::Comment, R"((;.*\n))" }, 231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, 232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, 233 { TOKEN_TYPE::String, R"((\".*\"))" }, 234 235 { TOKEN_TYPE::BraceOpen, R"((\())"}, 236 { TOKEN_TYPE::BraceClose, R"((\)))"}, 237 { TOKEN_TYPE::MemRefStart, R"((\[))"}, 238 { TOKEN_TYPE::MemRefEnd, R"((\]))"}, 239 { TOKEN_TYPE::Colon, R"((\:))"}, 240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"}, 241 { TOKEN_TYPE::StringDef, R"((<.+>))" }, 242 243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") }, 244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") }, 245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") }, 246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") }, 247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") }, 248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") }, 249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") }, 250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") }, 251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") }, 252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") }, 253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") }, 254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")}, 255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")}, 256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")}, 257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")}, 258 259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") }, 260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") }, 261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") }, 262 263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") }, 264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") }, 265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") }, 266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") }, 267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") }, 268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") }, 269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") }, 270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") }, 271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") }, 272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") }, 273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") }, 274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")}, 275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")}, 276 277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") }, 278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") }, 279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") }, 280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") }, 281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") }, 282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") }, 283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") }, 284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 289 290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)}, 291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, 292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, 293 294 }; 295 296 // FIXME: use context? 297 unsigned int g_label_number = 0; 298 299 vector<string> g_identifiers; 300 301 void 302 add_mem_id(Token& tok) 303 { 304 g_identifiers.push_back(tok.str()); 305 //fprintf(stderr, "Added mem id: '%s'\n", tok.str().c_str()); 306 } 307 308 bool 309 is_mem_id(Token& tok) 310 { 311 for (auto id : g_identifiers) 312 { 313 if (id == tok.str()) 314 { 315 return true; 316 } 317 } 318 319 return false; 320 } 321 322 bool 323 iequals(const string &a, const string &b) 324 { 325 size_t sz = a.size(); 326 if (b.size() != sz) 327 return false; 328 for (unsigned int i = 0; i < sz; ++i) 329 if (tolower(a[i]) != tolower(b[i])) 330 return false; 331 return true; 332 } 333 334 Token 335 get_expected_token(Token&& tok, TOKEN_TYPE type) 336 { 337 if (tok.type() != type) 338 { 339 throw "Not white space after identifier!\n"; 340 } 341 342 return tok; 343 } 344 345 Token get_ws(Token&& tok) 346 { 347 int type = tok.type(); 348 if (type != TOKEN_TYPE::WhiteSpace) 349 { 350 throw "Not white space after identifier!\n"; 351 } 352 353 return tok; 354 } 355 356 Token get_ws_or_nl(Token&& tok) 357 { 358 int type = tok.type(); 359 if ((type != TOKEN_TYPE::WhiteSpace) && 360 (type != TOKEN_TYPE::NewLine)) 361 { 362 throw "Not white space after identifier!\n"; 363 } 364 365 return tok; 366 } 367 368 bool is_string_in_list(vector<string> list, string str) 369 { 370 for (string &s : list) 371 { 372 if (s == str) 373 { 374 return true; 375 } 376 } 377 378 return false; 379 } 380 381 size_t 382 translate_token(TokenList& tokens, size_t index, const vector<string> ¯o_params) 383 { 384 Token tok = tokens[index]; 385 switch (tok.type()) 386 { 387 case TOKEN_TYPE::Comment: 388 printf("//%s", tok.str().c_str() + 1); 389 break; 390 391 case TOKEN_TYPE::DecNumber: 392 { 393 unsigned long long num = stoull(tok.str(), nullptr, 10); 394 printf("%llu", num); 395 break; 396 } 397 398 case TOKEN_TYPE::HexNumber: 399 { 400 string number = tok.str(); 401 printf("0x%s", number.substr(0, number.size() - 1).c_str()); 402 break; 403 } 404 405 case TOKEN_TYPE::Identifier: 406 if (is_string_in_list(macro_params, tok.str())) 407 { 408 printf("\\"); 409 } 410 printf("%s", tok.str().c_str()); 411 break; 412 413 // We migt want to improve these 414 case TOKEN_TYPE::BYTE_PTR: 415 case TOKEN_TYPE::WORD_PTR: 416 case TOKEN_TYPE::DWORD_PTR: 417 case TOKEN_TYPE::QWORD_PTR: 418 case TOKEN_TYPE::XMMWORD_PTR: 419 420 // Check these. valid only in instructions? 421 case TOKEN_TYPE::Reg8: 422 case TOKEN_TYPE::Reg16: 423 case TOKEN_TYPE::Reg32: 424 case TOKEN_TYPE::Reg64: 425 case TOKEN_TYPE::RegXmm: 426 case TOKEN_TYPE::Instruction: 427 428 case TOKEN_TYPE::WhiteSpace: 429 case TOKEN_TYPE::NewLine: 430 case TOKEN_TYPE::Operator: 431 printf("%s", tok.str().c_str()); 432 break; 433 434 default: 435 printf("%s", tok.str().c_str()); 436 break; 437 } 438 439 return index + 1; 440 } 441 442 size_t complete_line(TokenList &tokens, size_t index, const vector<string> ¯o_params) 443 { 444 while (index < tokens.size()) 445 { 446 Token tok = tokens[index]; 447 index = translate_token(tokens, index, macro_params); 448 if ((tok.type() == TOKEN_TYPE::NewLine) || 449 (tok.type() == TOKEN_TYPE::Comment)) 450 { 451 break; 452 } 453 } 454 455 return index; 456 } 457 458 size_t 459 translate_expression(TokenList &tokens, size_t index, const vector<string> ¯o_params) 460 { 461 while (index < tokens.size()) 462 { 463 Token tok = tokens[index]; 464 switch (tok.type()) 465 { 466 case TOKEN_TYPE::NewLine: 467 case TOKEN_TYPE::Comment: 468 return index; 469 470 case TOKEN_TYPE::KW_MASK: 471 printf("MASK_"); 472 index += 2; 473 break; 474 475 case TOKEN_TYPE::Instruction: 476 if (iequals(tok.str(), "and")) 477 { 478 printf("&"); 479 index += 1; 480 } 481 else if (iequals(tok.str(), "or")) 482 { 483 printf("|"); 484 index += 1; 485 } 486 else if (iequals(tok.str(), "shl")) 487 { 488 printf("<<"); 489 index += 1; 490 } 491 else if (iequals(tok.str(), "not")) 492 { 493 printf("!"); 494 index += 1; 495 } 496 else 497 { 498 throw "Invalid expression"; 499 } 500 break; 501 502 case TOKEN_TYPE::Operator: 503 if (tok.str() == ",") 504 { 505 return index; 506 } 507 case TOKEN_TYPE::WhiteSpace: 508 case TOKEN_TYPE::BraceOpen: 509 case TOKEN_TYPE::BraceClose: 510 case TOKEN_TYPE::DecNumber: 511 case TOKEN_TYPE::HexNumber: 512 case TOKEN_TYPE::Identifier: 513 index = translate_token(tokens, index, macro_params); 514 break; 515 516 default: 517 index = translate_token(tokens, index, macro_params); 518 } 519 } 520 521 return index; 522 } 523 524 size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params) 525 { 526 unsigned int offset = 0; 527 528 Token tok = tokens[index]; 529 530 if ((tok.type() == TOKEN_TYPE::DecNumber) || 531 (tok.type() == TOKEN_TYPE::HexNumber)) 532 { 533 offset = stoi(tok.str(), nullptr, 0); 534 index += 2; 535 } 536 537 index = translate_token(tokens, index, macro_params); 538 539 while (index < tokens.size()) 540 { 541 Token tok = tokens[index]; 542 index = translate_token(tokens, index, macro_params); 543 if (tok.type() == TOKEN_TYPE::MemRefEnd) 544 { 545 if (offset != 0) 546 { 547 printf(" + %u", offset); 548 } 549 return index; 550 } 551 } 552 553 throw "Failed to translate memory ref"; 554 return index; 555 } 556 557 size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params) 558 { 559 switch (tokens[index].type()) 560 { 561 case TOKEN_TYPE::BYTE_PTR: 562 case TOKEN_TYPE::WORD_PTR: 563 case TOKEN_TYPE::DWORD_PTR: 564 case TOKEN_TYPE::QWORD_PTR: 565 case TOKEN_TYPE::XMMWORD_PTR: 566 index = translate_token(tokens, index, macro_params); 567 568 // Optional whitespace 569 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 570 { 571 index = translate_token(tokens, index, macro_params); 572 } 573 } 574 575 while (index < tokens.size()) 576 { 577 Token tok = tokens[index]; 578 switch (tok.type()) 579 { 580 case TOKEN_TYPE::MemRefStart: 581 return translate_mem_ref(tokens, index, macro_params); 582 583 case TOKEN_TYPE::NewLine: 584 case TOKEN_TYPE::Comment: 585 return index; 586 587 case TOKEN_TYPE::Operator: 588 if (tok.str() == ",") 589 return index; 590 591 case TOKEN_TYPE::Identifier: 592 index = translate_token(tokens, index, macro_params); 593 if (is_mem_id(tok)) 594 { 595 printf("[rip]"); 596 } 597 break; 598 599 default: 600 index = translate_expression(tokens, index, macro_params); 601 } 602 } 603 604 return index; 605 } 606 607 size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params) 608 { 609 // Translate the instruction itself 610 index = translate_token(tokens, index, macro_params); 611 612 // Handle instruction parameters 613 while (index < tokens.size()) 614 { 615 // Optional whitespace 616 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 617 { 618 index = translate_token(tokens, index, macro_params); 619 } 620 621 // Check for parameters 622 Token tok = tokens[index]; 623 switch (tok.type()) 624 { 625 case TOKEN_TYPE::Comment: 626 case TOKEN_TYPE::NewLine: 627 return index; 628 629 case TOKEN_TYPE::WhiteSpace: 630 case TOKEN_TYPE::Operator: 631 index = translate_token(tokens, index, macro_params); 632 break; 633 634 default: 635 index = translate_instruction_param(tokens, index, macro_params); 636 break; 637 } 638 } 639 640 return index; 641 } 642 643 size_t translate_item(TokenList& tokens, size_t index, const vector<string> ¯o_params) 644 { 645 switch (tokens[index].type()) 646 { 647 case TOKEN_TYPE::DecNumber: 648 case TOKEN_TYPE::HexNumber: 649 case TOKEN_TYPE::String: 650 case TOKEN_TYPE::WhiteSpace: 651 return translate_token(tokens, index, macro_params); 652 } 653 654 throw "Failed to translate item"; 655 return -1; 656 } 657 658 size_t translate_list(TokenList& tokens, size_t index, const vector<string> ¯o_params) 659 { 660 while (index < tokens.size()) 661 { 662 // The item itself 663 index = translate_item(tokens, index, macro_params); 664 665 // Optional white space 666 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 667 { 668 index = translate_token(tokens, index, macro_params); 669 } 670 671 // End of list? 672 if ((tokens[index].type() == TOKEN_TYPE::Comment) || 673 (tokens[index].type() == TOKEN_TYPE::NewLine)) 674 { 675 return index; 676 } 677 678 // We expect a comma here 679 if ((tokens[index].type() != TOKEN_TYPE::Operator) || 680 (tokens[index].str() != ",")) 681 { 682 throw "Unexpected end of list"; 683 } 684 685 index = translate_token(tokens, index, macro_params); 686 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 687 { 688 index = translate_token(tokens, index, macro_params); 689 } 690 } 691 692 throw "Failed to translate list"; 693 return -1; 694 } 695 696 size_t 697 translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params) 698 { 699 Token tok = tokens[index]; 700 Token tok1 = get_ws(tokens[index + 1]); 701 string directive, need, have =""; 702 703 switch (tok.type()) 704 { 705 case TOKEN_TYPE::KW_DB: 706 directive = ".byte"; 707 break; 708 709 case TOKEN_TYPE::KW_DW: 710 directive = ".short"; 711 break; 712 713 case TOKEN_TYPE::KW_DD: 714 directive = ".long"; 715 break; 716 717 case TOKEN_TYPE::KW_DQ: 718 directive = ".quad"; 719 break; 720 } 721 722 index += 2; 723 724 while (index < tokens.size()) 725 { 726 // Check if we need '.ascii' for ASCII strings 727 if (tokens[index].str()[0] == '\"') 728 { 729 need = ".ascii"; 730 } 731 else 732 { 733 need = directive; 734 } 735 736 // Output the directive we need (or a comma) 737 if (have == "") 738 { 739 printf("%s ", need.c_str()); 740 } 741 else if (have != need) 742 { 743 printf("\n%s ", need.c_str()); 744 } 745 else 746 { 747 printf(", "); 748 } 749 750 have = need; 751 752 // The item itself 753 index = translate_item(tokens, index, macro_params); 754 755 // Optional white space 756 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 757 { 758 index = translate_token(tokens, index, macro_params); 759 } 760 761 // End of list? 762 if ((tokens[index].type() == TOKEN_TYPE::Comment) || 763 (tokens[index].type() == TOKEN_TYPE::NewLine)) 764 { 765 return index; 766 } 767 768 // We expect a comma here 769 if ((tokens[index].type() != TOKEN_TYPE::Operator) || 770 (tokens[index].str() != ",")) 771 { 772 throw "Unexpected end of list"; 773 } 774 775 // Skip comma and optional white-space 776 index++; 777 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 778 { 779 index++; 780 } 781 } 782 783 throw "Failed to translate list"; 784 return -1; 785 } 786 787 size_t 788 translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params) 789 { 790 // The next token should be white space 791 Token tok1 = get_ws(tokens[index + 1]); 792 793 printf("%s%s", translated.c_str(), tok1.str().c_str()); 794 return translate_expression(tokens, index + 2, macro_params); 795 } 796 797 size_t 798 translate_record(TokenList &tokens, size_t index, const vector<string> ¯o_params) 799 { 800 unsigned int bits, bitpos = 0; 801 unsigned long long oldmask = 0, mask = 0; 802 803 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier); 804 index += 4; 805 while (index < tokens.size()) 806 { 807 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier); 808 809 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 810 { 811 index++; 812 } 813 814 if (tokens[index++].str() != ":") 815 { 816 throw "Unexpected token"; 817 } 818 819 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 820 { 821 index++; 822 } 823 824 Token tok_bits = tokens[index++]; 825 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) && 826 (tok_bits.type() != TOKEN_TYPE::HexNumber)) 827 { 828 throw "Unexpected token"; 829 } 830 831 bits = stoi(tok_bits.str(), nullptr, 0); 832 833 printf("%s = %u\n", tok_member.str().c_str(), bitpos); 834 835 oldmask = (1ULL << bitpos) - 1; 836 bitpos += bits; 837 mask = (1ULL << bitpos) - 1 - oldmask; 838 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask); 839 840 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 841 { 842 index++; 843 } 844 845 if ((tokens[index].type() == TOKEN_TYPE::NewLine) || 846 (tokens[index].type() == TOKEN_TYPE::Comment)) 847 { 848 break; 849 } 850 851 if (tokens[index].str() != ",") 852 { 853 throw "unexpected token"; 854 } 855 856 index++; 857 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 858 { 859 index++; 860 } 861 862 if ((tokens[index].type() == TOKEN_TYPE::NewLine) || 863 (tokens[index].type() == TOKEN_TYPE::Comment)) 864 { 865 index++; 866 } 867 868 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 869 { 870 index++; 871 } 872 } 873 874 return index; 875 } 876 877 size_t 878 translate_identifier_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params) 879 { 880 Token tok = tokens[index]; 881 Token tok1 = tokens[index + 1]; 882 883 if (tok1.type() == TOKEN_TYPE::Colon) 884 { 885 if (tok.str() == "@@") 886 { 887 g_label_number++; 888 printf("%u:", g_label_number); 889 } 890 else 891 { 892 printf("%s:", tok.str().c_str()); 893 } 894 return index + 2; 895 } 896 897 Token tok2 = tokens[index + 2]; 898 899 switch (tok2.type()) 900 { 901 case TOKEN_TYPE::KW_MACRO: 902 throw "Cannot have a nested macro!"; 903 904 case TOKEN_TYPE::KW_DB: 905 case TOKEN_TYPE::KW_DW: 906 case TOKEN_TYPE::KW_DD: 907 case TOKEN_TYPE::KW_DQ: 908 printf("%s:%s", tok.str().c_str(), tok1.str().c_str()); 909 add_mem_id(tok); 910 return translate_data_def(tokens, index + 2, macro_params); 911 912 case TOKEN_TYPE::KW_EQU: 913 //printf("%s%s", tok.str().c_str(), tok1.str().c_str()); 914 printf("#define %s ", tok.str().c_str()); 915 return translate_expression(tokens, index + 3, macro_params); 916 917 case TOKEN_TYPE::KW_TEXTEQU: 918 { 919 Token tok3 = get_ws(tokens[index + 3]); 920 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef); 921 922 string textdef = tok4.str(); 923 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str()); 924 return index + 5; 925 } 926 927 case TOKEN_TYPE::KW_PROC: 928 { 929 printf(".func %s\n", tok.str().c_str()); 930 printf("%s:", tok.str().c_str()); 931 index += 3; 932 933 if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) && 934 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME)) 935 { 936 #ifdef TARGET_amd64 937 printf("\n.seh_proc %s\n", tok.str().c_str()); 938 #else 939 printf("\n.cfi_startproc\n"); 940 #endif 941 index += 2; 942 } 943 break; 944 } 945 946 case TOKEN_TYPE::KW_ENDP: 947 { 948 printf(".seh_endproc\n.endfunc"); 949 index += 3; 950 break; 951 } 952 953 case TOKEN_TYPE::KW_RECORD: 954 index = translate_record(tokens, index, macro_params); 955 break; 956 957 default: 958 // We don't know what it is, assume it's a macro and treat it like an instruction 959 index = translate_instruction(tokens, index, macro_params); 960 break; 961 } 962 963 return index; 964 } 965 966 size_t 967 translate_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params) 968 { 969 Token tok = tokens[index]; 970 971 switch (tok.type()) 972 { 973 case TOKEN_TYPE::WhiteSpace: 974 case TOKEN_TYPE::NewLine: 975 case TOKEN_TYPE::Comment: 976 return translate_token(tokens, index, macro_params); 977 978 case TOKEN_TYPE::Identifier: 979 return translate_identifier_construct(tokens, index, macro_params); 980 981 case TOKEN_TYPE::KW_ALIGN: 982 index = translate_construct_one_param(".align", tokens, index, macro_params); 983 break; 984 985 case TOKEN_TYPE::KW_allocstack: 986 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params); 987 break; 988 989 case TOKEN_TYPE::KW_code: 990 #ifdef TARGET_amd64 991 printf(".code64"); 992 #else 993 printf(".code"); 994 #endif 995 printf(" .intel_syntax noprefix"); 996 index++; 997 break; 998 999 case TOKEN_TYPE::KW_const: 1000 printf(".section .rdata"); 1001 index++; 1002 break; 1003 1004 case TOKEN_TYPE::KW_DB: 1005 case TOKEN_TYPE::KW_DW: 1006 case TOKEN_TYPE::KW_DD: 1007 case TOKEN_TYPE::KW_DQ: 1008 return translate_data_def(tokens, index, macro_params); 1009 1010 case TOKEN_TYPE::KW_END: 1011 printf("// END\n"); 1012 return tokens.size(); 1013 1014 case TOKEN_TYPE::KW_endprolog: 1015 printf(".seh_endprologue"); 1016 index++; 1017 break; 1018 1019 case TOKEN_TYPE::KW_EXTERN: 1020 { 1021 Token tok1 = get_ws_or_nl(tokens[index + 1]); 1022 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier); 1023 add_mem_id(tok2); 1024 printf("//"); 1025 return complete_line(tokens, index, macro_params); 1026 } 1027 1028 case TOKEN_TYPE::KW_if: 1029 case TOKEN_TYPE::KW_ifdef: 1030 case TOKEN_TYPE::KW_ifndef: 1031 case TOKEN_TYPE::KW_else: 1032 case TOKEN_TYPE::KW_endif: 1033 // TODO: handle parameter differences between "if" and ".if" etc. 1034 printf("."); 1035 return complete_line(tokens, index, macro_params); 1036 1037 case TOKEN_TYPE::KW_include: 1038 { 1039 // The next token should be white space 1040 Token tok1 = get_ws_or_nl(tokens[index + 1]); 1041 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename); 1042 printf("#include \"%s.h\"", tok2.str().c_str()); 1043 index += 3; 1044 break; 1045 } 1046 1047 case TOKEN_TYPE::KW_PUBLIC: 1048 index = translate_construct_one_param(".global", tokens, index, macro_params); 1049 break; 1050 1051 case TOKEN_TYPE::KW_savereg: 1052 printf(".seh_savereg"); 1053 return complete_line(tokens, index + 1, macro_params); 1054 1055 case TOKEN_TYPE::KW_savexmm128: 1056 printf(".seh_savexmm"); 1057 return complete_line(tokens, index + 1, macro_params); 1058 1059 case TOKEN_TYPE::Instruction: 1060 index = translate_instruction(tokens, index, macro_params); 1061 break; 1062 1063 case TOKEN_TYPE::KW_ERRDEF: 1064 printf("//"); 1065 return complete_line(tokens, index, macro_params); 1066 1067 default: 1068 throw "failed to translate construct"; 1069 } 1070 1071 // Skip optional white-space 1072 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 1073 { 1074 index++; 1075 } 1076 1077 // Line should end here! 1078 Token end = tokens[index]; 1079 if ((end.type() != TOKEN_TYPE::Comment) && 1080 (end.type() != TOKEN_TYPE::NewLine)) 1081 { 1082 throw "unexpected tokens"; 1083 } 1084 1085 return index; 1086 } 1087 1088 size_t 1089 translate_macro(TokenList& tokens, size_t index) 1090 { 1091 vector<string> macro_params; 1092 1093 printf(".macro %s", tokens[index].str().c_str()); 1094 1095 // Parse marameters 1096 index += 3; 1097 while (index < tokens.size()) 1098 { 1099 Token tok = tokens[index]; 1100 switch (tok.type()) 1101 { 1102 case TOKEN_TYPE::NewLine: 1103 case TOKEN_TYPE::Comment: 1104 index = translate_token(tokens, index, macro_params); 1105 break; 1106 1107 case TOKEN_TYPE::Identifier: 1108 macro_params.push_back(tok.str()); 1109 printf("%s", tok.str().c_str()); 1110 index++; 1111 continue; 1112 1113 case TOKEN_TYPE::WhiteSpace: 1114 case TOKEN_TYPE::Operator: 1115 index = translate_token(tokens, index, macro_params); 1116 continue; 1117 } 1118 1119 break; 1120 } 1121 1122 // Parse content 1123 while (index < tokens.size()) 1124 { 1125 Token tok = tokens[index]; 1126 switch (tok.type()) 1127 { 1128 case TOKEN_TYPE::KW_ENDM: 1129 printf(".endm"); 1130 return index + 1; 1131 1132 default: 1133 index = translate_construct(tokens, index, macro_params); 1134 } 1135 } 1136 1137 throw "Failed to translate macro"; 1138 return -1; 1139 } 1140 1141 void 1142 translate(TokenList &tokens) 1143 { 1144 size_t index = 0; 1145 size_t size = tokens.size(); 1146 vector<string> empty_macro_params; 1147 1148 while (index < size) 1149 { 1150 // Macros are special 1151 if ((tokens[index].type() == TOKEN_TYPE::Identifier) && 1152 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) && 1153 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO)) 1154 { 1155 index = translate_macro(tokens, index); 1156 } 1157 else 1158 { 1159 index = translate_construct(tokens, index, empty_macro_params); 1160 } 1161 } 1162 } 1163 1164 int main(int argc, char* argv[]) 1165 { 1166 if (argc < 2) 1167 { 1168 fprintf(stderr, "Invalid parameter!\n"); 1169 return -1; 1170 } 1171 1172 #if PROFILING_ENABLED 1173 time_t start_time = time(NULL); 1174 #endif 1175 1176 try 1177 { 1178 // Open and read the input file 1179 string filename(argv[1]); 1180 ifstream file(filename); 1181 stringstream buffer; 1182 buffer << file.rdbuf(); 1183 string text = buffer.str(); 1184 1185 // Create the tokenizer 1186 Tokenizer tokenizer(g_TokenList); 1187 1188 // Get a token list 1189 TokenList toklist(tokenizer, text); 1190 1191 // Now translate the tokens 1192 translate(toklist); 1193 } 1194 catch (const char* message) 1195 { 1196 fprintf(stderr, "Exception caught: '%s'\n", message); 1197 return -2; 1198 } 1199 1200 #if PROFILING_ENABLED 1201 time_t total_time = time(NULL) + 1 - start_time; 1202 fprintf(stderr, "total_time = %llu\n", total_time); 1203 fprintf(stderr, "search_time = %llu\n", search_time); 1204 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time); 1205 #endif 1206 1207 return 0; 1208 } 1209