1 /* 2 * PROJECT: ReactOS host tools 3 * LICENSE: MIT (https://spdx.org/licenses/MIT) 4 * PURPOSE: ASM preprocessor 5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org> 6 */ 7 8 // Optimize even on debug builds, because otherwise it's ridiculously slow 9 #ifdef _MSC_VER 10 #pragma optimize("gst", on) 11 #pragma auto_inline(on) 12 #else 13 #pragma GCC optimize("O3,inline") 14 #endif 15 16 #include "tokenizer.hpp" 17 #include <cstdlib> 18 #include <cstdio> 19 #include <sstream> 20 #include <ctime> 21 22 #define PROFILING_ENABLED 0 23 24 using namespace std; 25 26 time_t search_time; 27 28 enum TOKEN_TYPE 29 { 30 Invalid = -1, 31 Eof, 32 WhiteSpace, 33 NewLine, 34 Comment, 35 DecNumber, 36 HexNumber, 37 String, 38 39 BraceOpen, 40 BraceClose, 41 MemRefStart, 42 MemRefEnd, 43 Colon, 44 Operator, 45 StringDef, 46 47 KW_include, 48 KW_const, 49 KW_code, 50 KW_endprolog, 51 KW_ALIGN, 52 KW_EXTERN, 53 KW_PUBLIC, 54 KW_ENDM, 55 KW_END, 56 KW_if, 57 KW_ifdef, 58 KW_ifndef, 59 KW_else, 60 KW_endif, 61 62 KW_allocstack, 63 KW_savereg, 64 KW_savexmm128, 65 66 KW_DB, 67 KW_DW, 68 KW_DD, 69 KW_DQ, 70 KW_EQU, 71 KW_TEXTEQU, 72 KW_MACRO, 73 KW_PROC, 74 KW_FRAME, 75 KW_ENDP, 76 KW_RECORD, 77 78 KW_MASK, 79 KW_ERRDEF, 80 81 Filename, 82 Instruction, 83 Reg8, 84 Reg16, 85 Reg32, 86 Reg64, 87 RegXmm, 88 BYTE_PTR, 89 WORD_PTR, 90 DWORD_PTR, 91 QWORD_PTR, 92 XMMWORD_PTR, 93 94 LabelName, 95 Identifier 96 }; 97 98 int fake_printf(const char* format, ...) 99 { 100 return 0; 101 } 102 103 //#define printf fake_printf 104 105 // Use a look-ahead for following characters, not included into the match 106 //#define FOLLOWED_BY(x) R"((?=)" x R"())" 107 #define FOLLOWED_BY(x) x 108 109 #define ANY_CHAR R"((?:.|\n))" 110 #define WHITESPACE R"((?:[ \t]++))" 111 #define NEWLINE R"([\n])" 112 #define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)" 113 #define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])" 114 115 #define INSTRUCTION \ 116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \ 117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \ 118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \ 119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \ 120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \ 121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \ 122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \ 123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \ 124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \ 125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \ 126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \ 127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \ 128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \ 129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \ 130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \ 131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \ 132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \ 133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \ 134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \ 135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \ 136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \ 137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \ 138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \ 139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \ 140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \ 141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \ 142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \ 143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \ 144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \ 145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \ 146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \ 147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \ 148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \ 149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \ 150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \ 151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \ 152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \ 153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \ 154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \ 155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \ 156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \ 157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \ 158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \ 159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \ 160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \ 161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \ 162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \ 163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \ 164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \ 165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \ 166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \ 167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \ 168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \ 169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \ 170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \ 171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \ 172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \ 173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \ 174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \ 175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \ 176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \ 177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \ 178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \ 179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \ 180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \ 181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \ 182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \ 183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \ 184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \ 185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \ 186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \ 187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \ 188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \ 189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \ 190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \ 191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \ 192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \ 193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \ 194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \ 195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \ 196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \ 197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \ 198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \ 199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \ 200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \ 201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \ 202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \ 203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \ 204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \ 205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \ 206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \ 207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \ 208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \ 209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \ 210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \ 211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \ 212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \ 213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \ 214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \ 215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \ 216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \ 217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \ 218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \ 219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \ 220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \ 221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \ 222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \ 223 "XSAVES|XSETBV|XTEST" 224 225 vector<TOKEN_DEF> g_TokenList = 226 { 227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" }, 228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" }, 229 { TOKEN_TYPE::NewLine, R"((\n))" }, 230 { TOKEN_TYPE::Comment, R"((;.*\n))" }, 231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, 232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\(\)])") }, 233 { TOKEN_TYPE::String, R"((\".*\"))" }, 234 235 { TOKEN_TYPE::BraceOpen, R"((\())"}, 236 { TOKEN_TYPE::BraceClose, R"((\)))"}, 237 { TOKEN_TYPE::MemRefStart, R"((\[))"}, 238 { TOKEN_TYPE::MemRefEnd, R"((\]))"}, 239 { TOKEN_TYPE::Colon, R"((\:))"}, 240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"}, 241 { TOKEN_TYPE::StringDef, R"((<.+>))" }, 242 243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") }, 244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") }, 245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") }, 246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") }, 247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") }, 248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") }, 249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") }, 250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") }, 251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") }, 252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") }, 253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") }, 254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")}, 255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")}, 256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")}, 257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")}, 258 259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") }, 260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") }, 261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") }, 262 263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") }, 264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") }, 265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") }, 266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") }, 267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") }, 268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") }, 269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") }, 270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") }, 271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") }, 272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") }, 273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") }, 274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")}, 275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")}, 276 277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") }, 278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") }, 279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") }, 280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") }, 281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") }, 282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") }, 283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") }, 284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") }, 289 290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)}, 291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, 292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)}, 293 294 }; 295 296 // FIXME: use context? 297 unsigned int g_label_number = 0; 298 299 bool g_processing_jmp = false; 300 301 enum class IDTYPE 302 { 303 Memory, 304 Register, 305 Label, 306 Constant, 307 Macro, 308 Instruction, 309 String, 310 Unknown 311 }; 312 313 struct IDENTIFIER 314 { 315 string Name; 316 IDTYPE Type; 317 }; 318 319 vector<IDENTIFIER> g_identifiers; 320 321 static 322 void 323 add_identifier(Token& tok, IDTYPE type) 324 { 325 g_identifiers.push_back(IDENTIFIER{ tok.str(), type }); 326 //fprintf(stderr, "Added id: '%s'\n", tok.str().c_str()); 327 } 328 329 void 330 add_mem_id(Token& tok) 331 { 332 add_identifier(tok, IDTYPE::Memory); 333 } 334 335 bool 336 is_mem_id(Token& tok) 337 { 338 for (IDENTIFIER& identifier : g_identifiers) 339 { 340 if (identifier.Name == tok.str()) 341 { 342 return identifier.Type == IDTYPE::Memory; 343 } 344 } 345 346 return true; 347 } 348 349 bool 350 iequals(const string &a, const string &b) 351 { 352 size_t sz = a.size(); 353 if (b.size() != sz) 354 return false; 355 for (unsigned int i = 0; i < sz; ++i) 356 if (tolower(a[i]) != tolower(b[i])) 357 return false; 358 return true; 359 } 360 361 Token 362 get_expected_token(Token&& tok, TOKEN_TYPE type) 363 { 364 if (tok.type() != type) 365 { 366 throw "Not white space after identifier!\n"; 367 } 368 369 return tok; 370 } 371 372 Token get_ws(Token&& tok) 373 { 374 int type = tok.type(); 375 if (type != TOKEN_TYPE::WhiteSpace) 376 { 377 throw "Not white space after identifier!\n"; 378 } 379 380 return tok; 381 } 382 383 Token get_ws_or_nl(Token&& tok) 384 { 385 int type = tok.type(); 386 if ((type != TOKEN_TYPE::WhiteSpace) && 387 (type != TOKEN_TYPE::NewLine)) 388 { 389 throw "Not white space after identifier!\n"; 390 } 391 392 return tok; 393 } 394 395 bool is_string_in_list(vector<string> list, string str) 396 { 397 for (string &s : list) 398 { 399 if (s == str) 400 { 401 return true; 402 } 403 } 404 405 return false; 406 } 407 408 size_t 409 translate_token(TokenList& tokens, size_t index, const vector<string> ¯o_params) 410 { 411 Token tok = tokens[index]; 412 switch (tok.type()) 413 { 414 case TOKEN_TYPE::Comment: 415 printf("//%s", tok.str().c_str() + 1); 416 break; 417 418 case TOKEN_TYPE::DecNumber: 419 { 420 unsigned long long num = stoull(tok.str(), nullptr, 10); 421 printf("%llu", num); 422 break; 423 } 424 425 case TOKEN_TYPE::HexNumber: 426 { 427 string number = tok.str(); 428 printf("0x%s", number.substr(0, number.size() - 1).c_str()); 429 break; 430 } 431 432 case TOKEN_TYPE::Identifier: 433 if (is_string_in_list(macro_params, tok.str())) 434 { 435 printf("\\"); 436 } 437 printf("%s", tok.str().c_str()); 438 break; 439 440 // We migt want to improve these 441 case TOKEN_TYPE::BYTE_PTR: 442 case TOKEN_TYPE::WORD_PTR: 443 case TOKEN_TYPE::DWORD_PTR: 444 case TOKEN_TYPE::QWORD_PTR: 445 case TOKEN_TYPE::XMMWORD_PTR: 446 447 // Check these. valid only in instructions? 448 case TOKEN_TYPE::Reg8: 449 case TOKEN_TYPE::Reg16: 450 case TOKEN_TYPE::Reg32: 451 case TOKEN_TYPE::Reg64: 452 case TOKEN_TYPE::RegXmm: 453 case TOKEN_TYPE::Instruction: 454 455 case TOKEN_TYPE::WhiteSpace: 456 case TOKEN_TYPE::NewLine: 457 case TOKEN_TYPE::Operator: 458 printf("%s", tok.str().c_str()); 459 break; 460 461 default: 462 printf("%s", tok.str().c_str()); 463 break; 464 } 465 466 return index + 1; 467 } 468 469 size_t complete_line(TokenList &tokens, size_t index, const vector<string> ¯o_params) 470 { 471 while (index < tokens.size()) 472 { 473 Token tok = tokens[index]; 474 index = translate_token(tokens, index, macro_params); 475 if ((tok.type() == TOKEN_TYPE::NewLine) || 476 (tok.type() == TOKEN_TYPE::Comment)) 477 { 478 break; 479 } 480 } 481 482 return index; 483 } 484 485 size_t 486 translate_expression(TokenList &tokens, size_t index, const vector<string> ¯o_params) 487 { 488 while (index < tokens.size()) 489 { 490 Token tok = tokens[index]; 491 switch (tok.type()) 492 { 493 case TOKEN_TYPE::NewLine: 494 case TOKEN_TYPE::Comment: 495 return index; 496 497 case TOKEN_TYPE::KW_MASK: 498 printf("MASK_"); 499 index += 2; 500 break; 501 502 case TOKEN_TYPE::Instruction: 503 if (iequals(tok.str(), "and")) 504 { 505 printf("&"); 506 index += 1; 507 } 508 else if (iequals(tok.str(), "or")) 509 { 510 printf("|"); 511 index += 1; 512 } 513 else if (iequals(tok.str(), "shl")) 514 { 515 printf("<<"); 516 index += 1; 517 } 518 else if (iequals(tok.str(), "not")) 519 { 520 printf("!"); 521 index += 1; 522 } 523 else 524 { 525 throw "Invalid expression"; 526 } 527 break; 528 529 case TOKEN_TYPE::Operator: 530 if (tok.str() == ",") 531 { 532 return index; 533 } 534 case TOKEN_TYPE::WhiteSpace: 535 case TOKEN_TYPE::BraceOpen: 536 case TOKEN_TYPE::BraceClose: 537 case TOKEN_TYPE::DecNumber: 538 case TOKEN_TYPE::HexNumber: 539 case TOKEN_TYPE::Identifier: 540 index = translate_token(tokens, index, macro_params); 541 break; 542 543 default: 544 index = translate_token(tokens, index, macro_params); 545 } 546 } 547 548 return index; 549 } 550 551 size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params) 552 { 553 unsigned int offset = 0; 554 555 Token tok = tokens[index]; 556 557 if ((tok.type() == TOKEN_TYPE::DecNumber) || 558 (tok.type() == TOKEN_TYPE::HexNumber)) 559 { 560 offset = stoi(tok.str(), nullptr, 0); 561 index += 2; 562 } 563 564 index = translate_token(tokens, index, macro_params); 565 566 while (index < tokens.size()) 567 { 568 Token tok = tokens[index]; 569 index = translate_token(tokens, index, macro_params); 570 if (tok.type() == TOKEN_TYPE::MemRefEnd) 571 { 572 if (offset != 0) 573 { 574 printf(" + %u", offset); 575 } 576 return index; 577 } 578 } 579 580 throw "Failed to translate memory ref"; 581 return index; 582 } 583 584 size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params) 585 { 586 switch (tokens[index].type()) 587 { 588 case TOKEN_TYPE::BYTE_PTR: 589 case TOKEN_TYPE::WORD_PTR: 590 case TOKEN_TYPE::DWORD_PTR: 591 case TOKEN_TYPE::QWORD_PTR: 592 case TOKEN_TYPE::XMMWORD_PTR: 593 index = translate_token(tokens, index, macro_params); 594 595 // Optional whitespace 596 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 597 { 598 index = translate_token(tokens, index, macro_params); 599 } 600 } 601 602 while (index < tokens.size()) 603 { 604 Token tok = tokens[index]; 605 switch (tok.type()) 606 { 607 case TOKEN_TYPE::MemRefStart: 608 return translate_mem_ref(tokens, index, macro_params); 609 610 case TOKEN_TYPE::NewLine: 611 case TOKEN_TYPE::Comment: 612 return index; 613 614 case TOKEN_TYPE::Operator: 615 if (tok.str() == ",") 616 return index; 617 return translate_token(tokens, index, macro_params); 618 619 case TOKEN_TYPE::Identifier: 620 index = translate_token(tokens, index, macro_params); 621 if (is_mem_id(tok) && 622 !is_string_in_list(macro_params, tok.str()) && 623 !g_processing_jmp) 624 { 625 printf("[rip]"); 626 } 627 break; 628 629 default: 630 index = translate_expression(tokens, index, macro_params); 631 } 632 } 633 634 return index; 635 } 636 637 static 638 bool 639 is_jmp_or_call(const Token& tok) 640 { 641 const char* inst_list[] = { 642 "jmp", "call", "ja", "jae", "jb", "jbe", "jc", "jcxz", "je", "jecxz", "jg", "jge", 643 "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", "jnle", 644 "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "jrcxz", "js", "jz", "loop", "loope", 645 "loopne", "loopnz", "loopz" 646 }; 647 648 for (const char* inst : inst_list) 649 { 650 if (iequals(tok.str(), inst)) 651 { 652 return true; 653 } 654 } 655 656 return false; 657 } 658 659 size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params) 660 { 661 // Check for jump/call instructions 662 if (is_jmp_or_call(tokens[index])) 663 { 664 g_processing_jmp = true; 665 } 666 667 // Translate the instruction itself 668 index = translate_token(tokens, index, macro_params); 669 670 // Handle instruction parameters 671 while (index < tokens.size()) 672 { 673 // Optional whitespace 674 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 675 { 676 index = translate_token(tokens, index, macro_params); 677 } 678 679 // Check for parameters 680 Token tok = tokens[index]; 681 switch (tok.type()) 682 { 683 case TOKEN_TYPE::Comment: 684 case TOKEN_TYPE::NewLine: 685 g_processing_jmp = false; 686 return index; 687 688 case TOKEN_TYPE::WhiteSpace: 689 case TOKEN_TYPE::Operator: 690 index = translate_token(tokens, index, macro_params); 691 break; 692 693 default: 694 index = translate_instruction_param(tokens, index, macro_params); 695 break; 696 } 697 } 698 699 g_processing_jmp = false; 700 return index; 701 } 702 703 size_t translate_item(TokenList& tokens, size_t index, const vector<string> ¯o_params) 704 { 705 switch (tokens[index].type()) 706 { 707 case TOKEN_TYPE::DecNumber: 708 case TOKEN_TYPE::HexNumber: 709 case TOKEN_TYPE::String: 710 case TOKEN_TYPE::WhiteSpace: 711 return translate_token(tokens, index, macro_params); 712 } 713 714 throw "Failed to translate item"; 715 return -1; 716 } 717 718 size_t translate_list(TokenList& tokens, size_t index, const vector<string> ¯o_params) 719 { 720 while (index < tokens.size()) 721 { 722 // The item itself 723 index = translate_item(tokens, index, macro_params); 724 725 // Optional white space 726 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 727 { 728 index = translate_token(tokens, index, macro_params); 729 } 730 731 // End of list? 732 if ((tokens[index].type() == TOKEN_TYPE::Comment) || 733 (tokens[index].type() == TOKEN_TYPE::NewLine)) 734 { 735 return index; 736 } 737 738 // We expect a comma here 739 if ((tokens[index].type() != TOKEN_TYPE::Operator) || 740 (tokens[index].str() != ",")) 741 { 742 throw "Unexpected end of list"; 743 } 744 745 index = translate_token(tokens, index, macro_params); 746 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 747 { 748 index = translate_token(tokens, index, macro_params); 749 } 750 } 751 752 throw "Failed to translate list"; 753 return -1; 754 } 755 756 size_t 757 translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params) 758 { 759 Token tok = tokens[index]; 760 Token tok1 = get_ws(tokens[index + 1]); 761 string directive, need, have =""; 762 763 switch (tok.type()) 764 { 765 case TOKEN_TYPE::KW_DB: 766 directive = ".byte"; 767 break; 768 769 case TOKEN_TYPE::KW_DW: 770 directive = ".short"; 771 break; 772 773 case TOKEN_TYPE::KW_DD: 774 directive = ".long"; 775 break; 776 777 case TOKEN_TYPE::KW_DQ: 778 directive = ".quad"; 779 break; 780 } 781 782 index += 2; 783 784 while (index < tokens.size()) 785 { 786 // Check if we need '.ascii' for ASCII strings 787 if (tokens[index].str()[0] == '\"') 788 { 789 need = ".ascii"; 790 } 791 else 792 { 793 need = directive; 794 } 795 796 // Output the directive we need (or a comma) 797 if (have == "") 798 { 799 printf("%s ", need.c_str()); 800 } 801 else if (have != need) 802 { 803 printf("\n%s ", need.c_str()); 804 } 805 else 806 { 807 printf(", "); 808 } 809 810 have = need; 811 812 // The item itself 813 index = translate_item(tokens, index, macro_params); 814 815 // Optional white space 816 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 817 { 818 index = translate_token(tokens, index, macro_params); 819 } 820 821 // End of list? 822 if ((tokens[index].type() == TOKEN_TYPE::Comment) || 823 (tokens[index].type() == TOKEN_TYPE::NewLine)) 824 { 825 return index; 826 } 827 828 // We expect a comma here 829 if ((tokens[index].type() != TOKEN_TYPE::Operator) || 830 (tokens[index].str() != ",")) 831 { 832 throw "Unexpected end of list"; 833 } 834 835 // Skip comma and optional white-space 836 index++; 837 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 838 { 839 index++; 840 } 841 } 842 843 throw "Failed to translate list"; 844 return -1; 845 } 846 847 size_t 848 translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params) 849 { 850 // The next token should be white space 851 Token tok1 = get_ws(tokens[index + 1]); 852 853 printf("%s%s", translated.c_str(), tok1.str().c_str()); 854 return translate_expression(tokens, index + 2, macro_params); 855 } 856 857 size_t 858 translate_record(TokenList &tokens, size_t index, const vector<string> ¯o_params) 859 { 860 unsigned int bits, bitpos = 0; 861 unsigned long long oldmask = 0, mask = 0; 862 863 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier); 864 index += 4; 865 while (index < tokens.size()) 866 { 867 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier); 868 869 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 870 { 871 index++; 872 } 873 874 if (tokens[index++].str() != ":") 875 { 876 throw "Unexpected token"; 877 } 878 879 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 880 { 881 index++; 882 } 883 884 Token tok_bits = tokens[index++]; 885 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) && 886 (tok_bits.type() != TOKEN_TYPE::HexNumber)) 887 { 888 throw "Unexpected token"; 889 } 890 891 bits = stoi(tok_bits.str(), nullptr, 0); 892 893 printf("%s = %u\n", tok_member.str().c_str(), bitpos); 894 895 oldmask = (1ULL << bitpos) - 1; 896 bitpos += bits; 897 mask = (1ULL << bitpos) - 1 - oldmask; 898 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask); 899 900 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 901 { 902 index++; 903 } 904 905 if ((tokens[index].type() == TOKEN_TYPE::NewLine) || 906 (tokens[index].type() == TOKEN_TYPE::Comment)) 907 { 908 break; 909 } 910 911 if (tokens[index].str() != ",") 912 { 913 throw "unexpected token"; 914 } 915 916 index++; 917 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 918 { 919 index++; 920 } 921 922 if ((tokens[index].type() == TOKEN_TYPE::NewLine) || 923 (tokens[index].type() == TOKEN_TYPE::Comment)) 924 { 925 index++; 926 } 927 928 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 929 { 930 index++; 931 } 932 } 933 934 return index; 935 } 936 937 size_t 938 translate_identifier_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params) 939 { 940 Token tok = tokens[index]; 941 Token tok1 = tokens[index + 1]; 942 943 if (tok1.type() == TOKEN_TYPE::Colon) 944 { 945 if (tok.str() == "@@") 946 { 947 g_label_number++; 948 printf("%u:", g_label_number); 949 } 950 else 951 { 952 printf("%s:", tok.str().c_str()); 953 } 954 add_identifier(tok, IDTYPE::Label); 955 return index + 2; 956 } 957 958 Token tok2 = tokens[index + 2]; 959 960 switch (tok2.type()) 961 { 962 case TOKEN_TYPE::KW_MACRO: 963 throw "Cannot have a nested macro!"; 964 965 case TOKEN_TYPE::KW_DB: 966 case TOKEN_TYPE::KW_DW: 967 case TOKEN_TYPE::KW_DD: 968 case TOKEN_TYPE::KW_DQ: 969 printf("%s:%s", tok.str().c_str(), tok1.str().c_str()); 970 add_mem_id(tok); 971 return translate_data_def(tokens, index + 2, macro_params); 972 973 case TOKEN_TYPE::KW_EQU: 974 //printf("%s%s", tok.str().c_str(), tok1.str().c_str()); 975 printf("#define %s ", tok.str().c_str()); 976 add_identifier(tok, IDTYPE::Constant); 977 return translate_expression(tokens, index + 3, macro_params); 978 979 case TOKEN_TYPE::KW_TEXTEQU: 980 { 981 Token tok3 = get_ws(tokens[index + 3]); 982 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef); 983 984 string textdef = tok4.str(); 985 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str()); 986 add_identifier(tok, IDTYPE::Constant); 987 return index + 5; 988 } 989 990 case TOKEN_TYPE::KW_PROC: 991 { 992 printf(".func %s\n", tok.str().c_str()); 993 printf("%s:", tok.str().c_str()); 994 index += 3; 995 996 if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) && 997 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME)) 998 { 999 #ifdef TARGET_amd64 1000 printf("\n.seh_proc %s\n", tok.str().c_str()); 1001 #else 1002 printf("\n.cfi_startproc\n"); 1003 #endif 1004 index += 2; 1005 } 1006 add_identifier(tok, IDTYPE::Label); 1007 break; 1008 } 1009 1010 case TOKEN_TYPE::KW_ENDP: 1011 { 1012 printf(".seh_endproc\n.endfunc"); 1013 index += 3; 1014 break; 1015 } 1016 1017 case TOKEN_TYPE::KW_RECORD: 1018 index = translate_record(tokens, index, macro_params); 1019 break; 1020 1021 default: 1022 // We don't know what it is, assume it's a macro and treat it like an instruction 1023 index = translate_instruction(tokens, index, macro_params); 1024 break; 1025 } 1026 1027 return index; 1028 } 1029 1030 size_t 1031 translate_construct(TokenList& tokens, size_t index, const vector<string> ¯o_params) 1032 { 1033 Token tok = tokens[index]; 1034 1035 switch (tok.type()) 1036 { 1037 case TOKEN_TYPE::WhiteSpace: 1038 case TOKEN_TYPE::NewLine: 1039 case TOKEN_TYPE::Comment: 1040 return translate_token(tokens, index, macro_params); 1041 1042 case TOKEN_TYPE::Identifier: 1043 return translate_identifier_construct(tokens, index, macro_params); 1044 1045 case TOKEN_TYPE::KW_ALIGN: 1046 index = translate_construct_one_param(".align", tokens, index, macro_params); 1047 break; 1048 1049 case TOKEN_TYPE::KW_allocstack: 1050 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params); 1051 break; 1052 1053 case TOKEN_TYPE::KW_code: 1054 #ifdef TARGET_amd64 1055 printf(".code64"); 1056 #else 1057 printf(".code"); 1058 #endif 1059 printf(" .intel_syntax noprefix"); 1060 index++; 1061 break; 1062 1063 case TOKEN_TYPE::KW_const: 1064 printf(".section .rdata"); 1065 index++; 1066 break; 1067 1068 case TOKEN_TYPE::KW_DB: 1069 case TOKEN_TYPE::KW_DW: 1070 case TOKEN_TYPE::KW_DD: 1071 case TOKEN_TYPE::KW_DQ: 1072 return translate_data_def(tokens, index, macro_params); 1073 1074 case TOKEN_TYPE::KW_END: 1075 printf("// END\n"); 1076 return tokens.size(); 1077 1078 case TOKEN_TYPE::KW_endprolog: 1079 printf(".seh_endprologue"); 1080 index++; 1081 break; 1082 1083 case TOKEN_TYPE::KW_EXTERN: 1084 { 1085 Token tok1 = get_ws_or_nl(tokens[index + 1]); 1086 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier); 1087 add_mem_id(tok2); 1088 printf("//"); 1089 return complete_line(tokens, index, macro_params); 1090 } 1091 1092 case TOKEN_TYPE::KW_if: 1093 case TOKEN_TYPE::KW_ifdef: 1094 case TOKEN_TYPE::KW_ifndef: 1095 case TOKEN_TYPE::KW_else: 1096 case TOKEN_TYPE::KW_endif: 1097 // TODO: handle parameter differences between "if" and ".if" etc. 1098 printf("."); 1099 return complete_line(tokens, index, macro_params); 1100 1101 case TOKEN_TYPE::KW_include: 1102 { 1103 // The next token should be white space 1104 Token tok1 = get_ws_or_nl(tokens[index + 1]); 1105 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename); 1106 printf("#include \"%s.h\"", tok2.str().c_str()); 1107 index += 3; 1108 break; 1109 } 1110 1111 case TOKEN_TYPE::KW_PUBLIC: 1112 index = translate_construct_one_param(".global", tokens, index, macro_params); 1113 break; 1114 1115 case TOKEN_TYPE::KW_savereg: 1116 printf(".seh_savereg"); 1117 return complete_line(tokens, index + 1, macro_params); 1118 1119 case TOKEN_TYPE::KW_savexmm128: 1120 printf(".seh_savexmm"); 1121 return complete_line(tokens, index + 1, macro_params); 1122 1123 case TOKEN_TYPE::Instruction: 1124 index = translate_instruction(tokens, index, macro_params); 1125 break; 1126 1127 case TOKEN_TYPE::KW_ERRDEF: 1128 printf("//"); 1129 return complete_line(tokens, index, macro_params); 1130 1131 default: 1132 throw "failed to translate construct"; 1133 } 1134 1135 // Skip optional white-space 1136 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace) 1137 { 1138 index++; 1139 } 1140 1141 // Line should end here! 1142 Token end = tokens[index]; 1143 if ((end.type() != TOKEN_TYPE::Comment) && 1144 (end.type() != TOKEN_TYPE::NewLine)) 1145 { 1146 throw "unexpected tokens"; 1147 } 1148 1149 return index; 1150 } 1151 1152 size_t 1153 translate_macro(TokenList& tokens, size_t index) 1154 { 1155 vector<string> macro_params; 1156 1157 printf(".macro %s", tokens[index].str().c_str()); 1158 1159 // Parse marameters 1160 index += 3; 1161 while (index < tokens.size()) 1162 { 1163 Token tok = tokens[index]; 1164 switch (tok.type()) 1165 { 1166 case TOKEN_TYPE::NewLine: 1167 case TOKEN_TYPE::Comment: 1168 index = translate_token(tokens, index, macro_params); 1169 break; 1170 1171 case TOKEN_TYPE::Identifier: 1172 macro_params.push_back(tok.str()); 1173 printf("%s", tok.str().c_str()); 1174 index++; 1175 continue; 1176 1177 case TOKEN_TYPE::WhiteSpace: 1178 case TOKEN_TYPE::Operator: 1179 index = translate_token(tokens, index, macro_params); 1180 continue; 1181 } 1182 1183 break; 1184 } 1185 1186 // Parse content 1187 while (index < tokens.size()) 1188 { 1189 Token tok = tokens[index]; 1190 switch (tok.type()) 1191 { 1192 case TOKEN_TYPE::KW_ENDM: 1193 printf(".endm"); 1194 return index + 1; 1195 1196 default: 1197 index = translate_construct(tokens, index, macro_params); 1198 } 1199 } 1200 1201 throw "Failed to translate macro"; 1202 return -1; 1203 } 1204 1205 void 1206 translate(TokenList &tokens) 1207 { 1208 size_t index = 0; 1209 size_t size = tokens.size(); 1210 vector<string> empty_macro_params; 1211 1212 while (index < size) 1213 { 1214 // Macros are special 1215 if ((tokens[index].type() == TOKEN_TYPE::Identifier) && 1216 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) && 1217 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO)) 1218 { 1219 index = translate_macro(tokens, index); 1220 } 1221 else 1222 { 1223 index = translate_construct(tokens, index, empty_macro_params); 1224 } 1225 } 1226 } 1227 1228 int main(int argc, char* argv[]) 1229 { 1230 if (argc < 2) 1231 { 1232 fprintf(stderr, "Invalid parameter!\n"); 1233 return -1; 1234 } 1235 1236 #if PROFILING_ENABLED 1237 time_t start_time = time(NULL); 1238 #endif 1239 1240 try 1241 { 1242 // Open and read the input file 1243 string filename(argv[1]); 1244 ifstream file(filename); 1245 stringstream buffer; 1246 buffer << file.rdbuf(); 1247 string text = buffer.str(); 1248 1249 // Create the tokenizer 1250 Tokenizer tokenizer(g_TokenList); 1251 1252 // Get a token list 1253 TokenList toklist(tokenizer, text); 1254 1255 // Now translate the tokens 1256 translate(toklist); 1257 } 1258 catch (const char* message) 1259 { 1260 fprintf(stderr, "Exception caught: '%s'\n", message); 1261 return -2; 1262 } 1263 1264 #if PROFILING_ENABLED 1265 time_t total_time = time(NULL) + 1 - start_time; 1266 fprintf(stderr, "total_time = %llu\n", total_time); 1267 fprintf(stderr, "search_time = %llu\n", search_time); 1268 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time); 1269 #endif 1270 1271 return 0; 1272 } 1273