1 /* Scanner for INTVPC .CFG files */ 2 %option case-insensitive 3 %option never-interactive 4 %option noyywrap 5 %option noinput 6 %option batch 7 %option 8bit 8 %option nounput 9 %option prefix="bc_" 10 %option outfile="bincfg/bincfg_lex.c" 11 12 %x SEC_IGN 13 %s SEC_VAR 14 %s SEC_BSW 15 %s SEC_MAP 16 %s SEC_EBK 17 %s SEC_ATR 18 %s SEC_PRL 19 %x SEC_MC0 20 %x SEC_MC1 21 %x SEC_MC2 22 23 %{ 24 /* Clang doesn't like the unreachable code in Flex's generated output. */ 25 #ifdef __clang__ 26 #pragma clang diagnostic ignored "-Wunreachable-code" 27 #endif 28 29 /* put any #includes here */ 30 #include "config.h" 31 #include "lzoe/lzoe.h" 32 #include "bincfg/bincfg.h" 33 #include "bincfg/bincfg_grmr.tab.h" 34 #include "misc/types.h" 35 36 extern int bc_lex(void); 37 int bc_line_no = 1; 38 int bc_dec; 39 uint32_t bc_hex; 40 char *bc_txt = NULL; 41 static unsigned bc_txt_alloc = 0; 42 43 typedef enum nlsm_t 44 { 45 NLSM_START = 0, 46 NLSM_SAWCR, 47 NLSM_SAWEOF, 48 NLSM_SAWEOF2, 49 NLSM_DIDEOF 50 } nlsm_t; 51 52 static nlsm_t nlsm_state = NLSM_START; 53 54 /* Simple character-at-a-time state machine for reading input. It serves 55 three purposes: 56 57 1. Convert Mac and Windows style NL to UNIX style. 58 2. Insert an extra NL at EOF to ensure the last line is NL terminated 59 3. Reads from an LZFILE* instead of a FILE*. 60 */ 61 static int nlsm_next_char(void) 62 { 63 int next_char = EOF; 64 65 again: 66 if (nlsm_state == NLSM_SAWEOF) 67 { 68 nlsm_state = NLSM_SAWEOF2; 69 return 10; 70 } 71 if (nlsm_state == NLSM_SAWEOF2) 72 { 73 nlsm_state = NLSM_DIDEOF; 74 return EOF; 75 } 76 77 next_char = lzoe_fgetc( (LZFILE *)bc_in ); 78 79 if (nlsm_state == NLSM_DIDEOF) 80 { 81 if (next_char != EOF) 82 nlsm_state = NLSM_START; 83 else 84 return EOF; 85 } 86 87 if (next_char == EOF) 88 { 89 nlsm_state = NLSM_SAWEOF; 90 return 10; /* extra CR at EOF */ 91 } 92 93 if (next_char == 13) 94 { 95 nlsm_state = NLSM_SAWCR; 96 return 10; 97 } 98 99 if (next_char == 10 && nlsm_state == NLSM_SAWCR) 100 { 101 nlsm_state = NLSM_START; 102 goto again; /* eat LF after CR, since we converted CR to LF */ 103 } 104 105 nlsm_state = NLSM_START; 106 return next_char; 107 } 108 109 110 #define YYTEXT_TO_BC_TXT(unquote) \ 111 do { \ 112 size_t uyyleng = yyleng > 0 ? yyleng : 0; \ 113 if (uyyleng >= bc_txt_alloc) \ 114 { \ 115 if (!bc_txt_alloc) \ 116 bc_txt_alloc = 256; \ 117 \ 118 while (bc_txt_alloc <= uyyleng) \ 119 bc_txt_alloc <<= 1; \ 120 \ 121 bc_txt = (char *)realloc(bc_txt, bc_txt_alloc); \ 122 \ 123 if (!bc_txt) \ 124 return TOK_ERROR_OOM; \ 125 } \ 126 \ 127 if (!unquote || uyyleng < 2) \ 128 strcpy(bc_txt, yytext); \ 129 else \ 130 strcpy(bc_txt, cfg_unquote_str(yytext)); \ 131 } while (0) 132 133 #define YY_INPUT(buf,result,max_size) \ 134 do { \ 135 int c_; \ 136 unsigned i_; \ 137 unsigned ms = max_size > 0 ? max_size : 0; \ 138 \ 139 i_ = 0; \ 140 while (i_ < ms && (c_ = nlsm_next_char()) != EOF) \ 141 buf[i_++] = c_; \ 142 \ 143 result = i_ > 0 ? i_ : YY_NULL; \ 144 \ 145 } while (0); \ 146 147 148 %} 149 150 HNUM [0-9A-Fa-f]+ 151 NAME [A-Za-z_0-9\{\}"']+ 152 153 %% 154 155 %{ 156 /* -------------------------------------------------------------------- */ 157 /* Rules for identifying section headers. */ 158 /* -------------------------------------------------------------------- */ 159 %} 160 "[bankswitch]" { BEGIN(SEC_BSW); return TOK_SEC_BANKSWITCH; } 161 "[mapping]" { BEGIN(SEC_MAP); return TOK_SEC_MAPPING; } 162 "[ecsbank]" { BEGIN(SEC_EBK); return TOK_SEC_ECSBANK; } 163 "[memattr]" { BEGIN(SEC_ATR); return TOK_SEC_MEMATTR; } 164 "[preload]" { BEGIN(SEC_PRL); return TOK_SEC_PRELOAD; } 165 166 "[macro]" { BEGIN(SEC_MC0); return TOK_SEC_MACRO; } 167 168 "[vars]" { BEGIN(SEC_VAR); return TOK_SEC_VARS; } 169 "[joystick]" { BEGIN(SEC_VAR); return TOK_SEC_JOYSTICK; } 170 "[keys]" { BEGIN(SEC_VAR); return TOK_SEC_KEYS; } 171 "[capslock]" { BEGIN(SEC_VAR); return TOK_SEC_CAPSLOCK; } 172 "[numlock]" { BEGIN(SEC_VAR); return TOK_SEC_NUMLOCK; } 173 "[scrolllock]" { BEGIN(SEC_VAR); return TOK_SEC_SCROLLLOCK; } 174 175 "[disasm]" { BEGIN(SEC_IGN); return TOK_SEC_DISASM; } 176 "[voices]" { BEGIN(SEC_IGN); return TOK_SEC_VOICES; } 177 178 "["[^\]]*"]" { BEGIN(SEC_IGN); return TOK_SEC_UNKNOWN; } 179 180 %{ 181 /* -------------------------------------------------------------------- */ 182 /* Mini-scanner for ignored sections: Just eat them right up. */ 183 /* -------------------------------------------------------------------- */ 184 %} 185 <SEC_IGN><<EOF>> { BEGIN(INITIAL); return '\n'; } 186 <SEC_IGN>\n { bc_line_no++; /* eat newlines */ } 187 <SEC_IGN>;.* { /* eat comments */ } 188 <SEC_IGN>[^\[;\n]+ { /* eat non-section openers. */ } 189 <SEC_IGN>"[" { yyless(0); BEGIN(INITIAL); return '\n'; } 190 191 %{ 192 /* -------------------------------------------------------------------- */ 193 /* Keywords that are only valid in memory-attribute section. */ 194 /* -------------------------------------------------------------------- */ 195 %} 196 <SEC_ATR>"RAM" { return TOK_RAM; } 197 <SEC_ATR>"ROM" { return TOK_ROM; } 198 <SEC_ATR>"WOM" { return TOK_WOM; } 199 <SEC_ATR>"PAGE" { return TOK_PAGE; } 200 201 %{ 202 /* -------------------------------------------------------------------- */ 203 /* Keywords that are only valid in the mapping section. */ 204 /* -------------------------------------------------------------------- */ 205 %} 206 <SEC_MAP>"RAM" { return TOK_RAM; } 207 <SEC_MAP>"ROM" { return TOK_ROM; } 208 <SEC_MAP>"WOM" { return TOK_WOM; } 209 <SEC_MAP>"PAGE" { return TOK_PAGE; } 210 211 %{ 212 /* -------------------------------------------------------------------- */ 213 /* Keywords that are only valid in the ecsbank section. */ 214 /* -------------------------------------------------------------------- */ 215 %} 216 <SEC_EBK>":" { return ':'; } 217 218 %{ 219 /* -------------------------------------------------------------------- */ 220 /* Keywords that are only valid in the macro section. */ 221 /* -------------------------------------------------------------------- */ 222 %} 223 <SEC_MC0>"[" { yyless(0); BEGIN(INITIAL); /*]*/ } 224 <SEC_MC0>"@" { return TOK_MAC_QUIET; } 225 <SEC_MC0>"0" { BEGIN(SEC_MC2); bc_hex=0; return TOK_MAC_REG; } 226 <SEC_MC0>"1" { BEGIN(SEC_MC2); bc_hex=1; return TOK_MAC_REG; } 227 <SEC_MC0>"2" { BEGIN(SEC_MC2); bc_hex=2; return TOK_MAC_REG; } 228 <SEC_MC0>"3" { BEGIN(SEC_MC1); bc_hex=3; return TOK_MAC_REG; } 229 <SEC_MC0>"4" { BEGIN(SEC_MC2); bc_hex=4; return TOK_MAC_REG; } 230 <SEC_MC0>"5" { BEGIN(SEC_MC2); bc_hex=5; return TOK_MAC_REG; } 231 <SEC_MC0>"6" { BEGIN(SEC_MC2); bc_hex=6; return TOK_MAC_REG; } 232 <SEC_MC0>"7" { BEGIN(SEC_MC2); bc_hex=7; return TOK_MAC_REG; } 233 <SEC_MC0>"A" { BEGIN(SEC_MC2); return TOK_MAC_AHEAD; } 234 <SEC_MC0>"B" { BEGIN(SEC_MC2); return TOK_MAC_BLANK; } 235 <SEC_MC0>"I" { BEGIN(SEC_MC2); return TOK_MAC_INSPECT; } 236 <SEC_MC0>"L" { BEGIN(SEC_MC1); return TOK_MAC_LOAD; } 237 <SEC_MC0>"O" { BEGIN(SEC_MC2); return TOK_MAC_RUNTO; } 238 <SEC_MC0>"P" { BEGIN(SEC_MC2); return TOK_MAC_POKE; } 239 <SEC_MC0>"R" { BEGIN(SEC_MC2); return TOK_MAC_RUN; } 240 <SEC_MC0>"T" { BEGIN(SEC_MC2); return TOK_MAC_TRACE; } 241 <SEC_MC0>"V" { BEGIN(SEC_MC2); return TOK_MAC_VIEW; } 242 <SEC_MC0>"W" { BEGIN(SEC_MC1); return TOK_MAC_WATCH; } 243 <SEC_MC0>;.* { /* ignore comments. */ } 244 <SEC_MC0>[ \t\r]* { /* ignore whitespace. */ } 245 <SEC_MC0>\n { bc_line_no++; return '\n'; } 246 <SEC_MC0>. { /* ignore unknown lines. */ } 247 248 %{ 249 /* -------------------------------------------------------------------- */ 250 /* Special secondary, tertiary states for macro processing. Grrr... */ 251 /* We enter this state after parsing the first 'name' on a line. */ 252 /* This keeps hex ranges that aren't $ adorned from turning into */ 253 /* TOK_NAMEs. Too much of the grammar comes into the lexer. :-P */ 254 /* -------------------------------------------------------------------- */ 255 %} 256 257 <SEC_MC1,SEC_MC2>[0-9]+ { 258 BEGIN(SEC_MC2); 259 bc_dec = atoi(yytext); 260 sscanf(yytext , "%x", &bc_hex); 261 YYTEXT_TO_BC_TXT(0); 262 return TOK_DEC; 263 } 264 <SEC_MC1,SEC_MC2>[A-F0-9]+ { 265 BEGIN(SEC_MC2); 266 bc_dec = atoi(yytext); 267 sscanf(yytext , "%x", &bc_hex); 268 YYTEXT_TO_BC_TXT(0); 269 return TOK_HEX; 270 } 271 <SEC_MC1,SEC_MC2>\$[A-F0-9]+ { 272 BEGIN(SEC_MC2); 273 bc_dec = atoi(yytext + 1); 274 sscanf(yytext + 1, "%x", &bc_hex); 275 YYTEXT_TO_BC_TXT(0); 276 return TOK_HEX; 277 } 278 <SEC_MC1,SEC_MC2>"-" { BEGIN(SEC_MC2); return '-'; } 279 <SEC_MC1,SEC_MC2>"," { BEGIN(SEC_MC2); return ','; } 280 <SEC_MC1,SEC_MC2>":" { BEGIN(SEC_MC2); return ':'; } 281 <SEC_MC1,SEC_MC2>"PAGE" { BEGIN(SEC_MC2); return TOK_PAGE; } 282 <SEC_MC1,SEC_MC2>;.* { BEGIN(SEC_MC2); /* eat comments. */ } 283 <SEC_MC1,SEC_MC2>\n { bc_line_no++; BEGIN(SEC_MC0); return '\n'; } 284 <SEC_MC1,SEC_MC2>"[" { yyless(0); BEGIN(INITIAL); /*]*/ } 285 286 <SEC_MC1>\"([^\n\r"]*|\\\")*\" { 287 BEGIN(SEC_MC2); 288 YYTEXT_TO_BC_TXT(1); 289 return TOK_STRING; 290 } 291 292 <SEC_MC1>[^ \t\n\r;\[\]\$\=\-\,][^ \t\n\r;\[\]\$]* { 293 BEGIN(SEC_MC2); 294 YYTEXT_TO_BC_TXT(0); 295 return TOK_NAME; 296 } 297 298 <SEC_MC1,SEC_MC2>[ \t\r]* { /* ignore whitespace. */ } 299 <SEC_MC1,SEC_MC2>. { YYTEXT_TO_BC_TXT(0); return TOK_ERROR_BAD; } 300 301 %{ 302 /* -------------------------------------------------------------------- */ 303 /* Main scanner with common rules across most sections. */ 304 /* -------------------------------------------------------------------- */ 305 %} 306 -[0-9]+ { 307 bc_dec = atoi(yytext); 308 sscanf(yytext + 1, "%x", &bc_hex); 309 YYTEXT_TO_BC_TXT(0); 310 return TOK_DECONLY; 311 } 312 [0-9]+ { 313 bc_dec = atoi(yytext); 314 sscanf(yytext , "%x", &bc_hex); 315 YYTEXT_TO_BC_TXT(0); 316 return TOK_DEC; 317 } 318 [A-F0-9]+ { 319 bc_dec = atoi(yytext); 320 sscanf(yytext , "%x", &bc_hex); 321 YYTEXT_TO_BC_TXT(0); 322 return TOK_HEX; 323 } 324 \$[A-F0-9]+ { 325 bc_dec = atoi(yytext + 1); 326 sscanf(yytext + 1, "%x", &bc_hex); 327 YYTEXT_TO_BC_TXT(0); 328 return TOK_HEX; 329 } 330 "=" { return '='; } 331 "-" { return '-'; } 332 "," { return ','; } 333 ;.* { /* eat comments. */ } 334 \n { bc_line_no++; 335 if (YY_START == SEC_MC1) BEGIN(SEC_MC0); 336 return '\n'; /* comments and newlines are same */ 337 } 338 339 <SEC_VAR>[^ \t\n\r;\[\]\$\=\-\,\"]+ { 340 YYTEXT_TO_BC_TXT(0); 341 return TOK_NAME; 342 } 343 344 <SEC_VAR>\"([^\n\r"]*|\\\")*\" { 345 YYTEXT_TO_BC_TXT(1); 346 return TOK_STRING; 347 } 348 349 [ \t\r]+ { /* eat whitespace */ } 350 [^ \t\n\r\=\-\,\[\]A-Z0-9;]+ { 351 YYTEXT_TO_BC_TXT(0); 352 #ifdef DEBUG 353 fprintf(stderr, "BAD2: %d\n", yytext[0]); 354 #endif 355 return TOK_ERROR_BAD; 356 } 357 . { 358 YYTEXT_TO_BC_TXT(0); 359 #ifdef DEBUG 360 fprintf(stderr, "BAD3: %d\n", yytext[0]); 361 #endif 362 return TOK_ERROR_BAD; 363 } 364 365 %% 366 367 /* ======================================================================== */ 368 /* This program is free software; you can redistribute it and/or modify */ 369 /* it under the terms of the GNU General Public License as published by */ 370 /* the Free Software Foundation; either version 2 of the License, or */ 371 /* (at your option) any later version. */ 372 /* */ 373 /* This program is distributed in the hope that it will be useful, */ 374 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 375 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ 376 /* General Public License for more details. */ 377 /* */ 378 /* You should have received a copy of the GNU General Public License along */ 379 /* with this program; if not, write to the Free Software Foundation, Inc., */ 380 /* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ 381 /* ======================================================================== */ 382 /* Copyright (c) 2003-+Inf, Joseph Zbiciak */ 383 /* ======================================================================== */ 384