1 /* Scanner for INTVPC .CFG files */
2 %option case-insensitive
3 %option never-interactive
4 %option noyywrap
5 %option noinput
6 %option batch
7 %option 8bit
8 %option nounput
9 %option prefix="bc_"
10 %option outfile="bincfg/bincfg_lex.c"
11 
12 %x SEC_IGN
13 %s SEC_VAR
14 %s SEC_BSW
15 %s SEC_MAP
16 %s SEC_EBK
17 %s SEC_ATR
18 %s SEC_PRL
19 %x SEC_MC0
20 %x SEC_MC1
21 %x SEC_MC2
22 
23 %{
24 /* Clang doesn't like the unreachable code in Flex's generated output. */
25 #ifdef __clang__
26 #pragma clang diagnostic ignored "-Wunreachable-code"
27 #endif
28 
29 /* put any #includes here */
30 #include "config.h"
31 #include "lzoe/lzoe.h"
32 #include "bincfg/bincfg.h"
33 #include "bincfg/bincfg_grmr.tab.h"
34 #include "misc/types.h"
35 
36 extern int      bc_lex(void);
37 int             bc_line_no = 1;
38 int             bc_dec;
39 uint32_t         bc_hex;
40 char           *bc_txt = NULL;
41 static unsigned bc_txt_alloc = 0;
42 
43 typedef enum nlsm_t
44 {
45     NLSM_START = 0,
46     NLSM_SAWCR,
47     NLSM_SAWEOF,
48     NLSM_SAWEOF2,
49     NLSM_DIDEOF
50 } nlsm_t;
51 
52 static nlsm_t nlsm_state = NLSM_START;
53 
54 /* Simple character-at-a-time state machine for reading input.  It serves
55    three purposes:
56 
57    1.  Convert Mac and Windows style NL to UNIX style.
58    2.  Insert an extra NL at EOF to ensure the last line is NL terminated
59    3.  Reads from an LZFILE* instead of a FILE*.
60 */
61 static int nlsm_next_char(void)
62 {
63     int next_char = EOF;
64 
65 again:
66     if (nlsm_state == NLSM_SAWEOF)
67     {
68         nlsm_state = NLSM_SAWEOF2;
69         return 10;
70     }
71     if (nlsm_state == NLSM_SAWEOF2)
72     {
73         nlsm_state = NLSM_DIDEOF;
74         return EOF;
75     }
76 
77     next_char = lzoe_fgetc( (LZFILE *)bc_in );
78 
79     if (nlsm_state == NLSM_DIDEOF)
80     {
81         if (next_char != EOF)
82             nlsm_state = NLSM_START;
83         else
84             return EOF;
85     }
86 
87     if (next_char == EOF)
88     {
89         nlsm_state = NLSM_SAWEOF;
90         return 10; /* extra CR at EOF */
91     }
92 
93     if (next_char == 13)
94     {
95         nlsm_state = NLSM_SAWCR;
96         return 10;
97     }
98 
99     if (next_char == 10 && nlsm_state == NLSM_SAWCR)
100     {
101         nlsm_state = NLSM_START;
102         goto again; /* eat LF after CR, since we converted CR to LF */
103     }
104 
105     nlsm_state = NLSM_START;
106     return next_char;
107 }
108 
109 
110 #define YYTEXT_TO_BC_TXT(unquote) \
111         do {                                                            \
112             size_t uyyleng = yyleng > 0 ? yyleng : 0;                   \
113             if (uyyleng >= bc_txt_alloc)                                \
114             {                                                           \
115                 if (!bc_txt_alloc)                                      \
116                     bc_txt_alloc = 256;                                 \
117                                                                         \
118                 while (bc_txt_alloc <= uyyleng)                         \
119                     bc_txt_alloc <<= 1;                                 \
120                                                                         \
121                 bc_txt = (char *)realloc(bc_txt, bc_txt_alloc);         \
122                                                                         \
123                 if (!bc_txt)                                            \
124                     return TOK_ERROR_OOM;                               \
125             }                                                           \
126                                                                         \
127             if (!unquote || uyyleng < 2)                                \
128                 strcpy(bc_txt, yytext);                                 \
129             else                                                        \
130                 strcpy(bc_txt, cfg_unquote_str(yytext));                \
131         } while (0)
132 
133 #define YY_INPUT(buf,result,max_size)                                   \
134         do {                                                            \
135             int c_;                                                     \
136             unsigned i_;                                                \
137             unsigned ms = max_size > 0 ? max_size : 0;                  \
138                                                                         \
139             i_ = 0;                                                     \
140             while (i_ < ms && (c_ = nlsm_next_char()) != EOF)           \
141                 buf[i_++] = c_;                                         \
142                                                                         \
143             result = i_ > 0 ? i_ : YY_NULL;                             \
144                                                                         \
145         } while (0);                                                    \
146 
147 
148 %}
149 
150 HNUM    [0-9A-Fa-f]+
151 NAME    [A-Za-z_0-9\{\}"']+
152 
153 %%
154 
155 %{
156 /* -------------------------------------------------------------------- */
157 /*  Rules for identifying section headers.                              */
158 /* -------------------------------------------------------------------- */
159 %}
160 "[bankswitch]"      {   BEGIN(SEC_BSW); return  TOK_SEC_BANKSWITCH;     }
161 "[mapping]"         {   BEGIN(SEC_MAP); return  TOK_SEC_MAPPING;        }
162 "[ecsbank]"         {   BEGIN(SEC_EBK); return  TOK_SEC_ECSBANK;        }
163 "[memattr]"         {   BEGIN(SEC_ATR); return  TOK_SEC_MEMATTR;        }
164 "[preload]"         {   BEGIN(SEC_PRL); return  TOK_SEC_PRELOAD;        }
165 
166 "[macro]"           {   BEGIN(SEC_MC0); return  TOK_SEC_MACRO;          }
167 
168 "[vars]"            {   BEGIN(SEC_VAR); return  TOK_SEC_VARS;           }
169 "[joystick]"        {   BEGIN(SEC_VAR); return  TOK_SEC_JOYSTICK;       }
170 "[keys]"            {   BEGIN(SEC_VAR); return  TOK_SEC_KEYS;           }
171 "[capslock]"        {   BEGIN(SEC_VAR); return  TOK_SEC_CAPSLOCK;       }
172 "[numlock]"         {   BEGIN(SEC_VAR); return  TOK_SEC_NUMLOCK;        }
173 "[scrolllock]"      {   BEGIN(SEC_VAR); return  TOK_SEC_SCROLLLOCK;     }
174 
175 "[disasm]"          {   BEGIN(SEC_IGN); return  TOK_SEC_DISASM;         }
176 "[voices]"          {   BEGIN(SEC_IGN); return  TOK_SEC_VOICES;         }
177 
178 "["[^\]]*"]"        {   BEGIN(SEC_IGN); return  TOK_SEC_UNKNOWN;        }
179 
180 %{
181 /* -------------------------------------------------------------------- */
182 /*  Mini-scanner for ignored sections:  Just eat them right up.         */
183 /* -------------------------------------------------------------------- */
184 %}
185 <SEC_IGN><<EOF>>    {   BEGIN(INITIAL); return '\n';                    }
186 <SEC_IGN>\n         {   bc_line_no++; /* eat newlines */                }
187 <SEC_IGN>;.*        {   /* eat comments */                              }
188 <SEC_IGN>[^\[;\n]+  {   /* eat non-section openers. */                  }
189 <SEC_IGN>"["        {   yyless(0);  BEGIN(INITIAL); return '\n';        }
190 
191 %{
192 /* -------------------------------------------------------------------- */
193 /*  Keywords that are only valid in memory-attribute section.           */
194 /* -------------------------------------------------------------------- */
195 %}
196 <SEC_ATR>"RAM"      {   return TOK_RAM;                                 }
197 <SEC_ATR>"ROM"      {   return TOK_ROM;                                 }
198 <SEC_ATR>"WOM"      {   return TOK_WOM;                                 }
199 <SEC_ATR>"PAGE"     {   return TOK_PAGE;                                }
200 
201 %{
202 /* -------------------------------------------------------------------- */
203 /*  Keywords that are only valid in the mapping section.                */
204 /* -------------------------------------------------------------------- */
205 %}
206 <SEC_MAP>"RAM"      {   return TOK_RAM;                                 }
207 <SEC_MAP>"ROM"      {   return TOK_ROM;                                 }
208 <SEC_MAP>"WOM"      {   return TOK_WOM;                                 }
209 <SEC_MAP>"PAGE"     {   return TOK_PAGE;                                }
210 
211 %{
212 /* -------------------------------------------------------------------- */
213 /*  Keywords that are only valid in the ecsbank section.                */
214 /* -------------------------------------------------------------------- */
215 %}
216 <SEC_EBK>":"        {   return ':';                                     }
217 
218 %{
219 /* -------------------------------------------------------------------- */
220 /*  Keywords that are only valid in the macro section.                  */
221 /* -------------------------------------------------------------------- */
222 %}
223 <SEC_MC0>"["        {   yyless(0); BEGIN(INITIAL);   /*]*/              }
224 <SEC_MC0>"@"        {   return TOK_MAC_QUIET;                           }
225 <SEC_MC0>"0"        {   BEGIN(SEC_MC2); bc_hex=0; return TOK_MAC_REG;   }
226 <SEC_MC0>"1"        {   BEGIN(SEC_MC2); bc_hex=1; return TOK_MAC_REG;   }
227 <SEC_MC0>"2"        {   BEGIN(SEC_MC2); bc_hex=2; return TOK_MAC_REG;   }
228 <SEC_MC0>"3"        {   BEGIN(SEC_MC1); bc_hex=3; return TOK_MAC_REG;   }
229 <SEC_MC0>"4"        {   BEGIN(SEC_MC2); bc_hex=4; return TOK_MAC_REG;   }
230 <SEC_MC0>"5"        {   BEGIN(SEC_MC2); bc_hex=5; return TOK_MAC_REG;   }
231 <SEC_MC0>"6"        {   BEGIN(SEC_MC2); bc_hex=6; return TOK_MAC_REG;   }
232 <SEC_MC0>"7"        {   BEGIN(SEC_MC2); bc_hex=7; return TOK_MAC_REG;   }
233 <SEC_MC0>"A"        {   BEGIN(SEC_MC2); return TOK_MAC_AHEAD;           }
234 <SEC_MC0>"B"        {   BEGIN(SEC_MC2); return TOK_MAC_BLANK;           }
235 <SEC_MC0>"I"        {   BEGIN(SEC_MC2); return TOK_MAC_INSPECT;         }
236 <SEC_MC0>"L"        {   BEGIN(SEC_MC1); return TOK_MAC_LOAD;            }
237 <SEC_MC0>"O"        {   BEGIN(SEC_MC2); return TOK_MAC_RUNTO;           }
238 <SEC_MC0>"P"        {   BEGIN(SEC_MC2); return TOK_MAC_POKE;            }
239 <SEC_MC0>"R"        {   BEGIN(SEC_MC2); return TOK_MAC_RUN;             }
240 <SEC_MC0>"T"        {   BEGIN(SEC_MC2); return TOK_MAC_TRACE;           }
241 <SEC_MC0>"V"        {   BEGIN(SEC_MC2); return TOK_MAC_VIEW;            }
242 <SEC_MC0>"W"        {   BEGIN(SEC_MC1); return TOK_MAC_WATCH;           }
243 <SEC_MC0>;.*        {   /* ignore comments.       */                    }
244 <SEC_MC0>[ \t\r]*   {   /* ignore whitespace.     */                    }
245 <SEC_MC0>\n         {   bc_line_no++; return '\n';                      }
246 <SEC_MC0>.          {   /* ignore unknown lines.  */                    }
247 
248 %{
249 /* -------------------------------------------------------------------- */
250 /*  Special secondary, tertiary states for macro processing.  Grrr...   */
251 /*  We enter this state after parsing the first 'name' on a line.       */
252 /*  This keeps hex ranges that aren't $ adorned from turning into       */
253 /*  TOK_NAMEs.  Too much of the grammar comes into the lexer.  :-P      */
254 /* -------------------------------------------------------------------- */
255 %}
256 
257 <SEC_MC1,SEC_MC2>[0-9]+         {
258                                     BEGIN(SEC_MC2);
259                                     bc_dec = atoi(yytext);
260                                     sscanf(yytext    , "%x", &bc_hex);
261                                     YYTEXT_TO_BC_TXT(0);
262                                     return  TOK_DEC;
263                                 }
264 <SEC_MC1,SEC_MC2>[A-F0-9]+      {
265                                     BEGIN(SEC_MC2);
266                                     bc_dec = atoi(yytext);
267                                     sscanf(yytext    , "%x", &bc_hex);
268                                     YYTEXT_TO_BC_TXT(0);
269                                     return  TOK_HEX;
270                                 }
271 <SEC_MC1,SEC_MC2>\$[A-F0-9]+    {
272                                     BEGIN(SEC_MC2);
273                                     bc_dec = atoi(yytext + 1);
274                                     sscanf(yytext + 1, "%x", &bc_hex);
275                                     YYTEXT_TO_BC_TXT(0);
276                                     return  TOK_HEX;
277                                 }
278 <SEC_MC1,SEC_MC2>"-"    {   BEGIN(SEC_MC2); return '-';                 }
279 <SEC_MC1,SEC_MC2>","    {   BEGIN(SEC_MC2); return ',';                 }
280 <SEC_MC1,SEC_MC2>":"    {   BEGIN(SEC_MC2); return ':';                 }
281 <SEC_MC1,SEC_MC2>"PAGE" {   BEGIN(SEC_MC2); return TOK_PAGE;            }
282 <SEC_MC1,SEC_MC2>;.*    {   BEGIN(SEC_MC2); /* eat comments.  */        }
283 <SEC_MC1,SEC_MC2>\n     {   bc_line_no++; BEGIN(SEC_MC0); return '\n';  }
284 <SEC_MC1,SEC_MC2>"["    {   yyless(0); BEGIN(INITIAL);   /*]*/          }
285 
286 <SEC_MC1>\"([^\n\r"]*|\\\")*\"  {
287                                     BEGIN(SEC_MC2);
288                                     YYTEXT_TO_BC_TXT(1);
289                                     return TOK_STRING;
290                                 }
291 
292 <SEC_MC1>[^ \t\n\r;\[\]\$\=\-\,][^ \t\n\r;\[\]\$]* {
293                                     BEGIN(SEC_MC2);
294                                     YYTEXT_TO_BC_TXT(0);
295                                     return  TOK_NAME;
296                                 }
297 
298 <SEC_MC1,SEC_MC2>[ \t\r]* { /* ignore whitespace. */                    }
299 <SEC_MC1,SEC_MC2>.        { YYTEXT_TO_BC_TXT(0); return TOK_ERROR_BAD;  }
300 
301 %{
302 /* -------------------------------------------------------------------- */
303 /*  Main scanner with common rules across most sections.                */
304 /* -------------------------------------------------------------------- */
305 %}
306 -[0-9]+     {
307                 bc_dec = atoi(yytext);
308                 sscanf(yytext + 1, "%x", &bc_hex);
309                 YYTEXT_TO_BC_TXT(0);
310                 return  TOK_DECONLY;
311             }
312 [0-9]+      {
313                 bc_dec = atoi(yytext);
314                 sscanf(yytext    , "%x", &bc_hex);
315                 YYTEXT_TO_BC_TXT(0);
316                 return  TOK_DEC;
317             }
318 [A-F0-9]+   {
319                 bc_dec = atoi(yytext);
320                 sscanf(yytext    , "%x", &bc_hex);
321                 YYTEXT_TO_BC_TXT(0);
322                 return  TOK_HEX;
323             }
324 \$[A-F0-9]+ {
325                 bc_dec = atoi(yytext + 1);
326                 sscanf(yytext + 1, "%x", &bc_hex);
327                 YYTEXT_TO_BC_TXT(0);
328                 return  TOK_HEX;
329             }
330 "="         {   return '=';                                             }
331 "-"         {   return '-';                                             }
332 ","         {   return ',';                                             }
333 ;.*         {   /* eat comments.  */                                    }
334 \n          {   bc_line_no++;
335                 if (YY_START == SEC_MC1) BEGIN(SEC_MC0);
336                 return '\n';        /* comments and newlines are same */
337             }
338 
339 <SEC_VAR>[^ \t\n\r;\[\]\$\=\-\,\"]+ {
340                 YYTEXT_TO_BC_TXT(0);
341                 return TOK_NAME;
342             }
343 
344 <SEC_VAR>\"([^\n\r"]*|\\\")*\" {
345                 YYTEXT_TO_BC_TXT(1);
346                 return TOK_STRING;
347             }
348 
349 [ \t\r]+    {   /* eat whitespace */                                    }
350 [^ \t\n\r\=\-\,\[\]A-Z0-9;]+ {
351     YYTEXT_TO_BC_TXT(0);
352 #ifdef DEBUG
353     fprintf(stderr, "BAD2: %d\n", yytext[0]);
354 #endif
355     return TOK_ERROR_BAD;
356 }
357 .           {
358     YYTEXT_TO_BC_TXT(0);
359 #ifdef DEBUG
360     fprintf(stderr, "BAD3: %d\n", yytext[0]);
361 #endif
362     return TOK_ERROR_BAD;
363 }
364 
365 %%
366 
367 /* ======================================================================== */
368 /*  This program is free software; you can redistribute it and/or modify    */
369 /*  it under the terms of the GNU General Public License as published by    */
370 /*  the Free Software Foundation; either version 2 of the License, or       */
371 /*  (at your option) any later version.                                     */
372 /*                                                                          */
373 /*  This program is distributed in the hope that it will be useful,         */
374 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of          */
375 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU       */
376 /*  General Public License for more details.                                */
377 /*                                                                          */
378 /*  You should have received a copy of the GNU General Public License along */
379 /*  with this program; if not, write to the Free Software Foundation, Inc., */
380 /*  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.             */
381 /* ======================================================================== */
382 /*                 Copyright (c) 2003-+Inf, Joseph Zbiciak                  */
383 /* ======================================================================== */
384