1 /* Generated by re2c */
2 #line 1 "push_fg.re"
3 // re2c $INPUT -o $OUTPUT -fg
4 /*
5 * A push-model scanner example for re2c -f
6 * Written Mon Apr 11 2005 by mgix@mgix.com
7 * This file is in the public domain.
8 *
9 */
10
11 // ----------------------------------------------------------------------
12
13 #include <fcntl.h>
14 #include <stdio.h>
15 #include <stddef.h>
16 #include <stdlib.h>
17 #include <string.h>
18
19 #if defined(WIN32)
20
21 typedef signed char int8_t;
22 typedef signed short int16_t;
23 typedef signed int int32_t;
24
25 typedef unsigned char uint8_t;
26 typedef unsigned short uint16_t;
27 typedef unsigned int uint32_t;
28
29 #else
30
31 #include <stdint.h>
32 #include <unistd.h>
33
34 #ifndef O_BINARY
35 #define O_BINARY 0
36 #endif
37
38 #endif
39
40 // ----------------------------------------------------------------------
41 #define TOKENS \
42 \
43 TOK(kEOF) \
44 TOK(kEOL) \
45 TOK(kUnknown) \
46 TOK(kIdentifier) \
47 TOK(kDecimalConstant) \
48 \
49 TOK(kEqual) \
50 TOK(kLeftParen) \
51 TOK(kRightParen) \
52 TOK(kMinus) \
53 TOK(kPlus) \
54 TOK(kStar) \
55 TOK(kSlash) \
56 \
57 TOK(kIf) \
58 TOK(kFor) \
59 TOK(kElse) \
60 TOK(kGoto) \
61 TOK(kBreak) \
62 TOK(kWhile) \
63 TOK(kReturn) \
64
65
66 // ----------------------------------------------------------------------
67 static const char *tokenNames[] =
68 {
69 #define TOK(x) #x,
70 TOKENS
71 #undef TOK
72 };
73
74 // ----------------------------------------------------------------------
75 class PushScanner
76 {
77 public:
78
79 enum Token
80 {
81 #define TOK(x) x,
82 TOKENS
83 #undef TOK
84 };
85
86 private:
87
88 bool eof;
89 int32_t state;
90
91 uint8_t *limit;
92 uint8_t *start;
93 uint8_t *cursor;
94 uint8_t *marker;
95
96 uint8_t *buffer;
97 uint8_t *bufferEnd;
98
99 uint8_t yych;
100 uint32_t yyaccept;
101
102 public:
103
104 // ----------------------------------------------------------------------
PushScanner()105 PushScanner()
106 {
107 limit = 0;
108 start = 0;
109 state = -1;
110 cursor = 0;
111 marker = 0;
112 buffer = 0;
113 eof = false;
114 bufferEnd = 0;
115 }
116
117 // ----------------------------------------------------------------------
~PushScanner()118 ~PushScanner()
119 {
120 }
121
122 // ----------------------------------------------------------------------
send(Token token)123 void send(
124 Token token
125 )
126 {
127 size_t tokenSize = cursor-start;
128 const char *tokenName = tokenNames[token];
129 printf(
130 "scanner is pushing out a token of type %d (%s)",
131 token,
132 tokenName
133 );
134
135 if(token==kEOF) putchar('\n');
136 else
137 {
138 size_t tokenNameSize = strlen(tokenNames[token]);
139 size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
140 for(size_t i=0; i<padSize; ++i) putchar(' ');
141 printf(" : ---->");
142
143 fwrite(
144 start,
145 tokenSize,
146 1,
147 stdout
148 );
149
150 printf("<----\n");
151 }
152 }
153
154 // ----------------------------------------------------------------------
push(const void * input,ssize_t inputSize)155 uint32_t push(
156 const void *input,
157 ssize_t inputSize
158 )
159 {
160 printf(
161 "scanner is receiving a new data batch of length %d\n"
162 "scanner continues with saved state = %d\n",
163 inputSize,
164 state
165 );
166
167 /*
168 * Data source is signaling end of file when batch size
169 * is less than maxFill. This is slightly annoying because
170 * maxFill is a value that can only be known after re2c does
171 * its thing. Practically though, maxFill is never bigger than
172 * the longest keyword, so given our grammar, 32 is a safe bet.
173 */
174 uint8_t null[64];
175 const ssize_t maxFill = 32;
176 if(inputSize<maxFill)
177 {
178 eof = true;
179 input = null;
180 inputSize = sizeof(null);
181 memset(null, 0, sizeof(null));
182 }
183
184 /*
185 * When we get here, we have a partially
186 * consumed buffer which is in the following state:
187 * last valid char last valid buffer spot
188 * v v
189 * +-------------------+-------------+---------------+-------------+----------------------+
190 * ^ ^ ^ ^ ^ ^
191 * buffer start marker cursor limit bufferEnd
192 *
193 * We need to stretch the buffer and concatenate the new chunk of input to it
194 *
195 */
196 size_t used = limit-buffer;
197 size_t needed = used+inputSize;
198 size_t allocated = bufferEnd-buffer;
199 if(allocated<needed)
200 {
201 size_t limitOffset = limit-buffer;
202 size_t startOffset = start-buffer;
203 size_t markerOffset = marker-buffer;
204 size_t cursorOffset = cursor-buffer;
205
206 buffer = (uint8_t*)realloc(buffer, needed);
207 bufferEnd = needed+buffer;
208
209 marker = markerOffset + buffer;
210 cursor = cursorOffset + buffer;
211 start = buffer + startOffset;
212 limit = limitOffset + buffer;
213 }
214 memcpy(limit, input, inputSize);
215 limit += inputSize;
216
217 // The scanner starts here
218 #define YYLIMIT limit
219 #define YYCURSOR cursor
220 #define YYMARKER marker
221 #define YYCTYPE uint8_t
222
223 #define SKIP(x) { start = cursor; goto yy0; }
224 #define SEND(x) { send(x); SKIP(); }
225 #define YYFILL(n) { goto fill; }
226
227 #define YYGETSTATE() state
228 #define YYSETSTATE(x) { state = (x); }
229
230 start:
231
232
233 #line 234 "push_fg.c"
234 {
235
236 static const unsigned char yybm[] = {
237 0, 0, 0, 0, 0, 0, 0, 0,
238 0, 0, 0, 0, 0, 0, 0, 0,
239 0, 0, 0, 0, 0, 0, 0, 0,
240 0, 0, 0, 0, 0, 0, 0, 0,
241 0, 0, 0, 0, 0, 0, 0, 0,
242 0, 0, 0, 0, 0, 0, 0, 0,
243 192, 192, 192, 192, 192, 192, 192, 192,
244 192, 192, 0, 0, 0, 0, 0, 0,
245 0, 128, 128, 128, 128, 128, 128, 128,
246 128, 128, 128, 128, 128, 128, 128, 128,
247 128, 128, 128, 128, 128, 128, 128, 128,
248 128, 128, 128, 0, 0, 0, 0, 128,
249 0, 128, 128, 128, 128, 128, 128, 128,
250 128, 128, 128, 128, 128, 128, 128, 128,
251 128, 128, 128, 128, 128, 128, 128, 128,
252 128, 128, 128, 0, 0, 0, 0, 0,
253 0, 0, 0, 0, 0, 0, 0, 0,
254 0, 0, 0, 0, 0, 0, 0, 0,
255 0, 0, 0, 0, 0, 0, 0, 0,
256 0, 0, 0, 0, 0, 0, 0, 0,
257 0, 0, 0, 0, 0, 0, 0, 0,
258 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0,
260 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0,
262 0, 0, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0,
264 0, 0, 0, 0, 0, 0, 0, 0,
265 0, 0, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0,
268 0, 0, 0, 0, 0, 0, 0, 0,
269 };
270 switch (YYGETSTATE()) {
271 default:
272 goto yy0;
273 case 0:
274 goto yyFillLabel0;
275 case 1:
276 goto yyFillLabel1;
277 case 2:
278 goto yyFillLabel2;
279 }
280 yy0:
281 YYSETSTATE(0);
282 if ((YYLIMIT - YYCURSOR) < 7) YYFILL(7);
283 yyFillLabel0:
284 yych = *YYCURSOR;
285 {
286 static void *yytarget[256] = {
287 &&yy3, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
288 &&yy5, &&yy7, &&yy9, &&yy7, &&yy7, &&yy7, &&yy5, &&yy5,
289 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
290 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
291 &&yy7, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
292 &&yy11, &&yy13, &&yy15, &&yy17, &&yy5, &&yy19, &&yy5, &&yy21,
293 &&yy23, &&yy23, &&yy23, &&yy23, &&yy23, &&yy23, &&yy23, &&yy23,
294 &&yy23, &&yy23, &&yy5, &&yy5, &&yy5, &&yy26, &&yy5, &&yy5,
295 &&yy5, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28,
296 &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28,
297 &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28,
298 &&yy28, &&yy28, &&yy28, &&yy5, &&yy5, &&yy5, &&yy5, &&yy28,
299 &&yy5, &&yy28, &&yy31, &&yy28, &&yy28, &&yy32, &&yy33, &&yy34,
300 &&yy28, &&yy35, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28, &&yy28,
301 &&yy28, &&yy28, &&yy36, &&yy28, &&yy28, &&yy28, &&yy28, &&yy37,
302 &&yy28, &&yy28, &&yy28, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
303 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
304 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
305 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
306 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
307 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
308 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
309 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
310 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
311 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
312 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
313 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
314 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
315 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
316 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
317 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5,
318 &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5, &&yy5
319 };
320 goto *yytarget[yych];
321 }
322 yy3:
323 ++YYCURSOR;
324 #line 260 "push_fg.re"
325 { send(kEOF); return 1; }
326 #line 327 "push_fg.c"
327 yy5:
328 ++YYCURSOR;
329 #line 261 "push_fg.re"
330 { SEND(kUnknown); }
331 #line 332 "push_fg.c"
332 yy7:
333 ++YYCURSOR;
334 #line 259 "push_fg.re"
335 { SKIP(); }
336 #line 337 "push_fg.c"
337 yy9:
338 ++YYCURSOR;
339 #line 258 "push_fg.re"
340 { SKIP(); }
341 #line 342 "push_fg.c"
342 yy11:
343 ++YYCURSOR;
344 #line 251 "push_fg.re"
345 { SEND(kLeftParen); }
346 #line 347 "push_fg.c"
347 yy13:
348 ++YYCURSOR;
349 #line 252 "push_fg.re"
350 { SEND(kRightParen); }
351 #line 352 "push_fg.c"
352 yy15:
353 ++YYCURSOR;
354 #line 255 "push_fg.re"
355 { SEND(kStar); }
356 #line 357 "push_fg.c"
357 yy17:
358 ++YYCURSOR;
359 #line 254 "push_fg.re"
360 { SEND(kPlus); }
361 #line 362 "push_fg.c"
362 yy19:
363 ++YYCURSOR;
364 #line 253 "push_fg.re"
365 { SEND(kMinus); }
366 #line 367 "push_fg.c"
367 yy21:
368 ++YYCURSOR;
369 #line 256 "push_fg.re"
370 { SEND(kSlash); }
371 #line 372 "push_fg.c"
372 yy23:
373 ++YYCURSOR;
374 YYSETSTATE(1);
375 if (YYLIMIT <= YYCURSOR) YYFILL(1);
376 yyFillLabel1:
377 yych = *YYCURSOR;
378 if (yybm[0+yych] & 64) {
379 goto yy23;
380 }
381 #line 248 "push_fg.re"
382 { SEND(kDecimalConstant);}
383 #line 384 "push_fg.c"
384 yy26:
385 ++YYCURSOR;
386 #line 250 "push_fg.re"
387 { SEND(kEqual); }
388 #line 389 "push_fg.c"
389 yy28:
390 ++YYCURSOR;
391 YYSETSTATE(2);
392 if (YYLIMIT <= YYCURSOR) YYFILL(1);
393 yyFillLabel2:
394 yych = *YYCURSOR;
395 yy29:
396 if (yybm[0+yych] & 128) {
397 goto yy28;
398 }
399 #line 247 "push_fg.re"
400 { SEND(kIdentifier); }
401 #line 402 "push_fg.c"
402 yy31:
403 yych = *++YYCURSOR;
404 if (yych == 'r') goto yy38;
405 goto yy29;
406 yy32:
407 yych = *++YYCURSOR;
408 if (yych == 'l') goto yy39;
409 goto yy29;
410 yy33:
411 yych = *++YYCURSOR;
412 if (yych == 'o') goto yy40;
413 goto yy29;
414 yy34:
415 yych = *++YYCURSOR;
416 if (yych == 'o') goto yy41;
417 goto yy29;
418 yy35:
419 yych = *++YYCURSOR;
420 if (yych == 'f') goto yy42;
421 goto yy29;
422 yy36:
423 yych = *++YYCURSOR;
424 if (yych == 'e') goto yy44;
425 goto yy29;
426 yy37:
427 yych = *++YYCURSOR;
428 if (yych == 'h') goto yy45;
429 goto yy29;
430 yy38:
431 yych = *++YYCURSOR;
432 if (yych == 'e') goto yy46;
433 goto yy29;
434 yy39:
435 yych = *++YYCURSOR;
436 if (yych == 's') goto yy47;
437 goto yy29;
438 yy40:
439 yych = *++YYCURSOR;
440 if (yych == 'r') goto yy48;
441 goto yy29;
442 yy41:
443 yych = *++YYCURSOR;
444 if (yych == 't') goto yy50;
445 goto yy29;
446 yy42:
447 yych = *++YYCURSOR;
448 if (yybm[0+yych] & 128) {
449 goto yy28;
450 }
451 #line 240 "push_fg.re"
452 { SEND(kIf); }
453 #line 454 "push_fg.c"
454 yy44:
455 yych = *++YYCURSOR;
456 if (yych == 't') goto yy51;
457 goto yy29;
458 yy45:
459 yych = *++YYCURSOR;
460 if (yych == 'i') goto yy52;
461 goto yy29;
462 yy46:
463 yych = *++YYCURSOR;
464 if (yych == 'a') goto yy53;
465 goto yy29;
466 yy47:
467 yych = *++YYCURSOR;
468 if (yych == 'e') goto yy54;
469 goto yy29;
470 yy48:
471 yych = *++YYCURSOR;
472 if (yybm[0+yych] & 128) {
473 goto yy28;
474 }
475 #line 241 "push_fg.re"
476 { SEND(kFor); }
477 #line 478 "push_fg.c"
478 yy50:
479 yych = *++YYCURSOR;
480 if (yych == 'o') goto yy56;
481 goto yy29;
482 yy51:
483 yych = *++YYCURSOR;
484 if (yych == 'u') goto yy58;
485 goto yy29;
486 yy52:
487 yych = *++YYCURSOR;
488 if (yych == 'l') goto yy59;
489 goto yy29;
490 yy53:
491 yych = *++YYCURSOR;
492 if (yych == 'k') goto yy60;
493 goto yy29;
494 yy54:
495 yych = *++YYCURSOR;
496 if (yybm[0+yych] & 128) {
497 goto yy28;
498 }
499 #line 242 "push_fg.re"
500 { SEND(kElse); }
501 #line 502 "push_fg.c"
502 yy56:
503 yych = *++YYCURSOR;
504 if (yybm[0+yych] & 128) {
505 goto yy28;
506 }
507 #line 243 "push_fg.re"
508 { SEND(kGoto); }
509 #line 510 "push_fg.c"
510 yy58:
511 yych = *++YYCURSOR;
512 if (yych == 'r') goto yy62;
513 goto yy29;
514 yy59:
515 yych = *++YYCURSOR;
516 if (yych == 'e') goto yy63;
517 goto yy29;
518 yy60:
519 yych = *++YYCURSOR;
520 if (yybm[0+yych] & 128) {
521 goto yy28;
522 }
523 #line 244 "push_fg.re"
524 { SEND(kBreak); }
525 #line 526 "push_fg.c"
526 yy62:
527 yych = *++YYCURSOR;
528 if (yych == 'n') goto yy65;
529 goto yy29;
530 yy63:
531 yych = *++YYCURSOR;
532 if (yybm[0+yych] & 128) {
533 goto yy28;
534 }
535 #line 245 "push_fg.re"
536 { SEND(kWhile); }
537 #line 538 "push_fg.c"
538 yy65:
539 yych = *++YYCURSOR;
540 if (yybm[0+yych] & 128) {
541 goto yy28;
542 }
543 #line 246 "push_fg.re"
544 { SEND(kReturn); }
545 #line 546 "push_fg.c"
546 }
547 #line 262 "push_fg.re"
548
549
550 fill:
551 ssize_t unfinishedSize = cursor-start;
552 printf(
553 "scanner needs a refill. Exiting for now with:\n"
554 " saved fill state = %d\n"
555 " unfinished token size = %d\n",
556 state,
557 unfinishedSize
558 );
559
560 if(0<unfinishedSize && start<limit)
561 {
562 printf(" unfinished token is :");
563 fwrite(start, 1, cursor-start, stdout);
564 putchar('\n');
565 }
566 putchar('\n');
567
568 /*
569 * Once we get here, we can get rid of
570 * everything before start and after limit.
571 */
572 if(eof==true) goto start;
573 if(buffer<start)
574 {
575 size_t startOffset = start-buffer;
576 memmove(buffer, start, limit-start);
577 marker -= startOffset;
578 cursor -= startOffset;
579 limit -= startOffset;
580 start -= startOffset;
581 }
582 return 0;
583 }
584 };
585
586 // ----------------------------------------------------------------------
main(int argc,char ** argv)587 int main(
588 int argc,
589 char **argv
590 )
591 {
592 // Parse cmd line
593 int input = 0;
594 if(1<argc)
595 {
596 input = open(argv[1], O_RDONLY | O_BINARY);
597 if(input<0)
598 {
599 fprintf(
600 stderr,
601 "could not open file %s\n",
602 argv[1]
603 );
604 exit(1);
605 }
606 }
607
608 /*
609 * Tokenize input file by pushing batches
610 * of data one by one into the scanner.
611 */
612 const size_t batchSize = 256;
613 uint8_t buffer[batchSize];
614 PushScanner scanner;
615 while(1)
616 {
617 ssize_t n = read(input, buffer, batchSize);
618 scanner.push(buffer, n);
619 if(n<batchSize) break;
620 }
621 scanner.push(0, -1);
622 close(input);
623
624 // Done
625 return 0;
626 }
627
628 push_fg.re:238:22: warning: escape has no effect: '\h' [-Wuseless-escape]
629