1 //
2 // aegis - project change supervisor
3 // Copyright (C) 1991-1996, 1998, 1999, 2001-2008, 2010, 2012 Peter Miller
4 // Copyright (C) 2020 Aryeh M. Friedman
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3 of the License, or (at
9 // your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 //
19
20 #include <common/ac/assert.h>
21 #include <common/ac/ctype.h>
22 #include <common/ac/errno.h>
23 #include <common/ac/stdarg.h>
24 #include <common/ac/stdio.h>
25 #include <common/ac/stdlib.h>
26 #include <common/ac/string.h>
27
28 #include <common/mem.h>
29 #include <common/nstring/accumulator.h>
30 #include <common/quit.h>
31 #include <common/trace.h>
32 #include <libaegis/gram.yacc.h> // must be after <common/nstring.h>
33 #include <libaegis/input/bunzip2.h>
34 #include <libaegis/input/crlf.h>
35 #include <libaegis/input/env.h>
36 #include <libaegis/input/file.h>
37 #include <libaegis/input/gunzip.h>
38 #include <libaegis/meta_lex.h>
39 #include <libaegis/sub.h>
40 #include <libaegis/zero.h>
41
42
43 /**
44 * The source global variable is used to remember the managed input
45 * stream if the currently being parsed meta-data file.
46 */
47 static input source;
48
49 static int error_count;
50 extern GRAM_STYPE gram_lval;
51 static nstring_accumulator buffer;
52
53
54 input
lex_iopen_file(const nstring & filename)55 lex_iopen_file(const nstring &filename)
56 {
57 //
58 // Open the underlying binary file.
59 //
60 input fp = input_file_open(filename);
61
62 //
63 // Decompress the input stream. If it *isn't* compressed, this
64 // incurs NO overhead, because the gunzip code gets itself out
65 // of the way, and returns the original fp.
66 //
67 fp = input_gunzip_open(fp);
68 fp = input_bunzip2_open(fp);
69
70 //
71 // Get rid of CRLF sequences in the input.
72 // This happens, for instance, when the file is created on
73 // windows nt, but used on Unix.
74 //
75 fp = input_crlf::create(fp);
76
77 return fp;
78 }
79
80
81 input
lex_iopen_env(const nstring & name)82 lex_iopen_env(const nstring &name)
83 {
84 return input_env_open(name);
85 }
86
87
88 void
lex_open_input(input & ifp)89 lex_open_input(input &ifp)
90 {
91 assert(!source.is_open());
92 source = ifp;
93 }
94
95
96 void
lex_close(void)97 lex_close(void)
98 {
99 assert(source.is_open());
100 if (error_count)
101 {
102 sub_context_ty *scp;
103
104 scp = sub_context_new();
105 sub_var_set_string(scp, "File_Name", source->name());
106 sub_var_set_long(scp, "Number", error_count);
107 sub_var_optional(scp, "Number");
108 fatal_intl(scp, i18n("$filename: has errors"));
109 // NOTREACHED
110 }
111 source.close();
112 }
113
114
115 static inline void
lex_getc_undo(int c)116 lex_getc_undo(int c)
117 {
118 if (c >= 0)
119 source->ungetc(c);
120 }
121
122
123 int
gram_lex(void)124 gram_lex(void)
125 {
126 sub_context_ty *scp;
127 int c;
128 int ndigits;
129
130 for (;;)
131 {
132 c = source->getch();
133 switch (c)
134 {
135 case ' ':
136 case '\t':
137 case '\f':
138 case '\n':
139 break;
140
141 case '0':
142 buffer.clear();
143 buffer.push_back('0');
144 c = source->getch();
145 if (c == 'x' || c == 'X')
146 {
147 buffer.push_back(c);
148 ndigits = 0;
149 for (;;)
150 {
151 c = source->getch();
152 switch (c)
153 {
154 case '0':
155 case '1':
156 case '2':
157 case '3':
158 case '4':
159 case '5':
160 case '6':
161 case '7':
162 case '8':
163 case '9':
164 case 'A':
165 case 'B':
166 case 'C':
167 case 'D':
168 case 'E':
169 case 'F':
170 case 'a':
171 case 'b':
172 case 'c':
173 case 'd':
174 case 'e':
175 case 'f':
176 ++ndigits;
177 buffer.push_back(c);
178 continue;
179
180 default:
181 break;
182 }
183 break;
184 }
185 if (!ndigits)
186 {
187 gram_error(i18n("malformed hex constant"));
188 gram_lval.lv_integer = 0;
189 goto integer_return;
190 }
191 lex_getc_undo(c);
192 buffer.push_back(' ');
193 gram_lval.lv_integer =
194 strtoul(buffer.get_data(), (char **)0, 16);
195 goto integer_return;
196 }
197 if (c == '.')
198 {
199 buffer.push_back(c);
200 goto fraction;
201 }
202 if (c == 'e' || c == 'E')
203 goto exponent;
204 for (;;)
205 {
206 switch (c)
207 {
208 case '0':
209 case '1':
210 case '2':
211 case '3':
212 case '4':
213 case '5':
214 case '6':
215 case '7':
216 buffer.push_back(c);
217 c = source->getch();
218 continue;
219
220 default:
221 break;
222 }
223 break;
224 }
225 lex_getc_undo(c);
226 buffer.push_back(' ');
227 gram_lval.lv_integer = strtoul(buffer.get_data(), (char **)0, 8);
228 goto integer_return;
229
230 case '1':
231 case '2':
232 case '3':
233 case '4':
234 case '5':
235 case '6':
236 case '7':
237 case '8':
238 case '9':
239 buffer.clear();
240 for (;;)
241 {
242 buffer.push_back(c);
243 c = source->getch();
244 if (c < 0)
245 break;
246 if (!isdigit((unsigned char)c))
247 break;
248 }
249 if (c == '.')
250 {
251 buffer.push_back(c);
252 goto fraction;
253 }
254 if (c == 'e' || c == 'E')
255 goto exponent;
256 lex_getc_undo(c);
257 buffer.push_back(' ');
258 gram_lval.lv_integer = strtoul(buffer.get_data(), (char **)0, 10);
259 assert(gram_lval.lv_integer >= 0);
260 integer_return:
261 trace(("%s: INTEGER %ld\n", source->name().c_str(),
262 gram_lval.lv_integer));
263 return INTEGER;
264
265 case '.':
266 c = source->getch();
267 if (c < 0 || !isdigit((unsigned char)c))
268 {
269 lex_getc_undo(c);
270 return '.';
271 }
272 buffer.clear();
273 buffer.push_back('0');
274 buffer.push_back('.');
275 buffer.push_back(c);
276 fraction:
277 for (;;)
278 {
279 c = source->getch();
280 if (c < 0 || !isdigit((unsigned char)c))
281 break;
282 buffer.push_back(c);
283 }
284 if (c == 'e' || c == 'E')
285 {
286 exponent:
287 buffer.push_back(c);
288 c = source->getch();
289 if (c == '+' || c == '-')
290 {
291 buffer.push_back(c);
292 c = source->getch();
293 }
294 ndigits = 0;
295 for (;;)
296 {
297 c = source->getch();
298 if (c < 0 || !isdigit((unsigned char)c))
299 break;
300 ++ndigits;
301 buffer.push_back(c);
302 }
303 if (!ndigits)
304 {
305 gram_error(i18n("malformed exponent"));
306 gram_lval.lv_real = 0;
307 trace(("%s: REAL 0\n", source->name().c_str()));
308 return REAL;
309 }
310 }
311 lex_getc_undo(c);
312 buffer.push_back('\0');
313 gram_lval.lv_real = atof(buffer.get_data());
314 trace(("%s: REAL %g\n", source->name().c_str(),
315 gram_lval.lv_real));
316 return REAL;
317
318 case '"':
319 buffer.clear();
320 for (;;)
321 {
322 c = source->getch();
323 if (c == EOF)
324 {
325 str_eof:
326 gram_error("end-of-file within string");
327 break;
328 }
329 if (c == '\n')
330 {
331 gram_error("end-of-line within string");
332 break;
333 }
334 if (c == '"')
335 break;
336 if (c == '\\')
337 {
338 c = source->getch();
339 switch (c)
340 {
341 default:
342 scp = sub_context_new();
343 sub_var_set_format(scp, "Name", "\\%c", c);
344 lex_error(scp, i18n("unknown '$name' escape"));
345 sub_context_delete(scp);
346 break;
347
348 case '\n':
349 break;
350
351 case EOF:
352 goto str_eof;
353
354 case 'b':
355 buffer.push_back('\b');
356 break;
357
358 case 'n':
359 buffer.push_back('\n');
360 break;
361
362 case 'r':
363 buffer.push_back('\r');
364 break;
365
366 case 't':
367 buffer.push_back('\t');
368 break;
369
370 case 'f':
371 buffer.push_back('\f');
372 break;
373
374 case '"':
375 case '\\':
376 buffer.push_back(c);
377 break;
378
379 case '0':
380 case '1':
381 case '2':
382 case '3':
383 case '4':
384 case '5':
385 case '6':
386 case '7':
387 {
388 int n;
389 int v;
390
391 v = 0;
392 for (n = 0; n < 3; ++n)
393 {
394 v = v * 8 + c - '0';
395 c = source->getch();
396 switch (c)
397 {
398 case '0':
399 case '1':
400 case '2':
401 case '3':
402 case '4':
403 case '5':
404 case '6':
405 case '7':
406 continue;
407
408 default:
409 lex_getc_undo(c);
410 break;
411 }
412 break;
413 }
414 buffer.push_back(v);
415 }
416 break;
417 }
418 }
419 else
420 buffer.push_back(c);
421 }
422 gram_lval.lv_string = new nstring(buffer.mkstr());
423 trace(("%s: STRING %s\n", source->name().c_str(),
424 gram_lval.lv_string->quote_c().c_str()));
425 return STRING;
426
427 case '@':
428 buffer.clear();
429 for (;;)
430 {
431 c = source->getch();
432 switch (c)
433 {
434 case EOF:
435 goto str_eof;
436
437 case '@':
438 c = source->getch();
439 if (c == EOF)
440 break;
441 if (c != '@')
442 {
443 source->ungetc(c);
444 break;
445 }
446 // fall through...
447
448 default:
449 buffer.push_back(c);
450 continue;
451 }
452 break;
453 }
454 gram_lval.lv_string = new nstring(buffer.mkstr());
455 trace(("%s: STRING %s\n", source->name().c_str(),
456 gram_lval.lv_string->quote_c().c_str()));
457 return STRING;
458
459 case 'A':
460 case 'B':
461 case 'C':
462 case 'D':
463 case 'E':
464 case 'F':
465 case 'G':
466 case 'H':
467 case 'I':
468 case 'J':
469 case 'K':
470 case 'L':
471 case 'M':
472 case 'N':
473 case 'O':
474 case 'P':
475 case 'Q':
476 case 'R':
477 case 'S':
478 case 'T':
479 case 'U':
480 case 'V':
481 case 'W':
482 case 'X':
483 case 'Y':
484 case 'Z':
485 case '_':
486 case 'a':
487 case 'b':
488 case 'c':
489 case 'd':
490 case 'e':
491 case 'f':
492 case 'g':
493 case 'h':
494 case 'i':
495 case 'j':
496 case 'k':
497 case 'l':
498 case 'm':
499 case 'n':
500 case 'o':
501 case 'p':
502 case 'q':
503 case 'r':
504 case 's':
505 case 't':
506 case 'u':
507 case 'v':
508 case 'w':
509 case 'x':
510 case 'y':
511 case 'z':
512 buffer.clear();
513 for (;;)
514 {
515 buffer.push_back(c);
516 c = source->getch();
517 switch (c)
518 {
519 case '0':
520 case '1':
521 case '2':
522 case '3':
523 case '4':
524 case '5':
525 case '6':
526 case '7':
527 case '8':
528 case '9':
529 case 'A':
530 case 'B':
531 case 'C':
532 case 'D':
533 case 'E':
534 case 'F':
535 case 'G':
536 case 'H':
537 case 'I':
538 case 'J':
539 case 'K':
540 case 'L':
541 case 'M':
542 case 'N':
543 case 'O':
544 case 'P':
545 case 'Q':
546 case 'R':
547 case 'S':
548 case 'T':
549 case 'U':
550 case 'V':
551 case 'W':
552 case 'X':
553 case 'Y':
554 case 'Z':
555 case '_':
556 case 'a':
557 case 'b':
558 case 'c':
559 case 'd':
560 case 'e':
561 case 'f':
562 case 'g':
563 case 'h':
564 case 'i':
565 case 'j':
566 case 'k':
567 case 'l':
568 case 'm':
569 case 'n':
570 case 'o':
571 case 'p':
572 case 'q':
573 case 'r':
574 case 's':
575 case 't':
576 case 'u':
577 case 'v':
578 case 'w':
579 case 'x':
580 case 'y':
581 case 'z':
582 continue;
583
584 default:
585 lex_getc_undo(c);
586 break;
587 }
588 break;
589 }
590 if (buffer.size() == 4 && !memcmp(buffer.get_data(), "ZERO", 4))
591 {
592 gram_lval.lv_integer = MAGIC_ZERO;
593 goto integer_return;
594 }
595 gram_lval.lv_string = new nstring(buffer.mkstr());
596 trace(("%s: NAME %s\n", source->name().c_str(),
597 gram_lval.lv_string->quote_c().c_str()));
598 return NAME;
599
600 case '#':
601 //
602 // Shell style single line comment
603 //
604 single_line_comment:
605 for (;;)
606 {
607 c = source->getch();
608 if (c == EOF || c == '\n')
609 break;
610 }
611 break;
612
613 case '/':
614 //
615 // C and C++ style comments
616 //
617 c = source->getch();
618 if (c == '/')
619 {
620 //
621 // C++ style single line comment.
622 //
623 goto single_line_comment;
624 }
625 if (c != '*')
626 {
627 //
628 // Not a C style block comment,
629 // just return the slash.
630 //
631 lex_getc_undo(c);
632 trace(("%s: '/'\n", source->name().c_str()));
633 return '/';
634 }
635 for (;;)
636 {
637 for (;;)
638 {
639 c = source->getch();
640 if (c == EOF)
641 {
642 bad_comment:
643 gram_error("end-of-file within comment");
644 quit(1);
645 }
646 if (c == '*')
647 break;
648 }
649 for (;;)
650 {
651 c = source->getch();
652 if (c == EOF)
653 goto bad_comment;
654 if (c != '*')
655 break;
656 }
657 if (c == '/')
658 break;
659 }
660 break;
661
662 case EOF:
663 trace(("%s: end of file\n", source->name().c_str()));
664 return 0;
665
666 default:
667 trace(("%s: '%c'\n", source->name().c_str(), c));
668 return c;
669 }
670 }
671 }
672
673
674 void
gram_error(const char * s)675 gram_error(const char *s)
676 {
677 sub_context_ty *scp;
678
679 scp = sub_context_new();
680 lex_error(scp, s);
681 sub_context_delete(scp);
682 }
683
684
685 void
lex_error(sub_context_ty * scp,const char * s)686 lex_error(sub_context_ty *scp, const char *s)
687 {
688 string_ty *msg;
689
690 msg = subst_intl(scp, s);
691
692 // re-use substitution context
693 sub_var_set_string(scp, "MeSsaGe", msg);
694 sub_var_set_string(scp, "File_Name", source->name());
695 error_intl(scp, i18n("$filename: $message"));
696 str_free(msg);
697 if (++error_count >= 20)
698 {
699 // re-use substitution context
700 sub_var_set_string(scp, "File_Name", source->name());
701 fatal_intl(scp, i18n("$filename: too many errors"));
702 }
703
704 #if 0
705 //
706 // This stuff is here to insulate against error messages that various
707 // versions if GNU Bison may or may not issue. If there are similar
708 // issues with byacc, put them here, too.
709 //
710 i18n("syntax error: cannot back up")
711 i18n("syntax error; also virtual memory exhausted");
712 #endif
713 }
714
715
716 string_ty *
lex_position(void)717 lex_position(void)
718 {
719 static string_ty *s;
720
721 if (source.is_open())
722 return source->name().get_ref();
723 if (!s)
724 s = str_from_c("end-of-input");
725 return s;
726 }
727
728
729 // vim: set ts=8 sw=4 et :
730