1 /* Small compiler - File input, preprocessing and lexical analysis functions
2 *
3 * Copyright (c) ITB CompuPhase, 1997-2003
4 *
5 * This software is provided "as-is", without any express or implied warranty.
6 * In no event will the authors be held liable for any damages arising from
7 * the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software in
15 * a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 * 2. Altered source versions must be plainly marked as such, and must not be
18 * misrepresented as being the original software.
19 * 3. This notice may not be removed or altered from any source distribution.
20 *
21 * Version: $Id$
22 */
23
24
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <math.h>
35 #include "embryo_cc_sc.h"
36 #include "Embryo.h"
37
38 static int match(char *st, int end);
39 static cell litchar(char **lptr, int rawmode);
40 static int alpha(char c);
41
42 static int icomment; /* currently in multiline comment? */
43 static int iflevel; /* nesting level if #if/#else/#endif */
44 static int skiplevel; /* level at which we started skipping */
45 static int elsedone; /* level at which we have seen an #else */
46 static char term_expr[] = "";
47 static int listline = -1; /* "current line" for the list file */
48
49 /* pushstk & popstk
50 *
51 * Uses a LIFO stack to store information. The stack is used by doinclude(),
52 * doswitch() (to hold the state of "swactive") and some other routines.
53 *
54 * Porting note: I made the bold assumption that an integer will not be
55 * larger than a pointer (it may be smaller). That is, the stack element
56 * is typedef'ed as a pointer type, but I also store integers on it. See
57 * SC.H for "stkitem"
58 *
59 * Global references: stack,stkidx (private to pushstk() and popstk())
60 */
61 static stkitem stack[sSTKMAX];
62 static int stkidx;
63 void
pushstk(stkitem val)64 pushstk(stkitem val)
65 {
66 if (stkidx >= sSTKMAX)
67 error(102, "parser stack"); /* stack overflow (recursive include?) */
68 stack[stkidx] = val;
69 stkidx += 1;
70 }
71
72 stkitem
popstk(void)73 popstk(void)
74 {
75 if (stkidx == 0)
76 return (stkitem) - 1; /* stack is empty */
77 stkidx -= 1;
78 return stack[stkidx];
79 }
80
81 int
plungequalifiedfile(char * name)82 plungequalifiedfile(char *name)
83 {
84 static char *extensions[] = { ".inc", ".sma", ".small" };
85 FILE *fp;
86 char *ext;
87 int ext_idx;
88
89 ext_idx = 0;
90 do
91 {
92 fp = (FILE *) sc_opensrc(name);
93 ext = strchr(name, '\0'); /* save position */
94 if (!fp)
95 {
96 /* try to append an extension */
97 strcpy(ext, extensions[ext_idx]);
98 fp = (FILE *) sc_opensrc(name);
99 if (!fp)
100 *ext = '\0'; /* on failure, restore filename */
101 } /* if */
102 ext_idx++;
103 }
104 while ((!fp) &&
105 (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
106 if (!fp)
107 {
108 *ext = '\0'; /* restore filename */
109 return FALSE;
110 } /* if */
111 pushstk((stkitem) inpf);
112 pushstk((stkitem) inpfname); /* pointer to current file name */
113 pushstk((stkitem) curlibrary);
114 pushstk((stkitem) iflevel);
115 assert(skiplevel == 0);
116 pushstk((stkitem) icomment);
117 pushstk((stkitem) fcurrent);
118 pushstk((stkitem) fline);
119 inpfname = strdup(name); /* set name of include file */
120 if (!inpfname)
121 error(103); /* insufficient memory */
122 inpf = fp; /* set input file pointer to include file */
123 fnumber++;
124 fline = 0; /* set current line number to 0 */
125 fcurrent = fnumber;
126 icomment = FALSE;
127 setfile(inpfname, fcurrent);
128 listline = -1; /* force a #line directive when changing the file */
129 setactivefile(fcurrent);
130 return TRUE;
131 }
132
133 int
plungefile(char * name,int try_currentpath,int try_includepaths)134 plungefile(char *name, int try_currentpath, int try_includepaths)
135 {
136 int result = FALSE;
137 int i;
138 char *ptr;
139
140 if (try_currentpath)
141 result = plungequalifiedfile(name);
142
143 if (try_includepaths && name[0] != DIRSEP_CHAR)
144 {
145 for (i = 0; !result && (ptr = get_path(i)); i++)
146 {
147 char path[PATH_MAX + PATH_MAX + 128];
148
149 snprintf(path, sizeof (path), "%s/%s", ptr, name);
150 path[sizeof path - 1] = '\0'; /* force '\0' termination */
151 result = plungequalifiedfile(path);
152 } /* while */
153 } /* if */
154 return result;
155 }
156
157 static void
check_empty(const char * sptr)158 check_empty(const char *sptr)
159 {
160 /* verifies that the string contains only whitespace */
161 while (*sptr <= ' ' && *sptr != '\0')
162 sptr++;
163 if (*sptr != '\0')
164 error(38); /* extra characters on line */
165 }
166
167 /* doinclude
168 *
169 * Gets the name of an include file, pushes the old file on the stack and
170 * sets some options. This routine doesn't use lex(), since lex() doesn't
171 * recognize file names (and directories).
172 *
173 * Global references: inpf (altered)
174 * inpfname (altered)
175 * fline (altered)
176 * lptr (altered)
177 */
178 static void
doinclude(void)179 doinclude(void)
180 {
181 char name[PATH_MAX], c;
182 int i, result;
183
184 while (*lptr <= ' ' && *lptr != 0) /* skip leading whitespace */
185 lptr++;
186 if (*lptr == '<' || *lptr == '\"')
187 {
188 c = (char)((*lptr == '\"') ? '\"' : '>'); /* termination character */
189 lptr++;
190 while (*lptr <= ' ' && *lptr != 0) /* skip whitespace after quote */
191 lptr++;
192 }
193 else
194 {
195 c = '\0';
196 } /* if */
197
198 i = 0;
199 while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
200 name[i++] = *lptr++;
201 while (i > 0 && name[i - 1] <= ' ')
202 i--; /* strip trailing whitespace */
203 assert((i >= 0) && (i < (int)(sizeof(name))));
204 name[i] = '\0'; /* zero-terminate the string */
205
206 if (*lptr != c)
207 { /* verify correct string termination */
208 error(37); /* invalid string */
209 return;
210 } /* if */
211 if (c != '\0')
212 check_empty(lptr + 1); /* verify that the rest of the line is whitespace */
213
214 /* Include files between "..." or without quotes are read from the current
215 * directory, or from a list of "include directories". Include files
216 * between <...> are only read from the list of include directories.
217 */
218 result = plungefile(name, (c != '>'), TRUE);
219 if (!result)
220 error(100, name); /* cannot read from ... (fatal error) */
221 }
222
223 /* readline
224 *
225 * Reads in a new line from the input file pointed to by "inpf". readline()
226 * concatenates lines that end with a \ with the next line. If no more data
227 * can be read from the file, readline() attempts to pop off the previous file
228 * from the stack. If that fails too, it sets "freading" to 0.
229 *
230 * Global references: inpf,fline,inpfname,freading,icomment (altered)
231 */
232 static void
readline(char * line)233 readline(char *line)
234 {
235 int i, num, cont;
236
237 if (lptr == term_expr)
238 return;
239 num = sLINEMAX;
240 cont = FALSE;
241 do
242 {
243 if (!inpf || sc_eofsrc(inpf))
244 {
245 if (cont)
246 error(49); /* invalid line continuation */
247 if (inpf && inpf != inpf_org)
248 sc_closesrc(inpf);
249 i = (int)(long)popstk();
250 if (i == -1)
251 { /* All's done; popstk() returns "stack is empty" */
252 freading = FALSE;
253 *line = '\0';
254 /* when there is nothing more to read, the #if/#else stack should
255 * be empty and we should not be in a comment
256 */
257 assert(iflevel >= 0);
258 if (iflevel > 0)
259 error(1, "#endif", "-end of file-");
260 else if (icomment)
261 error(1, "*/", "-end of file-");
262 return;
263 } /* if */
264 fline = i;
265 fcurrent = (int)(long)popstk();
266 icomment = (int)(long)popstk();
267 assert(skiplevel == 0); /* skiplevel was not stored on stack, because it should always be zero at this point */
268 iflevel = (int)(long)popstk();
269 curlibrary = (constvalue *) popstk();
270 free(inpfname); /* return memory allocated for the include file name */
271 inpfname = (char *)popstk();
272 inpf = (FILE *) popstk();
273 setactivefile(fcurrent);
274 listline = -1; /* force a #line directive when changing the file */
275 elsedone = 0;
276 } /* if */
277
278 if (!sc_readsrc(inpf, line, num))
279 {
280 *line = '\0'; /* delete line */
281 cont = FALSE;
282 }
283 else
284 {
285 char *ptr;
286 /* check whether to erase leading spaces */
287 if (cont)
288 {
289 char *sptr = line;
290
291 while (*sptr == ' ' || *sptr == '\t')
292 sptr++;
293 if (sptr != line)
294 memmove(line, sptr, strlen(sptr) + 1);
295 } /* if */
296 cont = FALSE;
297 /* check whether a full line was read */
298 if (!strchr(line, '\n') && !sc_eofsrc(inpf))
299 error(75); /* line too long */
300 /* check if the next line must be concatenated to this line */
301 if ((ptr = strchr(line, '\n')) && ptr > line)
302 {
303 assert(*(ptr + 1) == '\0'); /* '\n' should be last in the string */
304 while (ptr > line
305 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
306 ptr--; /* skip trailing whitespace */
307 if (*ptr == '\\')
308 {
309 cont = TRUE;
310 /* set '\a' at the position of '\\' to make it possible to check
311 * for a line continuation in a single line comment (error 49)
312 */
313 *ptr++ = '\a';
314 *ptr = '\0'; /* erase '\n' (and any trailing whitespace) */
315 } /* if */
316 } /* if */
317 num -= strlen(line);
318 line += strlen(line);
319 } /* if */
320 fline += 1;
321 }
322 while (num >= 0 && cont);
323 }
324
325 /* stripcom
326 *
327 * Replaces all comments from the line by space characters. It updates
328 * a global variable ("icomment") for multiline comments.
329 *
330 * This routine also supports the C++ extension for single line comments.
331 * These comments are started with "//" and end at the end of the line.
332 *
333 * Global references: icomment (private to "stripcom")
334 */
335 static void
stripcom(char * line)336 stripcom(char *line)
337 {
338 char c;
339
340 while (*line)
341 {
342 if (icomment)
343 {
344 if (*line == '*' && *(line + 1) == '/')
345 {
346 icomment = FALSE; /* comment has ended */
347 *line = ' '; /* replace '*' and '/' characters by spaces */
348 *(line + 1) = ' ';
349 line += 2;
350 }
351 else
352 {
353 if (*line == '/' && *(line + 1) == '*')
354 error(216); /* nested comment */
355 *line = ' '; /* replace comments by spaces */
356 line += 1;
357 } /* if */
358 }
359 else
360 {
361 if (*line == '/' && *(line + 1) == '*')
362 {
363 icomment = TRUE; /* start comment */
364 *line = ' '; /* replace '/' and '*' characters by spaces */
365 *(line + 1) = ' ';
366 line += 2;
367 }
368 else if (*line == '/' && *(line + 1) == '/')
369 { /* comment to end of line */
370 if (strchr(line, '\a'))
371 error(49); /* invalid line continuation */
372 *line++ = '\n'; /* put "newline" at first slash */
373 *line = '\0'; /* put "zero-terminator" at second slash */
374 }
375 else
376 {
377 if (*line == '\"' || *line == '\'')
378 { /* leave literals unaltered */
379 c = *line; /* ending quote, single or double */
380 line += 1;
381 while ((*line != c || *(line - 1) == '\\')
382 && *line != '\0')
383 line += 1;
384 line += 1; /* skip final quote */
385 }
386 else
387 {
388 line += 1;
389 } /* if */
390 } /* if */
391 } /* if */
392 } /* while */
393 }
394
395 /* btoi
396 *
397 * Attempts to interpret a numeric symbol as a boolean value. On success
398 * it returns the number of characters processed (so the line pointer can be
399 * adjusted) and the value is stored in "val". Otherwise it returns 0 and
400 * "val" is garbage.
401 *
402 * A boolean value must start with "0b"
403 */
404 static int
btoi(cell * val,char * curptr)405 btoi(cell * val, char *curptr)
406 {
407 char *ptr;
408
409 *val = 0;
410 ptr = curptr;
411 if (*ptr == '0' && *(ptr + 1) == 'b')
412 {
413 ptr += 2;
414 while (*ptr == '0' || *ptr == '1' || *ptr == '_')
415 {
416 if (*ptr != '_')
417 *val = (*val << 1) | (*ptr - '0');
418 ptr++;
419 } /* while */
420 }
421 else
422 {
423 return 0;
424 } /* if */
425 if (alphanum(*ptr)) /* number must be delimited by non-alphanumeric char */
426 return 0;
427 else
428 return (int)(ptr - curptr);
429 }
430
431 /* dtoi
432 *
433 * Attempts to interpret a numeric symbol as a decimal value. On success
434 * it returns the number of characters processed and the value is stored in
435 * "val". Otherwise it returns 0 and "val" is garbage.
436 */
437 static int
dtoi(cell * val,char * curptr)438 dtoi(cell * val, char *curptr)
439 {
440 char *ptr;
441
442 *val = 0;
443 ptr = curptr;
444 if (!sc_isdigit(*ptr)) /* should start with digit */
445 return 0;
446 while (sc_isdigit(*ptr) || *ptr == '_')
447 {
448 if (*ptr != '_')
449 *val = (*val * 10) + (*ptr - '0');
450 ptr++;
451 } /* while */
452 if (alphanum(*ptr)) /* number must be delimited by non-alphanumerical */
453 return 0;
454 if (*ptr == '.' && sc_isdigit(*(ptr + 1)))
455 return 0; /* but a fractional part must not be present */
456 return (int)(ptr - curptr);
457 }
458
459 /* htoi
460 *
461 * Attempts to interpret a numeric symbol as a hexadecimal value. On
462 * success it returns the number of characters processed and the value is
463 * stored in "val". Otherwise it return 0 and "val" is garbage.
464 */
465 static int
htoi(cell * val,char * curptr)466 htoi(cell * val, char *curptr)
467 {
468 char *ptr;
469
470 *val = 0;
471 ptr = curptr;
472 if (!sc_isdigit(*ptr)) /* should start with digit */
473 return 0;
474 if (*ptr == '0' && *(ptr + 1) == 'x')
475 { /* C style hexadecimal notation */
476 ptr += 2;
477 while (sc_isxdigit(*ptr) || *ptr == '_')
478 {
479 if (*ptr != '_')
480 {
481 assert(sc_isxdigit(*ptr));
482 *val = *val << 4;
483 if (sc_isdigit(*ptr))
484 *val += (*ptr - '0');
485 else
486 *val += (tolower(*ptr) - 'a' + 10);
487 } /* if */
488 ptr++;
489 } /* while */
490 }
491 else
492 {
493 return 0;
494 } /* if */
495 if (alphanum(*ptr))
496 return 0;
497 else
498 return (int)(ptr - curptr);
499 }
500
501 #if defined LINUX
502 static double
pow10(int value)503 pow10(int value)
504 {
505 double res = 1.0;
506
507 while (value >= 4)
508 {
509 res *= 10000.0;
510 value -= 5;
511 } /* while */
512 while (value >= 2)
513 {
514 res *= 100.0;
515 value -= 2;
516 } /* while */
517 while (value >= 1)
518 {
519 res *= 10.0;
520 value -= 1;
521 } /* while */
522 return res;
523 }
524 #endif
525
526 /* ftoi
527 *
528 * Attempts to interpret a numeric symbol as a rational number, either as
529 * IEEE 754 single precision floating point or as a fixed point integer.
530 * On success it returns the number of characters processed and the value is
531 * stored in "val". Otherwise it returns 0 and "val" is unchanged.
532 *
533 * Small has stricter definition for floating point numbers than most:
534 * o the value must start with a digit; ".5" is not a valid number, you
535 * should write "0.5"
536 * o a period must appear in the value, even if an exponent is given; "2e3"
537 * is not a valid number, you should write "2.0e3"
538 * o at least one digit must follow the period; "6." is not a valid number,
539 * you should write "6.0"
540 */
541 static int
ftoi(cell * val,char * curptr)542 ftoi(cell * val, char *curptr)
543 {
544 char *ptr;
545 double fnum, ffrac, fmult;
546 unsigned long dnum, dbase;
547 int i, ignore;
548
549 assert(rational_digits >= 0 && rational_digits < 9);
550 for (i = 0, dbase = 1; i < rational_digits; i++)
551 dbase *= 10;
552 fnum = 0.0;
553 dnum = 0L;
554 ptr = curptr;
555 if (!sc_isdigit(*ptr)) /* should start with digit */
556 return 0;
557 while (sc_isdigit(*ptr) || *ptr == '_')
558 {
559 if (*ptr != '_')
560 {
561 fnum = (fnum * 10.0) + (*ptr - '0');
562 dnum = (dnum * 10L) + (*ptr - '0') * dbase;
563 } /* if */
564 ptr++;
565 } /* while */
566 if (*ptr != '.')
567 return 0; /* there must be a period */
568 ptr++;
569 if (!sc_isdigit(*ptr)) /* there must be at least one digit after the dot */
570 return 0;
571 ffrac = 0.0;
572 fmult = 1.0;
573 ignore = FALSE;
574 while (sc_isdigit(*ptr) || *ptr == '_')
575 {
576 if (*ptr != '_')
577 {
578 ffrac = (ffrac * 10.0) + (*ptr - '0');
579 fmult = fmult / 10.0;
580 dbase /= 10L;
581 dnum += (*ptr - '0') * dbase;
582 if (dbase == 0L && sc_rationaltag && rational_digits > 0
583 && !ignore)
584 {
585 error(222); /* number of digits exceeds rational number precision */
586 ignore = TRUE;
587 } /* if */
588 } /* if */
589 ptr++;
590 } /* while */
591 fnum += ffrac * fmult; /* form the number so far */
592 if (*ptr == 'e')
593 { /* optional fractional part */
594 int exp, sign;
595
596 ptr++;
597 if (*ptr == '-')
598 {
599 sign = -1;
600 ptr++;
601 }
602 else
603 {
604 sign = 1;
605 } /* if */
606 if (!sc_isdigit(*ptr)) /* 'e' should be followed by a digit */
607 return 0;
608 exp = 0;
609 while (sc_isdigit(*ptr))
610 {
611 exp = (exp * 10) + (*ptr - '0');
612 ptr++;
613 } /* while */
614 #if defined LINUX
615 fmult = pow10(exp * sign);
616 #else
617 fmult = pow(10, exp * sign);
618 #endif
619 fnum *= fmult;
620 dnum *= (unsigned long)(fmult + 0.5);
621 } /* if */
622
623 /* decide how to store the number */
624 if (sc_rationaltag == 0)
625 {
626 error(70); /* rational number support was not enabled */
627 *val = 0;
628 }
629 else if (rational_digits == 0)
630 {
631 float f = (float) fnum;
632 /* floating point */
633 *val = EMBRYO_FLOAT_TO_CELL(f);
634 #if !defined NDEBUG
635 /* I assume that the C/C++ compiler stores "float" values in IEEE 754
636 * format (as mandated in the ANSI standard). Test this assumption anyway.
637 */
638 {
639 float test1 = 0.0, test2 = 50.0;
640 Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
641 Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
642
643 if (c1 != 0x00000000L)
644 {
645 fprintf(stderr,
646 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
647 "point math as embryo expects. this could be bad.\n"
648 "\n"
649 "(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
650 "\n"
651 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
652 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
653 , c1);
654 }
655 else if (c2 != 0x42480000L)
656 {
657 fprintf(stderr,
658 "embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
659 "point math as embryo expects. This could be bad.\n"
660 "\n"
661 "(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
662 "\n"
663 "this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
664 "to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
665 , c2);
666 }
667 }
668 #endif
669 }
670 else
671 {
672 /* fixed point */
673 *val = (cell) dnum;
674 } /* if */
675
676 return (int)(ptr - curptr);
677 }
678
679 /* number
680 *
681 * Reads in a number (binary, decimal or hexadecimal). It returns the number
682 * of characters processed or 0 if the symbol couldn't be interpreted as a
683 * number (in this case the argument "val" remains unchanged). This routine
684 * relies on the 'early dropout' implementation of the logical or (||)
685 * operator.
686 *
687 * Note: the routine doesn't check for a sign (+ or -). The - is checked
688 * for at "hier2()" (in fact, it is viewed as an operator, not as a
689 * sign) and the + is invalid (as in K&R C, and unlike ANSI C).
690 */
691 static int
number(cell * val,char * curptr)692 number(cell * val, char *curptr)
693 {
694 int i;
695 cell curval;
696
697 if ((i = btoi(&curval, curptr)) != 0 /* binary? */
698 || (i = htoi(&curval, curptr)) != 0 /* hexadecimal? */
699 || (i = dtoi(&curval, curptr)) != 0) /* decimal? */
700 {
701 *val = curval;
702 return i;
703 }
704 else
705 {
706 return 0; /* else not a number */
707 } /* if */
708 }
709
710 static void
chrcat(char * str,char chr)711 chrcat(char *str, char chr)
712 {
713 str = strchr(str, '\0');
714 *str++ = chr;
715 *str = '\0';
716 }
717
718 static int
preproc_expr(cell * val,int * tag)719 preproc_expr(cell * val, int *tag)
720 {
721 int result;
722 int idx;
723 cell code_index;
724 char *term;
725
726 /* Disable staging; it should be disabled already because
727 * expressions may not be cut off half-way between conditional
728 * compilations. Reset the staging index, but keep the code
729 * index.
730 */
731 if (stgget(&idx, &code_index))
732 {
733 error(57); /* unfinished expression */
734 stgdel(0, code_index);
735 stgset(FALSE);
736 } /* if */
737 /* append a special symbol to the string, so the expression
738 * analyzer won't try to read a next line when it encounters
739 * an end-of-line
740 */
741 assert(strlen(pline) < sLINEMAX);
742 term = strchr(pline, '\0');
743 assert(term != NULL);
744 chrcat(pline, PREPROC_TERM); /* the "DEL" code (see SC.H) */
745 result = constexpr(val, tag); /* get value (or 0 on error) */
746 *term = '\0'; /* erase the token (if still present) */
747 lexclr(FALSE); /* clear any "pushed" tokens */
748 return result;
749 }
750
751 /* getstring
752 * Returns returns a pointer behind the closing quote or to the other
753 * character that caused the input to be ended.
754 */
755 static char *
getstring(char * dest,int max)756 getstring(char *dest, int max)
757 {
758 assert(dest != NULL);
759 *dest = '\0';
760 while (*lptr <= ' ' && *lptr != '\0')
761 lptr++; /* skip whitespace */
762 if (*lptr != '"')
763 {
764 error(37); /* invalid string */
765 }
766 else
767 {
768 int len = 0;
769
770 lptr++; /* skip " */
771 while (*lptr != '"' && *lptr != '\0')
772 {
773 if (len < max - 1)
774 dest[len++] = *lptr;
775 lptr++;
776 } /* if */
777 dest[len] = '\0';
778 if (*lptr == '"')
779 lptr++; /* skip closing " */
780 else
781 error(37); /* invalid string */
782 } /* if */
783 return lptr;
784 }
785
786 enum
787 {
788 CMD_NONE,
789 CMD_TERM,
790 CMD_EMPTYLINE,
791 CMD_CONDFALSE,
792 CMD_INCLUDE,
793 CMD_DEFINE,
794 CMD_IF,
795 CMD_DIRECTIVE,
796 };
797
798 /* command
799 *
800 * Recognizes the compiler directives. The function returns:
801 * CMD_NONE the line must be processed
802 * CMD_TERM a pending expression must be completed before processing further lines
803 * Other value: the line must be skipped, because:
804 * CMD_CONDFALSE false "#if.." code
805 * CMD_EMPTYLINE line is empty
806 * CMD_INCLUDE the line contains a #include directive
807 * CMD_DEFINE the line contains a #subst directive
808 * CMD_IF the line contains a #if/#else/#endif directive
809 * CMD_DIRECTIVE the line contains some other compiler directive
810 *
811 * Global variables: iflevel, skiplevel, elsedone (altered)
812 * lptr (altered)
813 */
814 static int
command(void)815 command(void)
816 {
817 int tok, ret;
818 cell val;
819 char *str;
820 int idx;
821 cell code_index;
822
823 while (*lptr <= ' ' && *lptr != '\0')
824 lptr += 1;
825 if (*lptr == '\0')
826 return CMD_EMPTYLINE; /* empty line */
827 if (*lptr != '#')
828 return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE; /* it is not a compiler directive */
829 /* compiler directive found */
830 indent_nowarn = TRUE; /* allow loose indentation" */
831 lexclr(FALSE); /* clear any "pushed" tokens */
832 /* on a pending expression, force to return a silent ';' token and force to
833 * re-read the line
834 */
835 if (!sc_needsemicolon && stgget(&idx, &code_index))
836 {
837 lptr = term_expr;
838 return CMD_TERM;
839 } /* if */
840 tok = lex(&val, &str);
841 ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE; /* preset 'ret' to CMD_DIRECTIVE (most common case) */
842 switch (tok)
843 {
844 case tpIF: /* conditional compilation */
845 ret = CMD_IF;
846 iflevel += 1;
847 if (skiplevel)
848 break; /* break out of switch */
849 preproc_expr(&val, NULL); /* get value (or 0 on error) */
850 if (!val)
851 skiplevel = iflevel;
852 check_empty(lptr);
853 break;
854 case tpELSE:
855 ret = CMD_IF;
856 if (iflevel == 0 && skiplevel == 0)
857 {
858 error(26); /* no matching #if */
859 errorset(sRESET);
860 }
861 else
862 {
863 if (elsedone == iflevel)
864 error(60); /* multiple #else directives between #if ... #endif */
865 elsedone = iflevel;
866 if (skiplevel == iflevel)
867 skiplevel = 0;
868 else if (skiplevel == 0)
869 skiplevel = iflevel;
870 } /* if */
871 check_empty(lptr);
872 break;
873 #if 0 /* ??? *really* need to use a stack here */
874 case tpELSEIF:
875 ret = CMD_IF;
876 if (iflevel == 0 && skiplevel == 0)
877 {
878 error(26); /* no matching #if */
879 errorset(sRESET);
880 }
881 else if (elsedone == iflevel)
882 {
883 error(61); /* #elseif directive may not follow an #else */
884 errorset(sRESET);
885 }
886 else
887 {
888 preproc_expr(&val, NULL); /* get value (or 0 on error) */
889 if (skiplevel == 0)
890 skiplevel = iflevel; /* we weren't skipping, start skipping now */
891 else if (val)
892 skiplevel = 0; /* we were skipping, condition is valid -> stop skipping */
893 /* else: we were skipping and condition is invalid -> keep skipping */
894 check_empty(lptr);
895 } /* if */
896 break;
897 #endif
898 case tpENDIF:
899 ret = CMD_IF;
900 if (iflevel == 0 && skiplevel == 0)
901 {
902 error(26);
903 errorset(sRESET);
904 }
905 else
906 {
907 if (skiplevel == iflevel)
908 skiplevel = 0;
909 if (elsedone == iflevel)
910 elsedone = 0; /* ??? actually, should build a stack of #if/#endif and keep
911 * the state whether an #else was seen per nesting level */
912 iflevel -= 1;
913 } /* if */
914 check_empty(lptr);
915 break;
916 case tINCLUDE: /* #include directive */
917 ret = CMD_INCLUDE;
918 if (skiplevel == 0)
919 doinclude();
920 break;
921 case tpFILE:
922 if (skiplevel == 0)
923 {
924 char pathname[PATH_MAX];
925
926 lptr = getstring(pathname, sizeof pathname);
927 if (pathname[0] != '\0')
928 {
929 free(inpfname);
930 inpfname = strdup(pathname);
931 if (!inpfname)
932 error(103); /* insufficient memory */
933 } /* if */
934 } /* if */
935 check_empty(lptr);
936 break;
937 case tpLINE:
938 if (skiplevel == 0)
939 {
940 if (lex(&val, &str) != tNUMBER)
941 error(8); /* invalid/non-constant expression */
942 fline = (int)val;
943
944 while (*lptr == ' ')
945 lptr++; /* skip whitespace */
946 if (*lptr == '"')
947 {
948 char pathname[PATH_MAX];
949
950 lptr = getstring(pathname, sizeof pathname);
951 if (pathname[0] != '\0')
952 {
953 free(inpfname);
954 inpfname = strdup(pathname);
955 if (!inpfname)
956 error(103); /* insufficient memory */
957 } /* if */
958 }
959 } /* if */
960 check_empty(lptr);
961 break;
962 case tpASSERT:
963 if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
964 {
965 preproc_expr(&val, NULL); /* get constant expression (or 0 on error) */
966 if (!val)
967 error(7); /* assertion failed */
968 check_empty(lptr);
969 } /* if */
970 break;
971 case tpPRAGMA:
972 if (skiplevel == 0)
973 {
974 if (lex(&val, &str) == tSYMBOL)
975 {
976 if (strcmp(str, "ctrlchar") == 0)
977 {
978 if (lex(&val, &str) != tNUMBER)
979 error(27); /* invalid character constant */
980 sc_ctrlchar = (char)val;
981 }
982 else if (strcmp(str, "compress") == 0)
983 {
984 cell compval;
985
986 preproc_expr(&compval, NULL);
987 sc_compress = (int)compval; /* switch code packing on/off */
988 }
989 else if (strcmp(str, "dynamic") == 0)
990 {
991 preproc_expr(&sc_stksize, NULL);
992 }
993 else if (strcmp(str, "library") == 0)
994 {
995 char name[sNAMEMAX + 1];
996
997 while (*lptr <= ' ' && *lptr != '\0')
998 lptr++;
999 if (*lptr == '"')
1000 {
1001 lptr = getstring(name, sizeof name);
1002 }
1003 else
1004 {
1005 int i;
1006
1007 for (i = 0;
1008 (i < (int)(sizeof(name)) - 1) &&
1009 (alphanum(*lptr));
1010 i++, lptr++)
1011 name[i] = *lptr;
1012 name[i] = '\0';
1013 } /* if */
1014 if (name[0] == '\0')
1015 {
1016 curlibrary = NULL;
1017 }
1018 else
1019 {
1020 if (strlen(name) > sEXPMAX)
1021 error(220, name, sEXPMAX); /* exported symbol is truncated */
1022 /* add the name if it does not yet exist in the table */
1023 if (!find_constval(&libname_tab, name, 0))
1024 curlibrary =
1025 append_constval(&libname_tab, name, 0, 0);
1026 } /* if */
1027 }
1028 else if (strcmp(str, "pack") == 0)
1029 {
1030 cell packval;
1031
1032 preproc_expr(&packval, NULL); /* default = packed/unpacked */
1033 sc_packstr = (int)packval;
1034 }
1035 else if (strcmp(str, "rational") == 0)
1036 {
1037 char name[sNAMEMAX + 1];
1038 cell digits = 0;
1039 int i;
1040
1041 /* first gather all information, start with the tag name */
1042 while ((*lptr <= ' ') && (*lptr != '\0'))
1043 lptr++;
1044 for (i = 0;
1045 (i < (int)(sizeof(name)) - 1) &&
1046 (alphanum(*lptr));
1047 i++, lptr++)
1048 name[i] = *lptr;
1049 name[i] = '\0';
1050 /* then the precision (for fixed point arithmetic) */
1051 while (*lptr <= ' ' && *lptr != '\0')
1052 lptr++;
1053 if (*lptr == '(')
1054 {
1055 preproc_expr(&digits, NULL);
1056 if (digits <= 0 || digits > 9)
1057 {
1058 error(68); /* invalid rational number precision */
1059 digits = 0;
1060 } /* if */
1061 if (*lptr == ')')
1062 lptr++;
1063 } /* if */
1064 /* add the tag (make it public) and check the values */
1065 i = sc_addtag(name);
1066 exporttag(i);
1067 if (sc_rationaltag == 0
1068 || (sc_rationaltag == i
1069 && rational_digits == (int)digits))
1070 {
1071 sc_rationaltag = i;
1072 rational_digits = (int)digits;
1073 }
1074 else
1075 {
1076 error(69); /* rational number format already set, can only be set once */
1077 } /* if */
1078 }
1079 else if (strcmp(str, "semicolon") == 0)
1080 {
1081 cell semicolval;
1082
1083 preproc_expr(&semicolval, NULL);
1084 sc_needsemicolon = (int)semicolval;
1085 }
1086 else if (strcmp(str, "tabsize") == 0)
1087 {
1088 cell tabsizeval;
1089
1090 preproc_expr(&tabsizeval, NULL);
1091 sc_tabsize = (int)tabsizeval;
1092 }
1093 else if (strcmp(str, "align") == 0)
1094 {
1095 sc_alignnext = TRUE;
1096 }
1097 else if (strcmp(str, "unused") == 0)
1098 {
1099 char name[sNAMEMAX + 1];
1100 int i, comma;
1101 symbol *sym;
1102
1103 do
1104 {
1105 /* get the name */
1106 while ((*lptr <= ' ') && (*lptr != '\0'))
1107 lptr++;
1108 for (i = 0;
1109 (i < (int)(sizeof(name)) - 1) &&
1110 (sc_isalpha(*lptr));
1111 i++, lptr++)
1112 name[i] = *lptr;
1113 name[i] = '\0';
1114 /* get the symbol */
1115 sym = findloc(name);
1116 if (!sym)
1117 sym = findglb(name);
1118 if (sym)
1119 {
1120 sym->usage |= uREAD;
1121 if (sym->ident == iVARIABLE
1122 || sym->ident == iREFERENCE
1123 || sym->ident == iARRAY
1124 || sym->ident == iREFARRAY)
1125 sym->usage |= uWRITTEN;
1126 }
1127 else
1128 {
1129 error(17, name); /* undefined symbol */
1130 } /* if */
1131 /* see if a comma follows the name */
1132 while (*lptr <= ' ' && *lptr != '\0')
1133 lptr++;
1134 comma = (*lptr == ',');
1135 if (comma)
1136 lptr++;
1137 }
1138 while (comma);
1139 }
1140 else
1141 {
1142 error(207); /* unknown #pragma */
1143 } /* if */
1144 }
1145 else
1146 {
1147 error(207); /* unknown #pragma */
1148 } /* if */
1149 check_empty(lptr);
1150 } /* if */
1151 break;
1152 case tpENDINPUT:
1153 case tpENDSCRPT:
1154 if (skiplevel == 0)
1155 {
1156 check_empty(lptr);
1157 assert(inpf != NULL);
1158 if (inpf != inpf_org)
1159 sc_closesrc(inpf);
1160 inpf = NULL;
1161 } /* if */
1162 break;
1163 #if !defined NOEMIT
1164 case tpEMIT:
1165 {
1166 /* write opcode to output file */
1167 char name[41];
1168 int i;
1169
1170 while (*lptr <= ' ' && *lptr != '\0')
1171 lptr++;
1172 for (i = 0; i < 40 && (sc_isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1173 name[i] = (char)tolower(*lptr);
1174 name[i] = '\0';
1175 stgwrite("\t");
1176 stgwrite(name);
1177 stgwrite(" ");
1178 code_idx += opcodes(1);
1179 /* write parameter (if any) */
1180 while (*lptr <= ' ' && *lptr != '\0')
1181 lptr++;
1182 if (*lptr != '\0')
1183 {
1184 symbol *sym;
1185
1186 tok = lex(&val, &str);
1187 switch (tok)
1188 {
1189 case tNUMBER:
1190 case tRATIONAL:
1191 outval(val, FALSE);
1192 code_idx += opargs(1);
1193 break;
1194 case tSYMBOL:
1195 sym = findloc(str);
1196 if (!sym)
1197 sym = findglb(str);
1198 if (!sym || (sym->ident != iFUNCTN
1199 && sym->ident != iREFFUNC
1200 && (sym->usage & uDEFINE) == 0))
1201 {
1202 error(17, str); /* undefined symbol */
1203 }
1204 else
1205 {
1206 outval(sym->addr, FALSE);
1207 /* mark symbol as "used", unknown whether for read or write */
1208 markusage(sym, uREAD | uWRITTEN);
1209 code_idx += opargs(1);
1210 } /* if */
1211 break;
1212 default:
1213 {
1214 char s2[20];
1215 extern char *sc_tokens[]; /* forward declaration */
1216
1217 if (tok < 256)
1218 {
1219 s2[0] = (char)tok;
1220 s2[1] = 0;
1221 }
1222 else
1223 {
1224 strncpy(s2, sc_tokens[tok - tFIRST], 19);
1225 s2[19] = 0;
1226 }
1227 error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1228 break;
1229 } /* case */
1230 } /* switch */
1231 } /* if */
1232 stgwrite("\n");
1233 check_empty(lptr);
1234 break;
1235 } /* case */
1236 #endif
1237 #if !defined NO_DEFINE
1238 case tpDEFINE:
1239 {
1240 ret = CMD_DEFINE;
1241 if (skiplevel == 0)
1242 {
1243 char *pattern, *substitution;
1244 char *start, *end;
1245 int count, prefixlen;
1246 stringpair *def;
1247
1248 /* find the pattern to match */
1249 while (*lptr <= ' ' && *lptr != '\0')
1250 lptr++;
1251 start = lptr; /* save starting point of the match pattern */
1252 count = 0;
1253 while (*lptr > ' ' && *lptr != '\0')
1254 {
1255 litchar(&lptr, FALSE); /* litchar() advances "lptr" and handles escape characters */
1256 count++;
1257 } /* while */
1258 end = lptr;
1259 /* check pattern to match */
1260 if (!sc_isalpha(*start) && *start != '_')
1261 {
1262 error(74); /* pattern must start with an alphabetic character */
1263 break;
1264 } /* if */
1265 /* store matched pattern */
1266 pattern = malloc(count + 1);
1267 if (!pattern)
1268 error(103); /* insufficient memory */
1269 lptr = start;
1270 count = 0;
1271 while (lptr != end)
1272 {
1273 assert(lptr < end);
1274 assert(*lptr != '\0');
1275 pattern[count++] = (char)litchar(&lptr, FALSE);
1276 } /* while */
1277 pattern[count] = '\0';
1278 /* special case, erase trailing variable, because it could match anything */
1279 if (count >= 2 && sc_isdigit(pattern[count - 1])
1280 && pattern[count - 2] == '%')
1281 pattern[count - 2] = '\0';
1282 /* find substitution string */
1283 while (*lptr <= ' ' && *lptr != '\0')
1284 lptr++;
1285 start = lptr; /* save starting point of the match pattern */
1286 count = 0;
1287 end = NULL;
1288 while (*lptr != '\0')
1289 {
1290 /* keep position of the start of trailing whitespace */
1291 if (*lptr <= ' ')
1292 {
1293 if (!end)
1294 end = lptr;
1295 }
1296 else
1297 {
1298 end = NULL;
1299 } /* if */
1300 count++;
1301 lptr++;
1302 } /* while */
1303 if (!end)
1304 end = lptr;
1305 /* store matched substitution */
1306 substitution = malloc(count + 1); /* +1 for '\0' */
1307 if (!substitution)
1308 error(103); /* insufficient memory */
1309 lptr = start;
1310 count = 0;
1311 while (lptr != end)
1312 {
1313 assert(lptr < end);
1314 assert(*lptr != '\0');
1315 substitution[count++] = *lptr++;
1316 } /* while */
1317 substitution[count] = '\0';
1318 /* check whether the definition already exists */
1319 for (prefixlen = 0, start = pattern;
1320 sc_isalpha(*start) || sc_isdigit(*start) || *start == '_';
1321 prefixlen++, start++)
1322 /* nothing */ ;
1323 assert(prefixlen > 0);
1324 if ((def = find_subst(pattern, prefixlen)))
1325 {
1326 if (strcmp(def->first, pattern) != 0
1327 || strcmp(def->second, substitution) != 0)
1328 error(201, pattern); /* redefinition of macro (non-identical) */
1329 delete_subst(pattern, prefixlen);
1330 } /* if */
1331 /* add the pattern/substitution pair to the list */
1332 assert(pattern[0] != '\0');
1333 insert_subst(pattern, substitution, prefixlen);
1334 free(pattern);
1335 free(substitution);
1336 } /* if */
1337 break;
1338 } /* case */
1339 case tpUNDEF:
1340 if (skiplevel == 0)
1341 {
1342 if (lex(&val, &str) == tSYMBOL)
1343 {
1344 if (!delete_subst(str, strlen(str)))
1345 error(17, str); /* undefined symbol */
1346 }
1347 else
1348 {
1349 error(20, str); /* invalid symbol name */
1350 } /* if */
1351 check_empty(lptr);
1352 } /* if */
1353 break;
1354 #endif
1355 default:
1356 error(31); /* unknown compiler directive */
1357 ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE; /* line must be processed (if skiplevel==0) */
1358 } /* switch */
1359 return ret;
1360 }
1361
1362 #if !defined NO_DEFINE
1363 static int
is_startstring(char * string)1364 is_startstring(char *string)
1365 {
1366 if (*string == '\"' || *string == '\'')
1367 return TRUE; /* "..." */
1368
1369 if (*string == '!')
1370 {
1371 string++;
1372 if (*string == '\"' || *string == '\'')
1373 return TRUE; /* !"..." */
1374 if (*string == sc_ctrlchar)
1375 {
1376 string++;
1377 if (*string == '\"' || *string == '\'')
1378 return TRUE; /* !\"..." */
1379 } /* if */
1380 }
1381 else if (*string == sc_ctrlchar)
1382 {
1383 string++;
1384 if (*string == '\"' || *string == '\'')
1385 return TRUE; /* \"..." */
1386 if (*string == '!')
1387 {
1388 string++;
1389 if (*string == '\"' || *string == '\'')
1390 return TRUE; /* \!"..." */
1391 } /* if */
1392 } /* if */
1393
1394 return FALSE;
1395 }
1396
1397 static char *
skipstring(char * string)1398 skipstring(char *string)
1399 {
1400 char endquote;
1401 int rawstring = FALSE;
1402
1403 while (*string == '!' || *string == sc_ctrlchar)
1404 {
1405 rawstring = (*string == sc_ctrlchar);
1406 string++;
1407 } /* while */
1408
1409 endquote = *string;
1410 assert(endquote == '\"' || endquote == '\'');
1411 string++; /* skip open quote */
1412 while (*string != endquote && *string != '\0')
1413 litchar(&string, rawstring);
1414 return string;
1415 }
1416
1417 static char *
skippgroup(char * string)1418 skippgroup(char *string)
1419 {
1420 int nest = 0;
1421 char open = *string;
1422 char close;
1423
1424 switch (open)
1425 {
1426 case '(':
1427 close = ')';
1428 break;
1429 case '{':
1430 close = '}';
1431 break;
1432 case '[':
1433 close = ']';
1434 break;
1435 case '<':
1436 close = '>';
1437 break;
1438 default:
1439 assert(0);
1440 close = '\0'; /* only to avoid a compiler warning */
1441 } /* switch */
1442
1443 string++;
1444 while (*string != close || nest > 0)
1445 {
1446 if (*string == open)
1447 nest++;
1448 else if (*string == close)
1449 nest--;
1450 else if (is_startstring(string))
1451 string = skipstring(string);
1452 if (*string == '\0')
1453 break;
1454 string++;
1455 } /* while */
1456 return string;
1457 }
1458
1459 static char *
strdel(char * str,size_t len)1460 strdel(char *str, size_t len)
1461 {
1462 size_t length = strlen(str);
1463
1464 if (len > length)
1465 len = length;
1466 memmove(str, str + len, length - len + 1); /* include EOS byte */
1467 return str;
1468 }
1469
1470 static char *
strins(char * dest,char * src,size_t srclen)1471 strins(char *dest, char *src, size_t srclen)
1472 {
1473 size_t destlen = strlen(dest);
1474
1475 assert(srclen <= strlen(src));
1476 memmove(dest + srclen, dest, destlen + 1); /* include EOS byte */
1477 memcpy(dest, src, srclen);
1478 return dest;
1479 }
1480
1481 static int
substpattern(char * line,size_t buffersize,char * pattern,char * substitution)1482 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1483 {
1484 int prefixlen;
1485 char *p, *s, *e, *args[10];
1486 int is_match, arg, len;
1487
1488 memset(args, 0, sizeof args);
1489
1490 /* check the length of the prefix */
1491 for (prefixlen = 0, s = pattern; sc_isalpha(*s) || sc_isdigit(*s) || *s == '_';
1492 prefixlen++, s++)
1493 /* nothing */ ;
1494 assert(prefixlen > 0);
1495 assert(strncmp(line, pattern, prefixlen) == 0);
1496
1497 /* pattern prefix matches; match the rest of the pattern, gather
1498 * the parameters
1499 */
1500 s = line + prefixlen;
1501 p = pattern + prefixlen;
1502 is_match = TRUE; /* so far, pattern matches */
1503 while (is_match && *s != '\0' && *p != '\0')
1504 {
1505 if (*p == '%')
1506 {
1507 p++; /* skip '%' */
1508 if (sc_isdigit(*p))
1509 {
1510 arg = *p - '0';
1511 assert(arg >= 0 && arg <= 9);
1512 p++; /* skip parameter id */
1513 assert(*p != '\0');
1514 /* match the source string up to the character after the digit
1515 * (skipping strings in the process
1516 */
1517 e = s;
1518 while (*e != *p && *e != '\0' && *e != '\n')
1519 {
1520 if (is_startstring(e)) /* skip strings */
1521 e = skipstring(e);
1522 else if (strchr("({[", *e)) /* skip parenthized groups */
1523 e = skippgroup(e);
1524 if (*e != '\0')
1525 e++; /* skip non-alphapetic character (or closing quote of
1526 * a string, or the closing paranthese of a group) */
1527 } /* while */
1528 /* store the parameter (overrule any earlier) */
1529 if (args[arg])
1530 free(args[arg]);
1531 len = (int)(e - s);
1532 args[arg] = malloc(len + 1);
1533 if (!args[arg])
1534 error(103); /* insufficient memory */
1535 strncpy(args[arg], s, len);
1536 args[arg][len] = '\0';
1537 /* character behind the pattern was matched too */
1538 if (*e == *p)
1539 {
1540 s = e + 1;
1541 }
1542 else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1543 && !sc_needsemicolon)
1544 {
1545 s = e; /* allow a trailing ; in the pattern match to end of line */
1546 }
1547 else
1548 {
1549 assert(*e == '\0' || *e == '\n');
1550 is_match = FALSE;
1551 s = e;
1552 } /* if */
1553 p++;
1554 }
1555 else
1556 {
1557 is_match = FALSE;
1558 } /* if */
1559 }
1560 else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1561 {
1562 /* source may be ';' or end of the line */
1563 while (*s <= ' ' && *s != '\0')
1564 s++; /* skip white space */
1565 if (*s != ';' && *s != '\0')
1566 is_match = FALSE;
1567 p++; /* skip the semicolon in the pattern */
1568 }
1569 else
1570 {
1571 cell ch;
1572
1573 /* skip whitespace between two non-alphanumeric characters, except
1574 * for two identical symbols
1575 */
1576 assert(p > pattern);
1577 if (!alphanum(*p) && *(p - 1) != *p)
1578 while (*s <= ' ' && *s != '\0')
1579 s++; /* skip white space */
1580 ch = litchar(&p, FALSE); /* this increments "p" */
1581 if (*s != ch)
1582 is_match = FALSE;
1583 else
1584 s++; /* this character matches */
1585 } /* if */
1586 } /* while */
1587
1588 if (is_match && *p == '\0')
1589 {
1590 /* if the last character to match is an alphanumeric character, the
1591 * current character in the source may not be alphanumeric
1592 */
1593 assert(p > pattern);
1594 if (alphanum(*(p - 1)) && alphanum(*s))
1595 is_match = FALSE;
1596 } /* if */
1597
1598 if (is_match)
1599 {
1600 /* calculate the length of the substituted string */
1601 for (e = substitution, len = 0; *e != '\0'; e++)
1602 {
1603 if (*e == '%' && sc_isdigit(*(e + 1)))
1604 {
1605 arg = *(e + 1) - '0';
1606 assert(arg >= 0 && arg <= 9);
1607 if (args[arg])
1608 len += strlen(args[arg]);
1609 e++; /* skip %, digit is skipped later */
1610 }
1611 else
1612 {
1613 len++;
1614 } /* if */
1615 } /* for */
1616 /* check length of the string after substitution */
1617 if (strlen(line) + len - (int)(s - line) > buffersize)
1618 {
1619 error(75); /* line too long */
1620 }
1621 else
1622 {
1623 /* substitute pattern */
1624 strdel(line, (int)(s - line));
1625 for (e = substitution, s = line; *e != '\0'; e++)
1626 {
1627 if (*e == '%' && sc_isdigit(*(e + 1)))
1628 {
1629 arg = *(e + 1) - '0';
1630 assert(arg >= 0 && arg <= 9);
1631 if (args[arg])
1632 {
1633 strins(s, args[arg], strlen(args[arg]));
1634 s += strlen(args[arg]);
1635 } /* if */
1636 e++; /* skip %, digit is skipped later */
1637 }
1638 else
1639 {
1640 strins(s, e, 1);
1641 s++;
1642 } /* if */
1643 } /* for */
1644 } /* if */
1645 } /* if */
1646
1647 for (arg = 0; arg < 10; arg++)
1648 if (args[arg])
1649 free(args[arg]);
1650
1651 return is_match;
1652 }
1653
1654 static void
substallpatterns(char * line,int buffersize)1655 substallpatterns(char *line, int buffersize)
1656 {
1657 char *start, *end;
1658 int prefixlen;
1659 stringpair *subst;
1660
1661 start = line;
1662 while (*start != '\0')
1663 {
1664 /* find the start of a prefix (skip all non-alphabetic characters),
1665 * also skip strings
1666 */
1667 while (!sc_isalpha(*start) && *start != '_' && *start != '\0')
1668 {
1669 /* skip strings */
1670 if (is_startstring(start))
1671 {
1672 start = skipstring(start);
1673 if (*start == '\0')
1674 break; /* abort loop on error */
1675 } /* if */
1676 start++; /* skip non-alphapetic character (or closing quote of a string) */
1677 } /* while */
1678 if (*start == '\0')
1679 break; /* abort loop on error */
1680 /* get the prefix (length), look for a matching definition */
1681 prefixlen = 0;
1682 end = start;
1683 while (sc_isalpha(*end) || sc_isdigit(*end) || *end == '_')
1684 {
1685 prefixlen++;
1686 end++;
1687 } /* while */
1688 assert(prefixlen > 0);
1689 subst = find_subst(start, prefixlen);
1690 if (subst)
1691 {
1692 /* properly match the pattern and substitute */
1693 if (!substpattern
1694 (start, buffersize - (start - line), subst->first,
1695 subst->second))
1696 start = end; /* match failed, skip this prefix */
1697 /* match succeeded: do not update "start", because the substitution text
1698 * may be matched by other macros
1699 */
1700 }
1701 else
1702 {
1703 start = end; /* no macro with this prefix, skip this prefix */
1704 } /* if */
1705 } /* while */
1706 }
1707 #endif
1708
1709 /* preprocess
1710 *
1711 * Reads a line by readline() into "pline" and performs basic preprocessing:
1712 * deleting comments, skipping lines with false "#if.." code and recognizing
1713 * other compiler directives. There is an indirect recursion: lex() calls
1714 * preprocess() if a new line must be read, preprocess() calls command(),
1715 * which at his turn calls lex() to identify the token.
1716 *
1717 * Global references: lptr (altered)
1718 * pline (altered)
1719 * freading (referred to only)
1720 */
1721 void
preprocess(void)1722 preprocess(void)
1723 {
1724 int iscommand;
1725
1726 if (!freading)
1727 return;
1728 do
1729 {
1730 readline(pline);
1731 stripcom(pline); /* ??? no need for this when reading back from list file (in the second pass) */
1732 lptr = pline; /* set "line pointer" to start of the parsing buffer */
1733 iscommand = command();
1734 if (iscommand != CMD_NONE)
1735 errorset(sRESET); /* reset error flag ("panic mode") on empty line or directive */
1736 #if !defined NO_DEFINE
1737 if (iscommand == CMD_NONE)
1738 {
1739 assert(lptr != term_expr);
1740 substallpatterns(pline, sLINEMAX);
1741 lptr = pline; /* reset "line pointer" to start of the parsing buffer */
1742 } /* if */
1743 #endif
1744 }
1745 while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading); /* enddo */
1746 }
1747
1748 static char *
unpackedstring(char * sptr,int rawstring)1749 unpackedstring(char *sptr, int rawstring)
1750 {
1751 while (*sptr != '\0')
1752 {
1753 /* check for doublequotes indicating the end of the string */
1754 if (*sptr == '\"')
1755 {
1756 /* check whether there's another pair of quotes following.
1757 * If so, paste the two strings together, thus
1758 * "pants""off" becomes "pantsoff"
1759 */
1760 if (*(sptr + 1) == '\"')
1761 sptr += 2;
1762 else
1763 break;
1764 }
1765
1766 if (*sptr == '\a')
1767 { /* ignore '\a' (which was inserted at a line concatenation) */
1768 sptr++;
1769 continue;
1770 } /* if */
1771 stowlit(litchar(&sptr, rawstring)); /* litchar() alters "lptr" */
1772 } /* while */
1773 stowlit(0); /* terminate string */
1774 return sptr;
1775 }
1776
1777 static char *
packedstring(char * sptr,int rawstring)1778 packedstring(char *sptr, int rawstring)
1779 {
1780 int i;
1781 ucell val, c;
1782
1783 i = sizeof(ucell) - (charbits / 8); /* start at most significant byte */
1784 val = 0;
1785 while (*sptr != '\0')
1786 {
1787 /* check for doublequotes indicating the end of the string */
1788 if (*sptr == '\"')
1789 {
1790 /* check whether there's another pair of quotes following.
1791 * If so, paste the two strings together, thus
1792 * "pants""off" becomes "pantsoff"
1793 */
1794 if (*(sptr + 1) == '\"')
1795 sptr += 2;
1796 else
1797 break;
1798 }
1799
1800 if (*sptr == '\a')
1801 { /* ignore '\a' (which was inserted at a line concatenation) */
1802 sptr++;
1803 continue;
1804 } /* if */
1805 c = litchar(&sptr, rawstring); /* litchar() alters "sptr" */
1806 if (c >= (ucell) (1 << charbits))
1807 error(43); /* character constant exceeds range */
1808 val |= (c << 8 * i);
1809 if (i == 0)
1810 {
1811 stowlit(val);
1812 val = 0;
1813 } /* if */
1814 i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1815 } /* if */
1816 /* save last code; make sure there is at least one terminating zero character */
1817 if (i != (int)(sizeof(ucell) - (charbits / 8)))
1818 stowlit(val); /* at least one zero character in "val" */
1819 else
1820 stowlit(0); /* add full cell of zeros */
1821 return sptr;
1822 }
1823
1824 /* lex(lexvalue,lexsym) Lexical Analysis
1825 *
1826 * lex() first deletes leading white space, then checks for multi-character
1827 * operators, keywords (including most compiler directives), numbers,
1828 * labels, symbols and literals (literal characters are converted to a number
1829 * and are returned as such). If every check fails, the line must contain
1830 * a single-character operator. So, lex() returns this character. In the other
1831 * case (something did match), lex() returns the number of the token. All
1832 * these tokens have been assigned numbers above 255.
1833 *
1834 * Some tokens have "attributes":
1835 * tNUMBER the value of the number is return in "lexvalue".
1836 * tRATIONAL the value is in IEEE 754 encoding or in fixed point
1837 * encoding in "lexvalue".
1838 * tSYMBOL the first sNAMEMAX characters of the symbol are
1839 * stored in a buffer, a pointer to this buffer is
1840 * returned in "lexsym".
1841 * tLABEL the first sNAMEMAX characters of the label are
1842 * stored in a buffer, a pointer to this buffer is
1843 * returned in "lexsym".
1844 * tSTRING the string is stored in the literal pool, the index
1845 * in the literal pool to this string is stored in
1846 * "lexvalue".
1847 *
1848 * lex() stores all information (the token found and possibly its attribute)
1849 * in global variables. This allows a token to be examined twice. If "_pushed"
1850 * is true, this information is returned.
1851 *
1852 * Global references: lptr (altered)
1853 * fline (referred to only)
1854 * litidx (referred to only)
1855 * _lextok, _lexval, _lexstr
1856 * _pushed
1857 */
1858
1859 static int _pushed;
1860 static int _lextok;
1861 static cell _lexval;
1862 static char _lexstr[sLINEMAX + 1];
1863 static int _lexnewline;
1864
1865 void
lexinit(void)1866 lexinit(void)
1867 {
1868 stkidx = 0; /* index for pushstk() and popstk() */
1869 iflevel = 0; /* preprocessor: nesting of "#if" */
1870 skiplevel = 0; /* preprocessor: skipping lines or compiling lines */
1871 icomment = FALSE; /* currently not in a multiline comment */
1872 _pushed = FALSE; /* no token pushed back into lex */
1873 _lexnewline = FALSE;
1874 }
1875
1876 char *sc_tokens[] = {
1877 "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1878 "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1879 "...", "..",
1880 "assert", "break", "case", "char", "const", "continue", "default",
1881 "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1882 "if", "native", "new", "operator", "public", "return", "sizeof",
1883 "sleep", "static", "stock", "switch", "tagof", "while",
1884 "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1885 "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1886 ";", ";", "-integer value-", "-rational value-", "-identifier-",
1887 "-label-", "-string-"
1888 };
1889
1890 int
lex(cell * lexvalue,char ** lexsym)1891 lex(cell * lexvalue, char **lexsym)
1892 {
1893 int i, toolong, newline, rawstring;
1894 char **tokptr;
1895
1896 if (_pushed)
1897 {
1898 _pushed = FALSE; /* reset "_pushed" flag */
1899 *lexvalue = _lexval;
1900 *lexsym = _lexstr;
1901 return _lextok;
1902 } /* if */
1903
1904 _lextok = 0; /* preset all values */
1905 _lexval = 0;
1906 _lexstr[0] = '\0';
1907 *lexvalue = _lexval;
1908 *lexsym = _lexstr;
1909 _lexnewline = FALSE;
1910 if (!freading)
1911 return 0;
1912
1913 newline = (lptr == pline); /* does lptr point to start of line buffer */
1914 while (*lptr <= ' ')
1915 { /* delete leading white space */
1916 if (*lptr == '\0')
1917 {
1918 preprocess(); /* preprocess resets "lptr" */
1919 if (!freading)
1920 return 0;
1921 if (lptr == term_expr) /* special sequence to terminate a pending expression */
1922 return (_lextok = tENDEXPR);
1923 _lexnewline = TRUE; /* set this after preprocess(), because
1924 * preprocess() calls lex() recursively */
1925 newline = TRUE;
1926 }
1927 else
1928 {
1929 lptr += 1;
1930 } /* if */
1931 } /* while */
1932 if (newline)
1933 {
1934 stmtindent = 0;
1935 for (i = 0; i < (int)(lptr - pline); i++)
1936 if (pline[i] == '\t' && sc_tabsize > 0)
1937 stmtindent +=
1938 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1939 else
1940 stmtindent++;
1941 } /* if */
1942
1943 i = tFIRST;
1944 tokptr = sc_tokens;
1945 while (i <= tMIDDLE)
1946 { /* match multi-character operators */
1947 if (match(*tokptr, FALSE))
1948 {
1949 _lextok = i;
1950 return _lextok;
1951 } /* if */
1952 i += 1;
1953 tokptr += 1;
1954 } /* while */
1955 while (i <= tLAST)
1956 { /* match reserved words and compiler directives */
1957 if (match(*tokptr, TRUE))
1958 {
1959 _lextok = i;
1960 errorset(sRESET); /* reset error flag (clear the "panic mode") */
1961 return _lextok;
1962 } /* if */
1963 i += 1;
1964 tokptr += 1;
1965 } /* while */
1966
1967 if ((i = number(&_lexval, lptr)) != 0)
1968 { /* number */
1969 _lextok = tNUMBER;
1970 *lexvalue = _lexval;
1971 lptr += i;
1972 }
1973 else if ((i = ftoi(&_lexval, lptr)) != 0)
1974 {
1975 _lextok = tRATIONAL;
1976 *lexvalue = _lexval;
1977 lptr += i;
1978 }
1979 else if (alpha(*lptr))
1980 { /* symbol or label */
1981 /* Note: only sNAMEMAX characters are significant. The compiler
1982 * generates a warning if a symbol exceeds this length.
1983 */
1984 _lextok = tSYMBOL;
1985 i = 0;
1986 toolong = 0;
1987 while (alphanum(*lptr))
1988 {
1989 _lexstr[i] = *lptr;
1990 lptr += 1;
1991 if (i < sNAMEMAX)
1992 i += 1;
1993 else
1994 toolong = 1;
1995 } /* while */
1996 _lexstr[i] = '\0';
1997 if (toolong)
1998 error(200, _lexstr, sNAMEMAX); /* symbol too long, truncated to sNAMEMAX chars */
1999 if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
2000 {
2001 _lextok = PUBLIC_CHAR; /* '@' all alone is not a symbol, it is an operator */
2002 }
2003 else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
2004 {
2005 _lextok = '_'; /* '_' by itself is not a symbol, it is a placeholder */
2006 } /* if */
2007 if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
2008 {
2009 _lextok = tLABEL; /* it wasn't a normal symbol, it was a label/tagname */
2010 lptr += 1; /* skip colon */
2011 } /* if */
2012 }
2013 else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
2014 { /* unpacked string literal */
2015 _lextok = tSTRING;
2016 rawstring = (*lptr == sc_ctrlchar);
2017 *lexvalue = _lexval = litidx;
2018 lptr += 1; /* skip double quote */
2019 if (rawstring)
2020 lptr += 1; /* skip "escape" character too */
2021 lptr =
2022 sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2023 rawstring);
2024 if (*lptr == '\"')
2025 lptr += 1; /* skip final quote */
2026 else
2027 error(37); /* invalid (non-terminated) string */
2028 }
2029 else if ((*lptr == '!' && *(lptr + 1) == '\"')
2030 || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2031 || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2032 && *(lptr + 2) == '\"'))
2033 { /* packed string literal */
2034 _lextok = tSTRING;
2035 rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2036 *lexvalue = _lexval = litidx;
2037 lptr += 2; /* skip exclamation point and double quote */
2038 if (rawstring)
2039 lptr += 1; /* skip "escape" character too */
2040 lptr =
2041 sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2042 rawstring);
2043 if (*lptr == '\"')
2044 lptr += 1; /* skip final quote */
2045 else
2046 error(37); /* invalid (non-terminated) string */
2047 }
2048 else if (*lptr == '\'')
2049 { /* character literal */
2050 lptr += 1; /* skip quote */
2051 _lextok = tNUMBER;
2052 *lexvalue = _lexval = litchar(&lptr, FALSE);
2053 if (*lptr == '\'')
2054 lptr += 1; /* skip final quote */
2055 else
2056 error(27); /* invalid character constant (must be one character) */
2057 }
2058 else if (*lptr == ';')
2059 { /* semicolumn resets "error" flag */
2060 _lextok = ';';
2061 lptr += 1;
2062 errorset(sRESET); /* reset error flag (clear the "panic mode") */
2063 }
2064 else
2065 {
2066 _lextok = *lptr; /* if every match fails, return the character */
2067 lptr += 1; /* increase the "lptr" pointer */
2068 } /* if */
2069 return _lextok;
2070 }
2071
2072 /* lexpush
2073 *
2074 * Pushes a token back, so the next call to lex() will return the token
2075 * last examined, instead of a new token.
2076 *
2077 * Only one token can be pushed back.
2078 *
2079 * In fact, lex() already stores the information it finds into global
2080 * variables, so all that is to be done is set a flag that informs lex()
2081 * to read and return the information from these variables, rather than
2082 * to read in a new token from the input file.
2083 */
2084 void
lexpush(void)2085 lexpush(void)
2086 {
2087 assert(_pushed == FALSE);
2088 _pushed = TRUE;
2089 }
2090
2091 /* lexclr
2092 *
2093 * Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2094 * symbol (a not continue with some old one). This is required upon return
2095 * from Assembler mode.
2096 */
2097 void
lexclr(int clreol)2098 lexclr(int clreol)
2099 {
2100 _pushed = FALSE;
2101 if (clreol)
2102 {
2103 lptr = strchr(pline, '\0');
2104 assert(lptr != NULL);
2105 } /* if */
2106 }
2107
2108 /* matchtoken
2109 *
2110 * This routine is useful if only a simple check is needed. If the token
2111 * differs from the one expected, it is pushed back.
2112 */
2113 int
matchtoken(int token)2114 matchtoken(int token)
2115 {
2116 cell val;
2117 char *str;
2118 int tok;
2119
2120 tok = lex(&val, &str);
2121 if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2122 {
2123 return 1;
2124 }
2125 else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2126 {
2127 lexpush(); /* push "tok" back, we use the "hidden" newline token */
2128 return 1;
2129 }
2130 else
2131 {
2132 lexpush();
2133 return 0;
2134 } /* if */
2135 }
2136
2137 /* tokeninfo
2138 *
2139 * Returns additional information of a token after using "matchtoken()"
2140 * or needtoken(). It does no harm using this routine after a call to
2141 * "lex()", but lex() already returns the same information.
2142 *
2143 * The token itself is the return value. Normally, this one is already known.
2144 */
2145 int
tokeninfo(cell * val,char ** str)2146 tokeninfo(cell * val, char **str)
2147 {
2148 /* if the token was pushed back, tokeninfo() returns the token and
2149 * parameters of the *next* token, not of the *current* token.
2150 */
2151 assert(!_pushed);
2152 *val = _lexval;
2153 *str = _lexstr;
2154 return _lextok;
2155 }
2156
2157 /* needtoken
2158 *
2159 * This routine checks for a required token and gives an error message if
2160 * it isn't there (and returns FALSE in that case).
2161 *
2162 * Global references: _lextok;
2163 */
2164 int
needtoken(int token)2165 needtoken(int token)
2166 {
2167 char s1[20], s2[20];
2168
2169 if (matchtoken(token))
2170 {
2171 return TRUE;
2172 }
2173 else
2174 {
2175 /* token already pushed back */
2176 assert(_pushed);
2177 if (token < 256)
2178 {
2179 s1[0] = (char)token; /* single character token */
2180 s1[1] = 0;
2181 }
2182 else
2183 {
2184 strncpy(s1, sc_tokens[token - tFIRST], 19); /* multi-character symbol */
2185 s1[19] = 0;
2186 }
2187
2188 if (!freading)
2189 {
2190 strncpy(s2, "-end of file-", 19);
2191 }
2192 else if (_lextok < 256)
2193 {
2194 s2[0] = (char)_lextok;
2195 s2[1] = 0;
2196 }
2197 else
2198 {
2199 strncpy(s2, sc_tokens[_lextok - tFIRST], 19);
2200 }
2201 s2[19] = 0;
2202 error(1, s1, s2); /* expected ..., but found ... */
2203 return FALSE;
2204 } /* if */
2205 }
2206
2207 /* match
2208 *
2209 * Compares a series of characters from the input file with the characters
2210 * in "st" (that contains a token). If the token on the input file matches
2211 * "st", the input file pointer "lptr" is adjusted to point to the next
2212 * token, otherwise "lptr" remains unaltered.
2213 *
2214 * If the parameter "end: is true, match() requires that the first character
2215 * behind the recognized token is non-alphanumeric.
2216 *
2217 * Global references: lptr (altered)
2218 */
2219 static int
match(char * st,int end)2220 match(char *st, int end)
2221 {
2222 int k;
2223 char *ptr;
2224
2225 k = 0;
2226 ptr = lptr;
2227 while (st[k])
2228 {
2229 if (st[k] != *ptr)
2230 return 0;
2231 k += 1;
2232 ptr += 1;
2233 } /* while */
2234 if (end)
2235 { /* symbol must terminate with non-alphanumeric char */
2236 if (alphanum(*ptr))
2237 return 0;
2238 } /* if */
2239 lptr = ptr; /* match found, skip symbol */
2240 return 1;
2241 }
2242
2243 /* stowlit
2244 *
2245 * Stores a value into the literal queue. The literal queue is used for
2246 * literal strings used in functions and for initializing array variables.
2247 *
2248 * Global references: litidx (altered)
2249 * litq (altered)
2250 */
2251 void
stowlit(cell val)2252 stowlit(cell val)
2253 {
2254 if (litidx >= litmax)
2255 {
2256 cell *p;
2257
2258 litmax += sDEF_LITMAX;
2259 p = (cell *) realloc(litq, litmax * sizeof(cell));
2260 if (!p)
2261 error(102, "literal table"); /* literal table overflow (fatal error) */
2262 litq = p;
2263 } /* if */
2264 assert(litidx < litmax);
2265 litq[litidx++] = val;
2266 }
2267
2268 /* litchar
2269 *
2270 * Return current literal character and increase the pointer to point
2271 * just behind this literal character.
2272 *
2273 * Note: standard "escape sequences" are suported, but the backslash may be
2274 * replaced by another character; the syntax '\ddd' is supported,
2275 * but ddd must be decimal!
2276 */
2277 static cell
litchar(char ** p_str,int rawmode)2278 litchar(char **p_str, int rawmode)
2279 {
2280 cell c = 0;
2281 unsigned char *cptr;
2282
2283 cptr = (unsigned char *)*p_str;
2284 if (rawmode || *cptr != sc_ctrlchar)
2285 { /* no escape character */
2286 c = *cptr;
2287 cptr += 1;
2288 }
2289 else
2290 {
2291 cptr += 1;
2292 if (*cptr == sc_ctrlchar)
2293 {
2294 c = *cptr; /* \\ == \ (the escape character itself) */
2295 cptr += 1;
2296 }
2297 else
2298 {
2299 switch (*cptr)
2300 {
2301 case 'a': /* \a == audible alarm */
2302 c = 7;
2303 cptr += 1;
2304 break;
2305 case 'b': /* \b == backspace */
2306 c = 8;
2307 cptr += 1;
2308 break;
2309 case 'e': /* \e == escape */
2310 c = 27;
2311 cptr += 1;
2312 break;
2313 case 'f': /* \f == form feed */
2314 c = 12;
2315 cptr += 1;
2316 break;
2317 case 'n': /* \n == NewLine character */
2318 c = 10;
2319 cptr += 1;
2320 break;
2321 case 'r': /* \r == carriage return */
2322 c = 13;
2323 cptr += 1;
2324 break;
2325 case 't': /* \t == horizontal TAB */
2326 c = 9;
2327 cptr += 1;
2328 break;
2329 case 'v': /* \v == vertical TAB */
2330 c = 11;
2331 cptr += 1;
2332 break;
2333 case '\'': /* \' == ' (single quote) */
2334 case '"': /* \" == " (single quote) */
2335 case '%': /* \% == % (percent) */
2336 c = *cptr;
2337 cptr += 1;
2338 break;
2339 default:
2340 if (sc_isdigit(*cptr))
2341 { /* \ddd */
2342 c = 0;
2343 while (*cptr >= '0' && *cptr <= '9') /* decimal! */
2344 c = c * 10 + *cptr++ - '0';
2345 if (*cptr == ';')
2346 cptr++; /* swallow a trailing ';' */
2347 }
2348 else
2349 {
2350 error(27); /* invalid character constant */
2351 } /* if */
2352 } /* switch */
2353 } /* if */
2354 } /* if */
2355 *p_str = (char *)cptr;
2356 assert(c >= 0 && c < 256);
2357 return c;
2358 }
2359
2360 /* alpha
2361 *
2362 * Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2363 * or an "at" sign ("@"). The "@" is an extension to standard C.
2364 */
2365 static int
alpha(char c)2366 alpha(char c)
2367 {
2368 return (sc_isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2369 }
2370
2371 /* alphanum
2372 *
2373 * Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2374 */
2375 int
alphanum(char c)2376 alphanum(char c)
2377 {
2378 return (alpha(c) || sc_isdigit(c));
2379 }
2380
2381 /* The local variable table must be searched backwards, so that the deepest
2382 * nesting of local variables is searched first. The simplest way to do
2383 * this is to insert all new items at the head of the list.
2384 * In the global list, the symbols are kept in sorted order, so that the
2385 * public functions are written in sorted order.
2386 */
2387 static symbol *
add_symbol(symbol * root,symbol * entry,int sort)2388 add_symbol(symbol * root, symbol * entry, int sort)
2389 {
2390 symbol *newsym;
2391
2392 if (sort)
2393 while (root->next && strcmp(entry->name, root->next->name) > 0)
2394 root = root->next;
2395
2396 if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2397 {
2398 error(103);
2399 return NULL;
2400 } /* if */
2401 memcpy(newsym, entry, sizeof(symbol));
2402 newsym->next = root->next;
2403 root->next = newsym;
2404 return newsym;
2405 }
2406
2407 static void
free_symbol(symbol * sym)2408 free_symbol(symbol * sym)
2409 {
2410 arginfo *arg;
2411
2412 /* free all sub-symbol allocated memory blocks, depending on the
2413 * kind of the symbol
2414 */
2415 assert(sym != NULL);
2416 if (sym->ident == iFUNCTN)
2417 {
2418 /* run through the argument list; "default array" arguments
2419 * must be freed explicitly; the tag list must also be freed */
2420 assert(sym->dim.arglist != NULL);
2421 for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2422 {
2423 if (arg->ident == iREFARRAY && arg->hasdefault)
2424 free(arg->defvalue.array.data);
2425 else if (arg->ident == iVARIABLE
2426 && ((arg->hasdefault & uSIZEOF) != 0
2427 || (arg->hasdefault & uTAGOF) != 0))
2428 free(arg->defvalue.size.symname);
2429 assert(arg->tags != NULL);
2430 free(arg->tags);
2431 } /* for */
2432 free(sym->dim.arglist);
2433 } /* if */
2434 assert(sym->refer != NULL);
2435 free(sym->refer);
2436 free(sym);
2437 }
2438
2439 void
delete_symbol(symbol * root,symbol * sym)2440 delete_symbol(symbol * root, symbol * sym)
2441 {
2442 /* find the symbol and its predecessor
2443 * (this function assumes that you will never delete a symbol that is not
2444 * in the table pointed at by "root")
2445 */
2446 assert(root != sym);
2447 while (root->next != sym)
2448 {
2449 root = root->next;
2450 assert(root != NULL);
2451 } /* while */
2452
2453 /* unlink it, then free it */
2454 root->next = sym->next;
2455 free_symbol(sym);
2456 }
2457
2458 void
delete_symbols(symbol * root,int level,int delete_labels,int delete_functions)2459 delete_symbols(symbol * root, int level, int delete_labels,
2460 int delete_functions)
2461 {
2462 symbol *sym;
2463
2464 /* erase only the symbols with a deeper nesting level than the
2465 * specified nesting level */
2466 while (root->next)
2467 {
2468 sym = root->next;
2469 if (sym->compound < level)
2470 break;
2471 if ((delete_labels || sym->ident != iLABEL)
2472 && (delete_functions || sym->ident != iFUNCTN
2473 || (sym->usage & uNATIVE) != 0) && (delete_functions
2474 || sym->ident != iCONSTEXPR
2475 || (sym->usage & uPREDEF) ==
2476 0) && (delete_functions
2477 || (sym->ident !=
2478 iVARIABLE
2479 && sym->ident !=
2480 iARRAY)))
2481 {
2482 root->next = sym->next;
2483 free_symbol(sym);
2484 }
2485 else
2486 {
2487 /* if the function was prototyped, but not implemented in this source,
2488 * mark it as such, so that its use can be flagged
2489 */
2490 if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2491 sym->usage |= uMISSING;
2492 if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2493 || sym->ident == iARRAY)
2494 sym->usage &= ~uDEFINE; /* clear "defined" flag */
2495 /* for user defined operators, also remove the "prototyped" flag, as
2496 * user-defined operators *must* be declared before use
2497 */
2498 if (sym->ident == iFUNCTN && !sc_isalpha(*sym->name)
2499 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2500 sym->usage &= ~uPROTOTYPED;
2501 root = sym; /* skip the symbol */
2502 } /* if */
2503 } /* if */
2504 }
2505
2506 /* The purpose of the hash is to reduce the frequency of a "name"
2507 * comparison (which is costly). There is little interest in avoiding
2508 * clusters in similar names, which is why this function is plain simple.
2509 */
2510 unsigned int
namehash(char * name)2511 namehash(char *name)
2512 {
2513 unsigned char *ptr = (unsigned char *)name;
2514 int len = strlen(name);
2515
2516 if (len == 0)
2517 return 0L;
2518 assert(len < 256);
2519 return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2520 (ptr[len >> 1Lu]);
2521 }
2522
2523 static symbol *
find_symbol(symbol * root,char * name,int fnum)2524 find_symbol(symbol * root, char *name, int fnum)
2525 {
2526 symbol *ptr = root->next;
2527 unsigned long hash = namehash(name);
2528
2529 while (ptr)
2530 {
2531 if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2532 && !ptr->parent && (ptr->fnumber < 0
2533 || ptr->fnumber == fnum))
2534 return ptr;
2535 ptr = ptr->next;
2536 } /* while */
2537 return NULL;
2538 }
2539
2540 static symbol *
find_symbol_child(symbol * root,symbol * sym)2541 find_symbol_child(symbol * root, symbol * sym)
2542 {
2543 symbol *ptr = root->next;
2544
2545 while (ptr)
2546 {
2547 if (ptr->parent == sym)
2548 return ptr;
2549 ptr = ptr->next;
2550 } /* while */
2551 return NULL;
2552 }
2553
2554 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2555 * bywhom will be the function that uses a variable or that calls
2556 * the function.
2557 */
2558 int
refer_symbol(symbol * entry,symbol * bywhom)2559 refer_symbol(symbol * entry, symbol * bywhom)
2560 {
2561 int count;
2562
2563 assert(bywhom != NULL); /* it makes no sense to add a "void" referrer */
2564 assert(entry != NULL);
2565 assert(entry->refer != NULL);
2566
2567 /* see if it is already there */
2568 for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2569 count++)
2570 /* nothing */ ;
2571 if (count < entry->numrefers)
2572 {
2573 assert(entry->refer[count] == bywhom);
2574 return TRUE;
2575 } /* if */
2576
2577 /* see if there is an empty spot in the referrer list */
2578 for (count = 0; count < entry->numrefers && entry->refer[count];
2579 count++)
2580 /* nothing */ ;
2581 assert(count <= entry->numrefers);
2582 if (count == entry->numrefers)
2583 {
2584 symbol **refer;
2585 int newsize = 2 * entry->numrefers;
2586
2587 assert(newsize > 0);
2588 /* grow the referrer list */
2589 refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2590 if (!refer)
2591 return FALSE; /* insufficient memory */
2592 /* initialize the new entries */
2593 entry->refer = refer;
2594 for (count = entry->numrefers; count < newsize; count++)
2595 entry->refer[count] = NULL;
2596 count = entry->numrefers; /* first empty spot */
2597 entry->numrefers = newsize;
2598 } /* if */
2599
2600 /* add the referrer */
2601 assert(entry->refer[count] == NULL);
2602 entry->refer[count] = bywhom;
2603 return TRUE;
2604 }
2605
2606 void
markusage(symbol * sym,int usage)2607 markusage(symbol * sym, int usage)
2608 {
2609 sym->usage |= (char)usage;
2610 /* check if (global) reference must be added to the symbol */
2611 if ((usage & (uREAD | uWRITTEN)) != 0)
2612 {
2613 /* only do this for global symbols */
2614 if (sym->vclass == sGLOBAL)
2615 {
2616 /* "curfunc" should always be valid, since statements may not occurs
2617 * outside functions; in the case of syntax errors, however, the
2618 * compiler may arrive through this function
2619 */
2620 if (curfunc)
2621 refer_symbol(sym, curfunc);
2622 } /* if */
2623 } /* if */
2624 }
2625
2626 /* findglb
2627 *
2628 * Returns a pointer to the global symbol (if found) or NULL (if not found)
2629 */
2630 symbol *
findglb(char * name)2631 findglb(char *name)
2632 {
2633 return find_symbol(&glbtab, name, fcurrent);
2634 }
2635
2636 /* findloc
2637 *
2638 * Returns a pointer to the local symbol (if found) or NULL (if not found).
2639 * See add_symbol() how the deepest nesting level is searched first.
2640 */
2641 symbol *
findloc(char * name)2642 findloc(char *name)
2643 {
2644 return find_symbol(&loctab, name, -1);
2645 }
2646
2647 symbol *
findconst(char * name)2648 findconst(char *name)
2649 {
2650 symbol *sym;
2651
2652 sym = find_symbol(&loctab, name, -1); /* try local symbols first */
2653 if (!sym || sym->ident != iCONSTEXPR) /* not found, or not a constant */
2654 sym = find_symbol(&glbtab, name, fcurrent);
2655 if (!sym || sym->ident != iCONSTEXPR)
2656 return NULL;
2657 assert(sym->parent == NULL); /* constants have no hierarchy */
2658 return sym;
2659 }
2660
2661 symbol *
finddepend(symbol * parent)2662 finddepend(symbol * parent)
2663 {
2664 symbol *sym;
2665
2666 sym = find_symbol_child(&loctab, parent); /* try local symbols first */
2667 if (!sym) /* not found */
2668 sym = find_symbol_child(&glbtab, parent);
2669 return sym;
2670 }
2671
2672 /* addsym
2673 *
2674 * Adds a symbol to the symbol table (either global or local variables,
2675 * or global and local constants).
2676 */
2677 symbol *
addsym(char * name,cell addr,int ident,int vclass,int tag,int usage)2678 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2679 {
2680 symbol entry, **refer;
2681
2682 /* global variables/constants/functions may only be defined once */
2683 assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2684 || findglb(name) == NULL);
2685 /* labels may only be defined once */
2686 assert(ident != iLABEL || findloc(name) == NULL);
2687
2688 /* create an empty referrer list */
2689 if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2690 {
2691 error(103); /* insufficient memory */
2692 return NULL;
2693 } /* if */
2694 *refer = NULL;
2695
2696 /* first fill in the entry */
2697 strncpy(entry.name, name, sizeof(entry.name) - 1);
2698 entry.name[sizeof(entry.name) - 1] = 0;
2699 entry.hash = namehash(name);
2700 entry.addr = addr;
2701 entry.vclass = (char)vclass;
2702 entry.ident = (char)ident;
2703 entry.tag = tag;
2704 entry.usage = (char)usage;
2705 entry.compound = 0; /* may be overridden later */
2706 entry.fnumber = -1; /* assume global visibility (ignored for local symbols) */
2707 entry.numrefers = 1;
2708 entry.refer = refer;
2709 entry.parent = NULL;
2710
2711 /* then insert it in the list */
2712 if (vclass == sGLOBAL)
2713 return add_symbol(&glbtab, &entry, TRUE);
2714 else
2715 return add_symbol(&loctab, &entry, FALSE);
2716 }
2717
2718 symbol *
addvariable(char * name,cell addr,int ident,int vclass,int tag,int dim[],int numdim,int idxtag[])2719 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2720 int dim[], int numdim, int idxtag[])
2721 {
2722 symbol *sym, *parent, *top;
2723 int level;
2724
2725 sym = findglb(name);
2726 /* global variables may only be defined once */
2727 assert(vclass != sGLOBAL || sym == NULL
2728 || (sym->usage & uDEFINE) == 0);
2729
2730 if (ident == iARRAY || ident == iREFARRAY)
2731 {
2732 parent = NULL;
2733 sym = NULL; /* to avoid a compiler warning */
2734 for (level = 0; level < numdim; level++)
2735 {
2736 top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2737 top->dim.array.length = dim[level];
2738 top->dim.array.level = (short)(numdim - level - 1);
2739 top->x.idxtag = idxtag[level];
2740 top->parent = parent;
2741 parent = top;
2742 if (level == 0)
2743 sym = top;
2744 } /* for */
2745 }
2746 else
2747 {
2748 sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2749 } /* if */
2750 return sym;
2751 }
2752
2753 /* getlabel
2754 *
2755 * Return next available internal label number.
2756 */
2757 int
getlabel(void)2758 getlabel(void)
2759 {
2760 return labnum++;
2761 }
2762
2763 /* itoh
2764 *
2765 * Converts a number to a hexadecimal string and returns a pointer to that
2766 * string.
2767 */
2768 char *
itoh(ucell val)2769 itoh(ucell val)
2770 {
2771 static char itohstr[15]; /* hex number is 10 characters long at most */
2772 char *ptr;
2773 int i, nibble[8]; /* a 32-bit hexadecimal cell has 8 nibbles */
2774 int max;
2775
2776 #if defined(BIT16)
2777 max = 4;
2778 #else
2779 max = 8;
2780 #endif
2781 ptr = itohstr;
2782 for (i = 0; i < max; i += 1)
2783 {
2784 nibble[i] = (int)(val & 0x0f); /* nibble 0 is lowest nibble */
2785 val >>= 4;
2786 } /* endfor */
2787 i = max - 1;
2788 while (nibble[i] == 0 && i > 0) /* search for highest non-zero nibble */
2789 i -= 1;
2790 while (i >= 0)
2791 {
2792 if (nibble[i] >= 10)
2793 *ptr++ = (char)('a' + (nibble[i] - 10));
2794 else
2795 *ptr++ = (char)('0' + nibble[i]);
2796 i -= 1;
2797 } /* while */
2798 *ptr = '\0'; /* and a zero-terminator */
2799 return itohstr;
2800 }
2801