1 /*  Small compiler - File input, preprocessing and lexical analysis functions
2  *
3  *  Copyright (c) ITB CompuPhase, 1997-2003
4  *
5  *  This software is provided "as-is", without any express or implied warranty.
6  *  In no event will the authors be held liable for any damages arising from
7  *  the use of this software.
8  *
9  *  Permission is granted to anyone to use this software for any purpose,
10  *  including commercial applications, and to alter it and redistribute it
11  *  freely, subject to the following restrictions:
12  *
13  *  1.  The origin of this software must not be misrepresented; you must not
14  *      claim that you wrote the original software. If you use this software in
15  *      a product, an acknowledgment in the product documentation would be
16  *      appreciated but is not required.
17  *  2.  Altered source versions must be plainly marked as such, and must not be
18  *      misrepresented as being the original software.
19  *  3.  This notice may not be removed or altered from any source distribution.
20  *
21  *  Version: $Id$
22  */
23 
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <math.h>
35 #include "embryo_cc_sc.h"
36 #include "Embryo.h"
37 
38 static int          match(char *st, int end);
39 static cell         litchar(char **lptr, int rawmode);
40 static int          alpha(char c);
41 
42 static int          icomment;	/* currently in multiline comment? */
43 static int          iflevel;	/* nesting level if #if/#else/#endif */
44 static int          skiplevel;	/* level at which we started skipping */
45 static int          elsedone;	/* level at which we have seen an #else */
46 static char         term_expr[] = "";
47 static int          listline = -1;	/* "current line" for the list file */
48 
49 /*  pushstk & popstk
50  *
51  *  Uses a LIFO stack to store information. The stack is used by doinclude(),
52  *  doswitch() (to hold the state of "swactive") and some other routines.
53  *
54  *  Porting note: I made the bold assumption that an integer will not be
55  *  larger than a pointer (it may be smaller). That is, the stack element
56  *  is typedef'ed as a pointer type, but I also store integers on it. See
57  *  SC.H for "stkitem"
58  *
59  *  Global references: stack,stkidx (private to pushstk() and popstk())
60  */
61 static stkitem      stack[sSTKMAX];
62 static int          stkidx;
63 void
pushstk(stkitem val)64 pushstk(stkitem val)
65 {
66    if (stkidx >= sSTKMAX)
67       error(102, "parser stack");	/* stack overflow (recursive include?) */
68    stack[stkidx] = val;
69    stkidx += 1;
70 }
71 
72 stkitem
popstk(void)73 popstk(void)
74 {
75    if (stkidx == 0)
76       return (stkitem) - 1;	/* stack is empty */
77    stkidx -= 1;
78    return stack[stkidx];
79 }
80 
81 int
plungequalifiedfile(char * name)82 plungequalifiedfile(char *name)
83 {
84    static char        *extensions[] = { ".inc", ".sma", ".small" };
85    FILE               *fp;
86    char               *ext;
87    int                 ext_idx;
88 
89    ext_idx = 0;
90    do
91      {
92 	fp = (FILE *) sc_opensrc(name);
93 	ext = strchr(name, '\0');	/* save position */
94 	if (!fp)
95 	  {
96 	     /* try to append an extension */
97 	     strcpy(ext, extensions[ext_idx]);
98 	     fp = (FILE *) sc_opensrc(name);
99 	     if (!fp)
100 		*ext = '\0';	/* on failure, restore filename */
101 	  }			/* if */
102 	ext_idx++;
103      }
104    while ((!fp) &&
105           (ext_idx < (int)(sizeof extensions / sizeof extensions[0])));
106    if (!fp)
107      {
108 	*ext = '\0';		/* restore filename */
109 	return FALSE;
110      }				/* if */
111    pushstk((stkitem) inpf);
112    pushstk((stkitem) inpfname);	/* pointer to current file name */
113    pushstk((stkitem) curlibrary);
114    pushstk((stkitem) iflevel);
115    assert(skiplevel == 0);
116    pushstk((stkitem) icomment);
117    pushstk((stkitem) fcurrent);
118    pushstk((stkitem) fline);
119    inpfname = strdup(name);	/* set name of include file */
120    if (!inpfname)
121       error(103);		/* insufficient memory */
122    inpf = fp;			/* set input file pointer to include file */
123    fnumber++;
124    fline = 0;			/* set current line number to 0 */
125    fcurrent = fnumber;
126    icomment = FALSE;
127    setfile(inpfname, fcurrent);
128    listline = -1;		/* force a #line directive when changing the file */
129    setactivefile(fcurrent);
130    return TRUE;
131 }
132 
133 int
plungefile(char * name,int try_currentpath,int try_includepaths)134 plungefile(char *name, int try_currentpath, int try_includepaths)
135 {
136    int                 result = FALSE;
137    int                 i;
138    char               *ptr;
139 
140    if (try_currentpath)
141       result = plungequalifiedfile(name);
142 
143    if (try_includepaths && name[0] != DIRSEP_CHAR)
144      {
145 	for (i = 0; !result && (ptr = get_path(i)); i++)
146 	  {
147 	     char                path[PATH_MAX + PATH_MAX + 128];
148 
149              snprintf(path, sizeof (path), "%s/%s", ptr, name);
150 	     path[sizeof path - 1] = '\0';	/* force '\0' termination */
151 	     result = plungequalifiedfile(path);
152 	  }			/* while */
153      }				/* if */
154    return result;
155 }
156 
157 static void
check_empty(const char * sptr)158 check_empty(const char *sptr)
159 {
160    /* verifies that the string contains only whitespace */
161    while (*sptr <= ' ' && *sptr != '\0')
162       sptr++;
163    if (*sptr != '\0')
164       error(38);		/* extra characters on line */
165 }
166 
167 /*  doinclude
168  *
169  *  Gets the name of an include file, pushes the old file on the stack and
170  *  sets some options. This routine doesn't use lex(), since lex() doesn't
171  *  recognize file names (and directories).
172  *
173  *  Global references: inpf     (altered)
174  *                     inpfname (altered)
175  *                     fline    (altered)
176  *                     lptr     (altered)
177  */
178 static void
doinclude(void)179 doinclude(void)
180 {
181    char                name[PATH_MAX], c;
182    int                 i, result;
183 
184    while (*lptr <= ' ' && *lptr != 0)	/* skip leading whitespace */
185       lptr++;
186    if (*lptr == '<' || *lptr == '\"')
187      {
188 	c = (char)((*lptr == '\"') ? '\"' : '>');	/* termination character */
189 	lptr++;
190 	while (*lptr <= ' ' && *lptr != 0)	/* skip whitespace after quote */
191 	   lptr++;
192      }
193    else
194      {
195 	c = '\0';
196      }				/* if */
197 
198    i = 0;
199    while ((*lptr != c) && (*lptr != '\0') && (i < (int)(sizeof(name) - 1))) /* find the end of the string */
200       name[i++] = *lptr++;
201    while (i > 0 && name[i - 1] <= ' ')
202       i--;			/* strip trailing whitespace */
203    assert((i >= 0) && (i < (int)(sizeof(name))));
204    name[i] = '\0';		/* zero-terminate the string */
205 
206    if (*lptr != c)
207      {				/* verify correct string termination */
208 	error(37);		/* invalid string */
209 	return;
210      }				/* if */
211    if (c != '\0')
212       check_empty(lptr + 1);	/* verify that the rest of the line is whitespace */
213 
214    /* Include files between "..." or without quotes are read from the current
215     * directory, or from a list of "include directories". Include files
216     * between <...> are only read from the list of include directories.
217     */
218    result = plungefile(name, (c != '>'), TRUE);
219    if (!result)
220       error(100, name);		/* cannot read from ... (fatal error) */
221 }
222 
223 /*  readline
224  *
225  *  Reads in a new line from the input file pointed to by "inpf". readline()
226  *  concatenates lines that end with a \ with the next line. If no more data
227  *  can be read from the file, readline() attempts to pop off the previous file
228  *  from the stack. If that fails too, it sets "freading" to 0.
229  *
230  *  Global references: inpf,fline,inpfname,freading,icomment (altered)
231  */
232 static void
readline(char * line)233 readline(char *line)
234 {
235    int                 i, num, cont;
236 
237    if (lptr == term_expr)
238       return;
239    num = sLINEMAX;
240    cont = FALSE;
241    do
242      {
243 	if (!inpf || sc_eofsrc(inpf))
244 	  {
245 	     if (cont)
246 		error(49);	/* invalid line continuation */
247 	     if (inpf && inpf != inpf_org)
248 		sc_closesrc(inpf);
249 	     i = (int)(long)popstk();
250 	     if (i == -1)
251 	       {		/* All's done; popstk() returns "stack is empty" */
252 		  freading = FALSE;
253 		  *line = '\0';
254 		  /* when there is nothing more to read, the #if/#else stack should
255 		   * be empty and we should not be in a comment
256 		   */
257 		  assert(iflevel >= 0);
258 		  if (iflevel > 0)
259 		     error(1, "#endif", "-end of file-");
260 		  else if (icomment)
261 		     error(1, "*/", "-end of file-");
262 		  return;
263 	       }		/* if */
264 	     fline = i;
265 	     fcurrent = (int)(long)popstk();
266 	     icomment = (int)(long)popstk();
267 	     assert(skiplevel == 0);	/* skiplevel was not stored on stack, because it should always be zero at this point */
268 	     iflevel = (int)(long)popstk();
269 	     curlibrary = (constvalue *) popstk();
270 	     free(inpfname);	/* return memory allocated for the include file name */
271 	     inpfname = (char *)popstk();
272 	     inpf = (FILE *) popstk();
273 	     setactivefile(fcurrent);
274 	     listline = -1;	/* force a #line directive when changing the file */
275 	     elsedone = 0;
276 	  }			/* if */
277 
278 	if (!sc_readsrc(inpf, line, num))
279 	  {
280 	     *line = '\0';	/* delete line */
281 	     cont = FALSE;
282 	  }
283 	else
284 	  {
285              char *ptr;
286 	     /* check whether to erase leading spaces */
287 	     if (cont)
288 	       {
289 		  char *sptr = line;
290 
291 		  while (*sptr == ' ' || *sptr == '\t')
292 		     sptr++;
293 		  if (sptr != line)
294 		     memmove(line, sptr, strlen(sptr) + 1);
295 	       }		/* if */
296 	     cont = FALSE;
297 	     /* check whether a full line was read */
298 	     if (!strchr(line, '\n') && !sc_eofsrc(inpf))
299 		error(75);	/* line too long */
300 	     /* check if the next line must be concatenated to this line */
301 	     if ((ptr = strchr(line, '\n')) && ptr > line)
302 	       {
303 		  assert(*(ptr + 1) == '\0');	/* '\n' should be last in the string */
304 		  while (ptr > line
305 			 && (*ptr == '\n' || *ptr == ' ' || *ptr == '\t'))
306 		     ptr--;	/* skip trailing whitespace */
307 		  if (*ptr == '\\')
308 		    {
309 		       cont = TRUE;
310 		       /* set '\a' at the position of '\\' to make it possible to check
311 		        * for a line continuation in a single line comment (error 49)
312 		        */
313 		       *ptr++ = '\a';
314 		       *ptr = '\0';	/* erase '\n' (and any trailing whitespace) */
315 		    }		/* if */
316 	       }		/* if */
317 	     num -= strlen(line);
318 	     line += strlen(line);
319 	  }			/* if */
320 	fline += 1;
321      }
322    while (num >= 0 && cont);
323 }
324 
325 /*  stripcom
326  *
327  *  Replaces all comments from the line by space characters. It updates
328  *  a global variable ("icomment") for multiline comments.
329  *
330  *  This routine also supports the C++ extension for single line comments.
331  *  These comments are started with "//" and end at the end of the line.
332  *
333  *  Global references: icomment  (private to "stripcom")
334  */
335 static void
stripcom(char * line)336 stripcom(char *line)
337 {
338    char                c;
339 
340    while (*line)
341      {
342 	if (icomment)
343 	  {
344 	     if (*line == '*' && *(line + 1) == '/')
345 	       {
346 		  icomment = FALSE;	/* comment has ended */
347 		  *line = ' ';	/* replace '*' and '/' characters by spaces */
348 		  *(line + 1) = ' ';
349 		  line += 2;
350 	       }
351 	     else
352 	       {
353 		  if (*line == '/' && *(line + 1) == '*')
354 		     error(216);	/* nested comment */
355 		  *line = ' ';	/* replace comments by spaces */
356 		  line += 1;
357 	       }		/* if */
358 	  }
359 	else
360 	  {
361 	     if (*line == '/' && *(line + 1) == '*')
362 	       {
363 		  icomment = TRUE;	/* start comment */
364 		  *line = ' ';	/* replace '/' and '*' characters by spaces */
365 		  *(line + 1) = ' ';
366 		  line += 2;
367 	       }
368 	     else if (*line == '/' && *(line + 1) == '/')
369 	       {		/* comment to end of line */
370 		  if (strchr(line, '\a'))
371 		     error(49);	/* invalid line continuation */
372 		  *line++ = '\n';	/* put "newline" at first slash */
373 		  *line = '\0';	/* put "zero-terminator" at second slash */
374 	       }
375 	     else
376 	       {
377 		  if (*line == '\"' || *line == '\'')
378 		    {		/* leave literals unaltered */
379 		       c = *line;	/* ending quote, single or double */
380 		       line += 1;
381 		       while ((*line != c || *(line - 1) == '\\')
382 			      && *line != '\0')
383 			  line += 1;
384 		       line += 1;	/* skip final quote */
385 		    }
386 		  else
387 		    {
388 		       line += 1;
389 		    }		/* if */
390 	       }		/* if */
391 	  }			/* if */
392      }				/* while */
393 }
394 
395 /*  btoi
396  *
397  *  Attempts to interpret a numeric symbol as a boolean value. On success
398  *  it returns the number of characters processed (so the line pointer can be
399  *  adjusted) and the value is stored in "val". Otherwise it returns 0 and
400  *  "val" is garbage.
401  *
402  *  A boolean value must start with "0b"
403  */
404 static int
btoi(cell * val,char * curptr)405 btoi(cell * val, char *curptr)
406 {
407    char               *ptr;
408 
409    *val = 0;
410    ptr = curptr;
411    if (*ptr == '0' && *(ptr + 1) == 'b')
412      {
413 	ptr += 2;
414 	while (*ptr == '0' || *ptr == '1' || *ptr == '_')
415 	  {
416 	     if (*ptr != '_')
417 		*val = (*val << 1) | (*ptr - '0');
418 	     ptr++;
419 	  }			/* while */
420      }
421    else
422      {
423 	return 0;
424      }				/* if */
425    if (alphanum(*ptr))		/* number must be delimited by non-alphanumeric char */
426       return 0;
427    else
428       return (int)(ptr - curptr);
429 }
430 
431 /*  dtoi
432  *
433  *  Attempts to interpret a numeric symbol as a decimal value. On success
434  *  it returns the number of characters processed and the value is stored in
435  *  "val". Otherwise it returns 0 and "val" is garbage.
436  */
437 static int
dtoi(cell * val,char * curptr)438 dtoi(cell * val, char *curptr)
439 {
440    char               *ptr;
441 
442    *val = 0;
443    ptr = curptr;
444    if (!sc_isdigit(*ptr))		/* should start with digit */
445       return 0;
446    while (sc_isdigit(*ptr) || *ptr == '_')
447      {
448 	if (*ptr != '_')
449 	   *val = (*val * 10) + (*ptr - '0');
450 	ptr++;
451      }				/* while */
452    if (alphanum(*ptr))		/* number must be delimited by non-alphanumerical */
453       return 0;
454    if (*ptr == '.' && sc_isdigit(*(ptr + 1)))
455       return 0;			/* but a fractional part must not be present */
456    return (int)(ptr - curptr);
457 }
458 
459 /*  htoi
460  *
461  *  Attempts to interpret a numeric symbol as a hexadecimal value. On
462  *  success it returns the number of characters processed and the value is
463  *  stored in "val". Otherwise it return 0 and "val" is garbage.
464  */
465 static int
htoi(cell * val,char * curptr)466 htoi(cell * val, char *curptr)
467 {
468    char               *ptr;
469 
470    *val = 0;
471    ptr = curptr;
472    if (!sc_isdigit(*ptr))		/* should start with digit */
473       return 0;
474    if (*ptr == '0' && *(ptr + 1) == 'x')
475      {				/* C style hexadecimal notation */
476 	ptr += 2;
477 	while (sc_isxdigit(*ptr) || *ptr == '_')
478 	  {
479 	     if (*ptr != '_')
480 	       {
481 		  assert(sc_isxdigit(*ptr));
482 		  *val = *val << 4;
483 		  if (sc_isdigit(*ptr))
484 		     *val += (*ptr - '0');
485 		  else
486 		     *val += (tolower(*ptr) - 'a' + 10);
487 	       }		/* if */
488 	     ptr++;
489 	  }			/* while */
490      }
491    else
492      {
493 	return 0;
494      }				/* if */
495    if (alphanum(*ptr))
496       return 0;
497    else
498       return (int)(ptr - curptr);
499 }
500 
501 #if defined LINUX
502 static double
pow10(int value)503 pow10(int value)
504 {
505    double              res = 1.0;
506 
507    while (value >= 4)
508      {
509 	res *= 10000.0;
510 	value -= 5;
511      }				/* while */
512    while (value >= 2)
513      {
514 	res *= 100.0;
515 	value -= 2;
516      }				/* while */
517    while (value >= 1)
518      {
519 	res *= 10.0;
520 	value -= 1;
521      }				/* while */
522    return res;
523 }
524 #endif
525 
526 /*  ftoi
527  *
528  *  Attempts to interpret a numeric symbol as a rational number, either as
529  *  IEEE 754 single precision floating point or as a fixed point integer.
530  *  On success it returns the number of characters processed and the value is
531  *  stored in "val". Otherwise it returns 0 and "val" is unchanged.
532  *
533  *  Small has stricter definition for floating point numbers than most:
534  *  o  the value must start with a digit; ".5" is not a valid number, you
535  *     should write "0.5"
536  *  o  a period must appear in the value, even if an exponent is given; "2e3"
537  *     is not a valid number, you should write "2.0e3"
538  *  o  at least one digit must follow the period; "6." is not a valid number,
539  *     you should write "6.0"
540  */
541 static int
ftoi(cell * val,char * curptr)542 ftoi(cell * val, char *curptr)
543 {
544    char               *ptr;
545    double              fnum, ffrac, fmult;
546    unsigned long       dnum, dbase;
547    int                 i, ignore;
548 
549    assert(rational_digits >= 0 && rational_digits < 9);
550    for (i = 0, dbase = 1; i < rational_digits; i++)
551       dbase *= 10;
552    fnum = 0.0;
553    dnum = 0L;
554    ptr = curptr;
555    if (!sc_isdigit(*ptr))		/* should start with digit */
556       return 0;
557    while (sc_isdigit(*ptr) || *ptr == '_')
558      {
559 	if (*ptr != '_')
560 	  {
561 	     fnum = (fnum * 10.0) + (*ptr - '0');
562 	     dnum = (dnum * 10L) + (*ptr - '0') * dbase;
563 	  }			/* if */
564 	ptr++;
565      }				/* while */
566    if (*ptr != '.')
567       return 0;			/* there must be a period */
568    ptr++;
569    if (!sc_isdigit(*ptr))		/* there must be at least one digit after the dot */
570       return 0;
571    ffrac = 0.0;
572    fmult = 1.0;
573    ignore = FALSE;
574    while (sc_isdigit(*ptr) || *ptr == '_')
575      {
576 	if (*ptr != '_')
577 	  {
578 	     ffrac = (ffrac * 10.0) + (*ptr - '0');
579 	     fmult = fmult / 10.0;
580 	     dbase /= 10L;
581 	     dnum += (*ptr - '0') * dbase;
582 	     if (dbase == 0L && sc_rationaltag && rational_digits > 0
583 		 && !ignore)
584 	       {
585 		  error(222);	/* number of digits exceeds rational number precision */
586 		  ignore = TRUE;
587 	       }		/* if */
588 	  }			/* if */
589 	ptr++;
590      }				/* while */
591    fnum += ffrac * fmult;	/* form the number so far */
592    if (*ptr == 'e')
593      {				/* optional fractional part */
594 	int                 exp, sign;
595 
596 	ptr++;
597 	if (*ptr == '-')
598 	  {
599 	     sign = -1;
600 	     ptr++;
601 	  }
602 	else
603 	  {
604 	     sign = 1;
605 	  }			/* if */
606 	if (!sc_isdigit(*ptr))	/* 'e' should be followed by a digit */
607 	   return 0;
608 	exp = 0;
609 	while (sc_isdigit(*ptr))
610 	  {
611 	     exp = (exp * 10) + (*ptr - '0');
612 	     ptr++;
613 	  }			/* while */
614 #if defined LINUX
615 	fmult = pow10(exp * sign);
616 #else
617 	fmult = pow(10, exp * sign);
618 #endif
619 	fnum *= fmult;
620 	dnum *= (unsigned long)(fmult + 0.5);
621      }				/* if */
622 
623    /* decide how to store the number */
624    if (sc_rationaltag == 0)
625      {
626 	error(70);		/* rational number support was not enabled */
627 	*val = 0;
628      }
629    else if (rational_digits == 0)
630      {
631 	float f = (float) fnum;
632 	/* floating point */
633       *val = EMBRYO_FLOAT_TO_CELL(f);
634 #if !defined NDEBUG
635 	/* I assume that the C/C++ compiler stores "float" values in IEEE 754
636 	 * format (as mandated in the ANSI standard). Test this assumption anyway.
637 	 */
638 	{
639 	   float test1 = 0.0, test2 = 50.0;
640 	   Embryo_Cell c1 = EMBRYO_FLOAT_TO_CELL(test1);
641 	   Embryo_Cell c2 = EMBRYO_FLOAT_TO_CELL(test2);
642 
643 	   if (c1 != 0x00000000L)
644 	     {
645 		fprintf(stderr,
646 			"embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
647 			"point math as embryo expects. this could be bad.\n"
648 			"\n"
649 			"(float 0.0 != 0x00000000 bitpattern, 0x%08x instead)\n"
650 			"\n"
651 			"this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
652 			"to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
653 			, c1);
654 	     }
655 	  else if (c2 != 0x42480000L)
656 	     {
657 		fprintf(stderr,
658 			"embryo_cc: WARNING! you compiler does not SEEM to interpret IEEE floating\n"
659 			"point math as embryo expects. This could be bad.\n"
660 			"\n"
661 			"(float 50.0 != 0x42480000 bitpattern, 0x%08x instead)\n"
662 			"\n"
663 			"this could be an issue with you compiling embryo with gcc 3.2.x that seems\n"
664 			"to trigger this sanity check. we are not sure why yet, but gcc 3.3.x works\n"
665 			, c2);
666 	     }
667 	}
668 #endif
669      }
670    else
671      {
672 	/* fixed point */
673 	*val = (cell) dnum;
674      }				/* if */
675 
676    return (int)(ptr - curptr);
677 }
678 
679 /*  number
680  *
681  *  Reads in a number (binary, decimal or hexadecimal). It returns the number
682  *  of characters processed or 0 if the symbol couldn't be interpreted as a
683  *  number (in this case the argument "val" remains unchanged). This routine
684  *  relies on the 'early dropout' implementation of the logical or (||)
685  *  operator.
686  *
687  *  Note: the routine doesn't check for a sign (+ or -). The - is checked
688  *        for at "hier2()" (in fact, it is viewed as an operator, not as a
689  *        sign) and the + is invalid (as in K&R C, and unlike ANSI C).
690  */
691 static int
number(cell * val,char * curptr)692 number(cell * val, char *curptr)
693 {
694    int                 i;
695    cell                curval;
696 
697    if ((i = btoi(&curval, curptr)) != 0	/* binary? */
698        || (i = htoi(&curval, curptr)) != 0	/* hexadecimal? */
699        || (i = dtoi(&curval, curptr)) != 0)	/* decimal? */
700      {
701 	*val = curval;
702 	return i;
703      }
704    else
705      {
706 	return 0;		/* else not a number */
707      }				/* if */
708 }
709 
710 static void
chrcat(char * str,char chr)711 chrcat(char *str, char chr)
712 {
713    str = strchr(str, '\0');
714    *str++ = chr;
715    *str = '\0';
716 }
717 
718 static int
preproc_expr(cell * val,int * tag)719 preproc_expr(cell * val, int *tag)
720 {
721    int                 result;
722    int                 idx;
723    cell                code_index;
724    char               *term;
725 
726    /* Disable staging; it should be disabled already because
727     * expressions may not be cut off half-way between conditional
728     * compilations. Reset the staging index, but keep the code
729     * index.
730     */
731    if (stgget(&idx, &code_index))
732      {
733 	error(57);		/* unfinished expression */
734 	stgdel(0, code_index);
735 	stgset(FALSE);
736      }				/* if */
737    /* append a special symbol to the string, so the expression
738     * analyzer won't try to read a next line when it encounters
739     * an end-of-line
740     */
741    assert(strlen(pline) < sLINEMAX);
742    term = strchr(pline, '\0');
743    assert(term != NULL);
744    chrcat(pline, PREPROC_TERM);	/* the "DEL" code (see SC.H) */
745    result = constexpr(val, tag);	/* get value (or 0 on error) */
746    *term = '\0';		/* erase the token (if still present) */
747    lexclr(FALSE);		/* clear any "pushed" tokens */
748    return result;
749 }
750 
751 /* getstring
752  * Returns returns a pointer behind the closing quote or to the other
753  * character that caused the input to be ended.
754  */
755 static char        *
getstring(char * dest,int max)756 getstring(char *dest, int max)
757 {
758    assert(dest != NULL);
759    *dest = '\0';
760    while (*lptr <= ' ' && *lptr != '\0')
761       lptr++;			/* skip whitespace */
762    if (*lptr != '"')
763      {
764 	error(37);		/* invalid string */
765      }
766    else
767      {
768 	int                 len = 0;
769 
770 	lptr++;			/* skip " */
771 	while (*lptr != '"' && *lptr != '\0')
772 	  {
773 	     if (len < max - 1)
774 		dest[len++] = *lptr;
775 	     lptr++;
776 	  }			/* if */
777 	dest[len] = '\0';
778 	if (*lptr == '"')
779 	   lptr++;		/* skip closing " */
780 	else
781 	   error(37);		/* invalid string */
782      }				/* if */
783    return lptr;
784 }
785 
786 enum
787 {
788    CMD_NONE,
789    CMD_TERM,
790    CMD_EMPTYLINE,
791    CMD_CONDFALSE,
792    CMD_INCLUDE,
793    CMD_DEFINE,
794    CMD_IF,
795    CMD_DIRECTIVE,
796 };
797 
798 /*  command
799  *
800  *  Recognizes the compiler directives. The function returns:
801  *     CMD_NONE         the line must be processed
802  *     CMD_TERM         a pending expression must be completed before processing further lines
803  *     Other value: the line must be skipped, because:
804  *     CMD_CONDFALSE    false "#if.." code
805  *     CMD_EMPTYLINE    line is empty
806  *     CMD_INCLUDE      the line contains a #include directive
807  *     CMD_DEFINE       the line contains a #subst directive
808  *     CMD_IF           the line contains a #if/#else/#endif directive
809  *     CMD_DIRECTIVE    the line contains some other compiler directive
810  *
811  *  Global variables: iflevel, skiplevel, elsedone (altered)
812  *                    lptr      (altered)
813  */
814 static int
command(void)815 command(void)
816 {
817    int                 tok, ret;
818    cell                val;
819    char               *str;
820    int                 idx;
821    cell                code_index;
822 
823    while (*lptr <= ' ' && *lptr != '\0')
824       lptr += 1;
825    if (*lptr == '\0')
826       return CMD_EMPTYLINE;	/* empty line */
827    if (*lptr != '#')
828       return skiplevel > 0 ? CMD_CONDFALSE : CMD_NONE;	/* it is not a compiler directive */
829    /* compiler directive found */
830    indent_nowarn = TRUE;	/* allow loose indentation" */
831    lexclr(FALSE);		/* clear any "pushed" tokens */
832    /* on a pending expression, force to return a silent ';' token and force to
833     * re-read the line
834     */
835    if (!sc_needsemicolon && stgget(&idx, &code_index))
836      {
837 	lptr = term_expr;
838 	return CMD_TERM;
839      }				/* if */
840    tok = lex(&val, &str);
841    ret = skiplevel > 0 ? CMD_CONDFALSE : CMD_DIRECTIVE;	/* preset 'ret' to CMD_DIRECTIVE (most common case) */
842    switch (tok)
843      {
844      case tpIF:		/* conditional compilation */
845 	ret = CMD_IF;
846 	iflevel += 1;
847 	if (skiplevel)
848 	   break;		/* break out of switch */
849 	preproc_expr(&val, NULL);	/* get value (or 0 on error) */
850 	if (!val)
851 	   skiplevel = iflevel;
852 	check_empty(lptr);
853 	break;
854      case tpELSE:
855 	ret = CMD_IF;
856 	if (iflevel == 0 && skiplevel == 0)
857 	  {
858 	     error(26);		/* no matching #if */
859 	     errorset(sRESET);
860 	  }
861 	else
862 	  {
863 	     if (elsedone == iflevel)
864 		error(60);	/* multiple #else directives between #if ... #endif */
865 	     elsedone = iflevel;
866 	     if (skiplevel == iflevel)
867 		skiplevel = 0;
868 	     else if (skiplevel == 0)
869 		skiplevel = iflevel;
870 	  }			/* if */
871 	check_empty(lptr);
872 	break;
873 #if 0				/* ??? *really* need to use a stack here */
874      case tpELSEIF:
875 	ret = CMD_IF;
876 	if (iflevel == 0 && skiplevel == 0)
877 	  {
878 	     error(26);		/* no matching #if */
879 	     errorset(sRESET);
880 	  }
881 	else if (elsedone == iflevel)
882 	  {
883 	     error(61);		/* #elseif directive may not follow an #else */
884 	     errorset(sRESET);
885 	  }
886 	else
887 	  {
888 	     preproc_expr(&val, NULL);	/* get value (or 0 on error) */
889 	     if (skiplevel == 0)
890 		skiplevel = iflevel;	/* we weren't skipping, start skipping now */
891 	     else if (val)
892 		skiplevel = 0;	/* we were skipping, condition is valid -> stop skipping */
893 	     /* else: we were skipping and condition is invalid -> keep skipping */
894 	     check_empty(lptr);
895 	  }			/* if */
896 	break;
897 #endif
898      case tpENDIF:
899 	ret = CMD_IF;
900 	if (iflevel == 0 && skiplevel == 0)
901 	  {
902 	     error(26);
903 	     errorset(sRESET);
904 	  }
905 	else
906 	  {
907 	     if (skiplevel == iflevel)
908 		skiplevel = 0;
909 	     if (elsedone == iflevel)
910 		elsedone = 0;	/* ??? actually, should build a stack of #if/#endif and keep
911 				 * the state whether an #else was seen per nesting level */
912 	     iflevel -= 1;
913 	  }			/* if */
914 	check_empty(lptr);
915 	break;
916      case tINCLUDE:		/* #include directive */
917 	ret = CMD_INCLUDE;
918 	if (skiplevel == 0)
919 	   doinclude();
920 	break;
921      case tpFILE:
922 	if (skiplevel == 0)
923 	  {
924 	     char                pathname[PATH_MAX];
925 
926 	     lptr = getstring(pathname, sizeof pathname);
927 	     if (pathname[0] != '\0')
928 	       {
929 		  free(inpfname);
930 		  inpfname = strdup(pathname);
931 		  if (!inpfname)
932 		     error(103);	/* insufficient memory */
933 	       }		/* if */
934 	  }			/* if */
935 	check_empty(lptr);
936 	break;
937      case tpLINE:
938 	if (skiplevel == 0)
939 	  {
940 	     if (lex(&val, &str) != tNUMBER)
941 		error(8);	/* invalid/non-constant expression */
942 	     fline = (int)val;
943 
944 	     while (*lptr == ' ')
945 	        lptr++;			/* skip whitespace */
946 	     if (*lptr == '"')
947                {
948 		  char pathname[PATH_MAX];
949 
950 		  lptr = getstring(pathname, sizeof pathname);
951 		  if (pathname[0] != '\0')
952 		    {
953 		       free(inpfname);
954 		       inpfname = strdup(pathname);
955 		       if (!inpfname)
956 		          error(103);	/* insufficient memory */
957 		    }		/* if */
958 	       }
959 	  }			/* if */
960 	check_empty(lptr);
961 	break;
962      case tpASSERT:
963 	if (skiplevel == 0 && (sc_debug & sCHKBOUNDS) != 0)
964 	  {
965 	     preproc_expr(&val, NULL);	/* get constant expression (or 0 on error) */
966 	     if (!val)
967 		error(7);	/* assertion failed */
968 	     check_empty(lptr);
969 	  }			/* if */
970 	break;
971      case tpPRAGMA:
972 	if (skiplevel == 0)
973 	  {
974 	     if (lex(&val, &str) == tSYMBOL)
975 	       {
976 		  if (strcmp(str, "ctrlchar") == 0)
977 		    {
978 		       if (lex(&val, &str) != tNUMBER)
979 			  error(27);	/* invalid character constant */
980 		       sc_ctrlchar = (char)val;
981 		    }
982 		  else if (strcmp(str, "compress") == 0)
983 		    {
984 		       cell                compval;
985 
986 		       preproc_expr(&compval, NULL);
987 		       sc_compress = (int)compval;	/* switch code packing on/off */
988 		    }
989 		  else if (strcmp(str, "dynamic") == 0)
990 		    {
991 		       preproc_expr(&sc_stksize, NULL);
992 		    }
993 		  else if (strcmp(str, "library") == 0)
994 		    {
995 		       char                name[sNAMEMAX + 1];
996 
997 		       while (*lptr <= ' ' && *lptr != '\0')
998 			  lptr++;
999 		       if (*lptr == '"')
1000 			 {
1001 			    lptr = getstring(name, sizeof name);
1002 			 }
1003 		       else
1004 			 {
1005 			    int                 i;
1006 
1007 			    for (i = 0;
1008                                  (i < (int)(sizeof(name)) - 1) &&
1009                                  (alphanum(*lptr));
1010 				 i++, lptr++)
1011 			       name[i] = *lptr;
1012 			    name[i] = '\0';
1013 			 }	/* if */
1014 		       if (name[0] == '\0')
1015 			 {
1016 			    curlibrary = NULL;
1017 			 }
1018 		       else
1019 			 {
1020 			    if (strlen(name) > sEXPMAX)
1021 			       error(220, name, sEXPMAX);	/* exported symbol is truncated */
1022 			    /* add the name if it does not yet exist in the table */
1023 			    if (!find_constval(&libname_tab, name, 0))
1024 			       curlibrary =
1025 				  append_constval(&libname_tab, name, 0, 0);
1026 			 }	/* if */
1027 		    }
1028 		  else if (strcmp(str, "pack") == 0)
1029 		    {
1030 		       cell                packval;
1031 
1032 		       preproc_expr(&packval, NULL);	/* default = packed/unpacked */
1033 		       sc_packstr = (int)packval;
1034 		    }
1035 		  else if (strcmp(str, "rational") == 0)
1036 		    {
1037 		       char                name[sNAMEMAX + 1];
1038 		       cell                digits = 0;
1039 		       int                 i;
1040 
1041 		       /* first gather all information, start with the tag name */
1042 		       while ((*lptr <= ' ') && (*lptr != '\0'))
1043 			  lptr++;
1044 		       for (i = 0;
1045                             (i < (int)(sizeof(name)) - 1) &&
1046                             (alphanum(*lptr));
1047 			    i++, lptr++)
1048 			  name[i] = *lptr;
1049 		       name[i] = '\0';
1050 		       /* then the precision (for fixed point arithmetic) */
1051 		       while (*lptr <= ' ' && *lptr != '\0')
1052 			  lptr++;
1053 		       if (*lptr == '(')
1054 			 {
1055 			    preproc_expr(&digits, NULL);
1056 			    if (digits <= 0 || digits > 9)
1057 			      {
1058 				 error(68);	/* invalid rational number precision */
1059 				 digits = 0;
1060 			      }	/* if */
1061 			    if (*lptr == ')')
1062 			       lptr++;
1063 			 }	/* if */
1064 		       /* add the tag (make it public) and check the values */
1065 		       i = sc_addtag(name);
1066 		       exporttag(i);
1067 		       if (sc_rationaltag == 0
1068 			   || (sc_rationaltag == i
1069 			       && rational_digits == (int)digits))
1070 			 {
1071 			    sc_rationaltag = i;
1072 			    rational_digits = (int)digits;
1073 			 }
1074 		       else
1075 			 {
1076 			    error(69);	/* rational number format already set, can only be set once */
1077 			 }	/* if */
1078 		    }
1079 		  else if (strcmp(str, "semicolon") == 0)
1080 		    {
1081 		       cell                semicolval;
1082 
1083 		       preproc_expr(&semicolval, NULL);
1084 		       sc_needsemicolon = (int)semicolval;
1085 		    }
1086 		  else if (strcmp(str, "tabsize") == 0)
1087 		    {
1088 		       cell                tabsizeval;
1089 
1090 		       preproc_expr(&tabsizeval, NULL);
1091 		       sc_tabsize = (int)tabsizeval;
1092 		    }
1093 		  else if (strcmp(str, "align") == 0)
1094 		    {
1095 		       sc_alignnext = TRUE;
1096 		    }
1097 		  else if (strcmp(str, "unused") == 0)
1098 		    {
1099 		       char                name[sNAMEMAX + 1];
1100 		       int                 i, comma;
1101 		       symbol             *sym;
1102 
1103 		       do
1104 			 {
1105 			    /* get the name */
1106 			    while ((*lptr <= ' ') && (*lptr != '\0'))
1107 			       lptr++;
1108 			    for (i = 0;
1109                                  (i < (int)(sizeof(name)) - 1) &&
1110                                  (sc_isalpha(*lptr));
1111 				 i++, lptr++)
1112 			       name[i] = *lptr;
1113 			    name[i] = '\0';
1114 			    /* get the symbol */
1115 			    sym = findloc(name);
1116 			    if (!sym)
1117 			       sym = findglb(name);
1118 			    if (sym)
1119 			      {
1120 				 sym->usage |= uREAD;
1121 				 if (sym->ident == iVARIABLE
1122 				     || sym->ident == iREFERENCE
1123 				     || sym->ident == iARRAY
1124 				     || sym->ident == iREFARRAY)
1125 				    sym->usage |= uWRITTEN;
1126 			      }
1127 			    else
1128 			      {
1129 				 error(17, name);	/* undefined symbol */
1130 			      }	/* if */
1131 			    /* see if a comma follows the name */
1132 			    while (*lptr <= ' ' && *lptr != '\0')
1133 			       lptr++;
1134 			    comma = (*lptr == ',');
1135 			    if (comma)
1136 			       lptr++;
1137 			 }
1138 		       while (comma);
1139 		    }
1140 		  else
1141 		    {
1142 		       error(207);	/* unknown #pragma */
1143 		    }		/* if */
1144 	       }
1145 	     else
1146 	       {
1147 		  error(207);	/* unknown #pragma */
1148 	       }		/* if */
1149 	     check_empty(lptr);
1150 	  }			/* if */
1151 	break;
1152      case tpENDINPUT:
1153      case tpENDSCRPT:
1154 	if (skiplevel == 0)
1155 	  {
1156 	     check_empty(lptr);
1157 	     assert(inpf != NULL);
1158 	     if (inpf != inpf_org)
1159 		sc_closesrc(inpf);
1160 	     inpf = NULL;
1161 	  }			/* if */
1162 	break;
1163 #if !defined NOEMIT
1164      case tpEMIT:
1165 	{
1166 	   /* write opcode to output file */
1167 	   char                name[41];
1168 	   int                 i;
1169 
1170 	   while (*lptr <= ' ' && *lptr != '\0')
1171 	      lptr++;
1172 	   for (i = 0; i < 40 && (sc_isalpha(*lptr) || *lptr == '.'); i++, lptr++)
1173 	      name[i] = (char)tolower(*lptr);
1174 	   name[i] = '\0';
1175 	   stgwrite("\t");
1176 	   stgwrite(name);
1177 	   stgwrite(" ");
1178 	   code_idx += opcodes(1);
1179 	   /* write parameter (if any) */
1180 	   while (*lptr <= ' ' && *lptr != '\0')
1181 	      lptr++;
1182 	   if (*lptr != '\0')
1183 	     {
1184 		symbol             *sym;
1185 
1186 		tok = lex(&val, &str);
1187 		switch (tok)
1188 		  {
1189 		  case tNUMBER:
1190 		  case tRATIONAL:
1191 		     outval(val, FALSE);
1192 		     code_idx += opargs(1);
1193 		     break;
1194 		  case tSYMBOL:
1195 		     sym = findloc(str);
1196 		     if (!sym)
1197 			sym = findglb(str);
1198 		     if (!sym || (sym->ident != iFUNCTN
1199 			 && sym->ident != iREFFUNC
1200 			 && (sym->usage & uDEFINE) == 0))
1201 		       {
1202 			  error(17, str);	/* undefined symbol */
1203 		       }
1204 		     else
1205 		       {
1206 			  outval(sym->addr, FALSE);
1207 			  /* mark symbol as "used", unknown whether for read or write */
1208 			  markusage(sym, uREAD | uWRITTEN);
1209 			  code_idx += opargs(1);
1210 		       }	/* if */
1211 		     break;
1212 		  default:
1213 		     {
1214 			char                s2[20];
1215 			extern char        *sc_tokens[];	/* forward declaration */
1216 
1217                         if (tok < 256)
1218                           {
1219                              s2[0] = (char)tok;
1220                              s2[1] = 0;
1221                           }
1222                         else
1223                           {
1224                              strncpy(s2, sc_tokens[tok - tFIRST], 19);
1225                              s2[19] = 0;
1226                           }
1227 			error(1, sc_tokens[tSYMBOL - tFIRST], s2);
1228 			break;
1229 		     }		/* case */
1230 		  }		/* switch */
1231 	     }			/* if */
1232 	   stgwrite("\n");
1233 	   check_empty(lptr);
1234 	   break;
1235 	}			/* case */
1236 #endif
1237 #if !defined NO_DEFINE
1238      case tpDEFINE:
1239 	{
1240 	   ret = CMD_DEFINE;
1241 	   if (skiplevel == 0)
1242 	     {
1243 		char               *pattern, *substitution;
1244 		char               *start, *end;
1245 		int                 count, prefixlen;
1246 		stringpair         *def;
1247 
1248 		/* find the pattern to match */
1249 		while (*lptr <= ' ' && *lptr != '\0')
1250 		   lptr++;
1251 		start = lptr;	/* save starting point of the match pattern */
1252 		count = 0;
1253 		while (*lptr > ' ' && *lptr != '\0')
1254 		  {
1255 		     litchar(&lptr, FALSE);	/* litchar() advances "lptr" and handles escape characters */
1256 		     count++;
1257 		  }		/* while */
1258 		end = lptr;
1259 		/* check pattern to match */
1260 		if (!sc_isalpha(*start) && *start != '_')
1261 		  {
1262 		     error(74);	/* pattern must start with an alphabetic character */
1263 		     break;
1264 		  }		/* if */
1265 		/* store matched pattern */
1266 		pattern = malloc(count + 1);
1267 		if (!pattern)
1268 		   error(103);	/* insufficient memory */
1269 		lptr = start;
1270 		count = 0;
1271 		while (lptr != end)
1272 		  {
1273 		     assert(lptr < end);
1274 		     assert(*lptr != '\0');
1275 		     pattern[count++] = (char)litchar(&lptr, FALSE);
1276 		  }		/* while */
1277 		pattern[count] = '\0';
1278 		/* special case, erase trailing variable, because it could match anything */
1279 		if (count >= 2 && sc_isdigit(pattern[count - 1])
1280 		    && pattern[count - 2] == '%')
1281 		   pattern[count - 2] = '\0';
1282 		/* find substitution string */
1283 		while (*lptr <= ' ' && *lptr != '\0')
1284 		   lptr++;
1285 		start = lptr;	/* save starting point of the match pattern */
1286 		count = 0;
1287 		end = NULL;
1288 		while (*lptr != '\0')
1289 		  {
1290 		     /* keep position of the start of trailing whitespace */
1291 		     if (*lptr <= ' ')
1292 		       {
1293 			  if (!end)
1294 			     end = lptr;
1295 		       }
1296 		     else
1297 		       {
1298 			  end = NULL;
1299 		       }	/* if */
1300 		     count++;
1301 		     lptr++;
1302 		  }		/* while */
1303 		if (!end)
1304 		   end = lptr;
1305 		/* store matched substitution */
1306 		substitution = malloc(count + 1);	/* +1 for '\0' */
1307 		if (!substitution)
1308 		   error(103);	/* insufficient memory */
1309 		lptr = start;
1310 		count = 0;
1311 		while (lptr != end)
1312 		  {
1313 		     assert(lptr < end);
1314 		     assert(*lptr != '\0');
1315 		     substitution[count++] = *lptr++;
1316 		  }		/* while */
1317 		substitution[count] = '\0';
1318 		/* check whether the definition already exists */
1319 		for (prefixlen = 0, start = pattern;
1320 		     sc_isalpha(*start) || sc_isdigit(*start) || *start == '_';
1321 		     prefixlen++, start++)
1322 		   /* nothing */ ;
1323 		assert(prefixlen > 0);
1324 		if ((def = find_subst(pattern, prefixlen)))
1325 		  {
1326 		     if (strcmp(def->first, pattern) != 0
1327 			 || strcmp(def->second, substitution) != 0)
1328 			error(201, pattern);	/* redefinition of macro (non-identical) */
1329 		     delete_subst(pattern, prefixlen);
1330 		  }		/* if */
1331 		/* add the pattern/substitution pair to the list */
1332 		assert(pattern[0] != '\0');
1333 		insert_subst(pattern, substitution, prefixlen);
1334 		free(pattern);
1335 		free(substitution);
1336 	     }			/* if */
1337 	   break;
1338 	}			/* case */
1339      case tpUNDEF:
1340 	if (skiplevel == 0)
1341 	  {
1342 	     if (lex(&val, &str) == tSYMBOL)
1343 	       {
1344 		  if (!delete_subst(str, strlen(str)))
1345 		     error(17, str);	/* undefined symbol */
1346 	       }
1347 	     else
1348 	       {
1349 		  error(20, str);	/* invalid symbol name */
1350 	       }		/* if */
1351 	     check_empty(lptr);
1352 	  }			/* if */
1353 	break;
1354 #endif
1355      default:
1356 	error(31);		/* unknown compiler directive */
1357 	ret = skiplevel > 0 ? CMD_DIRECTIVE : CMD_NONE;	/* line must be processed (if skiplevel==0) */
1358      }				/* switch */
1359    return ret;
1360 }
1361 
1362 #if !defined NO_DEFINE
1363 static int
is_startstring(char * string)1364 is_startstring(char *string)
1365 {
1366    if (*string == '\"' || *string == '\'')
1367       return TRUE;		/* "..." */
1368 
1369    if (*string == '!')
1370      {
1371 	string++;
1372 	if (*string == '\"' || *string == '\'')
1373 	   return TRUE;		/* !"..." */
1374 	if (*string == sc_ctrlchar)
1375 	  {
1376 	     string++;
1377 	     if (*string == '\"' || *string == '\'')
1378 		return TRUE;	/* !\"..." */
1379 	  }			/* if */
1380      }
1381    else if (*string == sc_ctrlchar)
1382      {
1383 	string++;
1384 	if (*string == '\"' || *string == '\'')
1385 	   return TRUE;		/* \"..." */
1386 	if (*string == '!')
1387 	  {
1388 	     string++;
1389 	     if (*string == '\"' || *string == '\'')
1390 		return TRUE;	/* \!"..." */
1391 	  }			/* if */
1392      }				/* if */
1393 
1394    return FALSE;
1395 }
1396 
1397 static char        *
skipstring(char * string)1398 skipstring(char *string)
1399 {
1400    char                endquote;
1401    int                 rawstring = FALSE;
1402 
1403    while (*string == '!' || *string == sc_ctrlchar)
1404      {
1405 	rawstring = (*string == sc_ctrlchar);
1406 	string++;
1407      }				/* while */
1408 
1409    endquote = *string;
1410    assert(endquote == '\"' || endquote == '\'');
1411    string++;			/* skip open quote */
1412    while (*string != endquote && *string != '\0')
1413       litchar(&string, rawstring);
1414    return string;
1415 }
1416 
1417 static char        *
skippgroup(char * string)1418 skippgroup(char *string)
1419 {
1420    int                 nest = 0;
1421    char                open = *string;
1422    char                close;
1423 
1424    switch (open)
1425      {
1426      case '(':
1427 	close = ')';
1428 	break;
1429      case '{':
1430 	close = '}';
1431 	break;
1432      case '[':
1433 	close = ']';
1434 	break;
1435      case '<':
1436 	close = '>';
1437 	break;
1438      default:
1439 	assert(0);
1440 	close = '\0';		/* only to avoid a compiler warning */
1441      }				/* switch */
1442 
1443    string++;
1444    while (*string != close || nest > 0)
1445      {
1446 	if (*string == open)
1447 	   nest++;
1448 	else if (*string == close)
1449 	   nest--;
1450 	else if (is_startstring(string))
1451 	   string = skipstring(string);
1452 	if (*string == '\0')
1453 	   break;
1454 	string++;
1455      }				/* while */
1456    return string;
1457 }
1458 
1459 static char        *
strdel(char * str,size_t len)1460 strdel(char *str, size_t len)
1461 {
1462    size_t              length = strlen(str);
1463 
1464    if (len > length)
1465       len = length;
1466    memmove(str, str + len, length - len + 1);	/* include EOS byte */
1467    return str;
1468 }
1469 
1470 static char        *
strins(char * dest,char * src,size_t srclen)1471 strins(char *dest, char *src, size_t srclen)
1472 {
1473    size_t              destlen = strlen(dest);
1474 
1475    assert(srclen <= strlen(src));
1476    memmove(dest + srclen, dest, destlen + 1);	/* include EOS byte */
1477    memcpy(dest, src, srclen);
1478    return dest;
1479 }
1480 
1481 static int
substpattern(char * line,size_t buffersize,char * pattern,char * substitution)1482 substpattern(char *line, size_t buffersize, char *pattern, char *substitution)
1483 {
1484    int                 prefixlen;
1485    char               *p, *s, *e, *args[10];
1486    int                 is_match, arg, len;
1487 
1488    memset(args, 0, sizeof args);
1489 
1490    /* check the length of the prefix */
1491    for (prefixlen = 0, s = pattern; sc_isalpha(*s) || sc_isdigit(*s) || *s == '_';
1492 	prefixlen++, s++)
1493       /* nothing */ ;
1494    assert(prefixlen > 0);
1495    assert(strncmp(line, pattern, prefixlen) == 0);
1496 
1497    /* pattern prefix matches; match the rest of the pattern, gather
1498     * the parameters
1499     */
1500    s = line + prefixlen;
1501    p = pattern + prefixlen;
1502    is_match = TRUE;		/* so far, pattern matches */
1503    while (is_match && *s != '\0' && *p != '\0')
1504      {
1505 	if (*p == '%')
1506 	  {
1507 	     p++;		/* skip '%' */
1508 	     if (sc_isdigit(*p))
1509 	       {
1510 		  arg = *p - '0';
1511 		  assert(arg >= 0 && arg <= 9);
1512 		  p++;		/* skip parameter id */
1513 		  assert(*p != '\0');
1514 		  /* match the source string up to the character after the digit
1515 		   * (skipping strings in the process
1516 		   */
1517 		  e = s;
1518 		  while (*e != *p && *e != '\0' && *e != '\n')
1519 		    {
1520 		       if (is_startstring(e))	/* skip strings */
1521 			  e = skipstring(e);
1522 		       else if (strchr("({[", *e))	/* skip parenthized groups */
1523 			  e = skippgroup(e);
1524 		       if (*e != '\0')
1525 			  e++;	/* skip non-alphapetic character (or closing quote of
1526 				 * a string, or the closing paranthese of a group) */
1527 		    }		/* while */
1528 		  /* store the parameter (overrule any earlier) */
1529 		  if (args[arg])
1530 		     free(args[arg]);
1531 		  len = (int)(e - s);
1532 		  args[arg] = malloc(len + 1);
1533 		  if (!args[arg])
1534 		     error(103);	/* insufficient memory */
1535 		  strncpy(args[arg], s, len);
1536 		  args[arg][len] = '\0';
1537 		  /* character behind the pattern was matched too */
1538 		  if (*e == *p)
1539 		    {
1540 		       s = e + 1;
1541 		    }
1542 		  else if (*e == '\n' && *p == ';' && *(p + 1) == '\0'
1543 			   && !sc_needsemicolon)
1544 		    {
1545 		       s = e;	/* allow a trailing ; in the pattern match to end of line */
1546 		    }
1547 		  else
1548 		    {
1549 		       assert(*e == '\0' || *e == '\n');
1550 		       is_match = FALSE;
1551 		       s = e;
1552 		    }		/* if */
1553 		  p++;
1554 	       }
1555 	     else
1556 	       {
1557 		  is_match = FALSE;
1558 	       }		/* if */
1559 	  }
1560 	else if (*p == ';' && *(p + 1) == '\0' && !sc_needsemicolon)
1561 	  {
1562 	     /* source may be ';' or end of the line */
1563 	     while (*s <= ' ' && *s != '\0')
1564 		s++;		/* skip white space */
1565 	     if (*s != ';' && *s != '\0')
1566 		is_match = FALSE;
1567 	     p++;		/* skip the semicolon in the pattern */
1568 	  }
1569 	else
1570 	  {
1571 	     cell                ch;
1572 
1573 	     /* skip whitespace between two non-alphanumeric characters, except
1574 	      * for two identical symbols
1575 	      */
1576 	     assert(p > pattern);
1577 	     if (!alphanum(*p) && *(p - 1) != *p)
1578 		while (*s <= ' ' && *s != '\0')
1579 		   s++;		/* skip white space */
1580 	     ch = litchar(&p, FALSE);	/* this increments "p" */
1581 	     if (*s != ch)
1582 		is_match = FALSE;
1583 	     else
1584 		s++;		/* this character matches */
1585 	  }			/* if */
1586      }				/* while */
1587 
1588    if (is_match && *p == '\0')
1589      {
1590 	/* if the last character to match is an alphanumeric character, the
1591 	 * current character in the source may not be alphanumeric
1592 	 */
1593 	assert(p > pattern);
1594 	if (alphanum(*(p - 1)) && alphanum(*s))
1595 	   is_match = FALSE;
1596      }				/* if */
1597 
1598    if (is_match)
1599      {
1600 	/* calculate the length of the substituted string */
1601 	for (e = substitution, len = 0; *e != '\0'; e++)
1602 	  {
1603 	     if (*e == '%' && sc_isdigit(*(e + 1)))
1604 	       {
1605 		  arg = *(e + 1) - '0';
1606 		  assert(arg >= 0 && arg <= 9);
1607 		  if (args[arg])
1608 		     len += strlen(args[arg]);
1609 		  e++;		/* skip %, digit is skipped later */
1610 	       }
1611 	     else
1612 	       {
1613 		  len++;
1614 	       }		/* if */
1615 	  }			/* for */
1616 	/* check length of the string after substitution */
1617 	if (strlen(line) + len - (int)(s - line) > buffersize)
1618 	  {
1619 	     error(75);		/* line too long */
1620 	  }
1621 	else
1622 	  {
1623 	     /* substitute pattern */
1624 	     strdel(line, (int)(s - line));
1625 	     for (e = substitution, s = line; *e != '\0'; e++)
1626 	       {
1627 		  if (*e == '%' && sc_isdigit(*(e + 1)))
1628 		    {
1629 		       arg = *(e + 1) - '0';
1630 		       assert(arg >= 0 && arg <= 9);
1631 		       if (args[arg])
1632 			 {
1633 			    strins(s, args[arg], strlen(args[arg]));
1634 			    s += strlen(args[arg]);
1635 			 }	/* if */
1636 		       e++;	/* skip %, digit is skipped later */
1637 		    }
1638 		  else
1639 		    {
1640 		       strins(s, e, 1);
1641 		       s++;
1642 		    }		/* if */
1643 	       }		/* for */
1644 	  }			/* if */
1645      }				/* if */
1646 
1647    for (arg = 0; arg < 10; arg++)
1648       if (args[arg])
1649 	 free(args[arg]);
1650 
1651    return is_match;
1652 }
1653 
1654 static void
substallpatterns(char * line,int buffersize)1655 substallpatterns(char *line, int buffersize)
1656 {
1657    char               *start, *end;
1658    int                 prefixlen;
1659    stringpair         *subst;
1660 
1661    start = line;
1662    while (*start != '\0')
1663      {
1664 	/* find the start of a prefix (skip all non-alphabetic characters),
1665 	 * also skip strings
1666 	 */
1667 	while (!sc_isalpha(*start) && *start != '_' && *start != '\0')
1668 	  {
1669 	     /* skip strings */
1670 	     if (is_startstring(start))
1671 	       {
1672 		  start = skipstring(start);
1673 		  if (*start == '\0')
1674 		     break;	/* abort loop on error */
1675 	       }		/* if */
1676 	     start++;		/* skip non-alphapetic character (or closing quote of a string) */
1677 	  }			/* while */
1678 	if (*start == '\0')
1679 	   break;		/* abort loop on error */
1680 	/* get the prefix (length), look for a matching definition */
1681 	prefixlen = 0;
1682 	end = start;
1683 	while (sc_isalpha(*end) || sc_isdigit(*end) || *end == '_')
1684 	  {
1685 	     prefixlen++;
1686 	     end++;
1687 	  }			/* while */
1688 	assert(prefixlen > 0);
1689 	subst = find_subst(start, prefixlen);
1690 	if (subst)
1691 	  {
1692 	     /* properly match the pattern and substitute */
1693 	     if (!substpattern
1694 		 (start, buffersize - (start - line), subst->first,
1695 		  subst->second))
1696 		start = end;	/* match failed, skip this prefix */
1697 	     /* match succeeded: do not update "start", because the substitution text
1698 	      * may be matched by other macros
1699 	      */
1700 	  }
1701 	else
1702 	  {
1703 	     start = end;	/* no macro with this prefix, skip this prefix */
1704 	  }			/* if */
1705      }				/* while */
1706 }
1707 #endif
1708 
1709 /*  preprocess
1710  *
1711  *  Reads a line by readline() into "pline" and performs basic preprocessing:
1712  *  deleting comments, skipping lines with false "#if.." code and recognizing
1713  *  other compiler directives. There is an indirect recursion: lex() calls
1714  *  preprocess() if a new line must be read, preprocess() calls command(),
1715  *  which at his turn calls lex() to identify the token.
1716  *
1717  *  Global references: lptr     (altered)
1718  *                     pline    (altered)
1719  *                     freading (referred to only)
1720  */
1721 void
preprocess(void)1722 preprocess(void)
1723 {
1724    int                 iscommand;
1725 
1726    if (!freading)
1727       return;
1728    do
1729      {
1730 	readline(pline);
1731 	stripcom(pline);	/* ??? no need for this when reading back from list file (in the second pass) */
1732 	lptr = pline;		/* set "line pointer" to start of the parsing buffer */
1733 	iscommand = command();
1734 	if (iscommand != CMD_NONE)
1735 	   errorset(sRESET);	/* reset error flag ("panic mode") on empty line or directive */
1736 #if !defined NO_DEFINE
1737 	if (iscommand == CMD_NONE)
1738 	  {
1739 	     assert(lptr != term_expr);
1740 	     substallpatterns(pline, sLINEMAX);
1741 	     lptr = pline;	/* reset "line pointer" to start of the parsing buffer */
1742 	  }			/* if */
1743 #endif
1744      }
1745    while (iscommand != CMD_NONE && iscommand != CMD_TERM && freading);	/* enddo */
1746 }
1747 
1748 static char        *
unpackedstring(char * sptr,int rawstring)1749 unpackedstring(char *sptr, int rawstring)
1750 {
1751    while (*sptr != '\0')
1752      {
1753 	/* check for doublequotes indicating the end of the string */
1754 	if (*sptr == '\"')
1755 	{
1756 	   /* check whether there's another pair of quotes following.
1757 	    * If so, paste the two strings together, thus
1758 	    * "pants""off" becomes "pantsoff"
1759 	    */
1760 	   if (*(sptr + 1) == '\"')
1761 	      sptr += 2;
1762 	   else
1763 	      break;
1764 	}
1765 
1766 	if (*sptr == '\a')
1767 	  {			/* ignore '\a' (which was inserted at a line concatenation) */
1768 	     sptr++;
1769 	     continue;
1770 	  }			/* if */
1771 	stowlit(litchar(&sptr, rawstring));	/* litchar() alters "lptr" */
1772      }				/* while */
1773    stowlit(0);			/* terminate string */
1774    return sptr;
1775 }
1776 
1777 static char        *
packedstring(char * sptr,int rawstring)1778 packedstring(char *sptr, int rawstring)
1779 {
1780    int                 i;
1781    ucell               val, c;
1782 
1783    i = sizeof(ucell) - (charbits / 8);	/* start at most significant byte */
1784    val = 0;
1785    while (*sptr != '\0')
1786      {
1787 	/* check for doublequotes indicating the end of the string */
1788 	if (*sptr == '\"')
1789 	{
1790 	   /* check whether there's another pair of quotes following.
1791 	    * If so, paste the two strings together, thus
1792 	    * "pants""off" becomes "pantsoff"
1793 	    */
1794 	   if (*(sptr + 1) == '\"')
1795 	      sptr += 2;
1796 	   else
1797 	      break;
1798 	}
1799 
1800 	if (*sptr == '\a')
1801 	  {			/* ignore '\a' (which was inserted at a line concatenation) */
1802 	     sptr++;
1803 	     continue;
1804 	  }			/* if */
1805 	c = litchar(&sptr, rawstring);	/* litchar() alters "sptr" */
1806 	if (c >= (ucell) (1 << charbits))
1807 	   error(43);		/* character constant exceeds range */
1808 	val |= (c << 8 * i);
1809 	if (i == 0)
1810 	  {
1811 	     stowlit(val);
1812 	     val = 0;
1813 	  }			/* if */
1814 	i = (i + sizeof(ucell) - (charbits / 8)) % sizeof(ucell);
1815      }				/* if */
1816    /* save last code; make sure there is at least one terminating zero character */
1817    if (i != (int)(sizeof(ucell) - (charbits / 8)))
1818       stowlit(val);		/* at least one zero character in "val" */
1819    else
1820       stowlit(0);		/* add full cell of zeros */
1821    return sptr;
1822 }
1823 
1824 /*  lex(lexvalue,lexsym)        Lexical Analysis
1825  *
1826  *  lex() first deletes leading white space, then checks for multi-character
1827  *  operators, keywords (including most compiler directives), numbers,
1828  *  labels, symbols and literals (literal characters are converted to a number
1829  *  and are returned as such). If every check fails, the line must contain
1830  *  a single-character operator. So, lex() returns this character. In the other
1831  *  case (something did match), lex() returns the number of the token. All
1832  *  these tokens have been assigned numbers above 255.
1833  *
1834  *  Some tokens have "attributes":
1835  *     tNUMBER        the value of the number is return in "lexvalue".
1836  *     tRATIONAL      the value is in IEEE 754 encoding or in fixed point
1837  *                    encoding in "lexvalue".
1838  *     tSYMBOL        the first sNAMEMAX characters of the symbol are
1839  *                    stored in a buffer, a pointer to this buffer is
1840  *                    returned in "lexsym".
1841  *     tLABEL         the first sNAMEMAX characters of the label are
1842  *                    stored in a buffer, a pointer to this buffer is
1843  *                    returned in "lexsym".
1844  *     tSTRING        the string is stored in the literal pool, the index
1845  *                    in the literal pool to this string is stored in
1846  *                    "lexvalue".
1847  *
1848  *  lex() stores all information (the token found and possibly its attribute)
1849  *  in global variables. This allows a token to be examined twice. If "_pushed"
1850  *  is true, this information is returned.
1851  *
1852  *  Global references: lptr          (altered)
1853  *                     fline         (referred to only)
1854  *                     litidx        (referred to only)
1855  *                     _lextok, _lexval, _lexstr
1856  *                     _pushed
1857  */
1858 
1859 static int          _pushed;
1860 static int          _lextok;
1861 static cell         _lexval;
1862 static char         _lexstr[sLINEMAX + 1];
1863 static int          _lexnewline;
1864 
1865 void
lexinit(void)1866 lexinit(void)
1867 {
1868    stkidx = 0;			/* index for pushstk() and popstk() */
1869    iflevel = 0;			/* preprocessor: nesting of "#if" */
1870    skiplevel = 0;		/* preprocessor: skipping lines or compiling lines */
1871    icomment = FALSE;		/* currently not in a multiline comment */
1872    _pushed = FALSE;		/* no token pushed back into lex */
1873    _lexnewline = FALSE;
1874 }
1875 
1876 char               *sc_tokens[] = {
1877    "*=", "/=", "%=", "+=", "-=", "<<=", ">>>=", ">>=", "&=", "^=", "|=",
1878    "||", "&&", "==", "!=", "<=", ">=", "<<", ">>>", ">>", "++", "--",
1879    "...", "..",
1880    "assert", "break", "case", "char", "const", "continue", "default",
1881    "defined", "do", "else", "enum", "exit", "for", "forward", "goto",
1882    "if", "native", "new", "operator", "public", "return", "sizeof",
1883    "sleep", "static", "stock", "switch", "tagof", "while",
1884    "#assert", "#define", "#else", "#emit", "#endif", "#endinput",
1885    "#endscript", "#file", "#if", "#include", "#line", "#pragma", "#undef",
1886    ";", ";", "-integer value-", "-rational value-", "-identifier-",
1887    "-label-", "-string-"
1888 };
1889 
1890 int
lex(cell * lexvalue,char ** lexsym)1891 lex(cell * lexvalue, char **lexsym)
1892 {
1893    int                 i, toolong, newline, rawstring;
1894    char              **tokptr;
1895 
1896    if (_pushed)
1897      {
1898 	_pushed = FALSE;	/* reset "_pushed" flag */
1899 	*lexvalue = _lexval;
1900 	*lexsym = _lexstr;
1901 	return _lextok;
1902      }				/* if */
1903 
1904    _lextok = 0;			/* preset all values */
1905    _lexval = 0;
1906    _lexstr[0] = '\0';
1907    *lexvalue = _lexval;
1908    *lexsym = _lexstr;
1909    _lexnewline = FALSE;
1910    if (!freading)
1911       return 0;
1912 
1913    newline = (lptr == pline);	/* does lptr point to start of line buffer */
1914    while (*lptr <= ' ')
1915      {				/* delete leading white space */
1916 	if (*lptr == '\0')
1917 	  {
1918 	     preprocess();	/* preprocess resets "lptr" */
1919 	     if (!freading)
1920 		return 0;
1921 	     if (lptr == term_expr)	/* special sequence to terminate a pending expression */
1922 		return (_lextok = tENDEXPR);
1923 	     _lexnewline = TRUE;	/* set this after preprocess(), because
1924 					 * preprocess() calls lex() recursively */
1925 	     newline = TRUE;
1926 	  }
1927 	else
1928 	  {
1929 	     lptr += 1;
1930 	  }			/* if */
1931      }				/* while */
1932    if (newline)
1933      {
1934 	stmtindent = 0;
1935 	for (i = 0; i < (int)(lptr - pline); i++)
1936 	   if (pline[i] == '\t' && sc_tabsize > 0)
1937 	      stmtindent +=
1938 		 (int)(sc_tabsize - (stmtindent + sc_tabsize) % sc_tabsize);
1939 	   else
1940 	      stmtindent++;
1941      }				/* if */
1942 
1943    i = tFIRST;
1944    tokptr = sc_tokens;
1945    while (i <= tMIDDLE)
1946      {				/* match multi-character operators */
1947 	if (match(*tokptr, FALSE))
1948 	  {
1949 	     _lextok = i;
1950 	     return _lextok;
1951 	  }			/* if */
1952 	i += 1;
1953 	tokptr += 1;
1954      }				/* while */
1955    while (i <= tLAST)
1956      {				/* match reserved words and compiler directives */
1957 	if (match(*tokptr, TRUE))
1958 	  {
1959 	     _lextok = i;
1960 	     errorset(sRESET);	/* reset error flag (clear the "panic mode") */
1961 	     return _lextok;
1962 	  }			/* if */
1963 	i += 1;
1964 	tokptr += 1;
1965      }				/* while */
1966 
1967    if ((i = number(&_lexval, lptr)) != 0)
1968      {				/* number */
1969 	_lextok = tNUMBER;
1970 	*lexvalue = _lexval;
1971 	lptr += i;
1972      }
1973    else if ((i = ftoi(&_lexval, lptr)) != 0)
1974      {
1975 	_lextok = tRATIONAL;
1976 	*lexvalue = _lexval;
1977 	lptr += i;
1978      }
1979    else if (alpha(*lptr))
1980      {				/* symbol or label */
1981 	/*  Note: only sNAMEMAX characters are significant. The compiler
1982 	 *        generates a warning if a symbol exceeds this length.
1983 	 */
1984 	_lextok = tSYMBOL;
1985 	i = 0;
1986 	toolong = 0;
1987 	while (alphanum(*lptr))
1988 	  {
1989 	     _lexstr[i] = *lptr;
1990 	     lptr += 1;
1991 	     if (i < sNAMEMAX)
1992 		i += 1;
1993 	     else
1994 		toolong = 1;
1995 	  }			/* while */
1996 	_lexstr[i] = '\0';
1997 	if (toolong)
1998 	   error(200, _lexstr, sNAMEMAX);	/* symbol too long, truncated to sNAMEMAX chars */
1999 	if (_lexstr[0] == PUBLIC_CHAR && _lexstr[1] == '\0')
2000 	  {
2001 	     _lextok = PUBLIC_CHAR;	/* '@' all alone is not a symbol, it is an operator */
2002 	  }
2003 	else if (_lexstr[0] == '_' && _lexstr[1] == '\0')
2004 	  {
2005 	     _lextok = '_';	/* '_' by itself is not a symbol, it is a placeholder */
2006 	  }			/* if */
2007 	if (*lptr == ':' && sc_allowtags && _lextok != PUBLIC_CHAR)
2008 	  {
2009 	     _lextok = tLABEL;	/* it wasn't a normal symbol, it was a label/tagname */
2010 	     lptr += 1;		/* skip colon */
2011 	  }			/* if */
2012      }
2013    else if (*lptr == '\"' || (*lptr == sc_ctrlchar && *(lptr + 1) == '\"'))
2014      {				/* unpacked string literal */
2015 	_lextok = tSTRING;
2016 	rawstring = (*lptr == sc_ctrlchar);
2017 	*lexvalue = _lexval = litidx;
2018 	lptr += 1;		/* skip double quote */
2019 	if (rawstring)
2020 	   lptr += 1;		/* skip "escape" character too */
2021 	lptr =
2022 	   sc_packstr ? packedstring(lptr, rawstring) : unpackedstring(lptr,
2023 								       rawstring);
2024 	if (*lptr == '\"')
2025 	   lptr += 1;		/* skip final quote */
2026 	else
2027 	   error(37);		/* invalid (non-terminated) string */
2028      }
2029    else if ((*lptr == '!' && *(lptr + 1) == '\"')
2030 	    || (*lptr == '!' && *(lptr + 1) == sc_ctrlchar && *(lptr + 2) == '\"')
2031 	    || (*lptr == sc_ctrlchar && *(lptr + 1) == '!'
2032 	    && *(lptr + 2) == '\"'))
2033      {				/* packed string literal */
2034 	_lextok = tSTRING;
2035 	rawstring = (*lptr == sc_ctrlchar || *(lptr + 1) == sc_ctrlchar);
2036 	*lexvalue = _lexval = litidx;
2037 	lptr += 2;		/* skip exclamation point and double quote */
2038 	if (rawstring)
2039 	   lptr += 1;		/* skip "escape" character too */
2040 	lptr =
2041 	   sc_packstr ? unpackedstring(lptr, rawstring) : packedstring(lptr,
2042 								       rawstring);
2043 	if (*lptr == '\"')
2044 	   lptr += 1;		/* skip final quote */
2045 	else
2046 	   error(37);		/* invalid (non-terminated) string */
2047      }
2048    else if (*lptr == '\'')
2049      {				/* character literal */
2050 	lptr += 1;		/* skip quote */
2051 	_lextok = tNUMBER;
2052 	*lexvalue = _lexval = litchar(&lptr, FALSE);
2053 	if (*lptr == '\'')
2054 	   lptr += 1;		/* skip final quote */
2055 	else
2056 	   error(27);		/* invalid character constant (must be one character) */
2057      }
2058    else if (*lptr == ';')
2059      {				/* semicolumn resets "error" flag */
2060 	_lextok = ';';
2061 	lptr += 1;
2062 	errorset(sRESET);	/* reset error flag (clear the "panic mode") */
2063      }
2064    else
2065      {
2066 	_lextok = *lptr;	/* if every match fails, return the character */
2067 	lptr += 1;		/* increase the "lptr" pointer */
2068      }				/* if */
2069    return _lextok;
2070 }
2071 
2072 /*  lexpush
2073  *
2074  *  Pushes a token back, so the next call to lex() will return the token
2075  *  last examined, instead of a new token.
2076  *
2077  *  Only one token can be pushed back.
2078  *
2079  *  In fact, lex() already stores the information it finds into global
2080  *  variables, so all that is to be done is set a flag that informs lex()
2081  *  to read and return the information from these variables, rather than
2082  *  to read in a new token from the input file.
2083  */
2084 void
lexpush(void)2085 lexpush(void)
2086 {
2087    assert(_pushed == FALSE);
2088    _pushed = TRUE;
2089 }
2090 
2091 /*  lexclr
2092  *
2093  *  Sets the variable "_pushed" to 0 to make sure lex() will read in a new
2094  *  symbol (a not continue with some old one). This is required upon return
2095  *  from Assembler mode.
2096  */
2097 void
lexclr(int clreol)2098 lexclr(int clreol)
2099 {
2100    _pushed = FALSE;
2101    if (clreol)
2102      {
2103 	lptr = strchr(pline, '\0');
2104 	assert(lptr != NULL);
2105      }				/* if */
2106 }
2107 
2108 /*  matchtoken
2109  *
2110  *  This routine is useful if only a simple check is needed. If the token
2111  *  differs from the one expected, it is pushed back.
2112  */
2113 int
matchtoken(int token)2114 matchtoken(int token)
2115 {
2116    cell                val;
2117    char               *str;
2118    int                 tok;
2119 
2120    tok = lex(&val, &str);
2121    if (tok == token || (token == tTERM && (tok == ';' || tok == tENDEXPR)))
2122      {
2123 	return 1;
2124      }
2125    else if (!sc_needsemicolon && token == tTERM && (_lexnewline || !freading))
2126      {
2127 	lexpush();		/* push "tok" back, we use the "hidden" newline token */
2128 	return 1;
2129      }
2130    else
2131      {
2132 	lexpush();
2133 	return 0;
2134      }				/* if */
2135 }
2136 
2137 /*  tokeninfo
2138  *
2139  *  Returns additional information of a token after using "matchtoken()"
2140  *  or needtoken(). It does no harm using this routine after a call to
2141  *  "lex()", but lex() already returns the same information.
2142  *
2143  *  The token itself is the return value. Normally, this one is already known.
2144  */
2145 int
tokeninfo(cell * val,char ** str)2146 tokeninfo(cell * val, char **str)
2147 {
2148    /* if the token was pushed back, tokeninfo() returns the token and
2149     * parameters of the *next* token, not of the *current* token.
2150     */
2151    assert(!_pushed);
2152    *val = _lexval;
2153    *str = _lexstr;
2154    return _lextok;
2155 }
2156 
2157 /*  needtoken
2158  *
2159  *  This routine checks for a required token and gives an error message if
2160  *  it isn't there (and returns FALSE in that case).
2161  *
2162  *  Global references: _lextok;
2163  */
2164 int
needtoken(int token)2165 needtoken(int token)
2166 {
2167    char                s1[20], s2[20];
2168 
2169    if (matchtoken(token))
2170      {
2171 	return TRUE;
2172      }
2173    else
2174      {
2175 	/* token already pushed back */
2176 	assert(_pushed);
2177         if (token < 256)
2178           {
2179              s1[0] = (char)token; /* single character token */
2180              s1[1] = 0;
2181           }
2182         else
2183           {
2184              strncpy(s1, sc_tokens[token - tFIRST], 19); /* multi-character symbol */
2185              s1[19] = 0;
2186           }
2187 
2188         if (!freading)
2189           {
2190              strncpy(s2, "-end of file-", 19);
2191           }
2192         else if (_lextok < 256)
2193           {
2194              s2[0] = (char)_lextok;
2195              s2[1] = 0;
2196           }
2197         else
2198           {
2199              strncpy(s2, sc_tokens[_lextok - tFIRST], 19);
2200           }
2201         s2[19] = 0;
2202 	error(1, s1, s2);	/* expected ..., but found ... */
2203 	return FALSE;
2204      }				/* if */
2205 }
2206 
2207 /*  match
2208  *
2209  *  Compares a series of characters from the input file with the characters
2210  *  in "st" (that contains a token). If the token on the input file matches
2211  *  "st", the input file pointer "lptr" is adjusted to point to the next
2212  *  token, otherwise "lptr" remains unaltered.
2213  *
2214  *  If the parameter "end: is true, match() requires that the first character
2215  *  behind the recognized token is non-alphanumeric.
2216  *
2217  *  Global references: lptr   (altered)
2218  */
2219 static int
match(char * st,int end)2220 match(char *st, int end)
2221 {
2222    int                 k;
2223    char               *ptr;
2224 
2225    k = 0;
2226    ptr = lptr;
2227    while (st[k])
2228      {
2229 	if (st[k] != *ptr)
2230 	   return 0;
2231 	k += 1;
2232 	ptr += 1;
2233      }				/* while */
2234    if (end)
2235      {				/* symbol must terminate with non-alphanumeric char */
2236 	if (alphanum(*ptr))
2237 	   return 0;
2238      }				/* if */
2239    lptr = ptr;			/* match found, skip symbol */
2240    return 1;
2241 }
2242 
2243 /*  stowlit
2244  *
2245  *  Stores a value into the literal queue. The literal queue is used for
2246  *  literal strings used in functions and for initializing array variables.
2247  *
2248  *  Global references: litidx  (altered)
2249  *                     litq    (altered)
2250  */
2251 void
stowlit(cell val)2252 stowlit(cell val)
2253 {
2254    if (litidx >= litmax)
2255      {
2256 	cell               *p;
2257 
2258 	litmax += sDEF_LITMAX;
2259 	p = (cell *) realloc(litq, litmax * sizeof(cell));
2260 	if (!p)
2261 	   error(102, "literal table");	/* literal table overflow (fatal error) */
2262 	litq = p;
2263      }				/* if */
2264    assert(litidx < litmax);
2265    litq[litidx++] = val;
2266 }
2267 
2268 /*  litchar
2269  *
2270  *  Return current literal character and increase the pointer to point
2271  *  just behind this literal character.
2272  *
2273  *  Note: standard "escape sequences" are suported, but the backslash may be
2274  *        replaced by another character; the syntax '\ddd' is supported,
2275  *        but ddd must be decimal!
2276  */
2277 static cell
litchar(char ** p_str,int rawmode)2278 litchar(char **p_str, int rawmode)
2279 {
2280    cell                c = 0;
2281    unsigned char      *cptr;
2282 
2283    cptr = (unsigned char *)*p_str;
2284    if (rawmode || *cptr != sc_ctrlchar)
2285      {				/* no escape character */
2286 	c = *cptr;
2287 	cptr += 1;
2288      }
2289    else
2290      {
2291 	cptr += 1;
2292 	if (*cptr == sc_ctrlchar)
2293 	  {
2294 	     c = *cptr;		/* \\ == \ (the escape character itself) */
2295 	     cptr += 1;
2296 	  }
2297 	else
2298 	  {
2299 	     switch (*cptr)
2300 	       {
2301 	       case 'a':	/* \a == audible alarm */
2302 		  c = 7;
2303 		  cptr += 1;
2304 		  break;
2305 	       case 'b':	/* \b == backspace */
2306 		  c = 8;
2307 		  cptr += 1;
2308 		  break;
2309 	       case 'e':	/* \e == escape */
2310 		  c = 27;
2311 		  cptr += 1;
2312 		  break;
2313 	       case 'f':	/* \f == form feed */
2314 		  c = 12;
2315 		  cptr += 1;
2316 		  break;
2317 	       case 'n':	/* \n == NewLine character */
2318 		  c = 10;
2319 		  cptr += 1;
2320 		  break;
2321 	       case 'r':	/* \r == carriage return */
2322 		  c = 13;
2323 		  cptr += 1;
2324 		  break;
2325 	       case 't':	/* \t == horizontal TAB */
2326 		  c = 9;
2327 		  cptr += 1;
2328 		  break;
2329 	       case 'v':	/* \v == vertical TAB */
2330 		  c = 11;
2331 		  cptr += 1;
2332 		  break;
2333 	       case '\'':	/* \' == ' (single quote) */
2334 	       case '"':	/* \" == " (single quote) */
2335 	       case '%':	/* \% == % (percent) */
2336 		  c = *cptr;
2337 		  cptr += 1;
2338 		  break;
2339 	       default:
2340 		  if (sc_isdigit(*cptr))
2341 		    {		/* \ddd */
2342 		       c = 0;
2343 		       while (*cptr >= '0' && *cptr <= '9')	/* decimal! */
2344 			  c = c * 10 + *cptr++ - '0';
2345 		       if (*cptr == ';')
2346 			  cptr++;	/* swallow a trailing ';' */
2347 		    }
2348 		  else
2349 		    {
2350 		       error(27);	/* invalid character constant */
2351 		    }		/* if */
2352 	       }		/* switch */
2353 	  }			/* if */
2354      }				/* if */
2355    *p_str = (char *)cptr;
2356    assert(c >= 0 && c < 256);
2357    return c;
2358 }
2359 
2360 /*  alpha
2361  *
2362  *  Test if character "c" is alphabetic ("a".."z"), an underscore ("_")
2363  *  or an "at" sign ("@"). The "@" is an extension to standard C.
2364  */
2365 static int
alpha(char c)2366 alpha(char c)
2367 {
2368    return (sc_isalpha(c) || c == '_' || c == PUBLIC_CHAR);
2369 }
2370 
2371 /*  alphanum
2372  *
2373  *  Test if character "c" is alphanumeric ("a".."z", "0".."9", "_" or "@")
2374  */
2375 int
alphanum(char c)2376 alphanum(char c)
2377 {
2378    return (alpha(c) || sc_isdigit(c));
2379 }
2380 
2381 /* The local variable table must be searched backwards, so that the deepest
2382  * nesting of local variables is searched first. The simplest way to do
2383  * this is to insert all new items at the head of the list.
2384  * In the global list, the symbols are kept in sorted order, so that the
2385  * public functions are written in sorted order.
2386  */
2387 static symbol      *
add_symbol(symbol * root,symbol * entry,int sort)2388 add_symbol(symbol * root, symbol * entry, int sort)
2389 {
2390    symbol             *newsym;
2391 
2392    if (sort)
2393       while (root->next && strcmp(entry->name, root->next->name) > 0)
2394 	 root = root->next;
2395 
2396    if (!(newsym = (symbol *)malloc(sizeof(symbol))))
2397      {
2398 	error(103);
2399 	return NULL;
2400      }				/* if */
2401    memcpy(newsym, entry, sizeof(symbol));
2402    newsym->next = root->next;
2403    root->next = newsym;
2404    return newsym;
2405 }
2406 
2407 static void
free_symbol(symbol * sym)2408 free_symbol(symbol * sym)
2409 {
2410    arginfo            *arg;
2411 
2412    /* free all sub-symbol allocated memory blocks, depending on the
2413     * kind of the symbol
2414     */
2415    assert(sym != NULL);
2416    if (sym->ident == iFUNCTN)
2417      {
2418 	/* run through the argument list; "default array" arguments
2419 	 * must be freed explicitly; the tag list must also be freed */
2420 	assert(sym->dim.arglist != NULL);
2421 	for (arg = sym->dim.arglist; arg->ident != 0; arg++)
2422 	  {
2423 	     if (arg->ident == iREFARRAY && arg->hasdefault)
2424 		free(arg->defvalue.array.data);
2425 	     else if (arg->ident == iVARIABLE
2426 		      && ((arg->hasdefault & uSIZEOF) != 0
2427 			  || (arg->hasdefault & uTAGOF) != 0))
2428 		free(arg->defvalue.size.symname);
2429 	     assert(arg->tags != NULL);
2430 	     free(arg->tags);
2431 	  }			/* for */
2432 	free(sym->dim.arglist);
2433      }				/* if */
2434    assert(sym->refer != NULL);
2435    free(sym->refer);
2436    free(sym);
2437 }
2438 
2439 void
delete_symbol(symbol * root,symbol * sym)2440 delete_symbol(symbol * root, symbol * sym)
2441 {
2442    /* find the symbol and its predecessor
2443     * (this function assumes that you will never delete a symbol that is not
2444     * in the table pointed at by "root")
2445     */
2446    assert(root != sym);
2447    while (root->next != sym)
2448      {
2449 	root = root->next;
2450 	assert(root != NULL);
2451      }				/* while */
2452 
2453    /* unlink it, then free it */
2454    root->next = sym->next;
2455    free_symbol(sym);
2456 }
2457 
2458 void
delete_symbols(symbol * root,int level,int delete_labels,int delete_functions)2459 delete_symbols(symbol * root, int level, int delete_labels,
2460 	       int delete_functions)
2461 {
2462    symbol             *sym;
2463 
2464    /* erase only the symbols with a deeper nesting level than the
2465     * specified nesting level */
2466    while (root->next)
2467      {
2468 	sym = root->next;
2469 	if (sym->compound < level)
2470 	   break;
2471 	if ((delete_labels || sym->ident != iLABEL)
2472 	    && (delete_functions || sym->ident != iFUNCTN
2473 		|| (sym->usage & uNATIVE) != 0) && (delete_functions
2474 						    || sym->ident != iCONSTEXPR
2475 						    || (sym->usage & uPREDEF) ==
2476 						    0) && (delete_functions
2477 							   || (sym->ident !=
2478 							       iVARIABLE
2479 							       && sym->ident !=
2480 							       iARRAY)))
2481 	  {
2482 	     root->next = sym->next;
2483 	     free_symbol(sym);
2484 	  }
2485 	else
2486 	  {
2487 	     /* if the function was prototyped, but not implemented in this source,
2488 	      * mark it as such, so that its use can be flagged
2489 	      */
2490 	     if (sym->ident == iFUNCTN && (sym->usage & uDEFINE) == 0)
2491 		sym->usage |= uMISSING;
2492 	     if (sym->ident == iFUNCTN || sym->ident == iVARIABLE
2493 		 || sym->ident == iARRAY)
2494 		sym->usage &= ~uDEFINE;	/* clear "defined" flag */
2495 	     /* for user defined operators, also remove the "prototyped" flag, as
2496 	      * user-defined operators *must* be declared before use
2497 	      */
2498 	     if (sym->ident == iFUNCTN && !sc_isalpha(*sym->name)
2499 		 && *sym->name != '_' && *sym->name != PUBLIC_CHAR)
2500 		sym->usage &= ~uPROTOTYPED;
2501 	     root = sym;	/* skip the symbol */
2502 	  }			/* if */
2503      }				/* if */
2504 }
2505 
2506 /* The purpose of the hash is to reduce the frequency of a "name"
2507  * comparison (which is costly). There is little interest in avoiding
2508  * clusters in similar names, which is why this function is plain simple.
2509  */
2510 unsigned int
namehash(char * name)2511 namehash(char *name)
2512 {
2513    unsigned char      *ptr = (unsigned char *)name;
2514    int                 len = strlen(name);
2515 
2516    if (len == 0)
2517       return 0L;
2518    assert(len < 256);
2519    return (len << 24Lu) + (ptr[0] << 16Lu) + (ptr[len - 1] << 8Lu) +
2520       (ptr[len >> 1Lu]);
2521 }
2522 
2523 static symbol      *
find_symbol(symbol * root,char * name,int fnum)2524 find_symbol(symbol * root, char *name, int fnum)
2525 {
2526    symbol             *ptr = root->next;
2527    unsigned long       hash = namehash(name);
2528 
2529    while (ptr)
2530      {
2531 	if (hash == ptr->hash && strcmp(name, ptr->name) == 0
2532 	    && !ptr->parent && (ptr->fnumber < 0
2533 				       || ptr->fnumber == fnum))
2534 	   return ptr;
2535 	ptr = ptr->next;
2536      }				/* while */
2537    return NULL;
2538 }
2539 
2540 static symbol      *
find_symbol_child(symbol * root,symbol * sym)2541 find_symbol_child(symbol * root, symbol * sym)
2542 {
2543    symbol             *ptr = root->next;
2544 
2545    while (ptr)
2546      {
2547 	if (ptr->parent == sym)
2548 	   return ptr;
2549 	ptr = ptr->next;
2550      }				/* while */
2551    return NULL;
2552 }
2553 
2554 /* Adds "bywhom" to the list of referrers of "entry". Typically,
2555  * bywhom will be the function that uses a variable or that calls
2556  * the function.
2557  */
2558 int
refer_symbol(symbol * entry,symbol * bywhom)2559 refer_symbol(symbol * entry, symbol * bywhom)
2560 {
2561    int                 count;
2562 
2563    assert(bywhom != NULL);	/* it makes no sense to add a "void" referrer */
2564    assert(entry != NULL);
2565    assert(entry->refer != NULL);
2566 
2567    /* see if it is already there */
2568    for (count = 0; count < entry->numrefers && entry->refer[count] != bywhom;
2569 	count++)
2570       /* nothing */ ;
2571    if (count < entry->numrefers)
2572      {
2573 	assert(entry->refer[count] == bywhom);
2574 	return TRUE;
2575      }				/* if */
2576 
2577    /* see if there is an empty spot in the referrer list */
2578    for (count = 0; count < entry->numrefers && entry->refer[count];
2579 	count++)
2580       /* nothing */ ;
2581    assert(count <= entry->numrefers);
2582    if (count == entry->numrefers)
2583      {
2584 	symbol            **refer;
2585 	int                 newsize = 2 * entry->numrefers;
2586 
2587 	assert(newsize > 0);
2588 	/* grow the referrer list */
2589 	refer = (symbol **) realloc(entry->refer, newsize * sizeof(symbol *));
2590 	if (!refer)
2591 	   return FALSE;	/* insufficient memory */
2592 	/* initialize the new entries */
2593 	entry->refer = refer;
2594 	for (count = entry->numrefers; count < newsize; count++)
2595 	   entry->refer[count] = NULL;
2596 	count = entry->numrefers;	/* first empty spot */
2597 	entry->numrefers = newsize;
2598      }				/* if */
2599 
2600    /* add the referrer */
2601    assert(entry->refer[count] == NULL);
2602    entry->refer[count] = bywhom;
2603    return TRUE;
2604 }
2605 
2606 void
markusage(symbol * sym,int usage)2607 markusage(symbol * sym, int usage)
2608 {
2609    sym->usage |= (char)usage;
2610    /* check if (global) reference must be added to the symbol */
2611    if ((usage & (uREAD | uWRITTEN)) != 0)
2612      {
2613 	/* only do this for global symbols */
2614 	if (sym->vclass == sGLOBAL)
2615 	  {
2616 	     /* "curfunc" should always be valid, since statements may not occurs
2617 	      * outside functions; in the case of syntax errors, however, the
2618 	      * compiler may arrive through this function
2619 	      */
2620 	     if (curfunc)
2621 		refer_symbol(sym, curfunc);
2622 	  }			/* if */
2623      }				/* if */
2624 }
2625 
2626 /*  findglb
2627  *
2628  *  Returns a pointer to the global symbol (if found) or NULL (if not found)
2629  */
2630 symbol     *
findglb(char * name)2631 findglb(char *name)
2632 {
2633    return find_symbol(&glbtab, name, fcurrent);
2634 }
2635 
2636 /*  findloc
2637  *
2638  *  Returns a pointer to the local symbol (if found) or NULL (if not found).
2639  *  See add_symbol() how the deepest nesting level is searched first.
2640  */
2641 symbol     *
findloc(char * name)2642 findloc(char *name)
2643 {
2644    return find_symbol(&loctab, name, -1);
2645 }
2646 
2647 symbol     *
findconst(char * name)2648 findconst(char *name)
2649 {
2650    symbol             *sym;
2651 
2652    sym = find_symbol(&loctab, name, -1);	/* try local symbols first */
2653    if (!sym || sym->ident != iCONSTEXPR)	/* not found, or not a constant */
2654       sym = find_symbol(&glbtab, name, fcurrent);
2655    if (!sym || sym->ident != iCONSTEXPR)
2656       return NULL;
2657    assert(sym->parent == NULL);	/* constants have no hierarchy */
2658    return sym;
2659 }
2660 
2661 symbol     *
finddepend(symbol * parent)2662 finddepend(symbol * parent)
2663 {
2664    symbol             *sym;
2665 
2666    sym = find_symbol_child(&loctab, parent);	/* try local symbols first */
2667    if (!sym)		/* not found */
2668       sym = find_symbol_child(&glbtab, parent);
2669    return sym;
2670 }
2671 
2672 /*  addsym
2673  *
2674  *  Adds a symbol to the symbol table (either global or local variables,
2675  *  or global and local constants).
2676  */
2677 symbol     *
addsym(char * name,cell addr,int ident,int vclass,int tag,int usage)2678 addsym(char *name, cell addr, int ident, int vclass, int tag, int usage)
2679 {
2680    symbol              entry, **refer;
2681 
2682    /* global variables/constants/functions may only be defined once */
2683    assert(!(ident == iFUNCTN || ident == iCONSTEXPR) || vclass != sGLOBAL
2684 	  || findglb(name) == NULL);
2685    /* labels may only be defined once */
2686    assert(ident != iLABEL || findloc(name) == NULL);
2687 
2688    /* create an empty referrer list */
2689    if (!(refer = (symbol **)malloc(sizeof(symbol *))))
2690      {
2691 	error(103);		/* insufficient memory */
2692 	return NULL;
2693      }				/* if */
2694    *refer = NULL;
2695 
2696    /* first fill in the entry */
2697    strncpy(entry.name, name, sizeof(entry.name) - 1);
2698    entry.name[sizeof(entry.name) - 1] = 0;
2699    entry.hash = namehash(name);
2700    entry.addr = addr;
2701    entry.vclass = (char)vclass;
2702    entry.ident = (char)ident;
2703    entry.tag = tag;
2704    entry.usage = (char)usage;
2705    entry.compound = 0;		/* may be overridden later */
2706    entry.fnumber = -1;		/* assume global visibility (ignored for local symbols) */
2707    entry.numrefers = 1;
2708    entry.refer = refer;
2709    entry.parent = NULL;
2710 
2711    /* then insert it in the list */
2712    if (vclass == sGLOBAL)
2713       return add_symbol(&glbtab, &entry, TRUE);
2714    else
2715       return add_symbol(&loctab, &entry, FALSE);
2716 }
2717 
2718 symbol     *
addvariable(char * name,cell addr,int ident,int vclass,int tag,int dim[],int numdim,int idxtag[])2719 addvariable(char *name, cell addr, int ident, int vclass, int tag,
2720 	    int dim[], int numdim, int idxtag[])
2721 {
2722    symbol             *sym, *parent, *top;
2723    int                 level;
2724 
2725    sym = findglb(name);
2726    /* global variables may only be defined once */
2727    assert(vclass != sGLOBAL || sym  == NULL
2728 	  || (sym->usage & uDEFINE) == 0);
2729 
2730    if (ident == iARRAY || ident == iREFARRAY)
2731      {
2732 	parent = NULL;
2733 	sym = NULL;		/* to avoid a compiler warning */
2734 	for (level = 0; level < numdim; level++)
2735 	  {
2736 	     top = addsym(name, addr, ident, vclass, tag, uDEFINE);
2737 	     top->dim.array.length = dim[level];
2738 	     top->dim.array.level = (short)(numdim - level - 1);
2739 	     top->x.idxtag = idxtag[level];
2740 	     top->parent = parent;
2741 	     parent = top;
2742 	     if (level == 0)
2743 		sym = top;
2744 	  }			/* for */
2745      }
2746    else
2747      {
2748 	sym = addsym(name, addr, ident, vclass, tag, uDEFINE);
2749      }				/* if */
2750    return sym;
2751 }
2752 
2753 /*  getlabel
2754  *
2755  *  Return next available internal label number.
2756  */
2757 int
getlabel(void)2758 getlabel(void)
2759 {
2760    return labnum++;
2761 }
2762 
2763 /*  itoh
2764  *
2765  *  Converts a number to a hexadecimal string and returns a pointer to that
2766  *  string.
2767  */
2768 char       *
itoh(ucell val)2769 itoh(ucell val)
2770 {
2771    static char         itohstr[15];	/* hex number is 10 characters long at most */
2772    char               *ptr;
2773    int                 i, nibble[8];	/* a 32-bit hexadecimal cell has 8 nibbles */
2774    int                 max;
2775 
2776 #if defined(BIT16)
2777    max = 4;
2778 #else
2779    max = 8;
2780 #endif
2781    ptr = itohstr;
2782    for (i = 0; i < max; i += 1)
2783      {
2784 	nibble[i] = (int)(val & 0x0f);	/* nibble 0 is lowest nibble */
2785 	val >>= 4;
2786      }				/* endfor */
2787    i = max - 1;
2788    while (nibble[i] == 0 && i > 0)	/* search for highest non-zero nibble */
2789       i -= 1;
2790    while (i >= 0)
2791      {
2792 	if (nibble[i] >= 10)
2793 	   *ptr++ = (char)('a' + (nibble[i] - 10));
2794 	else
2795 	   *ptr++ = (char)('0' + nibble[i]);
2796 	i -= 1;
2797      }				/* while */
2798    *ptr = '\0';			/* and a zero-terminator */
2799    return itohstr;
2800 }
2801