1 /*
2 ** Splint - annotation-assisted static program checker
3 ** Copyright (C) 1994-2003 University of Virginia,
4 **         Massachusetts Institute of Technology
5 **
6 ** This program is free software; you can redistribute it and/or modify it
7 ** under the terms of the GNU General Public License as published by the
8 ** Free Software Foundation; either version 2 of the License, or (at your
9 ** option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful, but
12 ** WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ** General Public License for more details.
15 **
16 ** The GNU General Public License is available from http://www.gnu.org/ or
17 ** the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 ** MA 02111-1307, USA.
19 **
20 ** For information on splint: info@splint.org
21 ** To report a bug: splint-bug@splint.org
22 ** For more information: http://www.splint.org
23 */
24 /*
25 ** scanline.c
26 **
27 ** Scan one line of Larch SL input at a time.
28 **
29 **	The input is source text, line at a time.   The output is a sequence
30 **	of tokens, reported by call-out LSLScanFreshToken.
31 **
32 **	This organization allows implementation of line-at-a-time
33 **	incremental scanning.  The incremental mechanism is in the
34 **	driving module scan.c, which can save and replay tokens for
35 **	unchanged lines.  This module works either way.
36 **
37 **	The main loop of the scanner keys on the leading character.
38 **	Actions within the loop collect the token starting with the
39 **	and determine its kind.
40 */
41 
42 # include "splintMacros.nf"
43 # include "basic.h"
44 # include "signature.h"
45 # include "signature2.h"
46 # include "scan.h"
47 # include "scanline.h"
48 # include "tokentable.h"
49 # include "syntable.h"
50 
51 /*@notfunction@*/
52 # define MOVECHAR()   do { *bufptr++ = c; c = *currentLine++; colNumber++; } while (FALSE)
53 
54 /* evans 2003-04-21: changed name to avoid conflict with MS VC++ */
55 /*@constant static int SCANMAXCHAR;@*/
56 # define SCANMAXCHAR 512
57 
58 /*@constant static int TABSIZE;@*/
59 # define TABSIZE 8
60 
61 static void LocalUserError (/*@temp@*/ char *p_msg);
62 
63 static charClassData charClass[LASTCHAR + 1];
64 
65 static int colNumber;
66 static int startCol;
67 static bool reportEOL;
68 static bool reportComments;
69 
70 static char tokenBuffer[SCANMAXCHAR];
71 
72 static const charClassData charClassDef[] =
73 {
74   /* Control characters					 */
75 
76   { CHC_NULL, TRUE },      /*  0 NULL		 */
77   { SINGLECHAR, FALSE },        /*  1 CTRL-A		 */
78   { SINGLECHAR, FALSE },	/*  2 CTRL-B		 */
79   { SINGLECHAR, FALSE },	/*  3 CTRL-C		 */
80   { SINGLECHAR, FALSE },	/*  4 CTRL-D		 */
81   { SINGLECHAR, FALSE },	/*  5 CTRL-E		 */
82   { SINGLECHAR, FALSE },	/*  6 CTRL-F		 */
83   { SINGLECHAR, FALSE },	/*  7 CTRL-G		 */
84   { SINGLECHAR, FALSE },	/*  8 CTRL-H		 */
85 
86   /* defined formatting characters			 */
87 
88   { WHITECHAR, FALSE },		/*  9 CTRL-I   TAB	 */
89   { WHITECHAR, TRUE },		/* 10 CTRL-J   EOL	 */
90 
91   /* more control characters				 */
92 
93   { SINGLECHAR, FALSE },	/* 11 CTRL-K		 */
94   { SINGLECHAR, FALSE },	/* 12 CTRL-L		 */
95   { SINGLECHAR, FALSE },	/* 13 CTRL-M		 */
96   { SINGLECHAR, FALSE },	/* 14 CTRL-N		 */
97   { SINGLECHAR, FALSE },	/* 15 CTRL-O		 */
98   { SINGLECHAR, FALSE },	/* 16 CTRL-P		 */
99   { SINGLECHAR, FALSE },	/* 17 CTRL-Q		 */
100   { SINGLECHAR, FALSE },	/* 18 CTRL-R		 */
101   { SINGLECHAR, FALSE },	/* 19 CTRL-S		 */
102   { SINGLECHAR, FALSE },	/* 20 CTRL-T		 */
103   { SINGLECHAR, FALSE },	/* 21 CTRL-U		 */
104   { SINGLECHAR, FALSE },	/* 22 CTRL-V		 */
105   { SINGLECHAR, FALSE },	/* 23 CTRL-W		 */
106   { SINGLECHAR, FALSE },	/* 24 CTRL-X		 */
107   { SINGLECHAR, FALSE },	/* 25 CTRL-Y		 */
108   { SINGLECHAR, FALSE },	/* 26 CTRL-Z		 */
109   { SINGLECHAR, FALSE },	/* 27 CTRL-[   ESC	 */
110   { SINGLECHAR, FALSE },	/* 28 CTRL-slash         */
111   { SINGLECHAR, FALSE },	/* 29 CTRL-]   GS	 */
112   { SINGLECHAR, FALSE },	/* 30 CTRL-^   RS	 */
113   { SINGLECHAR, FALSE },	/* 31 CTRL-_   US	 */
114 
115   /* Special printing characters			 */
116 
117   { WHITECHAR, FALSE },		/* 32 space		 */
118   { SINGLECHAR, FALSE },	/* 33 !			 */
119   { SINGLECHAR, FALSE },	/* 34 "			 */
120   { SINGLECHAR, FALSE },	/* 35 #			 */
121   { SINGLECHAR, FALSE },	/* 36 $			 */
122   { SINGLECHAR, FALSE },	/* 37 %			 */
123   { SINGLECHAR, FALSE },	/* 38 &			 */
124   { SINGLECHAR, FALSE },	/* 39 '			 */
125 
126   /* Reserved characters				 */
127 
128   { PERMCHAR, FALSE },		/* 40 (			 */
129   { PERMCHAR, FALSE },		/* 41 )			 */
130   { OPCHAR, FALSE },		/* 42 *			 */
131   { OPCHAR, FALSE },		/* 43 +			 */
132   { PERMCHAR, FALSE },		/* 44 ,			 */
133   { OPCHAR, FALSE },		/* 45 -			 */
134   { OPCHAR, FALSE },		/* 46 .			 */
135   { SLASHCHAR, FALSE },		/* 47 /			 */
136 
137   /* Numbers						 */
138 
139   { IDCHAR, FALSE },		/* 48 0			 */
140   { IDCHAR, FALSE },		/* 49 1			 */
141   { IDCHAR, FALSE },		/* 50 2			 */
142   { IDCHAR, FALSE },		/* 51 3			 */
143   { IDCHAR, FALSE },		/* 52 4			 */
144   { IDCHAR, FALSE },		/* 53 5			 */
145   { IDCHAR, FALSE },		/* 54 6			 */
146   { IDCHAR, FALSE },		/* 55 7			 */
147   { IDCHAR, FALSE },		/* 56 8			 */
148   { IDCHAR, FALSE },		/* 57 9			 */
149 
150   /* More reserved and special printing characters	 */
151 
152   { PERMCHAR, FALSE },		/* 58 :			 */
153   { SINGLECHAR, FALSE },	/* 59;			 */
154   { OPCHAR, FALSE },		/* 60 <			 */
155   { OPCHAR, FALSE },		/* 61 =			 */
156   { OPCHAR, FALSE },		/* 62 >			 */
157   { SINGLECHAR, FALSE },	/* 63 ?			 */
158   { SINGLECHAR, FALSE },	/* 64 @			 */
159 
160   /* Uppercase Alphabetics				 */
161 
162   { IDCHAR, FALSE },		/* 65 A 		 */
163   { IDCHAR, FALSE },		/* 66 B 		 */
164   { IDCHAR, FALSE },		/* 67 C 		 */
165   { IDCHAR, FALSE },		/* 68 D 	 	 */
166   { IDCHAR, FALSE },		/* 69 E 		 */
167   { IDCHAR, FALSE },		/* 70 F			 */
168   { IDCHAR, FALSE },		/* 71 G			 */
169   { IDCHAR, FALSE },		/* 72 H			 */
170   { IDCHAR, FALSE },		/* 73 I			 */
171   { IDCHAR, FALSE },		/* 74 J			 */
172   { IDCHAR, FALSE },		/* 75 K			 */
173   { IDCHAR, FALSE },		/* 76 L			 */
174   { IDCHAR, FALSE },		/* 77 M			 */
175   { IDCHAR, FALSE },		/* 78 N			 */
176   { IDCHAR, FALSE },		/* 79 O			 */
177   { IDCHAR, FALSE },		/* 80 P			 */
178   { IDCHAR, FALSE },		/* 81 Q			 */
179   { IDCHAR, FALSE },		/* 82 R			 */
180   { IDCHAR, FALSE },		/* 83 S			 */
181   { IDCHAR, FALSE },		/* 84 T			 */
182   { IDCHAR, FALSE },		/* 85 U			 */
183   { IDCHAR, FALSE },		/* 86 V			 */
184   { IDCHAR, FALSE },		/* 87 W			 */
185   { IDCHAR, FALSE },		/* 88 X			 */
186   { IDCHAR, FALSE },		/* 89 Y			 */
187   { IDCHAR, FALSE },		/* 90 Z			 */
188 
189   /* Still more reserved and special printing characters */
190 
191   { SINGLECHAR, FALSE },	/* 91 [			 */
192   { CHC_EXTENSION, FALSE },	/* 92 slash		 */
193   { SINGLECHAR, FALSE },	/* 93 ]			 */
194   { SINGLECHAR, FALSE },	/* 94 ^			 */
195   { IDCHAR, FALSE },		/* 95 _			 */
196   { SINGLECHAR, FALSE },	/* 96 `			 */
197 
198   /* Lowercase alphabetics				 */
199 
200   { IDCHAR, FALSE },		/* 97 a			 */
201   { IDCHAR, FALSE },		/* 98 b			 */
202   { IDCHAR, FALSE },		/* 99 c			 */
203   { IDCHAR, FALSE },		/* 100 d		 */
204   { IDCHAR, FALSE },		/* 101 e		 */
205   { IDCHAR, FALSE },		/* 102 f		 */
206   { IDCHAR, FALSE },		/* 103 g		 */
207   { IDCHAR, FALSE },		/* 104 h		 */
208   { IDCHAR, FALSE },		/* 105 i		 */
209   { IDCHAR, FALSE },		/* 106 j		 */
210   { IDCHAR, FALSE },		/* 107 k		 */
211   { IDCHAR, FALSE },		/* 108 l		 */
212   { IDCHAR, FALSE },		/* 109 m		 */
213   { IDCHAR, FALSE },		/* 110 n		 */
214   { IDCHAR, FALSE },		/* 111 o		 */
215   { IDCHAR, FALSE },		/* 112 p		 */
216   { IDCHAR, FALSE },		/* 113 q		 */
217   { IDCHAR, FALSE },		/* 114 r		 */
218   { IDCHAR, FALSE },		/* 115 s		 */
219   { IDCHAR, FALSE },		/* 116 t		 */
220   { IDCHAR, FALSE },		/* 117 u		 */
221   { IDCHAR, FALSE },		/* 118 v		 */
222   { IDCHAR, FALSE },		/* 119 w		 */
223   { IDCHAR, FALSE },		/* 120 x		 */
224   { IDCHAR, FALSE },		/* 121 y		 */
225   { IDCHAR, FALSE },		/* 122 z		 */
226 
227   { SINGLECHAR, FALSE },	/* 123 {		 */
228   { SINGLECHAR, FALSE },	/* 124 |		 */
229   { SINGLECHAR, FALSE },	/* 125 }		 */
230   { SINGLECHAR, FALSE },	/* 126 ~		 */
231   { SINGLECHAR, FALSE },	/* 127 DEL		 */
232 
233   /* MCS - unused in English				 */
234 
235   { SINGLECHAR, FALSE },	/* 128			 */
236   { SINGLECHAR, FALSE },	/* 129			 */
237   { SINGLECHAR, FALSE },	/* 130			 */
238   { SINGLECHAR, FALSE },	/* 131			 */
239   { SINGLECHAR, FALSE },	/* 132			 */
240   { SINGLECHAR, FALSE },	/* 133			 */
241   { SINGLECHAR, FALSE },	/* 134			 */
242   { SINGLECHAR, FALSE },	/* 135			 */
243   { SINGLECHAR, FALSE },	/* 136			 */
244   { SINGLECHAR, FALSE },	/* 137			 */
245   { SINGLECHAR, FALSE },	/* 138			 */
246   { SINGLECHAR, FALSE },	/* 139			 */
247   { SINGLECHAR, FALSE },	/* 140			 */
248   { SINGLECHAR, FALSE },	/* 141			 */
249   { SINGLECHAR, FALSE },	/* 142			 */
250   { SINGLECHAR, FALSE },	/* 143			 */
251   { SINGLECHAR, FALSE },	/* 144			 */
252   { SINGLECHAR, FALSE },	/* 145			 */
253   { SINGLECHAR, FALSE },	/* 146			 */
254   { SINGLECHAR, FALSE },	/* 147			 */
255   { SINGLECHAR, FALSE },	/* 148			 */
256   { SINGLECHAR, FALSE },	/* 149			 */
257   { SINGLECHAR, FALSE },	/* 150			 */
258   { SINGLECHAR, FALSE },	/* 151			 */
259   { SINGLECHAR, FALSE },	/* 152			 */
260   { SINGLECHAR, FALSE },	/* 153			 */
261   { SINGLECHAR, FALSE },	/* 154			 */
262   { SINGLECHAR, FALSE },	/* 155			 */
263   { SINGLECHAR, FALSE },	/* 156			 */
264   { SINGLECHAR, FALSE },	/* 157			 */
265   { SINGLECHAR, FALSE },	/* 158			 */
266   { SINGLECHAR, FALSE },	/* 159			 */
267   { SINGLECHAR, FALSE },	/* 160			 */
268   { SINGLECHAR, FALSE },	/* 161			 */
269   { SINGLECHAR, FALSE },	/* 162			 */
270   { SINGLECHAR, FALSE },	/* 163			 */
271   { SINGLECHAR, FALSE },	/* 164			 */
272   { SINGLECHAR, FALSE },	/* 165			 */
273   { SINGLECHAR, FALSE },	/* 166			 */
274   { SINGLECHAR, FALSE },	/* 167			 */
275   { SINGLECHAR, FALSE },	/* 168			 */
276   { SINGLECHAR, FALSE },	/* 169			 */
277   { SINGLECHAR, FALSE },	/* 170			 */
278   { SINGLECHAR, FALSE },	/* 171			 */
279   { SINGLECHAR, FALSE },	/* 172			 */
280   { SINGLECHAR, FALSE },	/* 173			 */
281   { SINGLECHAR, FALSE },	/* 174			 */
282   { SINGLECHAR, FALSE },	/* 175			 */
283   { SINGLECHAR, FALSE },	/* 176			 */
284   { SINGLECHAR, FALSE },	/* 177			 */
285   { SINGLECHAR, FALSE },	/* 178			 */
286   { SINGLECHAR, FALSE },	/* 179			 */
287   { SINGLECHAR, FALSE },	/* 180			 */
288   { SINGLECHAR, FALSE },	/* 181			 */
289   { SINGLECHAR, FALSE },	/* 182			 */
290   { SINGLECHAR, FALSE },	/* 183			 */
291   { SINGLECHAR, FALSE },	/* 184			 */
292   { SINGLECHAR, FALSE },	/* 185			 */
293   { SINGLECHAR, FALSE },	/* 186			 */
294   { SINGLECHAR, FALSE },	/* 187			 */
295   { SINGLECHAR, FALSE },	/* 188			 */
296   { SINGLECHAR, FALSE },	/* 189			 */
297   { SINGLECHAR, FALSE },	/* 190			 */
298   { SINGLECHAR, FALSE },	/* 191			 */
299   { SINGLECHAR, FALSE },	/* 192			 */
300   { SINGLECHAR, FALSE },	/* 193			 */
301   { SINGLECHAR, FALSE },	/* 194			 */
302   { SINGLECHAR, FALSE },	/* 195			 */
303   { SINGLECHAR, FALSE },	/* 196			 */
304   { SINGLECHAR, FALSE },	/* 197			 */
305   { SINGLECHAR, FALSE },	/* 198			 */
306   { SINGLECHAR, FALSE },	/* 199			 */
307   { SINGLECHAR, FALSE },	/* 200			 */
308   { SINGLECHAR, FALSE },	/* 201			 */
309   { SINGLECHAR, FALSE },	/* 202			 */
310   { SINGLECHAR, FALSE },	/* 203			 */
311   { SINGLECHAR, FALSE },	/* 204			 */
312   { SINGLECHAR, FALSE },	/* 205			 */
313   { SINGLECHAR, FALSE },	/* 206			 */
314   { SINGLECHAR, FALSE },	/* 207			 */
315   { SINGLECHAR, FALSE },	/* 208			 */
316   { SINGLECHAR, FALSE },	/* 209			 */
317   { SINGLECHAR, FALSE },	/* 210			 */
318   { SINGLECHAR, FALSE },	/* 211			 */
319   { SINGLECHAR, FALSE },	/* 212			 */
320   { SINGLECHAR, FALSE },	/* 213			 */
321   { SINGLECHAR, FALSE },	/* 214			 */
322   { SINGLECHAR, FALSE },	/* 215			 */
323   { SINGLECHAR, FALSE },	/* 216			 */
324   { SINGLECHAR, FALSE },	/* 217			 */
325   { SINGLECHAR, FALSE },	/* 218			 */
326   { SINGLECHAR, FALSE },	/* 219			 */
327   { SINGLECHAR, FALSE },	/* 220			 */
328   { SINGLECHAR, FALSE },	/* 221			 */
329   { SINGLECHAR, FALSE },	/* 222			 */
330   { SINGLECHAR, FALSE },	/* 223			 */
331   { SINGLECHAR, FALSE },	/* 224			 */
332   { SINGLECHAR, FALSE },	/* 225			 */
333   { SINGLECHAR, FALSE },	/* 226			 */
334   { SINGLECHAR, FALSE },	/* 227			 */
335   { SINGLECHAR, FALSE },	/* 228			 */
336   { SINGLECHAR, FALSE },	/* 229			 */
337   { SINGLECHAR, FALSE },	/* 230			 */
338   { SINGLECHAR, FALSE },	/* 231			 */
339   { SINGLECHAR, FALSE },	/* 232			 */
340   { SINGLECHAR, FALSE },	/* 233			 */
341   { SINGLECHAR, FALSE },	/* 234			 */
342   { SINGLECHAR, FALSE },	/* 235			 */
343   { SINGLECHAR, FALSE },	/* 236			 */
344   { SINGLECHAR, FALSE },	/* 237			 */
345   { SINGLECHAR, FALSE },	/* 238			 */
346   { SINGLECHAR, FALSE },	/* 239			 */
347   { SINGLECHAR, FALSE },	/* 240			 */
348   { SINGLECHAR, FALSE },	/* 241			 */
349   { SINGLECHAR, FALSE },	/* 242			 */
350   { SINGLECHAR, FALSE },	/* 243			 */
351   { SINGLECHAR, FALSE },	/* 244			 */
352   { SINGLECHAR, FALSE },	/* 245			 */
353   { SINGLECHAR, FALSE },	/* 246			 */
354   { SINGLECHAR, FALSE },	/* 247			 */
355   { SINGLECHAR, FALSE },	/* 248			 */
356   { SINGLECHAR, FALSE },	/* 249			 */
357   { SINGLECHAR, FALSE },	/* 250			 */
358   { SINGLECHAR, FALSE },	/* 251			 */
359   { SINGLECHAR, FALSE },	/* 252			 */
360   { SINGLECHAR, FALSE },	/* 253			 */
361   { SINGLECHAR, FALSE },	/* 254			 */
362   { SINGLECHAR, FALSE }		/* 255			 */
363 };
364 
365 /*
366 **++
367 **  FUNCTION NAME:
368 **
369 **      lscanline ()
370 **
371 **  FORMAL PARAMETERS:
372 **
373 **      None
374 **
375 **  RETURN VALUE:
376 **
377 **      None
378 **
379 **  INVARIANTS:
380 **
381 **      [@description or none@]
382 **
383 **  DESCRIPTION:
384 **
385 **	One line of text is processed.
386 **	Tokens are delivered via the call LSLScanFreshToken ().
387 **
388 **  EXCEPTIONS:
389 **
390 **--
391 */
392 
393 void
lscanLine(char * currentLine)394 lscanLine (char *currentLine)
395 {
396   ltokenCode cod;
397   lsymbol sym;
398   register char c;
399   register char *bufptr;
400   ltoken newToken;
401 
402   c = *currentLine++;
403   colNumber = 0;
404 
405   for (;;)
406     {
407       bufptr = &tokenBuffer[0];
408       startCol = colNumber;
409 
410       /*@-loopswitchbreak@*/
411       switch (lscanCharClass (c))
412 	{
413 
414 	case CHC_NULL:
415 	  sym = lsymbol_fromChars ("E O L");
416 	  cod = LST_EOL;
417 	  break;
418 
419 	 /* Identifiers */
420 
421 	case IDCHAR:
422 
423 	  while (lscanCharClass (c) == IDCHAR)
424 	    {
425 	      MOVECHAR ();
426 	    }
427 
428 	  *bufptr = '\0';
429 	  sym = lsymbol_fromChars (&tokenBuffer[0]);
430 	  cod = LST_SIMPLEID;
431 	  break;
432 
433 	 /* One-character tokens */
434 
435 	case SINGLECHAR:
436 	case PERMCHAR:
437 	  MOVECHAR ();
438 	  *bufptr = '\0';
439 	  sym = lsymbol_fromChars (&tokenBuffer[0]);
440 	  cod = LST_SIMPLEOP;
441 	  break;
442 
443 	case SLASHCHAR:
444 	  if (*currentLine == '\\')
445 	    {
446 	      MOVECHAR ();
447 	      MOVECHAR ();
448 	      *bufptr = '\0';
449 	      sym = lsymbol_fromChars (&tokenBuffer[0]);
450 	      cod = LST_SIMPLEOP;
451 	      break;
452 	    }
453 	  MOVECHAR ();
454 	  /* We fall through to next case if we have / followed  */
455 	  /* by anything else.					 */
456 	  /*@fallthrough@*/
457 	case OPCHAR:
458 
459 	 /* Operator symbols */
460 
461 	 /* possible multi character */
462 	  while (lscanCharClass (c) == OPCHAR)
463 	    {
464 	      MOVECHAR ();
465 	    }
466 
467 	  *bufptr = '\0';	/* null terminate in buffer */
468 	  sym = lsymbol_fromChars (&tokenBuffer[0]);
469 	  cod = LST_SIMPLEOP;
470 	  break;
471 
472 	 /* White space */
473 	case WHITECHAR:
474 	  /*@-switchswitchbreak@*/
475 	  switch (c)
476 	    {
477 	    case '\t':
478 	      MOVECHAR ();
479 	      colNumber--;
480 	      colNumber += TABSIZE;
481 	      colNumber -= (colNumber % TABSIZE);
482 	      break;
483 
484 	    case '\v':
485 	    case '\f':
486 	      MOVECHAR ();
487 	      colNumber--;
488 	      break;
489 
490 	    default:
491 	      MOVECHAR ();
492 	      break;
493 	    }
494 	  *bufptr = '\0';
495 	  sym = lsymbol_fromChars (&tokenBuffer[0]);
496 	  cod = LST_WHITESPACE;
497 	  break;
498 
499 	case CHC_EXTENSION:
500 	  MOVECHAR ();
501 
502 	  switch (c)
503 	    {
504 
505 	     /* open and close */
506 	    case '(':
507 	      MOVECHAR ();
508 	      while (lscanCharClass (c) == IDCHAR)
509 		{
510 		  MOVECHAR ();
511 		}
512 	      *bufptr = '\0';
513 	      sym = lsymbol_fromChars (&tokenBuffer[0]);
514 	      cod = LST_OPENSYM;
515 	      break;
516 
517 	    case ')':
518 	      MOVECHAR ();
519 	      while (lscanCharClass (c) == IDCHAR)
520 		{
521 		  MOVECHAR ();
522 		}
523 	      *bufptr = '\0';
524 	      sym = lsymbol_fromChars (&tokenBuffer[0]);
525 	      cod = LST_CLOSESYM;
526 	      break;
527 
528 	     /* separator */
529 	    case ',':
530 	      MOVECHAR ();
531 	      while (lscanCharClass (c) == IDCHAR)
532 		{
533 		  MOVECHAR ();
534 		}
535 	      *bufptr = '\0';
536 	      sym = lsymbol_fromChars (&tokenBuffer[0]);
537 	      cod = LST_SEPSYM;
538 	      break;
539 
540 	      /* simpleid */
541 	    case ':':
542 	      MOVECHAR ();
543 	      while (lscanCharClass (c) == IDCHAR)
544 		{
545 		  MOVECHAR ();
546 		}
547 	      *bufptr = '\0';
548 	      sym = lsymbol_fromChars (&tokenBuffer[0]);
549 	      cod = LST_SIMPLEID;
550 	      break;
551 
552 	    default:
553 	      if (lscanCharClass (c) == IDCHAR)
554 		{
555 		  do
556 		    {
557 		      MOVECHAR ();
558 		    }
559 		  while (lscanCharClass (c) == IDCHAR);
560 		  *bufptr = '\0';
561 		  sym = lsymbol_fromChars (&tokenBuffer[0]);
562 		  cod = LST_SIMPLEOP;
563 		}
564 	      else
565 		{
566 		  /*
567                   ** Meets none of the above.  Take the extension
568 		  ** character and the character following and treat
569 		  ** together as a SINGLECHAR.  SINGLECHARs tranlate into
570 		  ** SIMPLEOPs.
571 		  */
572 
573 		  MOVECHAR ();
574 		  *bufptr = '\0';
575 		  sym = lsymbol_fromChars (&tokenBuffer[0]);
576 		  cod = LST_SIMPLEOP;
577 		}
578 	      break;
579 	    }
580 	  /*@switchbreak@*/ break;
581 	  /*@=switchswitchbreak@*/
582 	default:
583 
584 	  LocalUserError ("unexpected character in input");
585 	  return;
586 	}
587       /*@=loopswitchbreak@*/
588 
589       /*
590       ** Above code only "guessed" at token type.  Insert it into the
591       ** TokenTable.  If the token already exists, it is returned as
592       ** previously defined.  If it does not exist, it is inserted as the
593       ** token code computed above.
594       */
595 
596       newToken = LSLInsertToken (cod, sym, 0, FALSE);
597 
598       if (LSLIsSyn (ltoken_getText (newToken)))
599 	{
600 	  /*
601 	  ** Token is a synonym.  Get the actual token and set the raw
602 	  ** text to the synonym name.
603 	  */
604 
605 	  newToken = LSLGetTokenForSyn (ltoken_getText (newToken));
606 	  ltoken_setRawText (newToken, sym);
607 	}
608 
609       ltoken_setCol (newToken, startCol);
610       ltoken_setLine (newToken, inputStream_thisLineNumber (LSLScanSource ()));
611       ltoken_setFileName (newToken, inputStream_fileName (LSLScanSource ()));
612 
613       if (ltoken_getCode (newToken) == LST_COMMENTSYM)
614 	{
615 	  bufptr = &tokenBuffer[0];
616 
617 	  while (!LSLIsEndComment (c))
618 	    {
619 	      MOVECHAR ();
620 	    }
621 	  if (lscanCharClass (c) != CHC_NULL)
622 	    {
623 	      MOVECHAR ();
624 	    }
625 	  if (reportComments)
626 	    {
627 	      *bufptr = '\0';
628 	      ltoken_setRawText (newToken, lsymbol_fromChars (&tokenBuffer[0]));
629 	      LSLScanFreshToken (newToken);
630 	    }
631 	}
632       else if (ltoken_getCode (newToken) == LST_EOL)
633 	{
634 	  if (reportEOL)
635 	    {
636 	      LSLScanFreshToken (newToken);
637 	    }
638 	  return;
639 	}
640       else
641 	{
642 	  if (cod != LST_WHITESPACE)
643 	    {
644 	      LSLScanFreshToken (newToken);
645 	    }
646 	}
647     }
648 }
649 
650 ltoken
LSLScanEofToken(void)651 LSLScanEofToken (void)
652 {
653   ltoken t = ltoken_copy (LSLInsertToken (LEOFTOKEN,
654 					  lsymbol_fromChars ("E O F"),
655 					  0, TRUE));
656   ltoken_setCol (t, colNumber);
657   ltoken_setLine (t, inputStream_thisLineNumber (LSLScanSource ()));
658   ltoken_setFileName (t, inputStream_fileName (LSLScanSource ()));
659   return t;
660 }
661 
662 void
LSLReportEolTokens(bool setting)663 LSLReportEolTokens (bool setting)
664 {
665   reportEOL = setting;
666 }
667 
668 static void
LocalUserError(char * msg)669   LocalUserError (char *msg)
670 {
671   inputStream s = LSLScanSource ();
672   llfatalerror (message ("%s:%d,%d: %s",
673 			 inputStream_fileName (s),
674 			 inputStream_thisLineNumber (s), colNumber,
675 			 cstring_fromChars (msg)));
676 }
677 
678 /*
679 **++
680 **  FUNCTION NAME:
681 **
682 **      lscanLineInit ()
683 **
684 **  FORMAL PARAMETERS:
685 **
686 **      None
687 **
688 **  RETURN VALUE:
689 **
690 **      None
691 **
692 **  INVARIANTS:
693 **
694 **      [@description or none@]
695 **
696 **  DESCRIPTION:
697 **
698 **      Initialize this module (should only be called once).
699 **
700 **  IMPLICIT INPUTS/OUTPUT:
701 **
702 **      GetNextLine - (output) initialized
703 **	NullToken   - (output) initialized
704 **      PrintName   - (output) array contents initialized
705 **
706 **  EXCEPTIONS:
707 **
708 **      None
709 **--
710 */
711 
712 void
lscanLineInit(void)713 lscanLineInit (void)
714 {
715   int i;
716 
717   reportEOL = FALSE;
718   reportComments = FALSE;
719 
720   for (i = 0; i <= LASTCHAR; i++)
721     {
722       charClass[i] = charClassDef[i];
723     }
724 
725  /*
726  ** NOTE: The following line ensures that all tokens have nonzero
727  ** handles, so that a handle of zero can be used to indicate that a
728  ** token does not have a synonym.
729  */
730 
731   (void) LSLReserveToken (LST_SIMPLEID, "dummy token");
732 
733   ltoken_forall = LSLReserveToken (LST_QUANTIFIERSYM, "\\forall");
734   ltoken_true = LSLReserveToken (LST_SIMPLEID, "true");
735   ltoken_false = LSLReserveToken (LST_SIMPLEID, "false");
736   ltoken_not = LSLReserveToken (LST_SIMPLEOP, "\\not");
737   ltoken_and = LSLReserveToken (LST_LOGICALOP, "\\and");
738   ltoken_or = LSLReserveToken (LST_LOGICALOP, "\\or");
739   ltoken_implies = LSLReserveToken (LST_LOGICALOP, "\\implies");
740 
741   ltoken_eq = LSLReserveToken (LST_EQOP, "\\eq");
742   ltoken_neq = LSLReserveToken (LST_EQOP, "\\neq");
743 
744   ltoken_equals = LSLReserveToken (LST_EQUATIONSYM, "\\equals");
745   ltoken_eqsep = LSLReserveToken (LST_EQSEPSYM, "\\eqsep");
746   ltoken_select = LSLReserveToken (LST_SELECTSYM, "\\select");
747   ltoken_open = LSLReserveToken (LST_OPENSYM, "\\open");
748   ltoken_sep = LSLReserveToken (LST_SEPSYM, "\\,");
749   ltoken_close = LSLReserveToken (LST_CLOSESYM, "\\close");
750   ltoken_id = LSLReserveToken (LST_SIMPLEID, "\\:");
751   ltoken_arrow = LSLReserveToken (LST_MAPSYM, "\\arrow");
752   ltoken_farrow = LSLReserveToken (LST_FIELDMAPSYM, "\\field_arrow");
753 
754   ltoken_marker = LSLReserveToken (LST_MARKERSYM, "\\marker");
755   ltoken_comment = LSLReserveToken (LST_COMMENTSYM, "\\comment");
756   ltoken_compose = LSLReserveToken (LST_COMPOSESYM, "\\composeSort");
757   ltoken_if = LSLReserveToken (LST_ifTOKEN, "if");
758 
759   (void) LSLReserveToken (LST_LPAR, " (");
760   (void) LSLReserveToken (LST_RPAR, ")");
761   (void) LSLReserveToken (LST_COMMA, ",");
762   (void) LSLReserveToken (LST_COLON, ":");
763 
764   (void) LSLReserveToken (LST_LBRACKET, "[");
765   (void) LSLReserveToken (LST_RBRACKET, "]");
766 
767   (void) LSLReserveToken (LST_WHITESPACE, " ");
768   (void) LSLReserveToken (LST_WHITESPACE, "\t");
769   (void) LSLReserveToken (LST_WHITESPACE, "\n");
770 
771   (void) LSLReserveToken (LEOFTOKEN, "E O F");
772   (void) LSLReserveToken (LST_EOL, "E O L");
773 
774   (void) LSLReserveToken (LST_assertsTOKEN, "asserts");
775   (void) LSLReserveToken (LST_assumesTOKEN, "assumes");
776   (void) LSLReserveToken (LST_byTOKEN, "by");
777   (void) LSLReserveToken (LST_convertsTOKEN, "converts");
778   (void) LSLReserveToken (LST_elseTOKEN, "else");
779   (void) LSLReserveToken (LST_enumerationTOKEN, "enumeration");
780   (void) LSLReserveToken (LST_equationsTOKEN, "equations");
781   (void) LSLReserveToken (LST_exemptingTOKEN, "exempting");
782   (void) LSLReserveToken (LST_forTOKEN, "for");
783   (void) LSLReserveToken (LST_generatedTOKEN, "generated");
784   (void) LSLReserveToken (LST_impliesTOKEN, "implies");
785   (void) LSLReserveToken (LST_includesTOKEN, "includes");
786   (void) LSLReserveToken (LST_introducesTOKEN, "introduces");
787   (void) LSLReserveToken (LST_ofTOKEN, "of");
788   (void) LSLReserveToken (LST_partitionedTOKEN, "partitioned");
789   (void) LSLReserveToken (LST_thenTOKEN, "then");
790   (void) LSLReserveToken (LST_traitTOKEN, "trait");
791   (void) LSLReserveToken (LST_tupleTOKEN, "tuple");
792   (void) LSLReserveToken (LST_unionTOKEN, "union");
793 }
794 
795 void
lscanLineReset(void)796 lscanLineReset (void)
797 {
798 }
799 
800 void
lscanLineCleanup(void)801 lscanLineCleanup (void)
802 {
803 }
804 
lscanCharClass(char c)805 charCode lscanCharClass (char c)
806 {
807   return charClass[ (int) (c)].code;
808 }
809 
LSLIsEndComment(char c)810 bool LSLIsEndComment (char c)
811 {
812   return charClass[ (int) (c)].endCommentChar;
813 }
814 
lsetCharClass(char c,charCode cod)815 void lsetCharClass (char c, charCode cod)
816 {
817   charClass[ (int) (c)].code = cod;
818 }
819 
lsetEndCommentChar(char c,bool flag)820 void lsetEndCommentChar (char c, bool flag)
821 {
822   charClass[ (int) (c)].endCommentChar = flag;
823 }
824