1 /*@z02.c:Lexical Analyser:Declarations@***************************************/
2 /*                                                                           */
3 /*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.39)                       */
4 /*  COPYRIGHT (C) 1991, 2008 Jeffrey H. Kingston                             */
5 /*                                                                           */
6 /*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
7 /*  School of Information Technologies                                       */
8 /*  The University of Sydney 2006                                            */
9 /*  AUSTRALIA                                                                */
10 /*                                                                           */
11 /*  This program is free software; you can redistribute it and/or modify     */
12 /*  it under the terms of the GNU General Public License as published by     */
13 /*  the Free Software Foundation; either Version 3, or (at your option)      */
14 /*  any later version.                                                       */
15 /*                                                                           */
16 /*  This program is distributed in the hope that it will be useful,          */
17 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
18 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
19 /*  GNU General Public License for more details.                             */
20 /*                                                                           */
21 /*  You should have received a copy of the GNU General Public License        */
22 /*  along with this program; if not, write to the Free Software              */
23 /*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
24 /*                                                                           */
25 /*  FILE:         z02.c                                                      */
26 /*  MODULE:       Lexical Analyser                                           */
27 /*  EXTERNS:      LexLegalName(), LexInit(), LexPush(), LexPop(),            */
28 /*                LexNextTokenPos(), LexGetToken()                           */
29 /*                                                                           */
30 /*  Implementation note:  this fast and cryptic lexical analyser is adapted  */
31 /*  from Waite, W. M.: The Cost of Lexical Analysis, in Software - Practice  */
32 /*  and Experience, v16, pp473-488 (May 1986).                               */
33 /*                                                                           */
34 /*  Converted 8 November 2002 to handle DOS etc. line endings.  The end of   */
35 /*  a line is now taken to be the second end-of-line character when there    */
36 /*  are two of them.  With this rule it's quite easy to do the conversion.   */
37 /*                                                                           */
38 /*****************************************************************************/
39 #include "externs.h"
40 #define	BUFFER_SIZE    8192		/* size of buffer for block read     */
41 #define	OTHER		0		/* punctuation or other character    */
42 #define	LETTER		1		/* letter type                       */
43 #define	QUOTE		2		/* quoted string delimiter type      */
44 #define	ESCAPE		3		/* escape character inside strings   */
45 #define	COMMENT		4		/* comment delimiter type            */
46 #define	CSPACE		5		/* space character type              */
47 #define	FORMFEED	6		/* formfeed character type           */
48 #define	TAB		7		/* tab character type                */
49 #define	NEWLINE		8		/* newline character type            */
50 #define	ENDFILE		9		/* end of file character type        */
51 
52 static	unsigned char	chtbl[256];	/* type table indexed by a FULL_CHAR */
53 static	FULL_CHAR	*chpt;		/* pointer to current text character */
54 static	FULL_CHAR	*frst;		/* address of first buffer character */
55 static	FULL_CHAR	*limit;		/* just past last char in buffer     */
56 static	FULL_CHAR	*buf;		/* the character buffer start pos    */
57 static	int		blksize;	/* size of block read; others too    */
58 static	FULL_CHAR	last_char;	/* last char read in from file       */
59 static	FULL_CHAR	*startline;	/* position in buff of last newline  */
60 static	FILE_NUM	this_file;	/* number of currently open file     */
61 static	FILE		*fp;		/* current input file                */
62 static	FILE_POS	file_pos;	/* current file position             */
63 static	short		ftype;		/* the type of the current file      */
64 static	OBJECT		next_token;	/* next token if already read	     */
65 static	int		offset;		/* where to start reading in file    */
66 static	int		first_line_num;	/* number of first line (if offset)  */
67 static	BOOLEAN		same_file;	/* TRUE if same file as preceding    */
68 static	FULL_CHAR	*mem_block;	/* file buffer                       */
69 
70 static int stack_free;		/* first free slot in lexical stack  */
71 static struct {
72   FULL_CHAR	*chpt;		/* pointer to current text character */
73   FULL_CHAR	*frst;		/* address of first buffer character */
74   FULL_CHAR	*limit;		/* just past last char in buffer     */
75   FULL_CHAR	*buf;		/* the character buffer start pos    */
76   int		blksize;	/* size of block read; others too    */
77   FULL_CHAR	last_char;	/* last char read in from file	     */
78   FULL_CHAR	*startline;	/* position in buff of last newline  */
79   FILE_NUM	this_file;	/* number of currently open file     */
80   FILE		*fp;		/* current input file                */
81   FILE_POS	file_pos;	/* current file position             */
82   short		ftype;		/* the type of the current file      */
83   OBJECT	next_token;	/* next token if already read	     */
84   int		offset;		/* where to start reading in file    */
85   int		first_line_num;	/* number of first line (if offset)  */
86   BOOLEAN	same_file;	/* TRUE if same file as preceding    */
87   long		save_ftell;	/* ftell() position if same_file     */
88   FULL_CHAR	*mem_block;	/* file buffer                       */
89 } lex_stack[MAX_LEX_STACK];
90 
91 /*@::LexLegalName(), LexInit()@***********************************************/
92 /*                                                                           */
93 /*  BOOLEAN LexLegalName(str)                                                */
94 /*                                                                           */
95 /*  Check whether str is a valid name for a symbol table entry.              */
96 /*  Valid names have the BNF form                                            */
97 /*                                                                           */
98 /*       <name> ::= <letter>  { <letter> }                                   */
99 /*       <name> ::= <special> { <special> }                                  */
100 /*       <name> ::= <escape>  { <letter> }                                   */
101 /*                                                                           */
102 /*  The third form is inaccessible to users and is for internal use only.    */
103 /*                                                                           */
104 /*****************************************************************************/
105 
LexLegalName(FULL_CHAR * str)106 BOOLEAN LexLegalName(FULL_CHAR *str)
107 { int i;  BOOLEAN res;
108   debug1(DLA, DDD, "LexLegalName( %s )", str);
109   switch( chtbl[str[0]] )
110   {
111     case ESCAPE:
112     case LETTER:
113 
114       for( i = 1;  chtbl[str[i]] == LETTER;  i++ );
115       res = str[i] == '\0';
116       break;
117 
118 
119     case OTHER:
120 
121       for( i = 1;  chtbl[str[i]] == OTHER;  i++ );
122       res = str[i] == '\0';
123       break;
124 
125 
126     default:
127 
128       res = FALSE;
129       break;
130 
131   }
132   debug1(DLA, DDD, "LexLegalName returning %s", bool(res));
133   return res;
134 } /* end LexLegalName */
135 
136 
137 /*****************************************************************************/
138 /*                                                                           */
139 /*  LexInit()                                                                */
140 /*                                                                           */
141 /*  Initialise character types.  Those not touched are 0 (OTHER).            */
142 /*  The function initchtbl() assists in initializing the chtbl.              */
143 /*                                                                           */
144 /*****************************************************************************/
145 
initchtbl(val,str)146 static void initchtbl(val, str)
147 int val;  FULL_CHAR *str;
148 { int i;
149   for( i = 0;  str[i] != '\0';  i++ )
150     chtbl[ str[i] ] = val;
151 } /* end initchtbl */
152 
LexInit(void)153 void LexInit(void)
154 { initchtbl(LETTER,  STR_LETTERS_LOWER);
155   initchtbl(LETTER,  STR_LETTERS_UPPER);
156   initchtbl(LETTER,  STR_LETTERS_SYMSTART);
157   initchtbl(LETTER,  STR_LETTERS_UNDERSCORE);
158   initchtbl(LETTER,  STR_LETTERS_EXTRA0);
159   initchtbl(LETTER,  STR_LETTERS_EXTRA1);
160   initchtbl(LETTER,  STR_LETTERS_EXTRA2);
161   initchtbl(LETTER,  STR_LETTERS_EXTRA3);
162   initchtbl(LETTER,  STR_LETTERS_EXTRA4);
163   initchtbl(LETTER,  STR_LETTERS_EXTRA5);
164   initchtbl(LETTER,  STR_LETTERS_EXTRA6);
165   initchtbl(LETTER,  STR_LETTERS_EXTRA7);
166   initchtbl(QUOTE,   STR_QUOTE);
167   initchtbl(ESCAPE,  STR_ESCAPE);
168   initchtbl(COMMENT, STR_COMMENT);
169   initchtbl(CSPACE,  STR_SPACE);
170   initchtbl(FORMFEED,STR_FORMFEED);
171   initchtbl(TAB,     STR_TAB);
172   chtbl[CH_LF] = NEWLINE;
173   chtbl[CH_CR] = NEWLINE;
174   chtbl['\0'] = ENDFILE;
175   stack_free = -1;
176 } /* end LexInit */
177 
178 /*@::LexPush(), LexPop()@*****************************************************/
179 /*                                                                           */
180 /*  LexPush(x, offs, ftype, lnum, same)                                      */
181 /*                                                                           */
182 /*  Start reading from the file sequence whose first file is x (subsequent   */
183 /*  files are obtained from NextFile).  The first file (x) is to be fseeked  */
184 /*  to offs.  When the sequence is done, ftype determines how to continue:   */
185 /*                                                                           */
186 /*      ftype          action                                                */
187 /*                                                                           */
188 /*      SOURCE_FILE    last input file ends, return @End \Input              */
189 /*      DATABASE_FILE  database file, return @End \Input                     */
190 /*      INCLUDE_FILE   include file, must pop lexical analyser and continue  */
191 /*      FILTER_FILE    filter file, return @End @FilterOut                   */
192 /*                                                                           */
193 /*  lnum is the line number at offs, to be used when creating file pos's     */
194 /*  in the tokens returned.  same is TRUE when this file is the same as      */
195 /*  the file currently being read, in which case there is no need to         */
196 /*  close that file and open this one; just an fseek is required.            */
197 /*                                                                           */
198 /*****************************************************************************/
199 
LexPush(FILE_NUM x,int offs,int ftyp,int lnum,BOOLEAN same)200 void LexPush(FILE_NUM x, int offs, int ftyp, int lnum, BOOLEAN same)
201 { int i;
202   debug5(DLA, DD, "LexPush(%s, %d, %s, %d, %s)", FileName(x), offs,
203     ftyp==SOURCE_FILE ? "source" : ftyp==INCLUDE_FILE ? "include":"database",
204     lnum, bool(same));
205   if( stack_free >= MAX_LEX_STACK - 1 )
206   { if( ftyp == INCLUDE_FILE )
207       Error(2, 1, "too many open files when opening include file %s; open files are:",
208         WARN, PosOfFile(x), FullFileName(x));
209     else
210       Error(2, 2, "too many open files when opening database file %s; open files are:",
211         WARN, PosOfFile(x), FileName(x));
212     for( i = stack_free - 1;  i >= 0;  i-- )
213     {
214       Error(2, 23, "  %s", WARN, no_fpos,
215 	EchoFileSource(lex_stack[i].this_file));
216     }
217     Error(2, 24, "exiting now", FATAL, no_fpos);
218   }
219   if( stack_free >= 0 )  /* save current state */
220   { lex_stack[stack_free].chpt		 = chpt;
221     lex_stack[stack_free].frst		 = frst;
222     lex_stack[stack_free].limit		 = limit;
223     lex_stack[stack_free].buf		 = buf;
224     lex_stack[stack_free].blksize	 = blksize;
225     lex_stack[stack_free].last_char	 = last_char;
226     lex_stack[stack_free].startline	 = startline;
227     lex_stack[stack_free].this_file	 = this_file;
228     lex_stack[stack_free].fp		 = fp;
229     lex_stack[stack_free].ftype		 = ftype;
230     lex_stack[stack_free].next_token	 = next_token;
231     lex_stack[stack_free].offset	 = offset;
232     lex_stack[stack_free].first_line_num = first_line_num;
233     lex_stack[stack_free].same_file	 = same_file;
234     lex_stack[stack_free].mem_block	 = mem_block;
235     FposCopy( lex_stack[stack_free].file_pos, file_pos );
236   }
237   stack_free += 1;
238   ifdebug(DMA, D,
239     DebugRegisterUsage(MEM_LEX,1, (MAX_LINE+BUFFER_SIZE+3)*sizeof(FULL_CHAR)));
240   mem_block = (FULL_CHAR *) malloc((MAX_LINE+BUFFER_SIZE+3)*sizeof(FULL_CHAR));
241   if( mem_block == NULL )
242     Error(2, 3, "run out of memory when opening file %s",
243       FATAL, PosOfFile(x), FullFileName(x));
244   buf = chpt = &mem_block[MAX_LINE];
245   last_char = CH_CR;
246   this_file = x;  offset = offs;
247   first_line_num = lnum;  same_file = same;
248   ftype = ftyp;  next_token = nilobj;
249   *chpt = '\0';
250   if( same_file )
251   { lex_stack[stack_free-1].save_ftell = ftell(fp);
252   }
253   else
254   { fp = null;
255   }
256 } /* end LexPush */
257 
258 
259 /*****************************************************************************/
260 /*                                                                           */
261 /*  LexPop() - pop lexical analyser.                                         */
262 /*                                                                           */
263 /*****************************************************************************/
264 
LexPop(void)265 void LexPop(void)
266 { debug0(DLA, DD, "LexPop()");
267   assert( stack_free > 0, "LexPop: stack_free <= 0!" );
268   stack_free--;
269   if( same_file )
270   { fseek(fp, lex_stack[stack_free].save_ftell, SEEK_SET);
271   }
272   else
273   { if( fp != null )  fclose(fp);
274   }
275   ifdebug(DMA, D,
276    DebugRegisterUsage(MEM_LEX,-1,-(MAX_LINE+BUFFER_SIZE+2)* (int) sizeof(FULL_CHAR))
277   );
278   free( (char *) mem_block);
279   mem_block	 = lex_stack[stack_free].mem_block;
280   chpt		 = lex_stack[stack_free].chpt;
281   frst		 = lex_stack[stack_free].frst;
282   limit		 = lex_stack[stack_free].limit;
283   buf		 = lex_stack[stack_free].buf;
284   blksize	 = lex_stack[stack_free].blksize;
285   last_char	 = lex_stack[stack_free].last_char;
286   startline	 = lex_stack[stack_free].startline;
287   this_file	 = lex_stack[stack_free].this_file;
288   fp		 = lex_stack[stack_free].fp;
289   ftype		 = lex_stack[stack_free].ftype;
290   next_token	 = lex_stack[stack_free].next_token;
291   offset	 = lex_stack[stack_free].offset;
292   first_line_num = lex_stack[stack_free].first_line_num;
293   same_file	 = lex_stack[stack_free].same_file;
294   FposCopy( file_pos, lex_stack[stack_free].file_pos );
295 } /* end LexPop */
296 
297 
298 /*@::setword(), LexNextTokenPos(), srcnext()@*********************************/
299 /*                                                                           */
300 /*  setword(typ, res, file_pos, str, len)                                    */
301 /*                                                                           */
302 /*  Set variable res to a WORD or QWORD token containing string str, etc.    */
303 /*                                                                           */
304 /*****************************************************************************/
305 
306 #define setword(typ, res, file_pos, str, len)				\
307 { NewWord(res, typ, len, &file_pos);					\
308   FposCopy(fpos(res), file_pos);					\
309   for( ch = 0;  ch < len;  ch++ ) string(res)[ch] = str[ch];		\
310   string(res)[ch] = '\0';						\
311 }
312 
313 
314 /*****************************************************************************/
315 /*                                                                           */
316 /*  long LexNextTokenPos()                                                   */
317 /*                                                                           */
318 /*  Equivalent to ftell() on the (buffered) current lex file.                */
319 /*                                                                           */
320 /*****************************************************************************/
321 
LexNextTokenPos(void)322 long LexNextTokenPos(void)
323 { long res;
324   if( next_token != nilobj )
325     Error(2, 4, "illegal macro invocation in database",
326       FATAL, &fpos(next_token));
327   res = ftell(fp) - (limit - chpt) - (buf - frst);
328 #if DB_FIX
329   /* uwe: 1997-11-04
330    *
331    * On NT under Visual C++ ftell() and fseek() always use binary
332    * positions, even if the file was opened in text mode.  This means
333    * that every LF in between the CHPT and LIMIT was counted by
334    * ftell() as *TWO* bytes.  The pointer arithmetic above adjusts the
335    * ftold value as lout has not yet read chars past CHPT, but it
336    * counts each LF as *ONE* byte, naturally.
337    *
338    * The code below compensates for this binary/text brain death.
339    *
340    * PS: gcc from Cygnus' gnuwin32 has sane ftell() and does *NOT*
341    * need this workaround (I haven't tried compiling lout with gcc
342    * though, as the result will need cygwin.dll to run).
343    */
344 
345 /* *** retired by JeffK 2002-11-08 since now reading in binary mode
346   {
347     register FULL_CHAR *p;
348     for (p = chpt; p < limit; ++p) {
349       if (*p == (FULL_CHAR) CH_LF)
350         --res;
351     }
352   }
353 *** */
354 #endif /* DB_FIX */
355 
356   debug1(DLA, DD, "LexNextTokenPos() returning %ld", res);
357   return res;
358 }
359 
360 
361 /*****************************************************************************/
362 /*                                                                           */
363 /*  static srcnext()                                                         */
364 /*                                                                           */
365 /*  Move to new line of input file.  May need to recharge buffer.            */
366 /*                                                                           */
367 /*  Patched JeffK 16/10/06 to fix bug when CRLF falls on block boundary.     */
368 /*                                                                           */
369 /*****************************************************************************/
370 
srcnext(void)371 static void srcnext(void)
372 { register FULL_CHAR *col;
373   debugcond4(DLA, DD, stack_free <= 1,
374     "srcnext();  buf: %d, chpt: %d, frst: %d, limit: %d",
375     buf - mem_block, chpt - mem_block, frst - mem_block, limit - mem_block);
376 
377   /* if time to transfer last line to area preceding buffer, do so */
378   if( blksize != 0 && chpt < limit )
379   { debugcond0(DLA, DD, stack_free <= 1, "srcnext: transferring.");
380     col = buf;
381     while( chtbl[(*--col = *--limit)] != NEWLINE );
382     frst = col + 1;  limit++;  blksize = 0;
383   }
384 
385   /* if buffer is empty, read next block */
386   /*** changed by JK 9/92 from "if( chpt == limit )" to fix long lines bug */
387   if( chpt >= limit )
388   { if( chpt > limit+1 || (chpt == limit + 1 && chtbl[*limit] != NEWLINE) )
389     { col_num(file_pos) = 1;
390       Error(2, 5, "line is too long (or final newline missing)",
391 	FATAL, &file_pos);
392     }
393     chpt = frst;
394     blksize = fread( (char *) buf, sizeof(char), BUFFER_SIZE, fp);
395     if( blksize > 0 )
396       last_char = *(buf + blksize - 1);
397     if( blksize < BUFFER_SIZE && chtbl[last_char] != NEWLINE )
398     {
399       /* at end of file since blksize < BUFFER_SIZE; add missing newline char */
400       blksize++;
401       last_char = *(buf+blksize-1) = CH_LF;
402 
403       /* this adjustment breaks LexNextTokenPos, so fatal error if database */
404       if( ftype == DATABASE_FILE )
405       {
406 	line_num(file_pos) = col_num(file_pos) = 0;
407 	Error(2, 25, "a database file must end with a newline; this one doesn't",
408 	  FATAL, &file_pos);
409       }
410     }
411     debugcond4(DLA, DD, stack_free <= 1,
412       "srcnext: %d = fread(0x%x, %d, %d, fp)",
413       blksize, buf, sizeof(char), BUFFER_SIZE);
414     frst = buf;  limit = buf + blksize;  *limit = CH_LF; *(limit + 1) = CH_CR;
415   }
416 
417   /* if nothing more to read, make this clear */
418   if( chpt >= limit )
419   { debugcond0(DLA, DD, stack_free <= 1, "srcnext: nothing more to read");
420     chpt = limit = buf;  *limit = '\0';
421   }
422   debugcond4(DLA, DD, stack_free <= 1,
423     "srcnext returning;  buf: %d, chpt: %d, frst: %d, limit: %d",
424     buf - mem_block, chpt - mem_block, frst - mem_block, limit - mem_block);
425 } /* end srcnext */
426 
427 
428 /*@::LexGetToken()@***********************************************************/
429 /*                                                                           */
430 /*  OBJECT LexGetToken()                                                     */
431 /*                                                                           */
432 /*  Get next token from input.  Look it up in symbol table.                  */
433 /*                                                                           */
434 /*****************************************************************************/
435 
LexGetToken(void)436 OBJECT LexGetToken(void)
437 {
438 	   FULL_CHAR *startpos;		/* where the latest token started    */
439   register FULL_CHAR *p, *q;		/* pointer to current input char     */
440   register int      ch;			/* temporary character (really char) */
441   OBJECT   res;				/* result token                      */
442   int vcount, hcount;			/* no. of newlines and spaces seen   */
443 
444   if( next_token != nilobj )
445   { next_token = Delete(res = next_token, PARENT);
446     debugcond4(DLA, DD, stack_free <= 1,
447       "LexGetToken%s (in macro) returning %d.%d %s",
448       EchoFilePos(&file_pos), vspace(res), hspace(res), EchoToken(res));
449     return res;
450   }
451 
452   res = nilobj;  p = chpt;
453   vcount = hcount = 0;
454   do switch( chtbl[*p++] )
455   {
456       case ESCAPE:
457 
458 	if( ftype==DATABASE_FILE && *p>='a' && *p<='z' && *(p+1) == '{' )
459 	{ res = NewToken(LBR, &file_pos, 0, 0, (unsigned) *p, StartSym);
460 	  p += 2;
461 	}
462 	else
463 	{
464 	  col_num(file_pos) = (startpos = p-1) - startline;
465 	  Error(2, 6, "character %c outside quoted string",
466 	    WARN, &file_pos, *startpos);
467 	}
468 	break;
469 
470 
471       case COMMENT:
472 
473 	debug1(DLA, DDD, "LexGetToken%s: comment", EchoFilePos(&file_pos));
474 	while( chtbl[(ch = *p++)] != NEWLINE && ch != '\0' );
475 	if( chtbl[ch] == NEWLINE )
476 	{
477 	  /* skip over second newline character if any */
478 	  if( ch == CH_LF )
479 	  {
480 	    if( *p == CH_CR )
481 	      p++;
482 	  }
483 	  else /* ch == CH_CR */
484 	  {
485 	    if( *p == CH_LF )
486 	      p++;
487 	  }
488 
489 	  /* do NEWLINE action, only preserve existing horizontal space */
490 	  /* and don't count the newline in the vcount. */
491 	  chpt = p;  srcnext();
492 	  line_num(file_pos)++;
493 	  col_num(file_pos) = 0;
494 	  startline = (p = chpt) - 1;
495 	}
496 	else
497 	{
498 	  --p;
499 	}
500 	break;
501 
502 
503       case CSPACE:
504       case FORMFEED:
505 
506 	hcount++;
507 	break;
508 
509 
510       case TAB:
511 
512 	hcount += 8;
513 	break;
514 
515 
516       case NEWLINE:
517 
518 	/* skip over second newline character if any */
519 	if( *(p-1) == CH_LF )
520 	{
521 	  if( *p == CH_CR )
522 	    p++;
523 	}
524 	else /* *(p-1) == CH_CR */
525 	{
526 	  if( *p == CH_LF )
527 	    p++;
528 	}
529 
530 	/* do newline action */
531 	chpt = p;  srcnext();
532 	line_num(file_pos)++;
533 	col_num(file_pos) = 0;
534 	vcount++;  hcount = 0;
535 	startline = (p = chpt) - 1;
536 	break;
537 
538 
539       case ENDFILE:
540 
541 	debug0(DLA, DDD, "LexGetToken: endfile");
542 	if( !same_file )
543 	{
544 	  /* close current file, if any */
545 	  if( fp != null )
546 	  { fclose(fp);  fp = null;
547 	    this_file = ftype == SOURCE_FILE ? NextFile(this_file) : NO_FILE;
548 	  }
549 
550 	  /* open next file */
551 	  while( this_file != NO_FILE )
552 	  { file_num(file_pos) = this_file;
553 	    line_num(file_pos) = 1;
554 	    col_num(file_pos) = 0;
555 	    fp = OpenFile(this_file, FALSE, TRUE);
556 	    if( fp != null )  break;
557 	    Error(2, 7, "cannot open file %s",
558 	      WARN, &file_pos, FullFileName(this_file));
559 	    this_file = ftype == SOURCE_FILE ? NextFile(this_file) : NO_FILE;
560 	  }
561 	}
562 
563 	if( fp != null )
564 	{ if( offset != 0 )
565 	  { debugcond1(DLA, DD, stack_free <= 1, "fseek(fp, %d, SEEK_SET)", offset);
566 	    fseek(fp, (long) offset, SEEK_SET);
567 	    offset = 0L;
568 	    line_num(file_pos) = first_line_num;
569 	  }
570 	  frst = limit = chpt = buf;
571 	  blksize = 0;  last_char = CH_LF;
572 	  srcnext();
573 	  startline = (p = chpt) - 1;
574 	  hcount = 0;
575 	}
576 
577 	/* no next file, so take continuation */
578 	else switch( ftype )
579 	{
580 
581 	  case SOURCE_FILE:
582 	  case DATABASE_FILE:
583 
584 	    /* input ends with "@End \Input" then UNEXPECTED_EOF */
585 	    res = NewToken(END, &file_pos, 0, 0, END_PREC, StartSym);
586 	    next_token = NewToken(UNEXPECTED_EOF, &file_pos,0,0,NO_PREC,nilobj);
587 	    --p;  startline = p;
588 	    break;
589 
590 
591 	  case FILTER_FILE:
592 
593 	    /* input ends with "@End @FilterOut" */
594 	    res = NewToken(END, &file_pos, 0, 0, END_PREC, FilterOutSym);
595 	    --p;  startline = p;
596 	    break;
597 
598 
599 	  case INCLUDE_FILE:
600 
601 	    LexPop();
602 	    p = chpt;
603 	    hcount = 0;
604 	    break;
605 
606 
607 	  default:
608 
609 	    assert(FALSE, "unknown file type");
610 	    break;
611 
612 	} /* end switch */
613 	break;
614 
615 
616       case OTHER:
617 
618 	col_num(file_pos) = (startpos = p-1) - startline;
619 	while( chtbl[*p++] == OTHER );
620 	/* using ch as a real int here, not a char */
621 	ch = p - startpos - 1;
622 	do
623 	{ res = SearchSym(startpos, ch);
624 	  --ch; --p;
625 	} while( ch > 0 && res == nilobj );
626 	goto MORE;  /* 7 lines down */
627 
628 
629       case LETTER:
630 
631 	col_num(file_pos) = (startpos = p-1) - startline;
632 	while( chtbl[*p++] == LETTER );  --p;
633 	res = SearchSym(startpos, p - startpos);
634 
635 	MORE: if( res == nilobj )
636 	{ setword(WORD, res, file_pos, startpos, p-startpos);
637 	}
638 	else if( type(res) == MACRO )
639 	{ if( recursive(res) )
640 	  { Error(2, 8, "recursion in macro", WARN, &file_pos);
641 	    setword(WORD, res, file_pos, startpos, p-startpos);
642 	  }
643 	  else
644 	  { res = CopyTokenList( sym_body(res), &file_pos );
645 	    if( res != nilobj ) next_token = Delete(res, PARENT);
646 	    else hcount = 0;
647 	  }
648 	}
649 	else if( predefined(res) == 0 )
650 	{
651 	  /* nice try but does not work for @Database @FontDef { ... } !! ***
652 	  if( res == FontDefSym && ftype != DATABASE_FILE )
653 	    Error(2, 26, "%s may only appear in a database file", FATAL,
654 	      &file_pos, KW_FONTDEF);
655 	  *** */
656 	  res = NewToken(CLOSURE, &file_pos, 0, 0, precedence(res), res);
657 	}
658 	else if( predefined(res) == INCLUDE || predefined(res) == SYS_INCLUDE )
659 	{ OBJECT t, fname;  FILE_NUM fnum;  int len;  BOOLEAN scope_suppressed;
660 	  chpt = p;
661 	  t = LexGetToken();
662 	  scope_suppressed = (type(t)==WORD && StringEqual(string(t), KW_LBR));
663 
664 	  if( type(t)!=LBR && !scope_suppressed )
665 	  { Error(2, 9, "%s expected (after %s)",
666 	      WARN, &fpos(t), KW_LBR, SymName(res));
667 	    Dispose(t);
668 	    res = nilobj;
669 	    break;
670 	  }
671 	  if( scope_suppressed )
672 	  { UnSuppressScope();
673 	    Dispose(t);
674 	    New(t, LBR);
675 	  }
676 	  fname = Parse(&t, nilobj, FALSE, FALSE);
677 	  fname = ReplaceWithTidy(fname, ACAT_TIDY);
678 	  if( scope_suppressed ) SuppressScope();
679 	  if( !is_word(type(fname)) )
680 	  { Error(2, 10, "name of include file expected here",
681 	      WARN, &fpos(fname));
682 	    Dispose(fname);
683 	    res = nilobj;
684 	    break;
685 	  }
686 	  len = StringLength(string(fname)) - StringLength(SOURCE_SUFFIX);
687 	  if( len >= 0 && StringEqual(&string(fname)[len], SOURCE_SUFFIX) )
688 	    StringCopy(&string(fname)[len], STR_EMPTY);
689 	  if( !InDefinitions ||
690 	      (FileNum(string(fname), STR_EMPTY) == NO_FILE &&
691 	      FileNum(string(fname), SOURCE_SUFFIX) == NO_FILE) )
692 	  {
693 	    /* need to define and read this include file */
694 	    debug4(DFS, D, "  calling DefineFile %s from LexGetToken (%s, %d, %d)",
695 	      string(fname), bool(InDefinitions),
696 	        FileNum(string(fname), STR_EMPTY),
697 		FileNum(string(fname), SOURCE_SUFFIX));
698 	    fnum = DefineFile(string(fname), STR_EMPTY, &fpos(fname),
699 	      INCLUDE_FILE,
700 	      predefined(res)==INCLUDE ? INCLUDE_PATH : SYSINCLUDE_PATH);
701 	    Dispose(fname);
702 	    LexPush(fnum, 0, INCLUDE_FILE, 1, FALSE);
703 	    res = LexGetToken();
704 	    vcount++; /** TEST ADDITION! **/
705 	    p = chpt;
706 	  }
707 	  else
708 	  {
709 	    debug1(DFS, D, "  skipping DefineFile %s from LexGetToken",
710 	      string(fname));
711 	    res = nilobj;
712 	    p = chpt;
713 	    Dispose(fname);
714 	    break;
715 	  }
716 	}
717 	else if( predefined(res) == END )
718 	  res = NewToken(predefined(res), &file_pos,0,0,precedence(res),nilobj);
719 	else
720 	  res = NewToken(predefined(res), &file_pos,0,0,precedence(res),res);
721 	break;
722 
723 
724       case QUOTE:
725 
726 	col_num(file_pos) = (startpos = q = p) - 1 - startline;
727 	do switch( chtbl[*q++ = *p++] )
728 	{
729 	  case OTHER:
730 	  case LETTER:
731 	  case COMMENT:
732 	  case CSPACE:
733 	  case FORMFEED:
734 	  case TAB:	break;
735 
736 	  case NEWLINE:
737 	  case ENDFILE:	--p;
738 			Error(2, 11, "unterminated string", WARN, &file_pos);
739 			setword(QWORD, res, file_pos, startpos, q-1-startpos);
740 			break;
741 
742 	  case QUOTE:	setword(QWORD, res, file_pos, startpos, q-1-startpos);
743 			break;
744 
745 	  case ESCAPE:	q--;
746 			if( chtbl[*p] == NEWLINE || chtbl[*p] == ENDFILE )
747 			{ Error(2, 12, "unterminated string", WARN, &file_pos);
748 			  setword(QWORD, res, file_pos, startpos, q-startpos);
749 			}
750 			else if( octaldigit(*p) )
751 			{ int count, ch;
752 			  count = ch = 0;
753 			  do
754 			  { ch = ch * 8 + digitchartonum(*p++);
755 			    count++;
756 			  } while( octaldigit(*p) && count < 3 );
757 			  if( ch == '\0' )
758 			    Error(2, 13, "skipping null character in string",
759 			      WARN, &file_pos);
760 			  else *q++ = ch;
761 			}
762 			else *q++ = *p++;
763 			break;
764 
765 	  default:	Error(2, 14, "LexGetToken: error in quoted string",
766 			  INTERN, &file_pos);
767 			break;
768 
769 	} while( res == nilobj );
770 	break;
771 
772 
773       default:
774 
775 	assert(FALSE, "LexGetToken: bad chtbl[]");
776 	break;
777 
778   } while( res == nilobj );
779 
780   if( p - startline >= MAX_LINE )
781   { col_num(file_pos) = 1;
782     Error(2, 15, "line is too long (or final newline missing)",FATAL,&file_pos);
783   }
784 
785   chpt = p;
786   vspace(res) = vcount;
787   hspace(res) = hcount;
788   debug5(DLA, DD, "LexGetToken%s returning %s %s %d.%d",
789     EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
790     vspace(res), hspace(res));
791   /* ***
792   debugcond5(DLA, DD, stack_free <= 1, "LexGetToken%s returning %s %s %d.%d",
793     EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
794     vspace(res), hspace(res));
795   *** */
796   return res;
797 } /* end LexGetToken */
798 
799 
800 /*@::LexScanVerbatim@*********************************************************/
801 /*                                                                           */
802 /*  OBJECT LexScanVerbatim(fp, end_stop, err_pos, lessskip)                  */
803 /*                                                                           */
804 /*  Scan input file and transfer to filter file fp, or if that is NULL, make */
805 /*  a VCAT of objects, one per line (or just a WORD if one line only), and   */
806 /*  return that object as the result.  If end_stop, terminate at @End, else  */
807 /*  terminate at matching right brace.                                       */
808 /*                                                                           */
809 /*  If lessskip is true it means that we should skip only up to and          */
810 /*  including the first newline character sequence, as opposed to the usual  */
811 /*  skipping of all initial white space characters.                          */
812 /*                                                                           */
813 /*****************************************************************************/
814 
815 /*****************************************************************************/
816 /*                                                                           */
817 /*  print(ch)                                                                */
818 /*                                                                           */
819 /*  Add ch to the result of the verbatim scan.  If chtbl[ch] == NEWLINE,     */
820 /*  this means to add whatever sequence of characters counts as the end      */
821 /*  of line sequence in today's operating system.  Otherwise, just add ch.   */
822 /*                                                                           */
823 /*  The code that calls print() ensures that only one character is passed    */
824 /*  to print() per newline sequence; this could be either CH_LF or CH_CR.    */
825 /*                                                                           */
826 /*****************************************************************************/
827 
828 #define print(ch)							\
829 { debug2(DLA, D, "print(%c), bufftop = %d", ch, bufftop);		\
830   if( fp == NULL )							\
831   { if( bufftop < MAX_BUFF )						\
832     { if( chtbl[ch] == NEWLINE )					\
833       { res = BuildLines(res, buff, &bufftop, hs_lnum_overshoot);	\
834       }									\
835       else buff[bufftop++] = ch;					\
836     }									\
837   }									\
838   else if( chtbl[ch] != NEWLINE )					\
839     putc(ch, fp);							\
840   else									\
841     StringFPuts(STR_NEWLINE, fp);					\
842 }
843 
844 /*****************************************************************************/
845 /*                                                                           */
846 /*  hold(ch)                                                                 */
847 /*                                                                           */
848 /*  At this point we are thinking about print(ch) but we are not certain     */
849 /*  yet, so ch is held pending a decision.                                   */
850 /*                                                                           */
851 /*  The code that calls hold() ensures that only one character is passed     */
852 /*  to hold() per newline sequence; this could be either CH_LF or CH_CR.     */
853 /*                                                                           */
854 /*****************************************************************************/
855 
856 #define hold(ch)							\
857 { if( hs_top == MAX_BUFF )  clear();					\
858   hs_buff[hs_top++] = ch;						\
859   if( chtbl[ch] == NEWLINE )						\
860     hs_lnum_overshoot++;						\
861 }
862 
863 /*****************************************************************************/
864 /*                                                                           */
865 /*  clear()                                                                  */
866 /*                                                                           */
867 /*  A decision has been made that all currently held characters are now      */
868 /*  to be printed.                                                           */
869 /*                                                                           */
870 /*****************************************************************************/
871 
872 #define clear()								\
873 { int i;								\
874   for( i = 0;  i < hs_top;  i++ )					\
875   { print(hs_buff[i]);							\
876     if( chtbl[hs_buff[i]] == NEWLINE )					\
877       hs_lnum_overshoot--;						\
878   }									\
879   hs_top = 0;								\
880   assert(hs_lnum_overshoot == 0, "clear(): hs_lnum_overshoot!");	\
881 }
882 
883 /*****************************************************************************/
884 /*                                                                           */
885 /*  OBJECT BuildLines(current, buff, bufftop, ladj)                          */
886 /*                                                                           */
887 /*  Add one line containing word buff to the growing set of verbatim lines.  */
888 /*  Adjust the line number of file_pos by ladj to account for extra lines    */
889 /*  that we may be holding, which would otherwise cause us to overestimate   */
890 /*  which line we are on.                                                    */
891 /*                                                                           */
892 /*****************************************************************************/
893 
BuildLines(OBJECT current,FULL_CHAR * buff,int * bufftop,int ladj)894 static OBJECT BuildLines(OBJECT current, FULL_CHAR *buff, int *bufftop, int ladj)
895 { OBJECT wd, res, gp, gpword;  int ch;  FILE_POS xfp;
896 
897   /* adjust file position since we may have been holding stuff */
898   file_num(xfp) = file_num(file_pos);
899   line_num(xfp) = line_num(file_pos) - ladj;
900   col_num(xfp) = 1;
901 
902   /* build a new word and reset the buffer */
903   setword(WORD, wd, xfp, buff, *bufftop);
904   debug1(DLA, D, "BuildLines(current, %s)", EchoObject(wd));
905   *bufftop = 0;
906 
907   if( current == nilobj )
908   {
909     /* if this is the first word, make it the result */
910     res = wd;
911   }
912   else
913   {
914     /* if this is the second word, make the result a VCAT */
915     if( type(current) == WORD )
916     { New(res, VCAT);
917       FposCopy(fpos(res), fpos(current));
918       Link(res, current);
919     }
920     else res = current;
921 
922     /* now attach the new word to res, preceded by a one-line gap */
923     New(gp, GAP_OBJ);
924     mark(gap(gp)) = FALSE;
925     join(gap(gp)) = FALSE;
926     FposCopy(fpos(gp), xfp);
927     gpword = MakeWord(WORD, AsciiToFull("1vx"), &xfp);
928     Link(gp, gpword);
929     Link(res, gp);
930     Link(res, wd);
931   }
932   debug1(DLA, D, "BuildLines returning %s", EchoObject(res));
933   return res;
934 }
935 
LexScanVerbatim(fp,end_stop,err_pos,lessskip)936 OBJECT LexScanVerbatim(fp, end_stop, err_pos, lessskip)
937 FILE *fp;  BOOLEAN end_stop;  FILE_POS *err_pos;  BOOLEAN lessskip;
938 {
939   register FULL_CHAR *p;		/* pointer to current input char     */
940   int depth;				/* depth of nesting of { ... }       */
941   BOOLEAN finished;			/* TRUE when finished                */
942   BOOLEAN skipping;			/* TRUE when skipping initial spaces */
943   FULL_CHAR hs_buff[MAX_BUFF];		/* hold spaces here in case last     */
944   int hs_top;				/* next free spot in hs_buff         */
945   int hs_lnum_overshoot = 0;		/* subtract this from lnum           */
946   FULL_CHAR buff[MAX_BUFF];		/* hold line here if not to file     */
947   int bufftop;				/* top of buff                       */
948   OBJECT res = nilobj;			/* result object if not to file      */
949 
950   debug3(DLA, D, "LexScanVerbatim(fp, %s, %s, %s)",
951     bool(end_stop), EchoFilePos(err_pos), bool(lessskip));
952   if( next_token != nilobj )
953   { Error(2, 16, "filter parameter in macro", FATAL, err_pos);
954   }
955 
956   p = chpt;  depth = 0;
957   finished = FALSE;
958   skipping = TRUE;
959   hs_top = 0;
960   bufftop = 0;
961   while( !finished ) switch( chtbl[*p++] )
962   {
963       case ESCAPE:
964       case COMMENT:
965       case QUOTE:
966 
967 	skipping = FALSE;
968 	clear();
969 	print(*(p-1));
970 	break;
971 
972 
973       case CSPACE:
974       case TAB:
975       case FORMFEED:
976 
977 	if( !skipping )  hold(*(p-1));
978 	break;
979 
980 
981       case NEWLINE:
982 
983 	/* skip over second newline character if any */
984 	if( *(p-1) == CH_LF )
985 	{
986 	  if( *p == CH_CR )
987 	    p++;
988 	}
989 	else /* c == CH_CR */
990 	{
991 	  if( *p == CH_LF )
992 	    p++;
993 	}
994 
995 	/* sort out skipping and holding */
996 	if( !skipping )  hold(*(p-1));
997 	if( lessskip ) skipping = FALSE;
998 
999 	/* perform newline action */
1000 	chpt = p;  srcnext();
1001 	line_num(file_pos)++;
1002 	col_num(file_pos) = 0;
1003 	startline = (p = chpt) - 1;
1004 	break;
1005 
1006 
1007       case ENDFILE:
1008 
1009 	if( fp == NULL )
1010 	  Error(2, 22, "end of file reached while reading %s",
1011 	    FATAL, err_pos, lessskip ? KW_RAWVERBATIM : KW_VERBATIM);
1012 	else
1013 	  Error(2, 17, "end of file reached while reading filter parameter",
1014 	    FATAL, err_pos);
1015 	break;
1016 
1017 
1018       case OTHER:
1019 
1020 	skipping = FALSE;
1021 	if( *(p-1) == '{' /*}*/ )
1022 	{ clear();
1023 	  print(*(p-1));
1024 	  depth++;
1025 	}
1026 	else if( *(p-1) == /*{*/ '}' )
1027 	{ if( !end_stop && depth == 0 )
1028 	  { p--;
1029 	    finished = TRUE;
1030 	  }
1031 	  else
1032 	  { clear();
1033 	    print(*(p-1));
1034 	    depth--;
1035 	  }
1036 	}
1037 	else
1038 	{ clear();
1039 	  print(*(p-1));
1040 	}
1041 	break;
1042 
1043 
1044       case LETTER:
1045 
1046 	skipping = FALSE;
1047 	if( *(p-1) == '@' )
1048 	{
1049 	  p--;
1050 	  if( end_stop && StringBeginsWith(p, KW_END) )
1051 	  { finished = TRUE;
1052 	  }
1053 	  else if( StringBeginsWithWord(p, KW_INCLUDE) ||
1054 		   StringBeginsWithWord(p, KW_SYSINCLUDE) )
1055 	  {
1056 	    OBJECT incl_fname, t;  FILE *incl_fp;  int ch;  FILE_NUM fnum;
1057 	    BOOLEAN sysinc = StringBeginsWith(p, KW_SYSINCLUDE);
1058 	    clear();
1059 	    p += sysinc ? StringLength(KW_SYSINCLUDE):StringLength(KW_INCLUDE);
1060 	    chpt = p;
1061 	    t = LexGetToken();
1062 	    if( type(t) != LBR )  Error(2, 18, "expected %s here (after %s)",
1063 		FATAL, &fpos(t), KW_LBR, sysinc ? KW_SYSINCLUDE : KW_INCLUDE);
1064 	    incl_fname = Parse(&t, nilobj, FALSE, FALSE);
1065 	    p = chpt;
1066 	    incl_fname = ReplaceWithTidy(incl_fname, ACAT_TIDY);
1067 	    if( !is_word(type(incl_fname)) )
1068 	      Error(2, 19, "expected file name here", FATAL,&fpos(incl_fname));
1069 	    debug0(DFS, D, "  calling DefineFile from LexScanVerbatim");
1070 	    fnum = DefineFile(string(incl_fname), STR_EMPTY, &fpos(incl_fname),
1071 	      INCLUDE_FILE, sysinc ? SYSINCLUDE_PATH : INCLUDE_PATH);
1072 	    Dispose(incl_fname);
1073 	    incl_fp = OpenFile(fnum, FALSE, TRUE);
1074 	    if( incl_fp == NULL )
1075 	      Error(2, 20, "cannot open include file %s",
1076 		FATAL, PosOfFile(fnum), FullFileName(fnum));
1077 	    while( (ch = getc(incl_fp)) != EOF )
1078 	      print(ch);
1079 	    fclose(incl_fp);
1080 	  }
1081 	  else
1082 	  { clear();
1083 	    print(*p);
1084 	    p++;
1085 	  }
1086 	}
1087 	else
1088 	{ clear();
1089 	  print(*(p-1));
1090 	}
1091 	break;
1092 
1093 
1094       default:
1095 
1096 	Error(2, 22, "unreadable character (octal %o)",INTERN,&file_pos,*(p-1));
1097 	assert(FALSE, "LexScanVerbatim: bad chtbl[]");
1098 	break;
1099 
1100   };
1101   print(CH_LF);
1102 
1103   if( p - startline >= MAX_LINE )
1104   { col_num(file_pos) = 1;
1105     Error(2, 21, "line is too long (or final newline missing)",FATAL,&file_pos);
1106   }
1107 
1108   chpt = p;
1109   if( fp == NULL && res == nilobj )
1110     res = MakeWord(WORD, STR_EMPTY, &file_pos);
1111 
1112   debug2(DLA, D, "LexScanVerbatim returning %s at %s",
1113     EchoObject(res), EchoFilePos(&file_pos));
1114   return res;
1115 } /* end LexScanVerbatim */
1116