1 /*@z02.c:Lexical Analyser:Declarations@***************************************/
2 /* */
3 /* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.39) */
4 /* COPYRIGHT (C) 1991, 2008 Jeffrey H. Kingston */
5 /* */
6 /* Jeffrey H. Kingston (jeff@it.usyd.edu.au) */
7 /* School of Information Technologies */
8 /* The University of Sydney 2006 */
9 /* AUSTRALIA */
10 /* */
11 /* This program is free software; you can redistribute it and/or modify */
12 /* it under the terms of the GNU General Public License as published by */
13 /* the Free Software Foundation; either Version 3, or (at your option) */
14 /* any later version. */
15 /* */
16 /* This program is distributed in the hope that it will be useful, */
17 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
18 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
19 /* GNU General Public License for more details. */
20 /* */
21 /* You should have received a copy of the GNU General Public License */
22 /* along with this program; if not, write to the Free Software */
23 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */
24 /* */
25 /* FILE: z02.c */
26 /* MODULE: Lexical Analyser */
27 /* EXTERNS: LexLegalName(), LexInit(), LexPush(), LexPop(), */
28 /* LexNextTokenPos(), LexGetToken() */
29 /* */
30 /* Implementation note: this fast and cryptic lexical analyser is adapted */
31 /* from Waite, W. M.: The Cost of Lexical Analysis, in Software - Practice */
32 /* and Experience, v16, pp473-488 (May 1986). */
33 /* */
34 /* Converted 8 November 2002 to handle DOS etc. line endings. The end of */
35 /* a line is now taken to be the second end-of-line character when there */
36 /* are two of them. With this rule it's quite easy to do the conversion. */
37 /* */
38 /*****************************************************************************/
39 #include "externs.h"
40 #define BUFFER_SIZE 8192 /* size of buffer for block read */
41 #define OTHER 0 /* punctuation or other character */
42 #define LETTER 1 /* letter type */
43 #define QUOTE 2 /* quoted string delimiter type */
44 #define ESCAPE 3 /* escape character inside strings */
45 #define COMMENT 4 /* comment delimiter type */
46 #define CSPACE 5 /* space character type */
47 #define FORMFEED 6 /* formfeed character type */
48 #define TAB 7 /* tab character type */
49 #define NEWLINE 8 /* newline character type */
50 #define ENDFILE 9 /* end of file character type */
51
52 static unsigned char chtbl[256]; /* type table indexed by a FULL_CHAR */
53 static FULL_CHAR *chpt; /* pointer to current text character */
54 static FULL_CHAR *frst; /* address of first buffer character */
55 static FULL_CHAR *limit; /* just past last char in buffer */
56 static FULL_CHAR *buf; /* the character buffer start pos */
57 static int blksize; /* size of block read; others too */
58 static FULL_CHAR last_char; /* last char read in from file */
59 static FULL_CHAR *startline; /* position in buff of last newline */
60 static FILE_NUM this_file; /* number of currently open file */
61 static FILE *fp; /* current input file */
62 static FILE_POS file_pos; /* current file position */
63 static short ftype; /* the type of the current file */
64 static OBJECT next_token; /* next token if already read */
65 static int offset; /* where to start reading in file */
66 static int first_line_num; /* number of first line (if offset) */
67 static BOOLEAN same_file; /* TRUE if same file as preceding */
68 static FULL_CHAR *mem_block; /* file buffer */
69
70 static int stack_free; /* first free slot in lexical stack */
71 static struct {
72 FULL_CHAR *chpt; /* pointer to current text character */
73 FULL_CHAR *frst; /* address of first buffer character */
74 FULL_CHAR *limit; /* just past last char in buffer */
75 FULL_CHAR *buf; /* the character buffer start pos */
76 int blksize; /* size of block read; others too */
77 FULL_CHAR last_char; /* last char read in from file */
78 FULL_CHAR *startline; /* position in buff of last newline */
79 FILE_NUM this_file; /* number of currently open file */
80 FILE *fp; /* current input file */
81 FILE_POS file_pos; /* current file position */
82 short ftype; /* the type of the current file */
83 OBJECT next_token; /* next token if already read */
84 int offset; /* where to start reading in file */
85 int first_line_num; /* number of first line (if offset) */
86 BOOLEAN same_file; /* TRUE if same file as preceding */
87 long save_ftell; /* ftell() position if same_file */
88 FULL_CHAR *mem_block; /* file buffer */
89 } lex_stack[MAX_LEX_STACK];
90
91 /*@::LexLegalName(), LexInit()@***********************************************/
92 /* */
93 /* BOOLEAN LexLegalName(str) */
94 /* */
95 /* Check whether str is a valid name for a symbol table entry. */
96 /* Valid names have the BNF form */
97 /* */
98 /* <name> ::= <letter> { <letter> } */
99 /* <name> ::= <special> { <special> } */
100 /* <name> ::= <escape> { <letter> } */
101 /* */
102 /* The third form is inaccessible to users and is for internal use only. */
103 /* */
104 /*****************************************************************************/
105
LexLegalName(FULL_CHAR * str)106 BOOLEAN LexLegalName(FULL_CHAR *str)
107 { int i; BOOLEAN res;
108 debug1(DLA, DDD, "LexLegalName( %s )", str);
109 switch( chtbl[str[0]] )
110 {
111 case ESCAPE:
112 case LETTER:
113
114 for( i = 1; chtbl[str[i]] == LETTER; i++ );
115 res = str[i] == '\0';
116 break;
117
118
119 case OTHER:
120
121 for( i = 1; chtbl[str[i]] == OTHER; i++ );
122 res = str[i] == '\0';
123 break;
124
125
126 default:
127
128 res = FALSE;
129 break;
130
131 }
132 debug1(DLA, DDD, "LexLegalName returning %s", bool(res));
133 return res;
134 } /* end LexLegalName */
135
136
137 /*****************************************************************************/
138 /* */
139 /* LexInit() */
140 /* */
141 /* Initialise character types. Those not touched are 0 (OTHER). */
142 /* The function initchtbl() assists in initializing the chtbl. */
143 /* */
144 /*****************************************************************************/
145
initchtbl(val,str)146 static void initchtbl(val, str)
147 int val; FULL_CHAR *str;
148 { int i;
149 for( i = 0; str[i] != '\0'; i++ )
150 chtbl[ str[i] ] = val;
151 } /* end initchtbl */
152
LexInit(void)153 void LexInit(void)
154 { initchtbl(LETTER, STR_LETTERS_LOWER);
155 initchtbl(LETTER, STR_LETTERS_UPPER);
156 initchtbl(LETTER, STR_LETTERS_SYMSTART);
157 initchtbl(LETTER, STR_LETTERS_UNDERSCORE);
158 initchtbl(LETTER, STR_LETTERS_EXTRA0);
159 initchtbl(LETTER, STR_LETTERS_EXTRA1);
160 initchtbl(LETTER, STR_LETTERS_EXTRA2);
161 initchtbl(LETTER, STR_LETTERS_EXTRA3);
162 initchtbl(LETTER, STR_LETTERS_EXTRA4);
163 initchtbl(LETTER, STR_LETTERS_EXTRA5);
164 initchtbl(LETTER, STR_LETTERS_EXTRA6);
165 initchtbl(LETTER, STR_LETTERS_EXTRA7);
166 initchtbl(QUOTE, STR_QUOTE);
167 initchtbl(ESCAPE, STR_ESCAPE);
168 initchtbl(COMMENT, STR_COMMENT);
169 initchtbl(CSPACE, STR_SPACE);
170 initchtbl(FORMFEED,STR_FORMFEED);
171 initchtbl(TAB, STR_TAB);
172 chtbl[CH_LF] = NEWLINE;
173 chtbl[CH_CR] = NEWLINE;
174 chtbl['\0'] = ENDFILE;
175 stack_free = -1;
176 } /* end LexInit */
177
178 /*@::LexPush(), LexPop()@*****************************************************/
179 /* */
180 /* LexPush(x, offs, ftype, lnum, same) */
181 /* */
182 /* Start reading from the file sequence whose first file is x (subsequent */
183 /* files are obtained from NextFile). The first file (x) is to be fseeked */
184 /* to offs. When the sequence is done, ftype determines how to continue: */
185 /* */
186 /* ftype action */
187 /* */
188 /* SOURCE_FILE last input file ends, return @End \Input */
189 /* DATABASE_FILE database file, return @End \Input */
190 /* INCLUDE_FILE include file, must pop lexical analyser and continue */
191 /* FILTER_FILE filter file, return @End @FilterOut */
192 /* */
193 /* lnum is the line number at offs, to be used when creating file pos's */
194 /* in the tokens returned. same is TRUE when this file is the same as */
195 /* the file currently being read, in which case there is no need to */
196 /* close that file and open this one; just an fseek is required. */
197 /* */
198 /*****************************************************************************/
199
LexPush(FILE_NUM x,int offs,int ftyp,int lnum,BOOLEAN same)200 void LexPush(FILE_NUM x, int offs, int ftyp, int lnum, BOOLEAN same)
201 { int i;
202 debug5(DLA, DD, "LexPush(%s, %d, %s, %d, %s)", FileName(x), offs,
203 ftyp==SOURCE_FILE ? "source" : ftyp==INCLUDE_FILE ? "include":"database",
204 lnum, bool(same));
205 if( stack_free >= MAX_LEX_STACK - 1 )
206 { if( ftyp == INCLUDE_FILE )
207 Error(2, 1, "too many open files when opening include file %s; open files are:",
208 WARN, PosOfFile(x), FullFileName(x));
209 else
210 Error(2, 2, "too many open files when opening database file %s; open files are:",
211 WARN, PosOfFile(x), FileName(x));
212 for( i = stack_free - 1; i >= 0; i-- )
213 {
214 Error(2, 23, " %s", WARN, no_fpos,
215 EchoFileSource(lex_stack[i].this_file));
216 }
217 Error(2, 24, "exiting now", FATAL, no_fpos);
218 }
219 if( stack_free >= 0 ) /* save current state */
220 { lex_stack[stack_free].chpt = chpt;
221 lex_stack[stack_free].frst = frst;
222 lex_stack[stack_free].limit = limit;
223 lex_stack[stack_free].buf = buf;
224 lex_stack[stack_free].blksize = blksize;
225 lex_stack[stack_free].last_char = last_char;
226 lex_stack[stack_free].startline = startline;
227 lex_stack[stack_free].this_file = this_file;
228 lex_stack[stack_free].fp = fp;
229 lex_stack[stack_free].ftype = ftype;
230 lex_stack[stack_free].next_token = next_token;
231 lex_stack[stack_free].offset = offset;
232 lex_stack[stack_free].first_line_num = first_line_num;
233 lex_stack[stack_free].same_file = same_file;
234 lex_stack[stack_free].mem_block = mem_block;
235 FposCopy( lex_stack[stack_free].file_pos, file_pos );
236 }
237 stack_free += 1;
238 ifdebug(DMA, D,
239 DebugRegisterUsage(MEM_LEX,1, (MAX_LINE+BUFFER_SIZE+3)*sizeof(FULL_CHAR)));
240 mem_block = (FULL_CHAR *) malloc((MAX_LINE+BUFFER_SIZE+3)*sizeof(FULL_CHAR));
241 if( mem_block == NULL )
242 Error(2, 3, "run out of memory when opening file %s",
243 FATAL, PosOfFile(x), FullFileName(x));
244 buf = chpt = &mem_block[MAX_LINE];
245 last_char = CH_CR;
246 this_file = x; offset = offs;
247 first_line_num = lnum; same_file = same;
248 ftype = ftyp; next_token = nilobj;
249 *chpt = '\0';
250 if( same_file )
251 { lex_stack[stack_free-1].save_ftell = ftell(fp);
252 }
253 else
254 { fp = null;
255 }
256 } /* end LexPush */
257
258
259 /*****************************************************************************/
260 /* */
261 /* LexPop() - pop lexical analyser. */
262 /* */
263 /*****************************************************************************/
264
LexPop(void)265 void LexPop(void)
266 { debug0(DLA, DD, "LexPop()");
267 assert( stack_free > 0, "LexPop: stack_free <= 0!" );
268 stack_free--;
269 if( same_file )
270 { fseek(fp, lex_stack[stack_free].save_ftell, SEEK_SET);
271 }
272 else
273 { if( fp != null ) fclose(fp);
274 }
275 ifdebug(DMA, D,
276 DebugRegisterUsage(MEM_LEX,-1,-(MAX_LINE+BUFFER_SIZE+2)* (int) sizeof(FULL_CHAR))
277 );
278 free( (char *) mem_block);
279 mem_block = lex_stack[stack_free].mem_block;
280 chpt = lex_stack[stack_free].chpt;
281 frst = lex_stack[stack_free].frst;
282 limit = lex_stack[stack_free].limit;
283 buf = lex_stack[stack_free].buf;
284 blksize = lex_stack[stack_free].blksize;
285 last_char = lex_stack[stack_free].last_char;
286 startline = lex_stack[stack_free].startline;
287 this_file = lex_stack[stack_free].this_file;
288 fp = lex_stack[stack_free].fp;
289 ftype = lex_stack[stack_free].ftype;
290 next_token = lex_stack[stack_free].next_token;
291 offset = lex_stack[stack_free].offset;
292 first_line_num = lex_stack[stack_free].first_line_num;
293 same_file = lex_stack[stack_free].same_file;
294 FposCopy( file_pos, lex_stack[stack_free].file_pos );
295 } /* end LexPop */
296
297
298 /*@::setword(), LexNextTokenPos(), srcnext()@*********************************/
299 /* */
300 /* setword(typ, res, file_pos, str, len) */
301 /* */
302 /* Set variable res to a WORD or QWORD token containing string str, etc. */
303 /* */
304 /*****************************************************************************/
305
306 #define setword(typ, res, file_pos, str, len) \
307 { NewWord(res, typ, len, &file_pos); \
308 FposCopy(fpos(res), file_pos); \
309 for( ch = 0; ch < len; ch++ ) string(res)[ch] = str[ch]; \
310 string(res)[ch] = '\0'; \
311 }
312
313
314 /*****************************************************************************/
315 /* */
316 /* long LexNextTokenPos() */
317 /* */
318 /* Equivalent to ftell() on the (buffered) current lex file. */
319 /* */
320 /*****************************************************************************/
321
LexNextTokenPos(void)322 long LexNextTokenPos(void)
323 { long res;
324 if( next_token != nilobj )
325 Error(2, 4, "illegal macro invocation in database",
326 FATAL, &fpos(next_token));
327 res = ftell(fp) - (limit - chpt) - (buf - frst);
328 #if DB_FIX
329 /* uwe: 1997-11-04
330 *
331 * On NT under Visual C++ ftell() and fseek() always use binary
332 * positions, even if the file was opened in text mode. This means
333 * that every LF in between the CHPT and LIMIT was counted by
334 * ftell() as *TWO* bytes. The pointer arithmetic above adjusts the
335 * ftold value as lout has not yet read chars past CHPT, but it
336 * counts each LF as *ONE* byte, naturally.
337 *
338 * The code below compensates for this binary/text brain death.
339 *
340 * PS: gcc from Cygnus' gnuwin32 has sane ftell() and does *NOT*
341 * need this workaround (I haven't tried compiling lout with gcc
342 * though, as the result will need cygwin.dll to run).
343 */
344
345 /* *** retired by JeffK 2002-11-08 since now reading in binary mode
346 {
347 register FULL_CHAR *p;
348 for (p = chpt; p < limit; ++p) {
349 if (*p == (FULL_CHAR) CH_LF)
350 --res;
351 }
352 }
353 *** */
354 #endif /* DB_FIX */
355
356 debug1(DLA, DD, "LexNextTokenPos() returning %ld", res);
357 return res;
358 }
359
360
361 /*****************************************************************************/
362 /* */
363 /* static srcnext() */
364 /* */
365 /* Move to new line of input file. May need to recharge buffer. */
366 /* */
367 /* Patched JeffK 16/10/06 to fix bug when CRLF falls on block boundary. */
368 /* */
369 /*****************************************************************************/
370
srcnext(void)371 static void srcnext(void)
372 { register FULL_CHAR *col;
373 debugcond4(DLA, DD, stack_free <= 1,
374 "srcnext(); buf: %d, chpt: %d, frst: %d, limit: %d",
375 buf - mem_block, chpt - mem_block, frst - mem_block, limit - mem_block);
376
377 /* if time to transfer last line to area preceding buffer, do so */
378 if( blksize != 0 && chpt < limit )
379 { debugcond0(DLA, DD, stack_free <= 1, "srcnext: transferring.");
380 col = buf;
381 while( chtbl[(*--col = *--limit)] != NEWLINE );
382 frst = col + 1; limit++; blksize = 0;
383 }
384
385 /* if buffer is empty, read next block */
386 /*** changed by JK 9/92 from "if( chpt == limit )" to fix long lines bug */
387 if( chpt >= limit )
388 { if( chpt > limit+1 || (chpt == limit + 1 && chtbl[*limit] != NEWLINE) )
389 { col_num(file_pos) = 1;
390 Error(2, 5, "line is too long (or final newline missing)",
391 FATAL, &file_pos);
392 }
393 chpt = frst;
394 blksize = fread( (char *) buf, sizeof(char), BUFFER_SIZE, fp);
395 if( blksize > 0 )
396 last_char = *(buf + blksize - 1);
397 if( blksize < BUFFER_SIZE && chtbl[last_char] != NEWLINE )
398 {
399 /* at end of file since blksize < BUFFER_SIZE; add missing newline char */
400 blksize++;
401 last_char = *(buf+blksize-1) = CH_LF;
402
403 /* this adjustment breaks LexNextTokenPos, so fatal error if database */
404 if( ftype == DATABASE_FILE )
405 {
406 line_num(file_pos) = col_num(file_pos) = 0;
407 Error(2, 25, "a database file must end with a newline; this one doesn't",
408 FATAL, &file_pos);
409 }
410 }
411 debugcond4(DLA, DD, stack_free <= 1,
412 "srcnext: %d = fread(0x%x, %d, %d, fp)",
413 blksize, buf, sizeof(char), BUFFER_SIZE);
414 frst = buf; limit = buf + blksize; *limit = CH_LF; *(limit + 1) = CH_CR;
415 }
416
417 /* if nothing more to read, make this clear */
418 if( chpt >= limit )
419 { debugcond0(DLA, DD, stack_free <= 1, "srcnext: nothing more to read");
420 chpt = limit = buf; *limit = '\0';
421 }
422 debugcond4(DLA, DD, stack_free <= 1,
423 "srcnext returning; buf: %d, chpt: %d, frst: %d, limit: %d",
424 buf - mem_block, chpt - mem_block, frst - mem_block, limit - mem_block);
425 } /* end srcnext */
426
427
428 /*@::LexGetToken()@***********************************************************/
429 /* */
430 /* OBJECT LexGetToken() */
431 /* */
432 /* Get next token from input. Look it up in symbol table. */
433 /* */
434 /*****************************************************************************/
435
LexGetToken(void)436 OBJECT LexGetToken(void)
437 {
438 FULL_CHAR *startpos; /* where the latest token started */
439 register FULL_CHAR *p, *q; /* pointer to current input char */
440 register int ch; /* temporary character (really char) */
441 OBJECT res; /* result token */
442 int vcount, hcount; /* no. of newlines and spaces seen */
443
444 if( next_token != nilobj )
445 { next_token = Delete(res = next_token, PARENT);
446 debugcond4(DLA, DD, stack_free <= 1,
447 "LexGetToken%s (in macro) returning %d.%d %s",
448 EchoFilePos(&file_pos), vspace(res), hspace(res), EchoToken(res));
449 return res;
450 }
451
452 res = nilobj; p = chpt;
453 vcount = hcount = 0;
454 do switch( chtbl[*p++] )
455 {
456 case ESCAPE:
457
458 if( ftype==DATABASE_FILE && *p>='a' && *p<='z' && *(p+1) == '{' )
459 { res = NewToken(LBR, &file_pos, 0, 0, (unsigned) *p, StartSym);
460 p += 2;
461 }
462 else
463 {
464 col_num(file_pos) = (startpos = p-1) - startline;
465 Error(2, 6, "character %c outside quoted string",
466 WARN, &file_pos, *startpos);
467 }
468 break;
469
470
471 case COMMENT:
472
473 debug1(DLA, DDD, "LexGetToken%s: comment", EchoFilePos(&file_pos));
474 while( chtbl[(ch = *p++)] != NEWLINE && ch != '\0' );
475 if( chtbl[ch] == NEWLINE )
476 {
477 /* skip over second newline character if any */
478 if( ch == CH_LF )
479 {
480 if( *p == CH_CR )
481 p++;
482 }
483 else /* ch == CH_CR */
484 {
485 if( *p == CH_LF )
486 p++;
487 }
488
489 /* do NEWLINE action, only preserve existing horizontal space */
490 /* and don't count the newline in the vcount. */
491 chpt = p; srcnext();
492 line_num(file_pos)++;
493 col_num(file_pos) = 0;
494 startline = (p = chpt) - 1;
495 }
496 else
497 {
498 --p;
499 }
500 break;
501
502
503 case CSPACE:
504 case FORMFEED:
505
506 hcount++;
507 break;
508
509
510 case TAB:
511
512 hcount += 8;
513 break;
514
515
516 case NEWLINE:
517
518 /* skip over second newline character if any */
519 if( *(p-1) == CH_LF )
520 {
521 if( *p == CH_CR )
522 p++;
523 }
524 else /* *(p-1) == CH_CR */
525 {
526 if( *p == CH_LF )
527 p++;
528 }
529
530 /* do newline action */
531 chpt = p; srcnext();
532 line_num(file_pos)++;
533 col_num(file_pos) = 0;
534 vcount++; hcount = 0;
535 startline = (p = chpt) - 1;
536 break;
537
538
539 case ENDFILE:
540
541 debug0(DLA, DDD, "LexGetToken: endfile");
542 if( !same_file )
543 {
544 /* close current file, if any */
545 if( fp != null )
546 { fclose(fp); fp = null;
547 this_file = ftype == SOURCE_FILE ? NextFile(this_file) : NO_FILE;
548 }
549
550 /* open next file */
551 while( this_file != NO_FILE )
552 { file_num(file_pos) = this_file;
553 line_num(file_pos) = 1;
554 col_num(file_pos) = 0;
555 fp = OpenFile(this_file, FALSE, TRUE);
556 if( fp != null ) break;
557 Error(2, 7, "cannot open file %s",
558 WARN, &file_pos, FullFileName(this_file));
559 this_file = ftype == SOURCE_FILE ? NextFile(this_file) : NO_FILE;
560 }
561 }
562
563 if( fp != null )
564 { if( offset != 0 )
565 { debugcond1(DLA, DD, stack_free <= 1, "fseek(fp, %d, SEEK_SET)", offset);
566 fseek(fp, (long) offset, SEEK_SET);
567 offset = 0L;
568 line_num(file_pos) = first_line_num;
569 }
570 frst = limit = chpt = buf;
571 blksize = 0; last_char = CH_LF;
572 srcnext();
573 startline = (p = chpt) - 1;
574 hcount = 0;
575 }
576
577 /* no next file, so take continuation */
578 else switch( ftype )
579 {
580
581 case SOURCE_FILE:
582 case DATABASE_FILE:
583
584 /* input ends with "@End \Input" then UNEXPECTED_EOF */
585 res = NewToken(END, &file_pos, 0, 0, END_PREC, StartSym);
586 next_token = NewToken(UNEXPECTED_EOF, &file_pos,0,0,NO_PREC,nilobj);
587 --p; startline = p;
588 break;
589
590
591 case FILTER_FILE:
592
593 /* input ends with "@End @FilterOut" */
594 res = NewToken(END, &file_pos, 0, 0, END_PREC, FilterOutSym);
595 --p; startline = p;
596 break;
597
598
599 case INCLUDE_FILE:
600
601 LexPop();
602 p = chpt;
603 hcount = 0;
604 break;
605
606
607 default:
608
609 assert(FALSE, "unknown file type");
610 break;
611
612 } /* end switch */
613 break;
614
615
616 case OTHER:
617
618 col_num(file_pos) = (startpos = p-1) - startline;
619 while( chtbl[*p++] == OTHER );
620 /* using ch as a real int here, not a char */
621 ch = p - startpos - 1;
622 do
623 { res = SearchSym(startpos, ch);
624 --ch; --p;
625 } while( ch > 0 && res == nilobj );
626 goto MORE; /* 7 lines down */
627
628
629 case LETTER:
630
631 col_num(file_pos) = (startpos = p-1) - startline;
632 while( chtbl[*p++] == LETTER ); --p;
633 res = SearchSym(startpos, p - startpos);
634
635 MORE: if( res == nilobj )
636 { setword(WORD, res, file_pos, startpos, p-startpos);
637 }
638 else if( type(res) == MACRO )
639 { if( recursive(res) )
640 { Error(2, 8, "recursion in macro", WARN, &file_pos);
641 setword(WORD, res, file_pos, startpos, p-startpos);
642 }
643 else
644 { res = CopyTokenList( sym_body(res), &file_pos );
645 if( res != nilobj ) next_token = Delete(res, PARENT);
646 else hcount = 0;
647 }
648 }
649 else if( predefined(res) == 0 )
650 {
651 /* nice try but does not work for @Database @FontDef { ... } !! ***
652 if( res == FontDefSym && ftype != DATABASE_FILE )
653 Error(2, 26, "%s may only appear in a database file", FATAL,
654 &file_pos, KW_FONTDEF);
655 *** */
656 res = NewToken(CLOSURE, &file_pos, 0, 0, precedence(res), res);
657 }
658 else if( predefined(res) == INCLUDE || predefined(res) == SYS_INCLUDE )
659 { OBJECT t, fname; FILE_NUM fnum; int len; BOOLEAN scope_suppressed;
660 chpt = p;
661 t = LexGetToken();
662 scope_suppressed = (type(t)==WORD && StringEqual(string(t), KW_LBR));
663
664 if( type(t)!=LBR && !scope_suppressed )
665 { Error(2, 9, "%s expected (after %s)",
666 WARN, &fpos(t), KW_LBR, SymName(res));
667 Dispose(t);
668 res = nilobj;
669 break;
670 }
671 if( scope_suppressed )
672 { UnSuppressScope();
673 Dispose(t);
674 New(t, LBR);
675 }
676 fname = Parse(&t, nilobj, FALSE, FALSE);
677 fname = ReplaceWithTidy(fname, ACAT_TIDY);
678 if( scope_suppressed ) SuppressScope();
679 if( !is_word(type(fname)) )
680 { Error(2, 10, "name of include file expected here",
681 WARN, &fpos(fname));
682 Dispose(fname);
683 res = nilobj;
684 break;
685 }
686 len = StringLength(string(fname)) - StringLength(SOURCE_SUFFIX);
687 if( len >= 0 && StringEqual(&string(fname)[len], SOURCE_SUFFIX) )
688 StringCopy(&string(fname)[len], STR_EMPTY);
689 if( !InDefinitions ||
690 (FileNum(string(fname), STR_EMPTY) == NO_FILE &&
691 FileNum(string(fname), SOURCE_SUFFIX) == NO_FILE) )
692 {
693 /* need to define and read this include file */
694 debug4(DFS, D, " calling DefineFile %s from LexGetToken (%s, %d, %d)",
695 string(fname), bool(InDefinitions),
696 FileNum(string(fname), STR_EMPTY),
697 FileNum(string(fname), SOURCE_SUFFIX));
698 fnum = DefineFile(string(fname), STR_EMPTY, &fpos(fname),
699 INCLUDE_FILE,
700 predefined(res)==INCLUDE ? INCLUDE_PATH : SYSINCLUDE_PATH);
701 Dispose(fname);
702 LexPush(fnum, 0, INCLUDE_FILE, 1, FALSE);
703 res = LexGetToken();
704 vcount++; /** TEST ADDITION! **/
705 p = chpt;
706 }
707 else
708 {
709 debug1(DFS, D, " skipping DefineFile %s from LexGetToken",
710 string(fname));
711 res = nilobj;
712 p = chpt;
713 Dispose(fname);
714 break;
715 }
716 }
717 else if( predefined(res) == END )
718 res = NewToken(predefined(res), &file_pos,0,0,precedence(res),nilobj);
719 else
720 res = NewToken(predefined(res), &file_pos,0,0,precedence(res),res);
721 break;
722
723
724 case QUOTE:
725
726 col_num(file_pos) = (startpos = q = p) - 1 - startline;
727 do switch( chtbl[*q++ = *p++] )
728 {
729 case OTHER:
730 case LETTER:
731 case COMMENT:
732 case CSPACE:
733 case FORMFEED:
734 case TAB: break;
735
736 case NEWLINE:
737 case ENDFILE: --p;
738 Error(2, 11, "unterminated string", WARN, &file_pos);
739 setword(QWORD, res, file_pos, startpos, q-1-startpos);
740 break;
741
742 case QUOTE: setword(QWORD, res, file_pos, startpos, q-1-startpos);
743 break;
744
745 case ESCAPE: q--;
746 if( chtbl[*p] == NEWLINE || chtbl[*p] == ENDFILE )
747 { Error(2, 12, "unterminated string", WARN, &file_pos);
748 setword(QWORD, res, file_pos, startpos, q-startpos);
749 }
750 else if( octaldigit(*p) )
751 { int count, ch;
752 count = ch = 0;
753 do
754 { ch = ch * 8 + digitchartonum(*p++);
755 count++;
756 } while( octaldigit(*p) && count < 3 );
757 if( ch == '\0' )
758 Error(2, 13, "skipping null character in string",
759 WARN, &file_pos);
760 else *q++ = ch;
761 }
762 else *q++ = *p++;
763 break;
764
765 default: Error(2, 14, "LexGetToken: error in quoted string",
766 INTERN, &file_pos);
767 break;
768
769 } while( res == nilobj );
770 break;
771
772
773 default:
774
775 assert(FALSE, "LexGetToken: bad chtbl[]");
776 break;
777
778 } while( res == nilobj );
779
780 if( p - startline >= MAX_LINE )
781 { col_num(file_pos) = 1;
782 Error(2, 15, "line is too long (or final newline missing)",FATAL,&file_pos);
783 }
784
785 chpt = p;
786 vspace(res) = vcount;
787 hspace(res) = hcount;
788 debug5(DLA, DD, "LexGetToken%s returning %s %s %d.%d",
789 EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
790 vspace(res), hspace(res));
791 /* ***
792 debugcond5(DLA, DD, stack_free <= 1, "LexGetToken%s returning %s %s %d.%d",
793 EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
794 vspace(res), hspace(res));
795 *** */
796 return res;
797 } /* end LexGetToken */
798
799
800 /*@::LexScanVerbatim@*********************************************************/
801 /* */
802 /* OBJECT LexScanVerbatim(fp, end_stop, err_pos, lessskip) */
803 /* */
804 /* Scan input file and transfer to filter file fp, or if that is NULL, make */
805 /* a VCAT of objects, one per line (or just a WORD if one line only), and */
806 /* return that object as the result. If end_stop, terminate at @End, else */
807 /* terminate at matching right brace. */
808 /* */
809 /* If lessskip is true it means that we should skip only up to and */
810 /* including the first newline character sequence, as opposed to the usual */
811 /* skipping of all initial white space characters. */
812 /* */
813 /*****************************************************************************/
814
815 /*****************************************************************************/
816 /* */
817 /* print(ch) */
818 /* */
819 /* Add ch to the result of the verbatim scan. If chtbl[ch] == NEWLINE, */
820 /* this means to add whatever sequence of characters counts as the end */
821 /* of line sequence in today's operating system. Otherwise, just add ch. */
822 /* */
823 /* The code that calls print() ensures that only one character is passed */
824 /* to print() per newline sequence; this could be either CH_LF or CH_CR. */
825 /* */
826 /*****************************************************************************/
827
828 #define print(ch) \
829 { debug2(DLA, D, "print(%c), bufftop = %d", ch, bufftop); \
830 if( fp == NULL ) \
831 { if( bufftop < MAX_BUFF ) \
832 { if( chtbl[ch] == NEWLINE ) \
833 { res = BuildLines(res, buff, &bufftop, hs_lnum_overshoot); \
834 } \
835 else buff[bufftop++] = ch; \
836 } \
837 } \
838 else if( chtbl[ch] != NEWLINE ) \
839 putc(ch, fp); \
840 else \
841 StringFPuts(STR_NEWLINE, fp); \
842 }
843
844 /*****************************************************************************/
845 /* */
846 /* hold(ch) */
847 /* */
848 /* At this point we are thinking about print(ch) but we are not certain */
849 /* yet, so ch is held pending a decision. */
850 /* */
851 /* The code that calls hold() ensures that only one character is passed */
852 /* to hold() per newline sequence; this could be either CH_LF or CH_CR. */
853 /* */
854 /*****************************************************************************/
855
856 #define hold(ch) \
857 { if( hs_top == MAX_BUFF ) clear(); \
858 hs_buff[hs_top++] = ch; \
859 if( chtbl[ch] == NEWLINE ) \
860 hs_lnum_overshoot++; \
861 }
862
863 /*****************************************************************************/
864 /* */
865 /* clear() */
866 /* */
867 /* A decision has been made that all currently held characters are now */
868 /* to be printed. */
869 /* */
870 /*****************************************************************************/
871
872 #define clear() \
873 { int i; \
874 for( i = 0; i < hs_top; i++ ) \
875 { print(hs_buff[i]); \
876 if( chtbl[hs_buff[i]] == NEWLINE ) \
877 hs_lnum_overshoot--; \
878 } \
879 hs_top = 0; \
880 assert(hs_lnum_overshoot == 0, "clear(): hs_lnum_overshoot!"); \
881 }
882
883 /*****************************************************************************/
884 /* */
885 /* OBJECT BuildLines(current, buff, bufftop, ladj) */
886 /* */
887 /* Add one line containing word buff to the growing set of verbatim lines. */
888 /* Adjust the line number of file_pos by ladj to account for extra lines */
889 /* that we may be holding, which would otherwise cause us to overestimate */
890 /* which line we are on. */
891 /* */
892 /*****************************************************************************/
893
BuildLines(OBJECT current,FULL_CHAR * buff,int * bufftop,int ladj)894 static OBJECT BuildLines(OBJECT current, FULL_CHAR *buff, int *bufftop, int ladj)
895 { OBJECT wd, res, gp, gpword; int ch; FILE_POS xfp;
896
897 /* adjust file position since we may have been holding stuff */
898 file_num(xfp) = file_num(file_pos);
899 line_num(xfp) = line_num(file_pos) - ladj;
900 col_num(xfp) = 1;
901
902 /* build a new word and reset the buffer */
903 setword(WORD, wd, xfp, buff, *bufftop);
904 debug1(DLA, D, "BuildLines(current, %s)", EchoObject(wd));
905 *bufftop = 0;
906
907 if( current == nilobj )
908 {
909 /* if this is the first word, make it the result */
910 res = wd;
911 }
912 else
913 {
914 /* if this is the second word, make the result a VCAT */
915 if( type(current) == WORD )
916 { New(res, VCAT);
917 FposCopy(fpos(res), fpos(current));
918 Link(res, current);
919 }
920 else res = current;
921
922 /* now attach the new word to res, preceded by a one-line gap */
923 New(gp, GAP_OBJ);
924 mark(gap(gp)) = FALSE;
925 join(gap(gp)) = FALSE;
926 FposCopy(fpos(gp), xfp);
927 gpword = MakeWord(WORD, AsciiToFull("1vx"), &xfp);
928 Link(gp, gpword);
929 Link(res, gp);
930 Link(res, wd);
931 }
932 debug1(DLA, D, "BuildLines returning %s", EchoObject(res));
933 return res;
934 }
935
LexScanVerbatim(fp,end_stop,err_pos,lessskip)936 OBJECT LexScanVerbatim(fp, end_stop, err_pos, lessskip)
937 FILE *fp; BOOLEAN end_stop; FILE_POS *err_pos; BOOLEAN lessskip;
938 {
939 register FULL_CHAR *p; /* pointer to current input char */
940 int depth; /* depth of nesting of { ... } */
941 BOOLEAN finished; /* TRUE when finished */
942 BOOLEAN skipping; /* TRUE when skipping initial spaces */
943 FULL_CHAR hs_buff[MAX_BUFF]; /* hold spaces here in case last */
944 int hs_top; /* next free spot in hs_buff */
945 int hs_lnum_overshoot = 0; /* subtract this from lnum */
946 FULL_CHAR buff[MAX_BUFF]; /* hold line here if not to file */
947 int bufftop; /* top of buff */
948 OBJECT res = nilobj; /* result object if not to file */
949
950 debug3(DLA, D, "LexScanVerbatim(fp, %s, %s, %s)",
951 bool(end_stop), EchoFilePos(err_pos), bool(lessskip));
952 if( next_token != nilobj )
953 { Error(2, 16, "filter parameter in macro", FATAL, err_pos);
954 }
955
956 p = chpt; depth = 0;
957 finished = FALSE;
958 skipping = TRUE;
959 hs_top = 0;
960 bufftop = 0;
961 while( !finished ) switch( chtbl[*p++] )
962 {
963 case ESCAPE:
964 case COMMENT:
965 case QUOTE:
966
967 skipping = FALSE;
968 clear();
969 print(*(p-1));
970 break;
971
972
973 case CSPACE:
974 case TAB:
975 case FORMFEED:
976
977 if( !skipping ) hold(*(p-1));
978 break;
979
980
981 case NEWLINE:
982
983 /* skip over second newline character if any */
984 if( *(p-1) == CH_LF )
985 {
986 if( *p == CH_CR )
987 p++;
988 }
989 else /* c == CH_CR */
990 {
991 if( *p == CH_LF )
992 p++;
993 }
994
995 /* sort out skipping and holding */
996 if( !skipping ) hold(*(p-1));
997 if( lessskip ) skipping = FALSE;
998
999 /* perform newline action */
1000 chpt = p; srcnext();
1001 line_num(file_pos)++;
1002 col_num(file_pos) = 0;
1003 startline = (p = chpt) - 1;
1004 break;
1005
1006
1007 case ENDFILE:
1008
1009 if( fp == NULL )
1010 Error(2, 22, "end of file reached while reading %s",
1011 FATAL, err_pos, lessskip ? KW_RAWVERBATIM : KW_VERBATIM);
1012 else
1013 Error(2, 17, "end of file reached while reading filter parameter",
1014 FATAL, err_pos);
1015 break;
1016
1017
1018 case OTHER:
1019
1020 skipping = FALSE;
1021 if( *(p-1) == '{' /*}*/ )
1022 { clear();
1023 print(*(p-1));
1024 depth++;
1025 }
1026 else if( *(p-1) == /*{*/ '}' )
1027 { if( !end_stop && depth == 0 )
1028 { p--;
1029 finished = TRUE;
1030 }
1031 else
1032 { clear();
1033 print(*(p-1));
1034 depth--;
1035 }
1036 }
1037 else
1038 { clear();
1039 print(*(p-1));
1040 }
1041 break;
1042
1043
1044 case LETTER:
1045
1046 skipping = FALSE;
1047 if( *(p-1) == '@' )
1048 {
1049 p--;
1050 if( end_stop && StringBeginsWith(p, KW_END) )
1051 { finished = TRUE;
1052 }
1053 else if( StringBeginsWithWord(p, KW_INCLUDE) ||
1054 StringBeginsWithWord(p, KW_SYSINCLUDE) )
1055 {
1056 OBJECT incl_fname, t; FILE *incl_fp; int ch; FILE_NUM fnum;
1057 BOOLEAN sysinc = StringBeginsWith(p, KW_SYSINCLUDE);
1058 clear();
1059 p += sysinc ? StringLength(KW_SYSINCLUDE):StringLength(KW_INCLUDE);
1060 chpt = p;
1061 t = LexGetToken();
1062 if( type(t) != LBR ) Error(2, 18, "expected %s here (after %s)",
1063 FATAL, &fpos(t), KW_LBR, sysinc ? KW_SYSINCLUDE : KW_INCLUDE);
1064 incl_fname = Parse(&t, nilobj, FALSE, FALSE);
1065 p = chpt;
1066 incl_fname = ReplaceWithTidy(incl_fname, ACAT_TIDY);
1067 if( !is_word(type(incl_fname)) )
1068 Error(2, 19, "expected file name here", FATAL,&fpos(incl_fname));
1069 debug0(DFS, D, " calling DefineFile from LexScanVerbatim");
1070 fnum = DefineFile(string(incl_fname), STR_EMPTY, &fpos(incl_fname),
1071 INCLUDE_FILE, sysinc ? SYSINCLUDE_PATH : INCLUDE_PATH);
1072 Dispose(incl_fname);
1073 incl_fp = OpenFile(fnum, FALSE, TRUE);
1074 if( incl_fp == NULL )
1075 Error(2, 20, "cannot open include file %s",
1076 FATAL, PosOfFile(fnum), FullFileName(fnum));
1077 while( (ch = getc(incl_fp)) != EOF )
1078 print(ch);
1079 fclose(incl_fp);
1080 }
1081 else
1082 { clear();
1083 print(*p);
1084 p++;
1085 }
1086 }
1087 else
1088 { clear();
1089 print(*(p-1));
1090 }
1091 break;
1092
1093
1094 default:
1095
1096 Error(2, 22, "unreadable character (octal %o)",INTERN,&file_pos,*(p-1));
1097 assert(FALSE, "LexScanVerbatim: bad chtbl[]");
1098 break;
1099
1100 };
1101 print(CH_LF);
1102
1103 if( p - startline >= MAX_LINE )
1104 { col_num(file_pos) = 1;
1105 Error(2, 21, "line is too long (or final newline missing)",FATAL,&file_pos);
1106 }
1107
1108 chpt = p;
1109 if( fp == NULL && res == nilobj )
1110 res = MakeWord(WORD, STR_EMPTY, &file_pos);
1111
1112 debug2(DLA, D, "LexScanVerbatim returning %s at %s",
1113 EchoObject(res), EchoFilePos(&file_pos));
1114 return res;
1115 } /* end LexScanVerbatim */
1116