1 /* lexical analysis of RCS files */
2 
3 /******************************************************************************
4  *                     Lexical Analysis.
5  *                     hashtable, Lexinit, nextlex, getlex, getkey,
6  *                     getid, getnum, readstring, printstring, savestring,
7  *                     checkid, fatserror, error, faterror, warn, diagnose
8  *                     Testprogram: define LEXDB
9  ******************************************************************************
10  */
11 
12 /* Copyright 1982, 1988, 1989 Walter Tichy
13    Copyright 1990, 1991, 1992, 1993, 1994, 1995 Paul Eggert
14    Distributed under license by the Free Software Foundation, Inc.
15 
16 This file is part of RCS.
17 
18 RCS is free software; you can redistribute it and/or modify
19 it under the terms of the GNU General Public License as published by
20 the Free Software Foundation; either version 2, or (at your option)
21 any later version.
22 
23 RCS is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26 GNU General Public License for more details.
27 
28 You should have received a copy of the GNU General Public License
29 along with RCS; see the file COPYING.
30 If not, write to the Free Software Foundation,
31 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
32 
33 Report problems and direct all questions to:
34 
35     rcs-bugs@cs.purdue.edu
36 
37 */
38 
39 
40 
41 /*
42  * $Log: rcslex.c,v $
43  * Revision 5.19  1995/06/16 06:19:24  eggert
44  * Update FSF address.
45  *
46  * Revision 5.18  1995/06/01 16:23:43  eggert
47  * (map_fd_deallocate,mmap_deallocate,read_deallocate,nothing_to_deallocate):
48  * New functions.
49  * (Iclose): If large_memory and maps_memory, use them to deallocate mapping.
50  * (fd2RILE): Use map_fd if available.
51  * If one mapping method fails, try the next instead of giving up;
52  * if they all fail, fall back on ordinary read.
53  * Work around bug: root mmap over NFS succeeds, but accessing dumps core.
54  * Use MAP_FAILED macro for mmap failure, and `char *' instead of caddr_t.
55  * (advise_access): Use madvise only if this instance used mmap.
56  * (Iopen): Use fdSafer to get safer file descriptor.
57  * (aflush): Moved here from rcsedit.c.
58  *
59  * Revision 5.17  1994/03/20 04:52:58  eggert
60  * Don't worry if madvise fails.  Add Orewind.  Remove lint.
61  *
62  * Revision 5.16  1993/11/09 17:55:29  eggert
63  * Fix `label: }' typo.
64  *
65  * Revision 5.15  1993/11/03 17:42:27  eggert
66  * Improve quality of diagnostics by putting file names in them more often.
67  * Don't discard ignored phrases.
68  *
69  * Revision 5.14  1992/07/28  16:12:44  eggert
70  * Identifiers may now start with a digit and (unless they are symbolic names)
71  * may contain `.'.  Avoid `unsigned'.  Statement macro names now end in _.
72  *
73  * Revision 5.13  1992/02/17  23:02:27  eggert
74  * Work around NFS mmap SIGBUS problem.
75  *
76  * Revision 5.12  1992/01/06  02:42:34  eggert
77  * Use OPEN_O_BINARY if mode contains 'b'.
78  *
79  * Revision 5.11  1991/11/03  03:30:44  eggert
80  * Fix porting bug to ancient hosts lacking vfprintf.
81  *
82  * Revision 5.10  1991/10/07  17:32:46  eggert
83  * Support piece tables even if !has_mmap.
84  *
85  * Revision 5.9  1991/09/24  00:28:42  eggert
86  * Don't export errsay().
87  *
88  * Revision 5.8  1991/08/19  03:13:55  eggert
89  * Add eoflex(), mmap support.  Tune.
90  *
91  * Revision 5.7  1991/04/21  11:58:26  eggert
92  * Add MS-DOS support.
93  *
94  * Revision 5.6  1991/02/25  07:12:42  eggert
95  * Work around fputs bug.  strsave -> str_save (DG/UX name clash)
96  *
97  * Revision 5.5  1990/12/04  05:18:47  eggert
98  * Use -I for prompts and -q for diagnostics.
99  *
100  * Revision 5.4  1990/11/19  20:05:28  hammer
101  * no longer gives warning about unknown keywords if -q is specified
102  *
103  * Revision 5.3  1990/11/01  05:03:48  eggert
104  * When ignoring unknown phrases, copy them to the output RCS file.
105  *
106  * Revision 5.2  1990/09/04  08:02:27  eggert
107  * Count RCS lines better.
108  *
109  * Revision 5.1  1990/08/29  07:14:03  eggert
110  * Work around buggy compilers with defective argument promotion.
111  *
112  * Revision 5.0  1990/08/22  08:12:55  eggert
113  * Remove compile-time limits; use malloc instead.
114  * Report errno-related errors with perror().
115  * Ansify and Posixate.  Add support for ISO 8859.
116  * Use better hash function.
117  *
118  * Revision 4.6  89/05/01  15:13:07  narten
119  * changed copyright header to reflect current distribution rules
120  *
121  * Revision 4.5  88/08/28  15:01:12  eggert
122  * Don't loop when writing error messages to a full filesystem.
123  * Flush stderr/stdout when mixing output.
124  * Yield exit status compatible with diff(1).
125  * Shrink stdio code size; allow cc -R; remove lint.
126  *
127  * Revision 4.4  87/12/18  11:44:47  narten
128  * fixed to use "varargs" in "fprintf"; this is required if it is to
129  * work on a SPARC machine such as a Sun-4
130  *
131  * Revision 4.3  87/10/18  10:37:18  narten
132  * Updating version numbers. Changes relative to 1.1 actually relative
133  * to version 4.1
134  *
135  * Revision 1.3  87/09/24  14:00:17  narten
136  * Sources now pass through lint (if you ignore printf/sprintf/fprintf
137  * warnings)
138  *
139  * Revision 1.2  87/03/27  14:22:33  jenkins
140  * Port to suns
141  *
142  * Revision 4.1  83/03/25  18:12:51  wft
143  * Only changed $Header to $Id.
144  *
145  * Revision 3.3  82/12/10  16:22:37  wft
146  * Improved error messages, changed exit status on error to 1.
147  *
148  * Revision 3.2  82/11/28  21:27:10  wft
149  * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
150  * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
151  * properly in case there is an IO-error (e.g., file system full).
152  *
153  * Revision 3.1  82/10/11  19:43:56  wft
154  * removed unused label out:;
155  * made sure all calls to getc() return into an integer, not a char.
156  */
157 
158 
159 /*
160 #define LEXDB
161 */
162 /* version LEXDB is for testing the lexical analyzer. The testprogram
163  * reads a stream of lexemes, enters the revision numbers into the
164  * hashtable, and prints the recognized tokens. Keywords are recognized
165  * as identifiers.
166  */
167 
168 
169 
170 #include "rcsbase.h"
171 
172 libId(lexId, "$Id: rcslex.c,v 5.19 1995/06/16 06:19:24 eggert Exp $")
173 
174 static char *checkidentifier P((char*,int,int));
175 static void errsay P((char const*));
176 static void fatsay P((char const*));
177 static void lookup P((char const*));
178 static void startsay P((const char*,const char*));
179 static void warnsay P((char const*));
180 
181 static struct hshentry *nexthsh;  /*pointer to next hash entry, set by lookup*/
182 
183 enum tokens     nexttok;    /*next token, set by nextlex                    */
184 
185 int             hshenter;   /*if true, next suitable lexeme will be entered */
186                             /*into the symbol table. Handle with care.      */
187 int             nextc;      /*next input character, initialized by Lexinit  */
188 
189 long		rcsline;    /*current line-number of input		    */
190 int             nerror;     /*counter for errors                            */
191 int             quietflag;  /*indicates quiet mode                          */
192 RILE *		finptr;	    /*input file descriptor			    */
193 
194 FILE *          frewrite;   /*file descriptor for echoing input             */
195 
196 FILE *		foutptr;    /* copy of frewrite, but 0 to suppress echo  */
197 
198 static struct buf tokbuf;   /* token buffer				    */
199 
200 char const *    NextString; /* next token				    */
201 
202 /*
203  * Our hash algorithm is h[0] = 0, h[i+1] = 4*h[i] + c,
204  * so hshsize should be odd.
205  * See B J McKenzie, R Harries & T Bell, Selecting a hashing algorithm,
206  * Software--practice & experience 20, 2 (Feb 1990), 209-224.
207  */
208 #ifndef hshsize
209 #	define hshsize 511
210 #endif
211 
212 static struct hshentry *hshtab[hshsize]; /*hashtable			    */
213 
214 static int ignored_phrases; /* have we ignored phrases in this RCS file? */
215 
216     void
warnignore()217 warnignore()
218 {
219     if (!ignored_phrases) {
220 	ignored_phrases = true;
221 	rcswarn("Unknown phrases like `%s ...;' are present.", NextString);
222     }
223 }
224 
225 
226 
227 	static void
lookup(str)228 lookup(str)
229 	char const *str;
230 /* Function: Looks up the character string pointed to by str in the
231  * hashtable. If the string is not present, a new entry for it is created.
232  * In any case, the address of the corresponding hashtable entry is placed
233  * into nexthsh.
234  */
235 {
236 	register unsigned ihash;  /* index into hashtable */
237 	register char const *sp;
238 	register struct hshentry *n, **p;
239 
240         /* calculate hash code */
241 	sp = str;
242         ihash = 0;
243 	while (*sp)
244 		ihash  =  (ihash<<2) + *sp++;
245 	ihash %= hshsize;
246 
247 	for (p = &hshtab[ihash];  ;  p = &n->nexthsh)
248 		if (!(n = *p)) {
249 			/* empty slot found */
250 			*p = n = ftalloc(struct hshentry);
251 			n->num = fstr_save(str);
252 			n->nexthsh = 0;
253 #			ifdef LEXDB
254 				VOID printf("\nEntered: %s at %u ", str, ihash);
255 #			endif
256 			break;
257 		} else if (strcmp(str, n->num) == 0)
258 			/* match found */
259 			break;
260 	nexthsh = n;
261 	NextString = n->num;
262 }
263 
264 
265 
266 
267 
268 
269 	void
Lexinit()270 Lexinit()
271 /* Function: Initialization of lexical analyzer:
272  * initializes the hashtable,
273  * initializes nextc, nexttok if finptr != 0
274  */
275 {       register int            c;
276 
277 	for (c = hshsize;  0 <= --c;  ) {
278 		hshtab[c] = 0;
279         }
280 
281 	nerror = 0;
282 	if (finptr) {
283 		foutptr = 0;
284 		hshenter = true;
285 		ignored_phrases = false;
286 		rcsline = 1;
287 		bufrealloc(&tokbuf, 2);
288 		Iget_(finptr, nextc)
289                 nextlex();            /*initial token*/
290         }
291 }
292 
293 
294 
295 
296 
297 
298 
299 	void
nextlex()300 nextlex()
301 
302 /* Function: Reads the next token and sets nexttok to the next token code.
303  * Only if hshenter is set, a revision number is entered into the
304  * hashtable and a pointer to it is placed into nexthsh.
305  * This is useful for avoiding that dates are placed into the hashtable.
306  * For ID's and NUM's, NextString is set to the character string.
307  * Assumption: nextc contains the next character.
308  */
309 {       register c;
310 	declarecache;
311 	register FILE *frew;
312         register char * sp;
313 	char const *limit;
314         register enum tokens d;
315 	register RILE *fin;
316 
317 	fin=finptr; frew=foutptr;
318 	setupcache(fin); cache(fin);
319 	c = nextc;
320 
321 	for (;;) { switch ((d = ctab[c])) {
322 
323 	default:
324 		fatserror("unknown character `%c'", c);
325 		/*NOTREACHED*/
326 
327         case NEWLN:
328 		++rcsline;
329 #               ifdef LEXDB
330 		afputc('\n',stdout);
331 #               endif
332                 /* Note: falls into next case */
333 
334         case SPACE:
335 		GETC_(frew, c)
336 		continue;
337 
338 	case IDCHAR:
339 	case LETTER:
340 	case Letter:
341 		d = ID;
342 		/* fall into */
343 	case DIGIT:
344 	case PERIOD:
345 		sp = tokbuf.string;
346 		limit = sp + tokbuf.size;
347 		*sp++ = c;
348 		for (;;) {
349 			GETC_(frew, c)
350 			switch (ctab[c]) {
351 			    case IDCHAR:
352 			    case LETTER:
353 			    case Letter:
354 				d = ID;
355 				/* fall into */
356 			    case DIGIT:
357 			    case PERIOD:
358 				*sp++ = c;
359 				if (limit <= sp)
360 					sp = bufenlarge(&tokbuf, &limit);
361 				continue;
362 
363 			    default:
364 				break;
365 			}
366 			break;
367                 }
368 		*sp = 0;
369 		if (d == DIGIT  ||  d == PERIOD) {
370 			d = NUM;
371 			if (hshenter) {
372 				lookup(tokbuf.string);
373 				break;
374 			}
375 		}
376 		NextString = fstr_save(tokbuf.string);
377 		break;
378 
379         case SBEGIN: /* long string */
380 		d = STRING;
381                 /* note: only the initial SBEGIN has been read*/
382                 /* read the string, and reset nextc afterwards*/
383 		break;
384 
385 	case COLON:
386 	case SEMI:
387 		GETC_(frew, c)
388 		break;
389 	} break; }
390 	nextc = c;
391 	nexttok = d;
392 	uncache(fin);
393 }
394 
395 	int
eoflex()396 eoflex()
397 /*
398  * Yield true if we look ahead to the end of the input, false otherwise.
399  * nextc becomes undefined at end of file.
400  */
401 {
402 	register int c;
403 	declarecache;
404 	register FILE *fout;
405 	register RILE *fin;
406 
407 	c = nextc;
408 	fin = finptr;
409 	fout = foutptr;
410 	setupcache(fin); cache(fin);
411 
412 	for (;;) {
413 		switch (ctab[c]) {
414 			default:
415 				nextc = c;
416 				uncache(fin);
417 				return false;
418 
419 			case NEWLN:
420 				++rcsline;
421 				/* fall into */
422 			case SPACE:
423 				cachegeteof_(c, {uncache(fin);return true;})
424 				break;
425 		}
426 		if (fout)
427 			aputc_(c, fout)
428 	}
429 }
430 
431 
getlex(token)432 int getlex(token)
433 enum tokens token;
434 /* Function: Checks if nexttok is the same as token. If so,
435  * advances the input by calling nextlex and returns true.
436  * otherwise returns false.
437  * Doesn't work for strings and keywords; loses the character string for ids.
438  */
439 {
440         if (nexttok==token) {
441                 nextlex();
442                 return(true);
443         } else  return(false);
444 }
445 
446 	int
getkeyopt(key)447 getkeyopt(key)
448 	char const *key;
449 /* Function: If the current token is a keyword identical to key,
450  * advances the input by calling nextlex and returns true;
451  * otherwise returns false.
452  */
453 {
454 	if (nexttok==ID  &&  strcmp(key,NextString) == 0) {
455 		 /* match found */
456 		 ffree1(NextString);
457 		 nextlex();
458 		 return(true);
459         }
460         return(false);
461 }
462 
463 	void
getkey(key)464 getkey(key)
465 	char const *key;
466 /* Check that the current input token is a keyword identical to key,
467  * and advance the input by calling nextlex.
468  */
469 {
470 	if (!getkeyopt(key))
471 		fatserror("missing '%s' keyword", key);
472 }
473 
474 	void
getkeystring(key)475 getkeystring(key)
476 	char const *key;
477 /* Check that the current input token is a keyword identical to key,
478  * and advance the input by calling nextlex; then look ahead for a string.
479  */
480 {
481 	getkey(key);
482 	if (nexttok != STRING)
483 		fatserror("missing string after '%s' keyword", key);
484 }
485 
486 
487 	char const *
getid()488 getid()
489 /* Function: Checks if nexttok is an identifier. If so,
490  * advances the input by calling nextlex and returns a pointer
491  * to the identifier; otherwise returns 0.
492  * Treats keywords as identifiers.
493  */
494 {
495 	register char const *name;
496         if (nexttok==ID) {
497                 name = NextString;
498                 nextlex();
499                 return name;
500 	} else
501 		return 0;
502 }
503 
504 
getnum()505 struct hshentry * getnum()
506 /* Function: Checks if nexttok is a number. If so,
507  * advances the input by calling nextlex and returns a pointer
508  * to the hashtable entry.  Otherwise returns 0.
509  * Doesn't work if hshenter is false.
510  */
511 {
512         register struct hshentry * num;
513         if (nexttok==NUM) {
514                 num=nexthsh;
515                 nextlex();
516                 return num;
517 	} else
518 		return 0;
519 }
520 
521 	struct cbuf
getphrases(key)522 getphrases(key)
523 	char const *key;
524 /*
525 * Get a series of phrases that do not start with KEY.  Yield resulting buffer.
526 * Stop when the next phrase starts with a token that is not an identifier,
527 * or is KEY.  Copy input to foutptr if it is set.  Unlike ignorephrases(),
528 * this routine assumes nextlex() has already been invoked before we start.
529 */
530 {
531     declarecache;
532     register int c;
533     register char const *kn;
534     struct cbuf r;
535     register RILE *fin;
536     register FILE *frew;
537 #   if large_memory
538 #	define savech_(c) ;
539 #   else
540 	register char *p;
541 	char const *limit;
542 	struct buf b;
543 #	define savech_(c) {if (limit<=p)p=bufenlarge(&b,&limit); *p++ =(c);}
544 #   endif
545 
546     if (nexttok!=ID  ||  strcmp(NextString,key) == 0)
547 	clear_buf(&r);
548     else {
549 	warnignore();
550 	fin = finptr;
551 	frew = foutptr;
552 	setupcache(fin); cache(fin);
553 #	if large_memory
554 	    r.string = (char const*)cacheptr() - strlen(NextString) - 1;
555 #	else
556 	    bufautobegin(&b);
557 	    bufscpy(&b, NextString);
558 	    p = b.string + strlen(b.string);
559 	    limit = b.string + b.size;
560 #	endif
561 	ffree1(NextString);
562 	c = nextc;
563 	for (;;) {
564 	    for (;;) {
565 		savech_(c)
566 		switch (ctab[c]) {
567 		    default:
568 			fatserror("unknown character `%c'", c);
569 			/*NOTREACHED*/
570 		    case NEWLN:
571 			++rcsline;
572 			/* fall into */
573 		    case COLON: case DIGIT: case LETTER: case Letter:
574 		    case PERIOD: case SPACE:
575 			GETC_(frew, c)
576 			continue;
577 		    case SBEGIN: /* long string */
578 			for (;;) {
579 			    for (;;) {
580 				GETC_(frew, c)
581 				savech_(c)
582 				switch (c) {
583 				    case '\n':
584 					++rcsline;
585 					/* fall into */
586 				    default:
587 					continue;
588 
589 				    case SDELIM:
590 					break;
591 				}
592 				break;
593 			    }
594 			    GETC_(frew, c)
595 			    if (c != SDELIM)
596 				break;
597 			    savech_(c)
598 			}
599 			continue;
600 		    case SEMI:
601 			cacheget_(c)
602 			if (ctab[c] == NEWLN) {
603 			    if (frew)
604 				aputc_(c, frew)
605 			    ++rcsline;
606 			    savech_(c)
607 			    cacheget_(c)
608 			}
609 #			if large_memory
610 			    r.size = (char const*)cacheptr() - 1 - r.string;
611 #			endif
612 			for (;;) {
613 			    switch (ctab[c]) {
614 				case NEWLN:
615 					++rcsline;
616 					/* fall into */
617 				case SPACE:
618 					cacheget_(c)
619 					continue;
620 
621 				default: break;
622 			    }
623 			    break;
624 			}
625 			if (frew)
626 			    aputc_(c, frew)
627 			break;
628 		}
629 		break;
630 	    }
631 	    if (ctab[c] == Letter) {
632 		    for (kn = key;  c && *kn==c;  kn++)
633 			GETC_(frew, c)
634 		    if (!*kn)
635 			switch (ctab[c]) {
636 			    case DIGIT: case LETTER: case Letter:
637 			    case IDCHAR: case PERIOD:
638 				break;
639 			    default:
640 				nextc = c;
641 				NextString = fstr_save(key);
642 				nexttok = ID;
643 				uncache(fin);
644 				goto returnit;
645 			}
646 #		    if !large_memory
647 			{
648 			    register char const *ki;
649 			    for (ki=key; ki<kn; )
650 				savech_(*ki++)
651 			}
652 #		    endif
653 	    } else {
654 		    nextc = c;
655 		    uncache(fin);
656 		    nextlex();
657 		    break;
658 	    }
659 	}
660     returnit:;
661 #	if !large_memory
662 	    return bufremember(&b, (size_t)(p - b.string));
663 #	endif
664     }
665     return r;
666 }
667 
668 
669 	void
readstring()670 readstring()
671 /* skip over characters until terminating single SDELIM        */
672 /* If foutptr is set, copy every character read to foutptr.    */
673 /* Does not advance nextlex at the end.                        */
674 {       register c;
675 	declarecache;
676 	register FILE *frew;
677 	register RILE *fin;
678 	fin=finptr; frew=foutptr;
679 	setupcache(fin); cache(fin);
680 	for (;;) {
681 		GETC_(frew, c)
682 		switch (c) {
683 		    case '\n':
684 			++rcsline;
685 			break;
686 
687 		    case SDELIM:
688 			GETC_(frew, c)
689 			if (c != SDELIM) {
690 				/* end of string */
691 				nextc = c;
692 				uncache(fin);
693 				return;
694 			}
695 			break;
696 		}
697 	}
698 }
699 
700 
701 	void
printstring()702 printstring()
703 /* Function: copy a string to stdout, until terminated with a single SDELIM.
704  * Does not advance nextlex at the end.
705  */
706 {
707         register c;
708 	declarecache;
709 	register FILE *fout;
710 	register RILE *fin;
711 	fin=finptr;
712 	fout = stdout;
713 	setupcache(fin); cache(fin);
714 	for (;;) {
715 		cacheget_(c)
716 		switch (c) {
717 		    case '\n':
718 			++rcsline;
719 			break;
720 		    case SDELIM:
721 			cacheget_(c)
722 			if (c != SDELIM) {
723                                 nextc=c;
724 				uncache(fin);
725                                 return;
726                         }
727 			break;
728                 }
729 		aputc_(c,fout)
730         }
731 }
732 
733 
734 
735 	struct cbuf
savestring(target)736 savestring(target)
737 	struct buf *target;
738 /* Copies a string terminated with SDELIM from file finptr to buffer target.
739  * Double SDELIM is replaced with SDELIM.
740  * If foutptr is set, the string is also copied unchanged to foutptr.
741  * Does not advance nextlex at the end.
742  * Yield a copy of *TARGET, except with exact length.
743  */
744 {
745         register c;
746 	declarecache;
747 	register FILE *frew;
748 	register char *tp;
749 	register RILE *fin;
750 	char const *limit;
751 	struct cbuf r;
752 
753 	fin=finptr; frew=foutptr;
754 	setupcache(fin); cache(fin);
755 	tp = target->string;  limit = tp + target->size;
756 	for (;;) {
757 		GETC_(frew, c)
758 		switch (c) {
759 		    case '\n':
760 			++rcsline;
761 			break;
762 		    case SDELIM:
763 			GETC_(frew, c)
764 			if (c != SDELIM) {
765                                 /* end of string */
766                                 nextc=c;
767 				r.string = target->string;
768 				r.size = tp - r.string;
769 				uncache(fin);
770 				return r;
771                         }
772 			break;
773                 }
774 		if (tp == limit)
775 			tp = bufenlarge(target, &limit);
776 		*tp++ = c;
777         }
778 }
779 
780 
781 	static char *
checkidentifier(id,delimiter,dotok)782 checkidentifier(id, delimiter, dotok)
783 	register char *id;
784 	int delimiter;
785 	register int dotok;
786 /*   Function:  check whether the string starting at id is an   */
787 /*		identifier and return a pointer to the delimiter*/
788 /*		after the identifier.  White space, delim and 0 */
789 /*              are legal delimiters.  Aborts the program if not*/
790 /*              a legal identifier. Useful for checking commands*/
791 /*		If !delim, the only delimiter is 0.		*/
792 /*		Allow '.' in identifier only if DOTOK is set.   */
793 {
794         register char    *temp;
795 	register char c;
796 	register char delim = delimiter;
797 	int isid = false;
798 
799 	temp = id;
800 	for (;;  id++) {
801 		switch (ctab[(unsigned char)(c = *id)]) {
802 			case IDCHAR:
803 			case LETTER:
804 			case Letter:
805 				isid = true;
806 				continue;
807 
808 			case DIGIT:
809 				continue;
810 
811 			case PERIOD:
812 				if (dotok)
813 					continue;
814 				break;
815 
816 			default:
817 				break;
818 		}
819 		break;
820 	}
821 	if (	 ! isid
822 	    ||	 (c  &&  (!delim || (c!=delim && c!=' ' && c!='\t' && c!='\n')))
823 	) {
824                 /* append \0 to end of id before error message */
825 		while ((c = *id) && c!=' ' && c!='\t' && c!='\n' && c!=delim)
826 		    id++;
827                 *id = '\0';
828 		faterror("invalid %s `%s'",
829 			dotok ? "identifier" : "symbol", temp
830 		);
831 	}
832 	return id;
833 }
834 
835 	char *
checkid(id,delimiter)836 checkid(id, delimiter)
837 	char *id;
838 	int delimiter;
839 {
840 	return checkidentifier(id, delimiter, true);
841 }
842 
843 	char *
checksym(sym,delimiter)844 checksym(sym, delimiter)
845 	char *sym;
846 	int delimiter;
847 {
848 	return checkidentifier(sym, delimiter, false);
849 }
850 
851 	void
checksid(id)852 checksid(id)
853 	char *id;
854 /* Check whether the string ID is an identifier.  */
855 {
856 	VOID checkid(id, 0);
857 }
858 
859 	void
checkssym(sym)860 checkssym(sym)
861 	char *sym;
862 {
863 	VOID checksym(sym, 0);
864 }
865 
866 
867 #if !large_memory
868 #   define Iclose(f) fclose(f)
869 #else
870 # if !maps_memory
871     static int Iclose P((RILE *));
872 	static int
Iclose(f)873     Iclose(f)
874 	register RILE *f;
875     {
876 	tfree(f->base);
877 	f->base = 0;
878 	return fclose(f->stream);
879     }
880 # else
881     static int Iclose P((RILE *));
882 	static int
Iclose(f)883     Iclose(f)
884 	register RILE *f;
885     {
886 	(* f->deallocate) (f);
887 	f->base = 0;
888 	return close(f->fd);
889     }
890 
891 #   if has_map_fd
892 	static void map_fd_deallocate P((RILE *));
893 	    static void
map_fd_deallocate(f)894 	map_fd_deallocate(f)
895 	    register RILE *f;
896 	{
897 	    if (vm_deallocate(
898 		task_self(),
899 		(vm_address_t) f->base,
900 		(vm_size_t) (f->lim - f->base)
901 	    ) != KERN_SUCCESS)
902 		efaterror("vm_deallocate");
903 	}
904 #   endif
905 #   if has_mmap
906 	static void mmap_deallocate P((RILE *));
907 	    static void
mmap_deallocate(f)908 	mmap_deallocate(f)
909 	    register RILE *f;
910 	{
911 	    if (munmap((char *) f->base, (size_t) (f->lim - f->base)) != 0)
912 		efaterror("munmap");
913 	}
914 #   endif
915     static void read_deallocate P((RILE *));
916 	static void
read_deallocate(f)917     read_deallocate(f)
918 	RILE *f;
919     {
920 	tfree(f->base);
921     }
922 
923     static void nothing_to_deallocate P((RILE *));
924 	static void
nothing_to_deallocate(f)925     nothing_to_deallocate(f)
926 	RILE *f;
927     {
928     }
929 # endif
930 #endif
931 
932 
933 #if large_memory && maps_memory
934 	static RILE *fd2_RILE P((int,char const*,struct stat*));
935 	static RILE *
fd2_RILE(fd,name,status)936 fd2_RILE(fd, name, status)
937 #else
938 	static RILE *fd2RILE P((int,char const*,char const*,struct stat*));
939 	static RILE *
940 fd2RILE(fd, name, type, status)
941 	char const *type;
942 #endif
943 	int fd;
944 	char const *name;
945 	register struct stat *status;
946 {
947 	struct stat st;
948 
949 	if (!status)
950 		status = &st;
951 	if (fstat(fd, status) != 0)
952 		efaterror(name);
953 	if (!S_ISREG(status->st_mode)) {
954 		error("`%s' is not a regular file", name);
955 		VOID close(fd);
956 		errno = EINVAL;
957 		return 0;
958 	} else {
959 
960 #	    if !(large_memory && maps_memory)
961 		FILE *stream;
962 		if (!(stream = fdopen(fd, type)))
963 			efaterror(name);
964 #	    endif
965 
966 #	    if !large_memory
967 		return stream;
968 #	    else
969 #		define RILES 3
970 	      {
971 		static RILE rilebuf[RILES];
972 
973 		register RILE *f;
974 		size_t s = status->st_size;
975 
976 		if (s != status->st_size)
977 			faterror("%s: too large", name);
978 		for (f = rilebuf;  f->base;  f++)
979 			if (f == rilebuf+RILES)
980 				faterror("too many RILEs");
981 #		if maps_memory
982 			f->deallocate = nothing_to_deallocate;
983 #		endif
984 		if (!s) {
985 		    static unsigned char nothing;
986 		    f->base = &nothing; /* Any nonzero address will do.  */
987 		} else {
988 		    f->base = 0;
989 #		    if has_map_fd
990 			map_fd(
991 				fd, (vm_offset_t)0, (vm_address_t*) &f->base,
992 				TRUE, (vm_size_t)s
993 			);
994 			f->deallocate = map_fd_deallocate;
995 #		    endif
996 #		    if has_mmap
997 			if (!f->base) {
998 			    catchmmapints();
999 			    f->base = (unsigned char *) mmap(
1000 				(char *)0, s, PROT_READ, MAP_SHARED,
1001 				fd, (off_t)0
1002 			    );
1003 #			    ifndef MAP_FAILED
1004 #			    define MAP_FAILED (-1)
1005 #			    endif
1006 			    if (f->base == (unsigned char *) MAP_FAILED)
1007 				f->base = 0;
1008 			    else {
1009 #				if has_NFS && mmap_signal
1010 				    /*
1011 				    * On many hosts, the superuser
1012 				    * can mmap an NFS file it can't read.
1013 				    * So access the first page now, and print
1014 				    * a nice message if a bus error occurs.
1015 				    */
1016 				    readAccessFilenameBuffer(name, f->base);
1017 #				endif
1018 			    }
1019 			    f->deallocate = mmap_deallocate;
1020 			}
1021 #		    endif
1022 		    if (!f->base) {
1023 			f->base = tnalloc(unsigned char, s);
1024 #			if maps_memory
1025 			{
1026 			    /*
1027 			    * We can't map the file into memory for some reason.
1028 			    * Read it into main memory all at once; this is
1029 			    * the simplest substitute for memory mapping.
1030 			    */
1031 			    char *bufptr = (char *) f->base;
1032 			    size_t bufsiz = s;
1033 			    do {
1034 				ssize_t r = read(fd, bufptr, bufsiz);
1035 				switch (r) {
1036 				    case -1:
1037 					efaterror(name);
1038 
1039 				    case 0:
1040 					/* The file must have shrunk!  */
1041 					status->st_size = s -= bufsiz;
1042 					bufsiz = 0;
1043 					break;
1044 
1045 				    default:
1046 					bufptr += r;
1047 					bufsiz -= r;
1048 					break;
1049 				}
1050 			    } while (bufsiz);
1051 			    if (lseek(fd, (off_t)0, SEEK_SET) == -1)
1052 				efaterror(name);
1053 			    f->deallocate = read_deallocate;
1054 			}
1055 #			endif
1056 		    }
1057 		}
1058 		f->ptr = f->base;
1059 		f->lim = f->base + s;
1060 		f->fd = fd;
1061 #		if !maps_memory
1062 		    f->readlim = f->base;
1063 		    f->stream = stream;
1064 #		endif
1065 		if_advise_access(s, f, MADV_SEQUENTIAL);
1066 		return f;
1067 	      }
1068 #	    endif
1069 	}
1070 }
1071 
1072 #if !maps_memory && large_memory
1073 	int
Igetmore(f)1074 Igetmore(f)
1075 	register RILE *f;
1076 {
1077 	register fread_type r;
1078 	register size_t s = f->lim - f->readlim;
1079 
1080 	if (BUFSIZ < s)
1081 		s = BUFSIZ;
1082 	if (!(r = Fread(f->readlim, sizeof(*f->readlim), s, f->stream))) {
1083 		testIerror(f->stream);
1084 		f->lim = f->readlim;  /* The file might have shrunk!  */
1085 		return 0;
1086 	}
1087 	f->readlim += r;
1088 	return 1;
1089 }
1090 #endif
1091 
1092 #if has_madvise && has_mmap && large_memory
1093 	void
advise_access(f,advice)1094 advise_access(f, advice)
1095 	register RILE *f;
1096 	int advice;
1097 {
1098     if (f->deallocate == mmap_deallocate)
1099 	VOID madvise((char *)f->base, (size_t)(f->lim - f->base), advice);
1100 	/* Don't worry if madvise fails; it's only advisory.  */
1101 }
1102 #endif
1103 
1104 	RILE *
1105 #if large_memory && maps_memory
I_open(name,status)1106 I_open(name, status)
1107 #else
1108 Iopen(name, type, status)
1109 	char const *type;
1110 #endif
1111 	char const *name;
1112 	struct stat *status;
1113 /* Open NAME for reading, yield its descriptor, and set *STATUS.  */
1114 {
1115 	int fd = fdSafer(open(name, O_RDONLY
1116 #		if OPEN_O_BINARY
1117 			|  (strchr(type,'b') ? OPEN_O_BINARY : 0)
1118 #		endif
1119 	));
1120 
1121 	if (fd < 0)
1122 		return 0;
1123 #	if large_memory && maps_memory
1124 		return fd2_RILE(fd, name, status);
1125 #	else
1126 		return fd2RILE(fd, name, type, status);
1127 #	endif
1128 }
1129 
1130 
1131 static int Oerrloop;
1132 
1133 	void
Oerror()1134 Oerror()
1135 {
1136 	if (Oerrloop)
1137 		exiterr();
1138 	Oerrloop = true;
1139 	efaterror("output error");
1140 }
1141 
Ieof()1142 void Ieof() { fatserror("unexpected end of file"); }
Ierror()1143 void Ierror() { efaterror("input error"); }
testIerror(f)1144 void testIerror(f) FILE *f; { if (ferror(f)) Ierror(); }
testOerror(o)1145 void testOerror(o) FILE *o; { if (ferror(o)) Oerror(); }
1146 
Ifclose(f)1147 void Ifclose(f) RILE *f; { if (f && Iclose(f)!=0) Ierror(); }
Ofclose(f)1148 void Ofclose(f) FILE *f; { if (f && fclose(f)!=0) Oerror(); }
Izclose(p)1149 void Izclose(p) RILE **p; { Ifclose(*p); *p = 0; }
Ozclose(p)1150 void Ozclose(p) FILE **p; { Ofclose(*p); *p = 0; }
1151 
1152 #if !large_memory
1153 	void
testIeof(f)1154 testIeof(f)
1155 	FILE *f;
1156 {
1157 	testIerror(f);
1158 	if (feof(f))
1159 		Ieof();
1160 }
Irewind(f)1161 void Irewind(f) FILE *f; { if (fseek(f,0L,SEEK_SET) != 0) Ierror(); }
1162 #endif
1163 
Orewind(f)1164 void Orewind(f) FILE *f; { if (fseek(f,0L,SEEK_SET) != 0) Oerror(); }
1165 
aflush(f)1166 void aflush(f) FILE *f; { if (fflush(f) != 0) Oerror(); }
eflush()1167 void eflush() { if (fflush(stderr)!=0 && !Oerrloop) Oerror(); }
oflush()1168 void oflush()
1169 {
1170 	if (fflush(workstdout ? workstdout : stdout) != 0  &&  !Oerrloop)
1171 		Oerror();
1172 }
1173 
1174 	void
fatcleanup(already_newline)1175 fatcleanup(already_newline)
1176 	int already_newline;
1177 {
1178 	VOID fprintf(stderr, already_newline+"\n%s aborted\n", cmdid);
1179 	exiterr();
1180 }
1181 
1182 	static void
startsay(s,t)1183 startsay(s, t)
1184 	const char *s, *t;
1185 {
1186 	oflush();
1187 	if (s)
1188 	    aprintf(stderr, "%s: %s: %s", cmdid, s, t);
1189 	else
1190 	    aprintf(stderr, "%s: %s", cmdid, t);
1191 }
1192 
1193 	static void
fatsay(s)1194 fatsay(s)
1195 	char const *s;
1196 {
1197 	startsay(s, "");
1198 }
1199 
1200 	static void
errsay(s)1201 errsay(s)
1202 	char const *s;
1203 {
1204 	fatsay(s);
1205 	nerror++;
1206 }
1207 
1208 	static void
warnsay(s)1209 warnsay(s)
1210 	char const *s;
1211 {
1212 	startsay(s, "warning: ");
1213 }
1214 
eerror(s)1215 void eerror(s) char const *s; { enerror(errno,s); }
1216 
1217 	void
enerror(e,s)1218 enerror(e,s)
1219 	int e;
1220 	char const *s;
1221 {
1222 	errsay((char const*)0);
1223 	errno = e;
1224 	perror(s);
1225 	eflush();
1226 }
1227 
efaterror(s)1228 void efaterror(s) char const *s; { enfaterror(errno,s); }
1229 
1230 	void
enfaterror(e,s)1231 enfaterror(e,s)
1232 	int e;
1233 	char const *s;
1234 {
1235 	fatsay((char const*)0);
1236 	errno = e;
1237 	perror(s);
1238 	fatcleanup(true);
1239 }
1240 
1241 #if has_prototypes
1242 	void
error(char const * format,...)1243 error(char const *format,...)
1244 #else
1245 	/*VARARGS1*/ void error(format, va_alist) char const *format; va_dcl
1246 #endif
1247 /* non-fatal error */
1248 {
1249 	va_list args;
1250 	errsay((char const*)0);
1251 	vararg_start(args, format);
1252 	fvfprintf(stderr, format, args);
1253 	va_end(args);
1254 	afputc('\n',stderr);
1255 	eflush();
1256 }
1257 
1258 #if has_prototypes
1259 	void
rcserror(char const * format,...)1260 rcserror(char const *format,...)
1261 #else
1262 	/*VARARGS1*/ void rcserror(format, va_alist) char const *format; va_dcl
1263 #endif
1264 /* non-fatal RCS file error */
1265 {
1266 	va_list args;
1267 	errsay(RCSname);
1268 	vararg_start(args, format);
1269 	fvfprintf(stderr, format, args);
1270 	va_end(args);
1271 	afputc('\n',stderr);
1272 	eflush();
1273 }
1274 
1275 #if has_prototypes
1276 	void
workerror(char const * format,...)1277 workerror(char const *format,...)
1278 #else
1279 	/*VARARGS1*/ void workerror(format, va_alist) char const *format; va_dcl
1280 #endif
1281 /* non-fatal working file error */
1282 {
1283 	va_list args;
1284 	errsay(workname);
1285 	vararg_start(args, format);
1286 	fvfprintf(stderr, format, args);
1287 	va_end(args);
1288 	afputc('\n',stderr);
1289 	eflush();
1290 }
1291 
1292 #if has_prototypes
1293 	void
fatserror(char const * format,...)1294 fatserror(char const *format,...)
1295 #else
1296 	/*VARARGS1*/ void
1297 	fatserror(format, va_alist) char const *format; va_dcl
1298 #endif
1299 /* fatal RCS file syntax error */
1300 {
1301 	va_list args;
1302 	oflush();
1303 	VOID fprintf(stderr, "%s: %s:%ld: ", cmdid, RCSname, rcsline);
1304 	vararg_start(args, format);
1305 	fvfprintf(stderr, format, args);
1306 	va_end(args);
1307 	fatcleanup(false);
1308 }
1309 
1310 #if has_prototypes
1311 	void
faterror(char const * format,...)1312 faterror(char const *format,...)
1313 #else
1314 	/*VARARGS1*/ void faterror(format, va_alist)
1315 	char const *format; va_dcl
1316 #endif
1317 /* fatal error, terminates program after cleanup */
1318 {
1319 	va_list args;
1320 	fatsay((char const*)0);
1321 	vararg_start(args, format);
1322 	fvfprintf(stderr, format, args);
1323 	va_end(args);
1324 	fatcleanup(false);
1325 }
1326 
1327 #if has_prototypes
1328 	void
rcsfaterror(char const * format,...)1329 rcsfaterror(char const *format,...)
1330 #else
1331 	/*VARARGS1*/ void rcsfaterror(format, va_alist)
1332 	char const *format; va_dcl
1333 #endif
1334 /* fatal RCS file error, terminates program after cleanup */
1335 {
1336 	va_list args;
1337 	fatsay(RCSname);
1338 	vararg_start(args, format);
1339 	fvfprintf(stderr, format, args);
1340 	va_end(args);
1341 	fatcleanup(false);
1342 }
1343 
1344 #if has_prototypes
1345 	void
warn(char const * format,...)1346 warn(char const *format,...)
1347 #else
1348 	/*VARARGS1*/ void warn(format, va_alist) char const *format; va_dcl
1349 #endif
1350 /* warning */
1351 {
1352 	va_list args;
1353 	if (!quietflag) {
1354 		warnsay((char *)0);
1355 		vararg_start(args, format);
1356 		fvfprintf(stderr, format, args);
1357 		va_end(args);
1358 		afputc('\n', stderr);
1359 		eflush();
1360 	}
1361 }
1362 
1363 #if has_prototypes
1364 	void
rcswarn(char const * format,...)1365 rcswarn(char const *format,...)
1366 #else
1367 	/*VARARGS1*/ void rcswarn(format, va_alist) char const *format; va_dcl
1368 #endif
1369 /* RCS file warning */
1370 {
1371 	va_list args;
1372 	if (!quietflag) {
1373 		warnsay(RCSname);
1374 		vararg_start(args, format);
1375 		fvfprintf(stderr, format, args);
1376 		va_end(args);
1377 		afputc('\n', stderr);
1378 		eflush();
1379 	}
1380 }
1381 
1382 #if has_prototypes
1383 	void
workwarn(char const * format,...)1384 workwarn(char const *format,...)
1385 #else
1386 	/*VARARGS1*/ void workwarn(format, va_alist) char const *format; va_dcl
1387 #endif
1388 /* working file warning */
1389 {
1390 	va_list args;
1391 	if (!quietflag) {
1392 		warnsay(workname);
1393 		vararg_start(args, format);
1394 		fvfprintf(stderr, format, args);
1395 		va_end(args);
1396 		afputc('\n', stderr);
1397 		eflush();
1398 	}
1399 }
1400 
1401 	void
redefined(c)1402 redefined(c)
1403 	int c;
1404 {
1405 	warn("redefinition of -%c option", c);
1406 }
1407 
1408 #if has_prototypes
1409 	void
diagnose(char const * format,...)1410 diagnose(char const *format,...)
1411 #else
1412 	/*VARARGS1*/ void diagnose(format, va_alist) char const *format; va_dcl
1413 #endif
1414 /* prints a diagnostic message */
1415 /* Unlike the other routines, it does not append a newline. */
1416 /* This lets some callers suppress the newline, and is faster */
1417 /* in implementations that flush stderr just at the end of each printf. */
1418 {
1419 	va_list args;
1420         if (!quietflag) {
1421 		oflush();
1422 		vararg_start(args, format);
1423 		fvfprintf(stderr, format, args);
1424 		va_end(args);
1425 		eflush();
1426         }
1427 }
1428 
1429 
1430 
1431 	void
afputc(c,f)1432 afputc(c, f)
1433 /* afputc(c,f); acts like aputc_(c,f) but is smaller and slower.  */
1434 	int c;
1435 	register FILE *f;
1436 {
1437 	aputc_(c,f)
1438 }
1439 
1440 
1441 	void
aputs(s,iop)1442 aputs(s, iop)
1443 	char const *s;
1444 	FILE *iop;
1445 /* Function: Put string s on file iop, abort on error.
1446  */
1447 {
1448 #if has_fputs
1449 	if (fputs(s, iop) < 0)
1450 		Oerror();
1451 #else
1452 	awrite(s, strlen(s), iop);
1453 #endif
1454 }
1455 
1456 
1457 
1458 	void
1459 #if has_prototypes
fvfprintf(FILE * stream,char const * format,va_list args)1460 fvfprintf(FILE *stream, char const *format, va_list args)
1461 #else
1462 	fvfprintf(stream,format,args) FILE *stream; char *format; va_list args;
1463 #endif
1464 /* like vfprintf, except abort program on error */
1465 {
1466 #if has_vfprintf
1467 	if (vfprintf(stream, format, args) < 0)
1468 		Oerror();
1469 #else
1470 #	if has__doprintf
1471 		_doprintf(stream, format, args);
1472 #	else
1473 #	if has__doprnt
1474 		_doprnt(format, args, stream);
1475 #	else
1476 		int *a = (int *)args;
1477 		VOID fprintf(stream, format,
1478 			a[0], a[1], a[2], a[3], a[4],
1479 			a[5], a[6], a[7], a[8], a[9]
1480 		);
1481 #	endif
1482 #	endif
1483 	if (ferror(stream))
1484 		Oerror();
1485 #endif
1486 }
1487 
1488 #if has_prototypes
1489 	void
aprintf(FILE * iop,char const * fmt,...)1490 aprintf(FILE *iop, char const *fmt, ...)
1491 #else
1492 	/*VARARGS2*/ void
1493 aprintf(iop, fmt, va_alist)
1494 FILE *iop;
1495 char const *fmt;
1496 va_dcl
1497 #endif
1498 /* Function: formatted output. Same as fprintf in stdio,
1499  * but aborts program on error
1500  */
1501 {
1502 	va_list ap;
1503 	vararg_start(ap, fmt);
1504 	fvfprintf(iop, fmt, ap);
1505 	va_end(ap);
1506 }
1507 
1508 
1509 
1510 #ifdef LEXDB
1511 /* test program reading a stream of lexemes and printing the tokens.
1512  */
1513 
1514 
1515 
1516 	int
main(argc,argv)1517 main(argc,argv)
1518 int argc; char * argv[];
1519 {
1520         cmdid="lextest";
1521         if (argc<2) {
1522 		aputs("No input file\n",stderr);
1523 		exitmain(EXIT_FAILURE);
1524         }
1525 	if (!(finptr=Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
1526 		faterror("can't open input file %s",argv[1]);
1527         }
1528         Lexinit();
1529 	while (!eoflex()) {
1530         switch (nexttok) {
1531 
1532         case ID:
1533                 VOID printf("ID: %s",NextString);
1534                 break;
1535 
1536         case NUM:
1537 		if (hshenter)
1538                    VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
1539                 else
1540                    VOID printf("NUM, unentered: %s",NextString);
1541                 hshenter = !hshenter; /*alternate between dates and numbers*/
1542                 break;
1543 
1544         case COLON:
1545                 VOID printf("COLON"); break;
1546 
1547         case SEMI:
1548                 VOID printf("SEMI"); break;
1549 
1550         case STRING:
1551                 readstring();
1552                 VOID printf("STRING"); break;
1553 
1554         case UNKN:
1555                 VOID printf("UNKN"); break;
1556 
1557         default:
1558                 VOID printf("DEFAULT"); break;
1559         }
1560         VOID printf(" | ");
1561         nextlex();
1562         }
1563 	exitmain(EXIT_SUCCESS);
1564 }
1565 
exiterr()1566 void exiterr() { _exit(EXIT_FAILURE); }
1567 
1568 
1569 #endif
1570