1 /****************************************************************************
2  * Copyright (c) 1998,1999,2000,2001 Free Software Foundation, Inc.         *
3  *                                                                          *
4  * Permission is hereby granted, free of charge, to any person obtaining a  *
5  * copy of this software and associated documentation files (the            *
6  * "Software"), to deal in the Software without restriction, including      *
7  * without limitation the rights to use, copy, modify, merge, publish,      *
8  * distribute, distribute with modifications, sublicense, and/or sell       *
9  * copies of the Software, and to permit persons to whom the Software is    *
10  * furnished to do so, subject to the following conditions:                 *
11  *                                                                          *
12  * The above copyright notice and this permission notice shall be included  *
13  * in all copies or substantial portions of the Software.                   *
14  *                                                                          *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  *
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF               *
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.   *
18  * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,   *
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR    *
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR    *
21  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.                               *
22  *                                                                          *
23  * Except as contained in this notice, the name(s) of the above copyright   *
24  * holders shall not be used in advertising or otherwise to promote the     *
25  * sale, use or other dealings in this Software without prior written       *
26  * authorization.                                                           *
27  ****************************************************************************/
28 
29 /****************************************************************************
30  *  Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995               *
31  *     and: Eric S. Raymond <esr@snark.thyrsus.com>                         *
32  ****************************************************************************/
33 
34 /* $FreeBSD$ */
35 
36 /*
37  *	comp_scan.c --- Lexical scanner for terminfo compiler.
38  *
39  *	_nc_reset_input()
40  *	_nc_get_token()
41  *	_nc_panic_mode()
42  *	int _nc_syntax;
43  *	int _nc_curr_line;
44  *	long _nc_curr_file_pos;
45  *	long _nc_comment_start;
46  *	long _nc_comment_end;
47  */
48 
49 #include <curses.priv.h>
50 
51 #include <ctype.h>
52 #include <term_entry.h>
53 #include <tic.h>
54 
55 MODULE_ID("$Id: comp_scan.c,v 1.59 2001/09/23 00:56:29 tom Exp $")
56 
57 /*
58  * Maximum length of string capability we'll accept before raising an error.
59  * Yes, there is a real capability in /etc/termcap this long, an "is".
60  */
61 #define MAXCAPLEN	600
62 
63 #define iswhite(ch)	(ch == ' '  ||  ch == '\t')
64 
65 NCURSES_EXPORT_VAR(int)
66 _nc_syntax = 0;			/* termcap or terminfo? */
67 NCURSES_EXPORT_VAR(long)
68 _nc_curr_file_pos = 0;		/* file offset of current line */
69 NCURSES_EXPORT_VAR(long)
70 _nc_comment_start = 0;		/* start of comment range before name */
71 NCURSES_EXPORT_VAR(long)
72 _nc_comment_end = 0;		/* end of comment range before name */
73 NCURSES_EXPORT_VAR(long)
74 _nc_start_line = 0;		/* start line of current entry */
75 
76 NCURSES_EXPORT_VAR(struct token)
77 _nc_curr_token =
78 {
79     0, 0, 0
80 };
81 
82 /*****************************************************************************
83  *
84  * Token-grabbing machinery
85  *
86  *****************************************************************************/
87 
88 static bool first_column;	/* See 'next_char()' below */
89 static char separator;		/* capability separator */
90 static int pushtype;		/* type of pushback token */
91 static char *pushname;
92 
93 #if NCURSES_EXT_FUNCS
94 NCURSES_EXPORT_VAR(bool)
95 _nc_disable_period = FALSE;	/* used by tic -a option */
96 #endif
97 
98 static int last_char(void);
99 static int next_char(void);
100 static long stream_pos(void);
101 static bool end_of_stream(void);
102 static void push_back(char c);
103 
104 /* Assume we may be looking at a termcap-style continuation */
105 static inline int
106 eat_escaped_newline(int ch)
107 {
108     if (ch == '\\')
109 	while ((ch = next_char()) == '\n' || iswhite(ch))
110 	    continue;
111     return ch;
112 }
113 
114 /*
115  *	int
116  *	get_token()
117  *
118  *	Scans the input for the next token, storing the specifics in the
119  *	global structure 'curr_token' and returning one of the following:
120  *
121  *		NAMES		A line beginning in column 1.  'name'
122  *				will be set to point to everything up to but
123  *				not including the first separator on the line.
124  *		BOOLEAN		An entry consisting of a name followed by
125  *				a separator.  'name' will be set to point to
126  *				the name of the capability.
127  *		NUMBER		An entry of the form
128  *					name#digits,
129  *				'name' will be set to point to the capability
130  *				name and 'valnumber' to the number given.
131  *		STRING		An entry of the form
132  *					name=characters,
133  *				'name' is set to the capability name and
134  *				'valstring' to the string of characters, with
135  *				input translations done.
136  *		CANCEL		An entry of the form
137  *					name@,
138  *				'name' is set to the capability name and
139  *				'valnumber' to -1.
140  *		EOF		The end of the file has been reached.
141  *
142  *	A `separator' is either a comma or a semicolon, depending on whether
143  *	we are in termcap or terminfo mode.
144  *
145  */
146 
147 NCURSES_EXPORT(int)
148 _nc_get_token(bool silent)
149 {
150     static const char terminfo_punct[] = "@%&*!#";
151     static char *buffer;
152 
153     char *numchk;
154     char *ptr;
155     char numbuf[80];
156     int ch;
157     int dot_flag = FALSE;
158     int type;
159     long number;
160     long token_start;
161     unsigned found;
162 
163     if (pushtype != NO_PUSHBACK) {
164 	int retval = pushtype;
165 
166 	_nc_set_type(pushname != 0 ? pushname : "");
167 	DEBUG(3, ("pushed-back token: `%s', class %d",
168 		  _nc_curr_token.tk_name, pushtype));
169 
170 	pushtype = NO_PUSHBACK;
171 	if (pushname != 0)
172 	    pushname[0] = '\0';
173 
174 	/* currtok wasn't altered by _nc_push_token() */
175 	return (retval);
176     }
177 
178     if (end_of_stream())
179 	return (EOF);
180 
181   start_token:
182     token_start = stream_pos();
183     while ((ch = next_char()) == '\n' || iswhite(ch))
184 	continue;
185 
186     ch = eat_escaped_newline(ch);
187 
188     if (ch == EOF)
189 	type = EOF;
190     else {
191 	/* if this is a termcap entry, skip a leading separator */
192 	if (separator == ':' && ch == ':')
193 	    ch = next_char();
194 
195 	if (ch == '.'
196 #if NCURSES_EXT_FUNCS
197 	    && !_nc_disable_period
198 #endif
199 	    ) {
200 	    dot_flag = TRUE;
201 	    DEBUG(8, ("dot-flag set"));
202 
203 	    while ((ch = next_char()) == '.' || iswhite(ch))
204 		continue;
205 	}
206 
207 	if (ch == EOF) {
208 	    type = EOF;
209 	    goto end_of_token;
210 	}
211 
212 	/* have to make some punctuation chars legal for terminfo */
213 	if (!isalnum(ch)
214 #if NCURSES_EXT_FUNCS
215 	    && !(ch == '.' && _nc_disable_period)
216 #endif
217 	    && !strchr(terminfo_punct, (char) ch)) {
218 	    if (!silent)
219 		_nc_warning("Illegal character (expected alphanumeric or %s) - %s",
220 			    terminfo_punct, unctrl((chtype) ch));
221 	    _nc_panic_mode(separator);
222 	    goto start_token;
223 	}
224 
225 	if (buffer == 0)
226 	    buffer = _nc_doalloc(buffer, MAX_ENTRY_SIZE);
227 
228 	ptr = buffer;
229 	*(ptr++) = ch;
230 
231 	if (first_column) {
232 	    char *desc;
233 
234 	    _nc_comment_start = token_start;
235 	    _nc_comment_end = _nc_curr_file_pos;
236 	    _nc_start_line = _nc_curr_line;
237 
238 	    _nc_syntax = ERR;
239 	    while ((ch = next_char()) != '\n') {
240 		if (ch == EOF)
241 		    _nc_err_abort("premature EOF");
242 		else if (ch == ':' && last_char() != ',') {
243 		    _nc_syntax = SYN_TERMCAP;
244 		    separator = ':';
245 		    break;
246 		} else if (ch == ',') {
247 		    _nc_syntax = SYN_TERMINFO;
248 		    separator = ',';
249 		    /*
250 		     * Fall-through here is not an accident.  The idea is that
251 		     * if we see a comma, we figure this is terminfo unless we
252 		     * subsequently run into a colon -- but we don't stop
253 		     * looking for that colon until hitting a newline.  This
254 		     * allows commas to be embedded in description fields of
255 		     * either syntax.
256 		     */
257 		    /* FALLTHRU */
258 		} else
259 		    ch = eat_escaped_newline(ch);
260 
261 		*ptr++ = ch;
262 	    }
263 	    ptr[0] = '\0';
264 	    if (_nc_syntax == ERR) {
265 		/*
266 		 * Grrr...what we ought to do here is barf, complaining that
267 		 * the entry is malformed.  But because a couple of name fields
268 		 * in the 8.2 termcap file end with |\, we just have to assume
269 		 * it's termcap syntax.
270 		 */
271 		_nc_syntax = SYN_TERMCAP;
272 		separator = ':';
273 	    } else if (_nc_syntax == SYN_TERMINFO) {
274 		/* throw away trailing /, *$/ */
275 		for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--)
276 		    continue;
277 		ptr[1] = '\0';
278 	    }
279 
280 	    /*
281 	     * This is the soonest we have the terminal name fetched.  Set up
282 	     * for following warning messages.
283 	     */
284 	    ptr = strchr(buffer, '|');
285 	    if (ptr == (char *) NULL)
286 		ptr = buffer + strlen(buffer);
287 	    ch = *ptr;
288 	    *ptr = '\0';
289 	    _nc_set_type(buffer);
290 	    *ptr = ch;
291 
292 	    /*
293 	     * Compute the boundary between the aliases and the description
294 	     * field for syntax-checking purposes.
295 	     */
296 	    desc = strrchr(buffer, '|');
297 	    if (!silent && desc) {
298 		if (*desc == '\0')
299 		    _nc_warning("empty longname field");
300 #ifndef FREEBSD_NATIVE
301 		else if (strchr(desc, ' ') == (char *) NULL)
302 		    _nc_warning("older tic versions may treat the description field as an alias");
303 #endif
304 	    }
305 	    if (!desc)
306 		desc = buffer + strlen(buffer);
307 
308 	    /*
309 	     * Whitespace in a name field other than the long name can confuse
310 	     * rdist and some termcap tools.  Slashes are a no-no.  Other
311 	     * special characters can be dangerous due to shell expansion.
312 	     */
313 	    for (ptr = buffer; ptr < desc; ptr++) {
314 		if (isspace(UChar(*ptr))) {
315 		    if (!silent)
316 			_nc_warning("whitespace in name or alias field");
317 		    break;
318 		} else if (*ptr == '/') {
319 		    if (!silent)
320 			_nc_warning("slashes aren't allowed in names or aliases");
321 		    break;
322 		} else if (strchr("$[]!*?", *ptr)) {
323 		    if (!silent)
324 			_nc_warning("dubious character `%c' in name or alias field", *ptr);
325 		    break;
326 		}
327 	    }
328 
329 	    ptr = buffer;
330 
331 	    _nc_curr_token.tk_name = buffer;
332 	    type = NAMES;
333 	} else {
334 	    while ((ch = next_char()) != EOF) {
335 		if (!isalnum(ch)) {
336 		    if (_nc_syntax == SYN_TERMINFO) {
337 			if (ch != '_')
338 			    break;
339 		    } else {	/* allow ';' for "k;" */
340 			if (ch != ';')
341 			    break;
342 		    }
343 		}
344 		*(ptr++) = ch;
345 	    }
346 
347 	    *ptr++ = '\0';
348 	    switch (ch) {
349 	    case ',':
350 	    case ':':
351 		if (ch != separator)
352 		    _nc_err_abort("Separator inconsistent with syntax");
353 		_nc_curr_token.tk_name = buffer;
354 		type = BOOLEAN;
355 		break;
356 	    case '@':
357 		if ((ch = next_char()) != separator && !silent)
358 		    _nc_warning("Missing separator after `%s', have %s",
359 				buffer, unctrl((chtype) ch));
360 		_nc_curr_token.tk_name = buffer;
361 		type = CANCEL;
362 		break;
363 
364 	    case '#':
365 		found = 0;
366 		while (isalnum(ch = next_char())) {
367 		    numbuf[found++] = ch;
368 		    if (found >= sizeof(numbuf) - 1)
369 			break;
370 		}
371 		numbuf[found] = '\0';
372 		number = strtol(numbuf, &numchk, 0);
373 		if (!silent) {
374 		    if (numchk == numbuf)
375 			_nc_warning("no value given for `%s'", buffer);
376 		    if ((*numchk != '\0') || (ch != separator))
377 			_nc_warning("Missing separator");
378 		}
379 		_nc_curr_token.tk_name = buffer;
380 		_nc_curr_token.tk_valnumber = number;
381 		type = NUMBER;
382 		break;
383 
384 	    case '=':
385 		ch = _nc_trans_string(ptr, buffer + MAX_ENTRY_SIZE);
386 		if (!silent && ch != separator)
387 		    _nc_warning("Missing separator");
388 		_nc_curr_token.tk_name = buffer;
389 		_nc_curr_token.tk_valstring = ptr;
390 		type = STRING;
391 		break;
392 
393 	    case EOF:
394 		type = EOF;
395 		break;
396 	    default:
397 		/* just to get rid of the compiler warning */
398 		type = UNDEF;
399 		if (!silent)
400 		    _nc_warning("Illegal character - %s", unctrl((chtype) ch));
401 	    }
402 	}			/* end else (first_column == FALSE) */
403     }				/* end else (ch != EOF) */
404 
405   end_of_token:
406 
407 #ifdef TRACE
408     if (dot_flag == TRUE)
409 	DEBUG(8, ("Commented out "));
410 
411     if (_nc_tracing >= DEBUG_LEVEL(7)) {
412 	switch (type) {
413 	case BOOLEAN:
414 	    _tracef("Token: Boolean; name='%s'",
415 		    _nc_curr_token.tk_name);
416 	    break;
417 
418 	case NUMBER:
419 	    _tracef("Token: Number;  name='%s', value=%d",
420 		    _nc_curr_token.tk_name,
421 		    _nc_curr_token.tk_valnumber);
422 	    break;
423 
424 	case STRING:
425 	    _tracef("Token: String;  name='%s', value=%s",
426 		    _nc_curr_token.tk_name,
427 		    _nc_visbuf(_nc_curr_token.tk_valstring));
428 	    break;
429 
430 	case CANCEL:
431 	    _tracef("Token: Cancel; name='%s'",
432 		    _nc_curr_token.tk_name);
433 	    break;
434 
435 	case NAMES:
436 
437 	    _tracef("Token: Names; value='%s'",
438 		    _nc_curr_token.tk_name);
439 	    break;
440 
441 	case EOF:
442 	    _tracef("Token: End of file");
443 	    break;
444 
445 	default:
446 	    _nc_warning("Bad token type");
447 	}
448     }
449 #endif
450 
451     if (dot_flag == TRUE)	/* if commented out, use the next one */
452 	type = _nc_get_token(silent);
453 
454     DEBUG(3, ("token: `%s', class %d",
455 	      _nc_curr_token.tk_name != 0 ? _nc_curr_token.tk_name :
456 	      "<null>",
457 	      type));
458 
459     return (type);
460 }
461 
462 /*
463  *	char
464  *	trans_string(ptr)
465  *
466  *	Reads characters using next_char() until encountering a separator, nl,
467  *	or end-of-file.  The returned value is the character which caused
468  *	reading to stop.  The following translations are done on the input:
469  *
470  *		^X  goes to  ctrl-X (i.e. X & 037)
471  *		{\E,\n,\r,\b,\t,\f}  go to
472  *			{ESCAPE,newline,carriage-return,backspace,tab,formfeed}
473  *		{\^,\\}  go to  {carat,backslash}
474  *		\ddd (for ddd = up to three octal digits)  goes to the character ddd
475  *
476  *		\e == \E
477  *		\0 == \200
478  *
479  */
480 
481 NCURSES_EXPORT(char)
482 _nc_trans_string(char *ptr, char *last)
483 {
484     int count = 0;
485     int number = 0;
486     int i, c;
487     chtype ch, last_ch = '\0';
488     bool ignored = FALSE;
489     bool long_warning = FALSE;
490 
491     while ((ch = c = next_char()) != (chtype) separator && c != EOF) {
492 	if (ptr == (last - 1))
493 	    break;
494 	if ((_nc_syntax == SYN_TERMCAP) && c == '\n')
495 	    break;
496 	if (ch == '^' && last_ch != '%') {
497 	    ch = c = next_char();
498 	    if (c == EOF)
499 		_nc_err_abort("Premature EOF");
500 
501 	    if (!(is7bits(ch) && isprint(ch))) {
502 		_nc_warning("Illegal ^ character - %s", unctrl(ch));
503 	    }
504 	    if (ch == '?') {
505 		*(ptr++) = '\177';
506 		if (_nc_tracing)
507 		    _nc_warning("Allow ^? as synonym for \\177");
508 	    } else {
509 		if ((ch &= 037) == 0)
510 		    ch = 128;
511 		*(ptr++) = (char) (ch);
512 	    }
513 	} else if (ch == '\\') {
514 	    ch = c = next_char();
515 	    if (c == EOF)
516 		_nc_err_abort("Premature EOF");
517 
518 	    if (ch >= '0' && ch <= '7') {
519 		number = ch - '0';
520 		for (i = 0; i < 2; i++) {
521 		    ch = c = next_char();
522 		    if (c == EOF)
523 			_nc_err_abort("Premature EOF");
524 
525 		    if (c < '0' || c > '7') {
526 			if (isdigit(c)) {
527 			    _nc_warning("Non-octal digit `%c' in \\ sequence", c);
528 			    /* allow the digit; it'll do less harm */
529 			} else {
530 			    push_back((char) c);
531 			    break;
532 			}
533 		    }
534 
535 		    number = number * 8 + c - '0';
536 		}
537 
538 		if (number == 0)
539 		    number = 0200;
540 		*(ptr++) = (char) number;
541 	    } else {
542 		switch (c) {
543 		case 'E':
544 		case 'e':
545 		    *(ptr++) = '\033';
546 		    break;
547 
548 		case 'a':
549 		    *(ptr++) = '\007';
550 		    break;
551 
552 		case 'l':
553 		case 'n':
554 		    *(ptr++) = '\n';
555 		    break;
556 
557 		case 'r':
558 		    *(ptr++) = '\r';
559 		    break;
560 
561 		case 'b':
562 		    *(ptr++) = '\010';
563 		    break;
564 
565 		case 's':
566 		    *(ptr++) = ' ';
567 		    break;
568 
569 		case 'f':
570 		    *(ptr++) = '\014';
571 		    break;
572 
573 		case 't':
574 		    *(ptr++) = '\t';
575 		    break;
576 
577 		case '\\':
578 		    *(ptr++) = '\\';
579 		    break;
580 
581 		case '^':
582 		    *(ptr++) = '^';
583 		    break;
584 
585 		case ',':
586 		    *(ptr++) = ',';
587 		    break;
588 
589 		case ':':
590 		    *(ptr++) = ':';
591 		    break;
592 
593 		case '\n':
594 		    continue;
595 
596 		default:
597 		    _nc_warning("Illegal character %s in \\ sequence",
598 				unctrl(ch));
599 		    *(ptr++) = (char) ch;
600 		}		/* endswitch (ch) */
601 	    }			/* endelse (ch < '0' ||  ch > '7') */
602 	}
603 	/* end else if (ch == '\\') */
604 	else if (ch == '\n' && (_nc_syntax == SYN_TERMINFO)) {
605 	    /* newlines embedded in a terminfo string are ignored */
606 	    ignored = TRUE;
607 	} else {
608 	    *(ptr++) = (char) ch;
609 	}
610 
611 	if (!ignored) {
612 	    last_ch = ch;
613 	    count++;
614 	}
615 	ignored = FALSE;
616 
617 	if (count > MAXCAPLEN && !long_warning) {
618 	    _nc_warning("Very long string found.  Missing separator?");
619 	    long_warning = TRUE;
620 	}
621     }				/* end while */
622 
623     *ptr = '\0';
624 
625     return (ch);
626 }
627 
628 /*
629  *	_nc_push_token()
630  *
631  *	Push a token of given type so that it will be reread by the next
632  *	get_token() call.
633  */
634 
635 NCURSES_EXPORT(void)
636 _nc_push_token(int tokclass)
637 {
638     /*
639      * This implementation is kind of bogus, it will fail if we ever do more
640      * than one pushback at a time between get_token() calls.  It relies on the
641      * fact that _nc_curr_token is static storage that nothing but
642      * _nc_get_token() touches.
643      */
644     pushtype = tokclass;
645     if (pushname == 0)
646 	pushname = _nc_doalloc(pushname, MAX_NAME_SIZE + 1);
647     _nc_get_type(pushname);
648 
649     DEBUG(3, ("pushing token: `%s', class %d",
650 	      _nc_curr_token.tk_name, pushtype));
651 }
652 
653 /*
654  * Panic mode error recovery - skip everything until a "ch" is found.
655  */
656 NCURSES_EXPORT(void)
657 _nc_panic_mode(char ch)
658 {
659     int c;
660 
661     for (;;) {
662 	c = next_char();
663 	if (c == ch)
664 	    return;
665 	if (c == EOF)
666 	    return;
667     }
668 }
669 
670 /*****************************************************************************
671  *
672  * Character-stream handling
673  *
674  *****************************************************************************/
675 
676 #define LEXBUFSIZ	1024
677 
678 static char *bufptr;		/* otherwise, the input buffer pointer */
679 static char *bufstart;		/* start of buffer so we can compute offsets */
680 static FILE *yyin;		/* scanner's input file descriptor */
681 
682 /*
683  *	_nc_reset_input()
684  *
685  *	Resets the input-reading routines.  Used on initialization,
686  *	or after a seek has been done.  Exactly one argument must be
687  *	non-null.
688  */
689 
690 NCURSES_EXPORT(void)
691 _nc_reset_input(FILE * fp, char *buf)
692 {
693     pushtype = NO_PUSHBACK;
694     if (pushname != 0)
695 	pushname[0] = '\0';
696     yyin = fp;
697     bufstart = bufptr = buf;
698     _nc_curr_file_pos = 0L;
699     if (fp != 0)
700 	_nc_curr_line = 0;
701     _nc_curr_col = 0;
702 }
703 
704 /*
705  *	int last_char()
706  *
707  *	Returns the final nonblank character on the current input buffer
708  */
709 static int
710 last_char(void)
711 {
712     size_t len = strlen(bufptr);
713     while (len--) {
714 	if (!isspace(UChar(bufptr[len])))
715 	    return bufptr[len];
716     }
717     return 0;
718 }
719 
720 /*
721  *	int next_char()
722  *
723  *	Returns the next character in the input stream.  Comments and leading
724  *	white space are stripped.
725  *
726  *	The global state variable 'firstcolumn' is set TRUE if the character
727  *	returned is from the first column of the input line.
728  *
729  *	The global variable _nc_curr_line is incremented for each new line.
730  *	The global variable _nc_curr_file_pos is set to the file offset of the
731  *	beginning of each line.
732  */
733 
734 static int
735 next_char(void)
736 {
737     if (!yyin) {
738 	/*
739 	 * An string with an embedded null will truncate the input.  This is
740 	 * intentional (we don't read binary files here).
741 	 */
742 	if (*bufptr == '\0')
743 	    return (EOF);
744 	if (*bufptr == '\n') {
745 	    _nc_curr_line++;
746 	    _nc_curr_col = 0;
747 	}
748     } else if (!bufptr || !*bufptr) {
749 	/*
750 	 * In theory this could be recoded to do its I/O one character at a
751 	 * time, saving the buffer space.  In practice, this turns out to be
752 	 * quite hard to get completely right.  Try it and see.  If you
753 	 * succeed, don't forget to hack push_back() correspondingly.
754 	 */
755 	static char *result;
756 	static size_t allocated;
757 	size_t used;
758 	size_t len;
759 
760 	do {
761 	    bufstart = 0;
762 	    used = 0;
763 	    do {
764 		if (used + (LEXBUFSIZ / 4) >= allocated) {
765 		    allocated += (allocated + LEXBUFSIZ);
766 		    result = _nc_doalloc(result, allocated);
767 		    if (result == 0)
768 			return (EOF);
769 		}
770 		if (used == 0)
771 		    _nc_curr_file_pos = ftell(yyin);
772 
773 		if (fgets(result + used, allocated - used, yyin) != NULL) {
774 		    bufstart = result;
775 		    if (used == 0) {
776 			_nc_curr_line++;
777 			_nc_curr_col = 0;
778 		    }
779 		} else {
780 		    if (used != 0)
781 			strcat(result, "\n");
782 		}
783 		if ((bufptr = bufstart) != 0) {
784 		    used = strlen(bufptr);
785 		    while (iswhite(*bufptr))
786 			bufptr++;
787 
788 		    /*
789 		     * Treat a trailing <cr><lf> the same as a <newline> so we
790 		     * can read files on OS/2, etc.
791 		     */
792 		    if ((len = strlen(bufptr)) > 1) {
793 			if (bufptr[len - 1] == '\n'
794 			    && bufptr[len - 2] == '\r') {
795 			    len--;
796 			    bufptr[len - 1] = '\n';
797 			    bufptr[len] = '\0';
798 			}
799 		    }
800 		} else {
801 		    return (EOF);
802 		}
803 	    } while (bufptr[len - 1] != '\n');	/* complete a line */
804 	} while (result[0] == '#');	/* ignore comments */
805     }
806 
807     first_column = (bufptr == bufstart);
808 
809     _nc_curr_col++;
810     return (*bufptr++);
811 }
812 
813 static void
814 push_back(char c)
815 /* push a character back onto the input stream */
816 {
817     if (bufptr == bufstart)
818 	_nc_syserr_abort("Can't backspace off beginning of line");
819     *--bufptr = c;
820 }
821 
822 static long
823 stream_pos(void)
824 /* return our current character position in the input stream */
825 {
826     return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0));
827 }
828 
829 static bool
830 end_of_stream(void)
831 /* are we at end of input? */
832 {
833     return ((yyin ? feof(yyin) : (bufptr && *bufptr == '\0'))
834 	    ? TRUE : FALSE);
835 }
836