xref: /freebsd/contrib/ntp/ntpd/ntp_scanner.c (revision 5b9c547c)
1 
2 /* ntp_scanner.c
3  *
4  * The source code for a simple lexical analyzer.
5  *
6  * Written By:	Sachin Kamboj
7  *		University of Delaware
8  *		Newark, DE 19711
9  * Copyright (c) 2006
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 # include <config.h>
14 #endif
15 
16 #include <stdio.h>
17 #include <ctype.h>
18 #include <stdlib.h>
19 #include <errno.h>
20 #include <string.h>
21 
22 #include "ntpd.h"
23 #include "ntp_config.h"
24 #include "ntpsim.h"
25 #include "ntp_scanner.h"
26 #include "ntp_parser.h"
27 
28 /* ntp_keyword.h declares finite state machine and token text */
29 #include "ntp_keyword.h"
30 
31 
32 
33 /* SCANNER GLOBAL VARIABLES
34  * ------------------------
35  */
36 
37 #define MAX_LEXEME (1024 + 1)	/* The maximum size of a lexeme */
38 char yytext[MAX_LEXEME];	/* Buffer for storing the input text/lexeme */
39 u_int32 conf_file_sum;		/* Simple sum of characters read */
40 
41 
42 
43 
44 /* CONSTANTS
45  * ---------
46  */
47 
48 
49 /* SCANNER GLOBAL VARIABLES
50  * ------------------------
51  */
52 const char special_chars[] = "{}(),;|=";
53 
54 
55 /* FUNCTIONS
56  * ---------
57  */
58 
59 static int is_keyword(char *lexeme, follby *pfollowedby);
60 
61 
62 /*
63  * keyword() - Return the keyword associated with token T_ identifier.
64  *	       See also token_name() for the string-ized T_ identifier.
65  *	       Example: keyword(T_Server) returns "server"
66  *			token_name(T_Server) returns "T_Server"
67  */
68 const char *
69 keyword(
70 	int token
71 	)
72 {
73 	size_t i;
74 	const char *text;
75 
76 	i = token - LOWEST_KEYWORD_ID;
77 
78 	if (i < COUNTOF(keyword_text))
79 		text = keyword_text[i];
80 	else
81 		text = NULL;
82 
83 	return (text != NULL)
84 		   ? text
85 		   : "(keyword not found)";
86 }
87 
88 
89 /* FILE INTERFACE
90  * --------------
91  * We define a couple of wrapper functions around the standard C fgetc
92  * and ungetc functions in order to include positional bookkeeping
93  */
94 
95 struct FILE_INFO *
96 F_OPEN(
97 	const char *path,
98 	const char *mode
99 	)
100 {
101 	struct FILE_INFO *my_info;
102 
103 	my_info = emalloc(sizeof *my_info);
104 
105 	my_info->line_no = 1;
106 	my_info->col_no = 0;
107 	my_info->prev_line_col_no = 0;
108 	my_info->prev_token_col_no = 0;
109 	my_info->fname = path;
110 
111 	my_info->fd = fopen(path, mode);
112 	if (NULL == my_info->fd) {
113 		free(my_info);
114 		return NULL;
115 	}
116 	return my_info;
117 }
118 
119 int
120 FGETC(
121 	struct FILE_INFO *stream
122 	)
123 {
124 	int ch;
125 
126 	do
127 		ch = fgetc(stream->fd);
128 	while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX));
129 
130 	if (EOF != ch) {
131 		if (input_from_file)
132 			conf_file_sum += (u_char)ch;
133 		++stream->col_no;
134 		if (ch == '\n') {
135 			stream->prev_line_col_no = stream->col_no;
136 			++stream->line_no;
137 			stream->col_no = 1;
138 		}
139 	}
140 
141 	return ch;
142 }
143 
144 /* BUGS: 1. Function will fail on more than one line of pushback
145  *       2. No error checking is done to see if ungetc fails
146  * SK: I don't think its worth fixing these bugs for our purposes ;-)
147  */
148 int
149 UNGETC(
150 	int ch,
151 	struct FILE_INFO *stream
152 	)
153 {
154 	if (input_from_file)
155 		conf_file_sum -= (u_char)ch;
156 	if (ch == '\n') {
157 		stream->col_no = stream->prev_line_col_no;
158 		stream->prev_line_col_no = -1;
159 		--stream->line_no;
160 	}
161 	--stream->col_no;
162 	return ungetc(ch, stream->fd);
163 }
164 
165 int
166 FCLOSE(
167 	struct FILE_INFO *stream
168 	)
169 {
170 	int ret_val = fclose(stream->fd);
171 
172 	if (!ret_val)
173 		free(stream);
174 	return ret_val;
175 }
176 
177 /* STREAM INTERFACE
178  * ----------------
179  * Provide a wrapper for the stream functions so that the
180  * stream can either read from a file or from a character
181  * array.
182  * NOTE: This is not very efficient for reading from character
183  * arrays, but needed to allow remote configuration where the
184  * configuration command is provided through ntpq.
185  *
186  * The behavior of there two functions is determined by the
187  * input_from_file flag.
188  */
189 
190 static int
191 get_next_char(
192 	struct FILE_INFO *ip_file
193 	)
194 {
195 	char ch;
196 
197 	if (input_from_file)
198 		return FGETC(ip_file);
199 	else {
200 		if (remote_config.buffer[remote_config.pos] == '\0')
201 			return EOF;
202 		else {
203 			ip_file->col_no++;
204 			ch = remote_config.buffer[remote_config.pos++];
205 			if (ch == '\n') {
206 				ip_file->prev_line_col_no = ip_file->col_no;
207 				++ip_file->line_no;
208 				ip_file->col_no = 1;
209 			}
210 			return ch;
211 		}
212 	}
213 }
214 
215 static void
216 push_back_char(
217 	struct FILE_INFO *ip_file,
218 	int ch
219 	)
220 {
221 	if (input_from_file)
222 		UNGETC(ch, ip_file);
223 	else {
224 		if (ch == '\n') {
225 			ip_file->col_no = ip_file->prev_line_col_no;
226 			ip_file->prev_line_col_no = -1;
227 			--ip_file->line_no;
228 		}
229 		--ip_file->col_no;
230 
231 		remote_config.pos--;
232 	}
233 }
234 
235 
236 
237 /* STATE MACHINES
238  * --------------
239  */
240 
241 /* Keywords */
242 static int
243 is_keyword(
244 	char *lexeme,
245 	follby *pfollowedby
246 	)
247 {
248 	follby fb;
249 	int curr_s;		/* current state index */
250 	int token;
251 	int i;
252 
253 	curr_s = SCANNER_INIT_S;
254 	token = 0;
255 
256 	for (i = 0; lexeme[i]; i++) {
257 		while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
258 			curr_s = SS_OTHER_N(sst[curr_s]);
259 
260 		if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
261 			if ('\0' == lexeme[i + 1]
262 			    && FOLLBY_NON_ACCEPTING
263 			       != SS_FB(sst[curr_s])) {
264 				fb = SS_FB(sst[curr_s]);
265 				*pfollowedby = fb;
266 				token = curr_s;
267 				break;
268 			}
269 			curr_s = SS_MATCH_N(sst[curr_s]);
270 		} else
271 			break;
272 	}
273 
274 	return token;
275 }
276 
277 
278 /* Integer */
279 static int
280 is_integer(
281 	char *lexeme
282 	)
283 {
284 	int	i;
285 	int	is_neg;
286 	u_int	u_val;
287 
288 	i = 0;
289 
290 	/* Allow a leading minus sign */
291 	if (lexeme[i] == '-') {
292 		i++;
293 		is_neg = TRUE;
294 	} else {
295 		is_neg = FALSE;
296 	}
297 
298 	/* Check that all the remaining characters are digits */
299 	for (; lexeme[i] != '\0'; i++) {
300 		if (!isdigit((unsigned char)lexeme[i]))
301 			return FALSE;
302 	}
303 
304 	if (is_neg)
305 		return TRUE;
306 
307 	/* Reject numbers that fit in unsigned but not in signed int */
308 	if (1 == sscanf(lexeme, "%u", &u_val))
309 		return (u_val <= INT_MAX);
310 	else
311 		return FALSE;
312 }
313 
314 
315 /* U_int -- assumes is_integer() has returned FALSE */
316 static int
317 is_u_int(
318 	char *lexeme
319 	)
320 {
321 	int	i;
322 	int	is_hex;
323 
324 	i = 0;
325 	if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) {
326 		i += 2;
327 		is_hex = TRUE;
328 	} else {
329 		is_hex = FALSE;
330 	}
331 
332 	/* Check that all the remaining characters are digits */
333 	for (; lexeme[i] != '\0'; i++) {
334 		if (is_hex && !isxdigit((unsigned char)lexeme[i]))
335 			return FALSE;
336 		if (!is_hex && !isdigit((unsigned char)lexeme[i]))
337 			return FALSE;
338 	}
339 
340 	return TRUE;
341 }
342 
343 
344 /* Double */
345 static int
346 is_double(
347 	char *lexeme
348 	)
349 {
350 	u_int num_digits = 0;  /* Number of digits read */
351 	u_int i;
352 
353 	i = 0;
354 
355 	/* Check for an optional '+' or '-' */
356 	if ('+' == lexeme[i] || '-' == lexeme[i])
357 		i++;
358 
359 	/* Read the integer part */
360 	for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
361 		num_digits++;
362 
363 	/* Check for the optional decimal point */
364 	if ('.' == lexeme[i]) {
365 		i++;
366 		/* Check for any digits after the decimal point */
367 		for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
368 			num_digits++;
369 	}
370 
371 	/*
372 	 * The number of digits in both the decimal part and the
373 	 * fraction part must not be zero at this point
374 	 */
375 	if (!num_digits)
376 		return 0;
377 
378 	/* Check if we are done */
379 	if (!lexeme[i])
380 		return 1;
381 
382 	/* There is still more input, read the exponent */
383 	if ('e' == tolower((unsigned char)lexeme[i]))
384 		i++;
385 	else
386 		return 0;
387 
388 	/* Read an optional Sign */
389 	if ('+' == lexeme[i] || '-' == lexeme[i])
390 		i++;
391 
392 	/* Now read the exponent part */
393 	while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
394 		i++;
395 
396 	/* Check if we are done */
397 	if (!lexeme[i])
398 		return 1;
399 	else
400 		return 0;
401 }
402 
403 
404 /* is_special() - Test whether a character is a token */
405 static inline int
406 is_special(
407 	int ch
408 	)
409 {
410 	return strchr(special_chars, ch) != NULL;
411 }
412 
413 
414 static int
415 is_EOC(
416 	int ch
417 	)
418 {
419 	if ((old_config_style && (ch == '\n')) ||
420 	    (!old_config_style && (ch == ';')))
421 		return 1;
422 	return 0;
423 }
424 
425 
426 char *
427 quote_if_needed(char *str)
428 {
429 	char *ret;
430 	size_t len;
431 	size_t octets;
432 
433 	len = strlen(str);
434 	octets = len + 2 + 1;
435 	ret = emalloc(octets);
436 	if ('"' != str[0]
437 	    && (strcspn(str, special_chars) < len
438 		|| strchr(str, ' ') != NULL)) {
439 		snprintf(ret, octets, "\"%s\"", str);
440 	} else
441 		strlcpy(ret, str, octets);
442 
443 	return ret;
444 }
445 
446 
447 static int
448 create_string_token(
449 	char *lexeme
450 	)
451 {
452 	char *pch;
453 
454 	/*
455 	 * ignore end of line whitespace
456 	 */
457 	pch = lexeme;
458 	while (*pch && isspace((unsigned char)*pch))
459 		pch++;
460 
461 	if (!*pch) {
462 		yylval.Integer = T_EOC;
463 		return yylval.Integer;
464 	}
465 
466 	yylval.String = estrdup(lexeme);
467 	return T_String;
468 }
469 
470 
471 /*
472  * yylex() - function that does the actual scanning.
473  * Bison expects this function to be called yylex and for it to take no
474  * input and return an int.
475  * Conceptually yylex "returns" yylval as well as the actual return
476  * value representing the token or type.
477  */
478 int
479 yylex(
480 	struct FILE_INFO *ip_file
481 	)
482 {
483 	static follby	followedby = FOLLBY_TOKEN;
484 	size_t		i;
485 	int		instring;
486 	int		yylval_was_set;
487 	int		converted;
488 	int		token;		/* The return value */
489 	int		ch;
490 
491 	if (input_from_file)
492 		ip_file = fp[curr_include_level];
493 	instring = FALSE;
494 	yylval_was_set = FALSE;
495 
496 	do {
497 		/* Ignore whitespace at the beginning */
498 		while (EOF != (ch = get_next_char(ip_file)) &&
499 		       isspace(ch) &&
500 		       !is_EOC(ch))
501 			; /* Null Statement */
502 
503 		if (EOF == ch) {
504 
505 			if (!input_from_file || curr_include_level <= 0)
506 				return 0;
507 
508 			FCLOSE(fp[curr_include_level]);
509 			ip_file = fp[--curr_include_level];
510 			token = T_EOC;
511 			goto normal_return;
512 
513 		} else if (is_EOC(ch)) {
514 
515 			/* end FOLLBY_STRINGS_TO_EOC effect */
516 			followedby = FOLLBY_TOKEN;
517 			token = T_EOC;
518 			goto normal_return;
519 
520 		} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
521 			/* special chars are their own token values */
522 			token = ch;
523 			/*
524 			 * '=' outside simulator configuration implies
525 			 * a single string following as in:
526 			 * setvar Owner = "The Boss" default
527 			 */
528 			if ('=' == ch && old_config_style)
529 				followedby = FOLLBY_STRING;
530 			yytext[0] = (char)ch;
531 			yytext[1] = '\0';
532 			goto normal_return;
533 		} else
534 			push_back_char(ip_file, ch);
535 
536 		/* save the position of start of the token */
537 		ip_file->prev_token_line_no = ip_file->line_no;
538 		ip_file->prev_token_col_no = ip_file->col_no;
539 
540 		/* Read in the lexeme */
541 		i = 0;
542 		while (EOF != (ch = get_next_char(ip_file))) {
543 
544 			yytext[i] = (char)ch;
545 
546 			/* Break on whitespace or a special character */
547 			if (isspace(ch) || is_EOC(ch)
548 			    || '"' == ch
549 			    || (FOLLBY_TOKEN == followedby
550 				&& is_special(ch)))
551 				break;
552 
553 			/* Read the rest of the line on reading a start
554 			   of comment character */
555 			if ('#' == ch) {
556 				while (EOF != (ch = get_next_char(ip_file))
557 				       && '\n' != ch)
558 					; /* Null Statement */
559 				break;
560 			}
561 
562 			i++;
563 			if (i >= COUNTOF(yytext))
564 				goto lex_too_long;
565 		}
566 		/* Pick up all of the string inside between " marks, to
567 		 * end of line.  If we make it to EOL without a
568 		 * terminating " assume it for them.
569 		 *
570 		 * XXX - HMS: I'm not sure we want to assume the closing "
571 		 */
572 		if ('"' == ch) {
573 			instring = TRUE;
574 			while (EOF != (ch = get_next_char(ip_file)) &&
575 			       ch != '"' && ch != '\n') {
576 				yytext[i++] = (char)ch;
577 				if (i >= COUNTOF(yytext))
578 					goto lex_too_long;
579 			}
580 			/*
581 			 * yytext[i] will be pushed back as not part of
582 			 * this lexeme, but any closing quote should
583 			 * not be pushed back, so we read another char.
584 			 */
585 			if ('"' == ch)
586 				ch = get_next_char(ip_file);
587 		}
588 		/* Pushback the last character read that is not a part
589 		 * of this lexeme.
590 		 * If the last character read was an EOF, pushback a
591 		 * newline character. This is to prevent a parse error
592 		 * when there is no newline at the end of a file.
593 		 */
594 		if (EOF == ch)
595 			push_back_char(ip_file, '\n');
596 		else
597 			push_back_char(ip_file, ch);
598 		yytext[i] = '\0';
599 	} while (i == 0);
600 
601 	/* Now return the desired token */
602 
603 	/* First make sure that the parser is *not* expecting a string
604 	 * as the next token (based on the previous token that was
605 	 * returned) and that we haven't read a string.
606 	 */
607 
608 	if (followedby == FOLLBY_TOKEN && !instring) {
609 		token = is_keyword(yytext, &followedby);
610 		if (token) {
611 			/*
612 			 * T_Server is exceptional as it forces the
613 			 * following token to be a string in the
614 			 * non-simulator parts of the configuration,
615 			 * but in the simulator configuration section,
616 			 * "server" is followed by "=" which must be
617 			 * recognized as a token not a string.
618 			 */
619 			if (T_Server == token && !old_config_style)
620 				followedby = FOLLBY_TOKEN;
621 			goto normal_return;
622 		} else if (is_integer(yytext)) {
623 			yylval_was_set = TRUE;
624 			errno = 0;
625 			if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
626 			    && ((errno == EINVAL) || (errno == ERANGE))) {
627 				msyslog(LOG_ERR,
628 					"Integer cannot be represented: %s",
629 					yytext);
630 				if (input_from_file) {
631 					exit(1);
632 				} else {
633 					/* force end of parsing */
634 					yylval.Integer = 0;
635 					return 0;
636 				}
637 			}
638 			token = T_Integer;
639 			goto normal_return;
640 		} else if (is_u_int(yytext)) {
641 			yylval_was_set = TRUE;
642 			if ('0' == yytext[0] &&
643 			    'x' == tolower((unsigned char)yytext[1]))
644 				converted = sscanf(&yytext[2], "%x",
645 						   &yylval.U_int);
646 			else
647 				converted = sscanf(yytext, "%u",
648 						   &yylval.U_int);
649 			if (1 != converted) {
650 				msyslog(LOG_ERR,
651 					"U_int cannot be represented: %s",
652 					yytext);
653 				if (input_from_file) {
654 					exit(1);
655 				} else {
656 					/* force end of parsing */
657 					yylval.Integer = 0;
658 					return 0;
659 				}
660 			}
661 			token = T_U_int;
662 			goto normal_return;
663 		} else if (is_double(yytext)) {
664 			yylval_was_set = TRUE;
665 			errno = 0;
666 			if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
667 				msyslog(LOG_ERR,
668 					"Double too large to represent: %s",
669 					yytext);
670 				exit(1);
671 			} else {
672 				token = T_Double;
673 				goto normal_return;
674 			}
675 		} else {
676 			/* Default: Everything is a string */
677 			yylval_was_set = TRUE;
678 			token = create_string_token(yytext);
679 			goto normal_return;
680 		}
681 	}
682 
683 	/*
684 	 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
685 	 * of a string.  Hence, we need to return T_String.
686 	 *
687 	 * _Except_ we might have a -4 or -6 flag on a an association
688 	 * configuration line (server, peer, pool, etc.).
689 	 *
690 	 * This is a terrible hack, but the grammar is ambiguous so we
691 	 * don't have a choice.  [SK]
692 	 *
693 	 * The ambiguity is in the keyword scanner, not ntp_parser.y.
694 	 * We do not require server addresses be quoted in ntp.conf,
695 	 * complicating the scanner's job.  To avoid trying (and
696 	 * failing) to match an IP address or DNS name to a keyword,
697 	 * the association keywords use FOLLBY_STRING in the keyword
698 	 * table, which tells the scanner to force the next token to be
699 	 * a T_String, so it does not try to match a keyword but rather
700 	 * expects a string when -4/-6 modifiers to server, peer, etc.
701 	 * are encountered.
702 	 * restrict -4 and restrict -6 parsing works correctly without
703 	 * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
704 	 */
705 	if ('-' == yytext[0]) {
706 		if ('4' == yytext[1]) {
707 			token = T_Ipv4_flag;
708 			goto normal_return;
709 		} else if ('6' == yytext[1]) {
710 			token = T_Ipv6_flag;
711 			goto normal_return;
712 		}
713 	}
714 
715 	instring = FALSE;
716 	if (FOLLBY_STRING == followedby)
717 		followedby = FOLLBY_TOKEN;
718 
719 	yylval_was_set = TRUE;
720 	token = create_string_token(yytext);
721 
722 normal_return:
723 	if (T_EOC == token)
724 		DPRINTF(4,("\t<end of command>\n"));
725 	else
726 		DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
727 			    token_name(token)));
728 
729 	if (!yylval_was_set)
730 		yylval.Integer = token;
731 
732 	return token;
733 
734 lex_too_long:
735 	yytext[min(sizeof(yytext) - 1, 50)] = 0;
736 	msyslog(LOG_ERR,
737 		"configuration item on line %d longer than limit of %lu, began with '%s'",
738 		ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50),
739 		yytext);
740 
741 	/*
742 	 * If we hit the length limit reading the startup configuration
743 	 * file, abort.
744 	 */
745 	if (input_from_file)
746 		exit(sizeof(yytext) - 1);
747 
748 	/*
749 	 * If it's runtime configuration via ntpq :config treat it as
750 	 * if the configuration text ended before the too-long lexeme,
751 	 * hostname, or string.
752 	 */
753 	yylval.Integer = 0;
754 	return 0;
755 }
756