1 /* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
2  */
3 
4 #include "lib.h"
5 #include "istream.h"
6 #include "failures.h"
7 
8 #include "sieve-common.h"
9 #include "sieve-limits.h"
10 #include "sieve-script.h"
11 #include "sieve-lexer.h"
12 #include "sieve-parser.h"
13 #include "sieve-error.h"
14 #include "sieve-ast.h"
15 
16 /*
17  * Forward declarations
18  */
19 
20 static int
21 sieve_parser_recover(struct sieve_parser *parser,
22 		     enum sieve_token_type end_token);
23 
24 /*
25  * Parser object
26  */
27 
28 struct sieve_parser {
29 	pool_t pool;
30 
31 	bool valid;
32 
33 	struct sieve_script *script;
34 
35 	struct sieve_error_handler *ehandler;
36 
37 	const struct sieve_lexer *lexer;
38 	struct sieve_ast *ast;
39 };
40 
41 struct sieve_parser *
sieve_parser_create(struct sieve_script * script,struct sieve_error_handler * ehandler,enum sieve_error * error_r)42 sieve_parser_create(struct sieve_script *script,
43 		    struct sieve_error_handler *ehandler,
44 		    enum sieve_error *error_r)
45 {
46 	struct sieve_parser *parser;
47 	const struct sieve_lexer *lexer;
48 
49 	lexer = sieve_lexer_create(script, ehandler, error_r);
50 	if (lexer != NULL) {
51 		pool_t pool = pool_alloconly_create("sieve_parser", 4096);
52 
53 		parser = p_new(pool, struct sieve_parser, 1);
54 		parser->pool = pool;
55 		parser->valid = TRUE;
56 
57 		parser->ehandler = ehandler;
58 		sieve_error_handler_ref(ehandler);
59 
60 		parser->script = script;
61 		sieve_script_ref(script);
62 
63 		parser->lexer = lexer;
64 		parser->ast = NULL;
65 
66 		return parser;
67 	}
68 
69 	return NULL;
70 }
71 
sieve_parser_free(struct sieve_parser ** parser)72 void sieve_parser_free(struct sieve_parser **parser)
73 {
74 	if ((*parser)->ast != NULL)
75 		sieve_ast_unref(&(*parser)->ast);
76 
77 	sieve_lexer_free(&(*parser)->lexer);
78 	sieve_script_unref(&(*parser)->script);
79 
80 	sieve_error_handler_unref(&(*parser)->ehandler);
81 
82 	pool_unref(&(*parser)->pool);
83 
84 	*parser = NULL;
85 }
86 
87 /*
88  * Internal error handling
89  */
90 
91 inline static void ATTR_FORMAT(4, 5)
sieve_parser_error(struct sieve_parser * parser,const char * csrc_filename,unsigned int csrc_linenum,const char * fmt,...)92 sieve_parser_error(struct sieve_parser *parser,
93 		   const char *csrc_filename, unsigned int csrc_linenum,
94 		   const char *fmt, ...)
95 {
96 	struct sieve_error_params params = {
97 		.log_type = LOG_TYPE_ERROR,
98 		.csrc = {
99 			.filename = csrc_filename,
100 			.linenum = csrc_linenum,
101 		},
102 	};
103 	va_list args;
104 
105 	va_start(args, fmt);
106 
107 	/* Don't report a parse error if the lexer complained already */
108 	if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) {
109 		T_BEGIN {
110 			params.location = sieve_error_script_location(
111 				parser->script,
112 				sieve_lexer_token_line(parser->lexer));
113 			sieve_logv(parser->ehandler, &params, fmt, args);
114 		} T_END;
115 	}
116 
117 	parser->valid = FALSE;
118 
119 	va_end(args);
120 }
121 #define sieve_parser_error(parser, ...) \
122 	sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__)
123 
124 /*
125  * Sieve grammar parsing
126  */
127 
128 /* sieve_parse_arguments():
129 
130    Parses both command arguments and sub-tests:
131      arguments = *argument [test / test-list]
132      argument = string-list / number / tag
133      string = quoted-string / multi-line   [[implicitly handled in lexer]]
134      string-list = "[" string *("," string) "]" / string         ;; if
135        there is only a single string, the brackets are optional
136      test-list = "(" test *("," test) ")"
137      test = identifier arguments
138  */
139 static int
sieve_parse_arguments(struct sieve_parser * parser,struct sieve_ast_node * node,unsigned int depth)140 sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node,
141 		      unsigned int depth)
142 {
143 	const struct sieve_lexer *lexer = parser->lexer;
144 	struct sieve_ast_node *test = NULL;
145 	bool test_present = TRUE;
146 	bool arg_present = TRUE;
147 	int result = 1; /* Indicates whether the parser is in a defined, not
148 	                    necessarily error-free state */
149 
150 	/* Parse arguments */
151 	while (arg_present && result > 0) {
152 		struct sieve_ast_argument *arg;
153 
154 		if (!parser->valid &&
155 		    !sieve_errors_more_allowed(parser->ehandler)) {
156 			result = 0;
157 			break;
158 		}
159 
160 		switch (sieve_lexer_token_type(lexer)) {
161 		/* String list */
162 		case STT_LSQUARE:
163 			/* Create stinglist object */
164 			arg = sieve_ast_argument_stringlist_create(
165 				node, sieve_lexer_token_line(parser->lexer));
166 			if (arg == NULL) break;
167 			sieve_lexer_skip_token(lexer);
168 
169 			if (sieve_lexer_token_type(lexer) == STT_STRING) {
170 				bool add_failed = FALSE;
171 
172 				/* Add the string to the list */
173 				if (!sieve_ast_stringlist_add(
174 					arg, sieve_lexer_token_str(lexer),
175 					sieve_lexer_token_line(parser->lexer)))
176 					add_failed = TRUE;
177 				sieve_lexer_skip_token(lexer);
178 
179 				while (!add_failed &&
180 				       sieve_lexer_token_type(lexer) == STT_COMMA) {
181 					sieve_lexer_skip_token(lexer);
182 
183 					/* Check parser status */
184 					if (!parser->valid &&
185 					    !sieve_errors_more_allowed(parser->ehandler)) {
186 						result = sieve_parser_recover(parser, STT_RSQUARE);
187 						break;
188 					}
189 
190 					if (sieve_lexer_token_type(lexer) == STT_STRING) {
191 						/* Add the string to the list */
192 						if (!sieve_ast_stringlist_add(
193 							arg, sieve_lexer_token_str(lexer),
194 							sieve_lexer_token_line(parser->lexer)))
195 							add_failed = TRUE;
196 
197 						sieve_lexer_skip_token(lexer);
198 					} else {
199 						sieve_parser_error(parser,
200 							"expecting string after ',' in string list, "
201 							"but found %s",
202 							sieve_lexer_token_description(lexer));
203 						result = sieve_parser_recover(parser, STT_RSQUARE);
204 						break;
205 					}
206 				}
207 
208 				if (add_failed) {
209 					sieve_parser_error(parser,
210 						"failed to accept more items in string list");
211 					return -1;
212 				}
213 			} else {
214 				sieve_parser_error(parser,
215 					"expecting string after '[' in string list, "
216 					"but found %s",
217 					sieve_lexer_token_description(lexer));
218 				result = sieve_parser_recover(parser, STT_RSQUARE);
219 			}
220 
221 			/* Finish the string list */
222 			if (sieve_lexer_token_type(lexer) == STT_RSQUARE) {
223 				sieve_lexer_skip_token(lexer);
224 			} else {
225 				sieve_parser_error(parser,
226 					"expecting ',' or end of string list ']', "
227 					"but found %s",
228 					sieve_lexer_token_description(lexer));
229 
230 				if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0)
231 					sieve_lexer_skip_token(lexer);
232 			}
233 			break;
234 		/* Single string */
235 		case STT_STRING:
236 			arg = sieve_ast_argument_string_create(
237 				node, sieve_lexer_token_str(lexer),
238 				sieve_lexer_token_line(parser->lexer));
239 			sieve_lexer_skip_token(lexer);
240 			break;
241 		/* Number */
242 		case STT_NUMBER:
243 			arg = sieve_ast_argument_number_create(
244 				node, sieve_lexer_token_int(lexer),
245 				sieve_lexer_token_line(parser->lexer));
246 			sieve_lexer_skip_token(lexer);
247 			break;
248 		/* Tag */
249 		case STT_TAG:
250 			arg = sieve_ast_argument_tag_create(
251 				node, sieve_lexer_token_ident(lexer),
252 				sieve_lexer_token_line(parser->lexer));
253 			sieve_lexer_skip_token(lexer);
254 			break;
255 		/* End of argument list, continue with tests */
256 		default:
257 			arg_present = FALSE;
258 			break;
259 		}
260 
261 		if (arg_present && arg == NULL) {
262 			sieve_parser_error(parser,
263 				"failed to accept more arguments for command '%s'",
264 				node->identifier);
265 			return -1;
266 		}
267 
268 		if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) {
269 			sieve_parser_error(parser,
270 				"too many arguments for command '%s'",
271 				node->identifier);
272 			return 0;
273 		}
274 	}
275 
276 	if (result <= 0)
277 		return result; /* Defer recovery to caller */
278 
279 	/* --> [ test / test-list ]
280  	   test-list = "(" test *("," test) ")"
281 	   test = identifier arguments
282 	 */
283 	switch (sieve_lexer_token_type(lexer)) {
284 	/* Single test */
285 	case STT_IDENTIFIER:
286 		if ((depth + 1) > SIEVE_MAX_TEST_NESTING) {
287 			sieve_parser_error(parser,
288 				"cannot nest tests deeper than %u levels",
289 				SIEVE_MAX_TEST_NESTING);
290 			return 0;
291 		}
292 
293 		test = sieve_ast_test_create(
294 			node, sieve_lexer_token_ident(lexer),
295 			sieve_lexer_token_line(parser->lexer));
296 		sieve_lexer_skip_token(lexer);
297 
298 		/* Theoretically, test can be NULL */
299 		if (test == NULL)
300 			break;
301 
302 		/* Parse test arguments, which may include more tests (recurse) */
303 		if (sieve_parse_arguments(parser, test, depth + 1) <= 0) {
304 			return 0; /* Defer recovery to caller */
305 		}
306 
307 		break;
308 
309 	/* Test list */
310 	case STT_LBRACKET:
311 		sieve_lexer_skip_token(lexer);
312 
313 		if (depth+1 > SIEVE_MAX_TEST_NESTING) {
314 			sieve_parser_error(parser,
315 				"cannot nest tests deeper than %u levels",
316 				SIEVE_MAX_TEST_NESTING);
317 			result = sieve_parser_recover(parser, STT_RBRACKET);
318 
319 			if (result > 0)
320 				sieve_lexer_skip_token(lexer);
321 			return result;
322 		}
323 
324 		node->test_list = TRUE;
325 
326 		/* Test starts with identifier */
327 		if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
328 			test = sieve_ast_test_create(
329 				node, sieve_lexer_token_ident(lexer),
330 				sieve_lexer_token_line(parser->lexer));
331 			sieve_lexer_skip_token(lexer);
332 
333 			if (test == NULL)
334 				break;
335 
336 			/* Parse test arguments, which may include more tests (recurse) */
337 			if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) {
338 
339 				/* More tests ? */
340 				while (sieve_lexer_token_type(lexer) == STT_COMMA) {
341 					sieve_lexer_skip_token(lexer);
342 
343 					/* Check parser status */
344 					if (!parser->valid &&
345 					    !sieve_errors_more_allowed(parser->ehandler)) {
346 						result = sieve_parser_recover(parser, STT_RBRACKET);
347 						break;
348 					}
349 
350 					/* Test starts with identifier */
351 					if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
352 						test = sieve_ast_test_create(
353 							node, sieve_lexer_token_ident(lexer),
354 							sieve_lexer_token_line(parser->lexer));
355 						sieve_lexer_skip_token(lexer);
356 
357 						if (test == NULL)
358 							break;
359 
360 						/* Parse test arguments, which may include more tests (recurse) */
361 						if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) {
362 							if (result < 0)
363 								return result;
364 							result = sieve_parser_recover(parser, STT_RBRACKET);
365 							break;
366 						}
367 					} else {
368 						sieve_parser_error(parser,
369 							"expecting test identifier after ',' in test list, "
370 							"but found %s",
371 							sieve_lexer_token_description(lexer));
372 						result = sieve_parser_recover(parser, STT_RBRACKET);
373 						break;
374 					}
375 				}
376 				if (test == NULL)
377 					break;
378 			} else {
379 				if (result < 0)
380 					return result;
381 
382 				result = sieve_parser_recover(parser, STT_RBRACKET);
383 			}
384 		} else {
385 			sieve_parser_error(parser,
386 				"expecting test identifier after '(' in test list, "
387 				"but found %s",
388 				sieve_lexer_token_description(lexer));
389 
390 			result = sieve_parser_recover(parser, STT_RBRACKET);
391 		}
392 
393 		/* The next token should be a ')', indicating the end of the
394 		   test list
395 		     --> previous sieve_parser_recover calls try to restore this
396 			 situation after parse errors.
397 		 */
398  		if (sieve_lexer_token_type(lexer) == STT_RBRACKET) {
399 			sieve_lexer_skip_token(lexer);
400 		} else {
401 			sieve_parser_error(parser,
402 				"expecting ',' or end of test list ')', "
403 				"but found %s",
404 				sieve_lexer_token_description(lexer));
405 
406 			/* Recover function tries to make next token equal to
407 			   ')'. If it succeeds we need to skip it.
408 			 */
409 			if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0)
410 				sieve_lexer_skip_token(lexer);
411 		}
412 		break;
413 
414 	default:
415 		/* Not an error: test / test-list is optional
416 		     --> any errors are detected by the caller
417 		 */
418 		test_present = FALSE;
419 		break;
420 	}
421 
422 	if (test_present && test == NULL) {
423 		sieve_parser_error(parser,
424 			"failed to accept more tests for command '%s'",
425 			node->identifier);
426 		return -1;
427 	}
428 
429 	return result;
430 }
431 
432 /* commands = *command
433    command = identifier arguments ( ";" / block )
434    block = "{" commands "}"
435  */
436 static int
sieve_parse_commands(struct sieve_parser * parser,struct sieve_ast_node * block,unsigned int depth)437 sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block,
438 		     unsigned int depth)
439 {
440 	const struct sieve_lexer *lexer = parser->lexer;
441 	int result = 1;
442 
443 	while (result > 0 &&
444 	       sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
445 		struct sieve_ast_node *command;
446 
447 		/* Check parser status */
448 		if (!parser->valid &&
449 		    !sieve_errors_more_allowed(parser->ehandler)) {
450 			result = sieve_parser_recover(parser, STT_SEMICOLON);
451 			break;
452 		}
453 
454 		/* Create command node */
455 		command = sieve_ast_command_create(
456 			block, sieve_lexer_token_ident(lexer),
457 			sieve_lexer_token_line(parser->lexer));
458 		sieve_lexer_skip_token(lexer);
459 
460 		if (command == NULL) {
461 			sieve_parser_error(parser,
462 				"failed to accept more commands inside the block of command '%s'",
463 				block->identifier);
464 			return -1;
465 		}
466 
467 		result = sieve_parse_arguments(parser, command, 1);
468 
469 		/* Check whether the command is properly terminated
470 		   (i.e. with ; or a new block)
471 		 */
472 		if (result > 0 &&
473 		    sieve_lexer_token_type(lexer) != STT_SEMICOLON &&
474 		    sieve_lexer_token_type(lexer) != STT_LCURLY) {
475 
476 			sieve_parser_error(parser,
477 				"expected end of command ';' or the beginning of a compound block '{', "
478 				"but found %s",
479 				sieve_lexer_token_description(lexer));
480 			result = 0;
481 		}
482 
483 		/* Try to recover from parse errors to reacquire a defined state
484 		 */
485 		if (result == 0)
486 			result = sieve_parser_recover(parser, STT_SEMICOLON);
487 
488 		/* Don't bother to continue if we are not in a defined state */
489 		if (result <= 0)
490 			return result;
491 
492 		switch (sieve_lexer_token_type(lexer)) {
493 		/* End of the command */
494 		case STT_SEMICOLON:
495 			sieve_lexer_skip_token(lexer);
496 			break;
497 		/* Command has a block {...} */
498 		case STT_LCURLY:
499 			sieve_lexer_skip_token(lexer);
500 
501 			/* Check current depth first */
502 			if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) {
503 				sieve_parser_error(parser,
504 					"cannot nest command blocks deeper than %u levels",
505 					SIEVE_MAX_BLOCK_NESTING);
506 				result = sieve_parser_recover(parser, STT_RCURLY);
507 
508 				if (result > 0)
509 					sieve_lexer_skip_token(lexer);
510 				break;
511 			}
512 
513 			command->block = TRUE;
514 
515 			if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) {
516 				if (sieve_lexer_token_type(lexer) != STT_RCURLY) {
517 					sieve_parser_error(parser,
518 						"expected end of compound block '}', "
519 						"but found %s",
520 						sieve_lexer_token_description(lexer));
521 					result = sieve_parser_recover(parser, STT_RCURLY);
522 				} else {
523 					sieve_lexer_skip_token(lexer);
524 				}
525 			} else {
526 				if (result < 0)
527 					return result;
528 
529 				if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0)
530 					sieve_lexer_skip_token(lexer);
531 			}
532 
533 			break;
534 
535 		default:
536 			/* Recovered previously, so this cannot happen */
537 			i_unreached();
538 		}
539 	}
540 
541 	return result;
542 }
543 
sieve_parser_run(struct sieve_parser * parser,struct sieve_ast ** ast)544 bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast)
545 {
546 	if (parser->ast != NULL)
547 		sieve_ast_unref(&parser->ast);
548 
549 	/* Create AST object if none is provided */
550 	if (*ast == NULL)
551 		*ast = sieve_ast_create(parser->script);
552 	else
553 		sieve_ast_ref(*ast);
554 
555 	parser->ast = *ast;
556 
557 	/* Scan first token */
558 	sieve_lexer_skip_token(parser->lexer);
559 
560 	/* Parse */
561 	if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 &&
562 	    parser->valid) {
563 		/* Parsed right to EOF ? */
564 		if (sieve_lexer_token_type(parser->lexer) != STT_EOF) {
565 			sieve_parser_error(parser,
566 				"unexpected %s found at (the presumed) end of file",
567 				sieve_lexer_token_description(parser->lexer));
568 			parser->valid = FALSE;
569 		}
570 	} else {
571 		parser->valid = FALSE;
572 	}
573 
574 	/* Clean up AST if parse failed */
575 	if (!parser->valid) {
576 		parser->ast = NULL;
577 		sieve_ast_unref(ast);
578 	}
579 
580 	return parser->valid;
581 }
582 
583 /* Error recovery:
584      To continue parsing after an error it is important to find the next
585      parsible item in the stream. The recover function skips over the remaining
586      garbage after an error. It tries  to find the end of the failed syntax
587      structure and takes nesting of structures into account.
588  */
589 
590 /* Assign useful names to priorities for readability */
591 enum sieve_grammatical_prio {
592 	SGP_BLOCK = 3,
593 	SGP_COMMAND = 2,
594 	SGP_TEST_LIST = 1,
595 	SGP_STRING_LIST = 0,
596 
597 	SGP_OTHER = -1
598 };
599 
600 static inline enum sieve_grammatical_prio
__get_token_priority(enum sieve_token_type token)601 __get_token_priority(enum sieve_token_type token)
602 {
603 	switch (token) {
604 	case STT_LCURLY:
605 	case STT_RCURLY:
606 		return SGP_BLOCK;
607 	case STT_SEMICOLON:
608 		return SGP_COMMAND;
609 	case STT_LBRACKET:
610 	case STT_RBRACKET:
611 		return SGP_TEST_LIST;
612 	case STT_LSQUARE:
613 	case STT_RSQUARE:
614 		return SGP_STRING_LIST;
615 	default:
616 		break;
617 	}
618 
619 	return SGP_OTHER;
620 }
621 
622 static int
sieve_parser_recover(struct sieve_parser * parser,enum sieve_token_type end_token)623 sieve_parser_recover(struct sieve_parser *parser,
624 		     enum sieve_token_type end_token)
625 {
626 	/* The tokens that begin/end a specific block/command/list in order
627  	   of ascending grammatical priority.
628  	 */
629  	static const enum sieve_token_type begin_tokens[4] = {
630 		STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY };
631 	static const enum sieve_token_type end_tokens[4] = {
632 		STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY};
633 	const struct sieve_lexer *lexer = parser->lexer;
634 	int nesting = 1;
635 	enum sieve_grammatical_prio end_priority =
636 		__get_token_priority(end_token);
637 
638 	i_assert(end_priority != SGP_OTHER);
639 
640 	while (sieve_lexer_token_type(lexer) != STT_EOF &&
641 	       __get_token_priority(sieve_lexer_token_type(lexer))
642 		<= end_priority) {
643 		if (sieve_lexer_token_type(lexer) ==
644 			begin_tokens[end_priority]) {
645 			nesting++;
646 			sieve_lexer_skip_token(lexer);
647 			continue;
648 		}
649 		if (sieve_lexer_token_type(lexer) ==
650 			end_tokens[end_priority]) {
651 			nesting--;
652 
653 			if (nesting == 0) {
654 				/* Next character is the end */
655 				return 1;
656 			}
657 		}
658 		sieve_lexer_skip_token(lexer);
659 	}
660 
661 	/* Special case: COMMAND */
662 	if (end_token == STT_SEMICOLON &&
663 	    sieve_lexer_token_type(lexer) == STT_LCURLY) {
664 		return 1;
665 	}
666 
667 	/* End not found before eof or end of surrounding grammatical structure
668 	 */
669 	return 0;
670 }
671