1 /* Copyright (c) 2002-2018 Pigeonhole authors, see the included COPYING file
2 */
3
4 #include "lib.h"
5 #include "istream.h"
6 #include "failures.h"
7
8 #include "sieve-common.h"
9 #include "sieve-limits.h"
10 #include "sieve-script.h"
11 #include "sieve-lexer.h"
12 #include "sieve-parser.h"
13 #include "sieve-error.h"
14 #include "sieve-ast.h"
15
16 /*
17 * Forward declarations
18 */
19
20 static int
21 sieve_parser_recover(struct sieve_parser *parser,
22 enum sieve_token_type end_token);
23
24 /*
25 * Parser object
26 */
27
28 struct sieve_parser {
29 pool_t pool;
30
31 bool valid;
32
33 struct sieve_script *script;
34
35 struct sieve_error_handler *ehandler;
36
37 const struct sieve_lexer *lexer;
38 struct sieve_ast *ast;
39 };
40
41 struct sieve_parser *
sieve_parser_create(struct sieve_script * script,struct sieve_error_handler * ehandler,enum sieve_error * error_r)42 sieve_parser_create(struct sieve_script *script,
43 struct sieve_error_handler *ehandler,
44 enum sieve_error *error_r)
45 {
46 struct sieve_parser *parser;
47 const struct sieve_lexer *lexer;
48
49 lexer = sieve_lexer_create(script, ehandler, error_r);
50 if (lexer != NULL) {
51 pool_t pool = pool_alloconly_create("sieve_parser", 4096);
52
53 parser = p_new(pool, struct sieve_parser, 1);
54 parser->pool = pool;
55 parser->valid = TRUE;
56
57 parser->ehandler = ehandler;
58 sieve_error_handler_ref(ehandler);
59
60 parser->script = script;
61 sieve_script_ref(script);
62
63 parser->lexer = lexer;
64 parser->ast = NULL;
65
66 return parser;
67 }
68
69 return NULL;
70 }
71
sieve_parser_free(struct sieve_parser ** parser)72 void sieve_parser_free(struct sieve_parser **parser)
73 {
74 if ((*parser)->ast != NULL)
75 sieve_ast_unref(&(*parser)->ast);
76
77 sieve_lexer_free(&(*parser)->lexer);
78 sieve_script_unref(&(*parser)->script);
79
80 sieve_error_handler_unref(&(*parser)->ehandler);
81
82 pool_unref(&(*parser)->pool);
83
84 *parser = NULL;
85 }
86
87 /*
88 * Internal error handling
89 */
90
91 inline static void ATTR_FORMAT(4, 5)
sieve_parser_error(struct sieve_parser * parser,const char * csrc_filename,unsigned int csrc_linenum,const char * fmt,...)92 sieve_parser_error(struct sieve_parser *parser,
93 const char *csrc_filename, unsigned int csrc_linenum,
94 const char *fmt, ...)
95 {
96 struct sieve_error_params params = {
97 .log_type = LOG_TYPE_ERROR,
98 .csrc = {
99 .filename = csrc_filename,
100 .linenum = csrc_linenum,
101 },
102 };
103 va_list args;
104
105 va_start(args, fmt);
106
107 /* Don't report a parse error if the lexer complained already */
108 if (sieve_lexer_token_type(parser->lexer) != STT_ERROR) {
109 T_BEGIN {
110 params.location = sieve_error_script_location(
111 parser->script,
112 sieve_lexer_token_line(parser->lexer));
113 sieve_logv(parser->ehandler, ¶ms, fmt, args);
114 } T_END;
115 }
116
117 parser->valid = FALSE;
118
119 va_end(args);
120 }
121 #define sieve_parser_error(parser, ...) \
122 sieve_parser_error(parser, __FILE__, __LINE__, __VA_ARGS__)
123
124 /*
125 * Sieve grammar parsing
126 */
127
128 /* sieve_parse_arguments():
129
130 Parses both command arguments and sub-tests:
131 arguments = *argument [test / test-list]
132 argument = string-list / number / tag
133 string = quoted-string / multi-line [[implicitly handled in lexer]]
134 string-list = "[" string *("," string) "]" / string ;; if
135 there is only a single string, the brackets are optional
136 test-list = "(" test *("," test) ")"
137 test = identifier arguments
138 */
139 static int
sieve_parse_arguments(struct sieve_parser * parser,struct sieve_ast_node * node,unsigned int depth)140 sieve_parse_arguments(struct sieve_parser *parser, struct sieve_ast_node *node,
141 unsigned int depth)
142 {
143 const struct sieve_lexer *lexer = parser->lexer;
144 struct sieve_ast_node *test = NULL;
145 bool test_present = TRUE;
146 bool arg_present = TRUE;
147 int result = 1; /* Indicates whether the parser is in a defined, not
148 necessarily error-free state */
149
150 /* Parse arguments */
151 while (arg_present && result > 0) {
152 struct sieve_ast_argument *arg;
153
154 if (!parser->valid &&
155 !sieve_errors_more_allowed(parser->ehandler)) {
156 result = 0;
157 break;
158 }
159
160 switch (sieve_lexer_token_type(lexer)) {
161 /* String list */
162 case STT_LSQUARE:
163 /* Create stinglist object */
164 arg = sieve_ast_argument_stringlist_create(
165 node, sieve_lexer_token_line(parser->lexer));
166 if (arg == NULL) break;
167 sieve_lexer_skip_token(lexer);
168
169 if (sieve_lexer_token_type(lexer) == STT_STRING) {
170 bool add_failed = FALSE;
171
172 /* Add the string to the list */
173 if (!sieve_ast_stringlist_add(
174 arg, sieve_lexer_token_str(lexer),
175 sieve_lexer_token_line(parser->lexer)))
176 add_failed = TRUE;
177 sieve_lexer_skip_token(lexer);
178
179 while (!add_failed &&
180 sieve_lexer_token_type(lexer) == STT_COMMA) {
181 sieve_lexer_skip_token(lexer);
182
183 /* Check parser status */
184 if (!parser->valid &&
185 !sieve_errors_more_allowed(parser->ehandler)) {
186 result = sieve_parser_recover(parser, STT_RSQUARE);
187 break;
188 }
189
190 if (sieve_lexer_token_type(lexer) == STT_STRING) {
191 /* Add the string to the list */
192 if (!sieve_ast_stringlist_add(
193 arg, sieve_lexer_token_str(lexer),
194 sieve_lexer_token_line(parser->lexer)))
195 add_failed = TRUE;
196
197 sieve_lexer_skip_token(lexer);
198 } else {
199 sieve_parser_error(parser,
200 "expecting string after ',' in string list, "
201 "but found %s",
202 sieve_lexer_token_description(lexer));
203 result = sieve_parser_recover(parser, STT_RSQUARE);
204 break;
205 }
206 }
207
208 if (add_failed) {
209 sieve_parser_error(parser,
210 "failed to accept more items in string list");
211 return -1;
212 }
213 } else {
214 sieve_parser_error(parser,
215 "expecting string after '[' in string list, "
216 "but found %s",
217 sieve_lexer_token_description(lexer));
218 result = sieve_parser_recover(parser, STT_RSQUARE);
219 }
220
221 /* Finish the string list */
222 if (sieve_lexer_token_type(lexer) == STT_RSQUARE) {
223 sieve_lexer_skip_token(lexer);
224 } else {
225 sieve_parser_error(parser,
226 "expecting ',' or end of string list ']', "
227 "but found %s",
228 sieve_lexer_token_description(lexer));
229
230 if ((result = sieve_parser_recover(parser, STT_RSQUARE)) > 0)
231 sieve_lexer_skip_token(lexer);
232 }
233 break;
234 /* Single string */
235 case STT_STRING:
236 arg = sieve_ast_argument_string_create(
237 node, sieve_lexer_token_str(lexer),
238 sieve_lexer_token_line(parser->lexer));
239 sieve_lexer_skip_token(lexer);
240 break;
241 /* Number */
242 case STT_NUMBER:
243 arg = sieve_ast_argument_number_create(
244 node, sieve_lexer_token_int(lexer),
245 sieve_lexer_token_line(parser->lexer));
246 sieve_lexer_skip_token(lexer);
247 break;
248 /* Tag */
249 case STT_TAG:
250 arg = sieve_ast_argument_tag_create(
251 node, sieve_lexer_token_ident(lexer),
252 sieve_lexer_token_line(parser->lexer));
253 sieve_lexer_skip_token(lexer);
254 break;
255 /* End of argument list, continue with tests */
256 default:
257 arg_present = FALSE;
258 break;
259 }
260
261 if (arg_present && arg == NULL) {
262 sieve_parser_error(parser,
263 "failed to accept more arguments for command '%s'",
264 node->identifier);
265 return -1;
266 }
267
268 if (sieve_ast_argument_count(node) > SIEVE_MAX_COMMAND_ARGUMENTS) {
269 sieve_parser_error(parser,
270 "too many arguments for command '%s'",
271 node->identifier);
272 return 0;
273 }
274 }
275
276 if (result <= 0)
277 return result; /* Defer recovery to caller */
278
279 /* --> [ test / test-list ]
280 test-list = "(" test *("," test) ")"
281 test = identifier arguments
282 */
283 switch (sieve_lexer_token_type(lexer)) {
284 /* Single test */
285 case STT_IDENTIFIER:
286 if ((depth + 1) > SIEVE_MAX_TEST_NESTING) {
287 sieve_parser_error(parser,
288 "cannot nest tests deeper than %u levels",
289 SIEVE_MAX_TEST_NESTING);
290 return 0;
291 }
292
293 test = sieve_ast_test_create(
294 node, sieve_lexer_token_ident(lexer),
295 sieve_lexer_token_line(parser->lexer));
296 sieve_lexer_skip_token(lexer);
297
298 /* Theoretically, test can be NULL */
299 if (test == NULL)
300 break;
301
302 /* Parse test arguments, which may include more tests (recurse) */
303 if (sieve_parse_arguments(parser, test, depth + 1) <= 0) {
304 return 0; /* Defer recovery to caller */
305 }
306
307 break;
308
309 /* Test list */
310 case STT_LBRACKET:
311 sieve_lexer_skip_token(lexer);
312
313 if (depth+1 > SIEVE_MAX_TEST_NESTING) {
314 sieve_parser_error(parser,
315 "cannot nest tests deeper than %u levels",
316 SIEVE_MAX_TEST_NESTING);
317 result = sieve_parser_recover(parser, STT_RBRACKET);
318
319 if (result > 0)
320 sieve_lexer_skip_token(lexer);
321 return result;
322 }
323
324 node->test_list = TRUE;
325
326 /* Test starts with identifier */
327 if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
328 test = sieve_ast_test_create(
329 node, sieve_lexer_token_ident(lexer),
330 sieve_lexer_token_line(parser->lexer));
331 sieve_lexer_skip_token(lexer);
332
333 if (test == NULL)
334 break;
335
336 /* Parse test arguments, which may include more tests (recurse) */
337 if ((result = sieve_parse_arguments(parser, test, depth+1)) > 0) {
338
339 /* More tests ? */
340 while (sieve_lexer_token_type(lexer) == STT_COMMA) {
341 sieve_lexer_skip_token(lexer);
342
343 /* Check parser status */
344 if (!parser->valid &&
345 !sieve_errors_more_allowed(parser->ehandler)) {
346 result = sieve_parser_recover(parser, STT_RBRACKET);
347 break;
348 }
349
350 /* Test starts with identifier */
351 if (sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
352 test = sieve_ast_test_create(
353 node, sieve_lexer_token_ident(lexer),
354 sieve_lexer_token_line(parser->lexer));
355 sieve_lexer_skip_token(lexer);
356
357 if (test == NULL)
358 break;
359
360 /* Parse test arguments, which may include more tests (recurse) */
361 if ((result = sieve_parse_arguments(parser, test, depth+1)) <= 0) {
362 if (result < 0)
363 return result;
364 result = sieve_parser_recover(parser, STT_RBRACKET);
365 break;
366 }
367 } else {
368 sieve_parser_error(parser,
369 "expecting test identifier after ',' in test list, "
370 "but found %s",
371 sieve_lexer_token_description(lexer));
372 result = sieve_parser_recover(parser, STT_RBRACKET);
373 break;
374 }
375 }
376 if (test == NULL)
377 break;
378 } else {
379 if (result < 0)
380 return result;
381
382 result = sieve_parser_recover(parser, STT_RBRACKET);
383 }
384 } else {
385 sieve_parser_error(parser,
386 "expecting test identifier after '(' in test list, "
387 "but found %s",
388 sieve_lexer_token_description(lexer));
389
390 result = sieve_parser_recover(parser, STT_RBRACKET);
391 }
392
393 /* The next token should be a ')', indicating the end of the
394 test list
395 --> previous sieve_parser_recover calls try to restore this
396 situation after parse errors.
397 */
398 if (sieve_lexer_token_type(lexer) == STT_RBRACKET) {
399 sieve_lexer_skip_token(lexer);
400 } else {
401 sieve_parser_error(parser,
402 "expecting ',' or end of test list ')', "
403 "but found %s",
404 sieve_lexer_token_description(lexer));
405
406 /* Recover function tries to make next token equal to
407 ')'. If it succeeds we need to skip it.
408 */
409 if ((result = sieve_parser_recover(parser, STT_RBRACKET)) > 0)
410 sieve_lexer_skip_token(lexer);
411 }
412 break;
413
414 default:
415 /* Not an error: test / test-list is optional
416 --> any errors are detected by the caller
417 */
418 test_present = FALSE;
419 break;
420 }
421
422 if (test_present && test == NULL) {
423 sieve_parser_error(parser,
424 "failed to accept more tests for command '%s'",
425 node->identifier);
426 return -1;
427 }
428
429 return result;
430 }
431
432 /* commands = *command
433 command = identifier arguments ( ";" / block )
434 block = "{" commands "}"
435 */
436 static int
sieve_parse_commands(struct sieve_parser * parser,struct sieve_ast_node * block,unsigned int depth)437 sieve_parse_commands(struct sieve_parser *parser, struct sieve_ast_node *block,
438 unsigned int depth)
439 {
440 const struct sieve_lexer *lexer = parser->lexer;
441 int result = 1;
442
443 while (result > 0 &&
444 sieve_lexer_token_type(lexer) == STT_IDENTIFIER) {
445 struct sieve_ast_node *command;
446
447 /* Check parser status */
448 if (!parser->valid &&
449 !sieve_errors_more_allowed(parser->ehandler)) {
450 result = sieve_parser_recover(parser, STT_SEMICOLON);
451 break;
452 }
453
454 /* Create command node */
455 command = sieve_ast_command_create(
456 block, sieve_lexer_token_ident(lexer),
457 sieve_lexer_token_line(parser->lexer));
458 sieve_lexer_skip_token(lexer);
459
460 if (command == NULL) {
461 sieve_parser_error(parser,
462 "failed to accept more commands inside the block of command '%s'",
463 block->identifier);
464 return -1;
465 }
466
467 result = sieve_parse_arguments(parser, command, 1);
468
469 /* Check whether the command is properly terminated
470 (i.e. with ; or a new block)
471 */
472 if (result > 0 &&
473 sieve_lexer_token_type(lexer) != STT_SEMICOLON &&
474 sieve_lexer_token_type(lexer) != STT_LCURLY) {
475
476 sieve_parser_error(parser,
477 "expected end of command ';' or the beginning of a compound block '{', "
478 "but found %s",
479 sieve_lexer_token_description(lexer));
480 result = 0;
481 }
482
483 /* Try to recover from parse errors to reacquire a defined state
484 */
485 if (result == 0)
486 result = sieve_parser_recover(parser, STT_SEMICOLON);
487
488 /* Don't bother to continue if we are not in a defined state */
489 if (result <= 0)
490 return result;
491
492 switch (sieve_lexer_token_type(lexer)) {
493 /* End of the command */
494 case STT_SEMICOLON:
495 sieve_lexer_skip_token(lexer);
496 break;
497 /* Command has a block {...} */
498 case STT_LCURLY:
499 sieve_lexer_skip_token(lexer);
500
501 /* Check current depth first */
502 if ((depth + 1) > SIEVE_MAX_BLOCK_NESTING) {
503 sieve_parser_error(parser,
504 "cannot nest command blocks deeper than %u levels",
505 SIEVE_MAX_BLOCK_NESTING);
506 result = sieve_parser_recover(parser, STT_RCURLY);
507
508 if (result > 0)
509 sieve_lexer_skip_token(lexer);
510 break;
511 }
512
513 command->block = TRUE;
514
515 if ((result = sieve_parse_commands(parser, command, depth + 1)) > 0) {
516 if (sieve_lexer_token_type(lexer) != STT_RCURLY) {
517 sieve_parser_error(parser,
518 "expected end of compound block '}', "
519 "but found %s",
520 sieve_lexer_token_description(lexer));
521 result = sieve_parser_recover(parser, STT_RCURLY);
522 } else {
523 sieve_lexer_skip_token(lexer);
524 }
525 } else {
526 if (result < 0)
527 return result;
528
529 if ((result = sieve_parser_recover(parser, STT_RCURLY)) > 0)
530 sieve_lexer_skip_token(lexer);
531 }
532
533 break;
534
535 default:
536 /* Recovered previously, so this cannot happen */
537 i_unreached();
538 }
539 }
540
541 return result;
542 }
543
sieve_parser_run(struct sieve_parser * parser,struct sieve_ast ** ast)544 bool sieve_parser_run(struct sieve_parser *parser, struct sieve_ast **ast)
545 {
546 if (parser->ast != NULL)
547 sieve_ast_unref(&parser->ast);
548
549 /* Create AST object if none is provided */
550 if (*ast == NULL)
551 *ast = sieve_ast_create(parser->script);
552 else
553 sieve_ast_ref(*ast);
554
555 parser->ast = *ast;
556
557 /* Scan first token */
558 sieve_lexer_skip_token(parser->lexer);
559
560 /* Parse */
561 if (sieve_parse_commands(parser, sieve_ast_root(parser->ast), 1) > 0 &&
562 parser->valid) {
563 /* Parsed right to EOF ? */
564 if (sieve_lexer_token_type(parser->lexer) != STT_EOF) {
565 sieve_parser_error(parser,
566 "unexpected %s found at (the presumed) end of file",
567 sieve_lexer_token_description(parser->lexer));
568 parser->valid = FALSE;
569 }
570 } else {
571 parser->valid = FALSE;
572 }
573
574 /* Clean up AST if parse failed */
575 if (!parser->valid) {
576 parser->ast = NULL;
577 sieve_ast_unref(ast);
578 }
579
580 return parser->valid;
581 }
582
583 /* Error recovery:
584 To continue parsing after an error it is important to find the next
585 parsible item in the stream. The recover function skips over the remaining
586 garbage after an error. It tries to find the end of the failed syntax
587 structure and takes nesting of structures into account.
588 */
589
590 /* Assign useful names to priorities for readability */
591 enum sieve_grammatical_prio {
592 SGP_BLOCK = 3,
593 SGP_COMMAND = 2,
594 SGP_TEST_LIST = 1,
595 SGP_STRING_LIST = 0,
596
597 SGP_OTHER = -1
598 };
599
600 static inline enum sieve_grammatical_prio
__get_token_priority(enum sieve_token_type token)601 __get_token_priority(enum sieve_token_type token)
602 {
603 switch (token) {
604 case STT_LCURLY:
605 case STT_RCURLY:
606 return SGP_BLOCK;
607 case STT_SEMICOLON:
608 return SGP_COMMAND;
609 case STT_LBRACKET:
610 case STT_RBRACKET:
611 return SGP_TEST_LIST;
612 case STT_LSQUARE:
613 case STT_RSQUARE:
614 return SGP_STRING_LIST;
615 default:
616 break;
617 }
618
619 return SGP_OTHER;
620 }
621
622 static int
sieve_parser_recover(struct sieve_parser * parser,enum sieve_token_type end_token)623 sieve_parser_recover(struct sieve_parser *parser,
624 enum sieve_token_type end_token)
625 {
626 /* The tokens that begin/end a specific block/command/list in order
627 of ascending grammatical priority.
628 */
629 static const enum sieve_token_type begin_tokens[4] = {
630 STT_LSQUARE, STT_LBRACKET, STT_NONE, STT_LCURLY };
631 static const enum sieve_token_type end_tokens[4] = {
632 STT_RSQUARE, STT_RBRACKET, STT_SEMICOLON, STT_RCURLY};
633 const struct sieve_lexer *lexer = parser->lexer;
634 int nesting = 1;
635 enum sieve_grammatical_prio end_priority =
636 __get_token_priority(end_token);
637
638 i_assert(end_priority != SGP_OTHER);
639
640 while (sieve_lexer_token_type(lexer) != STT_EOF &&
641 __get_token_priority(sieve_lexer_token_type(lexer))
642 <= end_priority) {
643 if (sieve_lexer_token_type(lexer) ==
644 begin_tokens[end_priority]) {
645 nesting++;
646 sieve_lexer_skip_token(lexer);
647 continue;
648 }
649 if (sieve_lexer_token_type(lexer) ==
650 end_tokens[end_priority]) {
651 nesting--;
652
653 if (nesting == 0) {
654 /* Next character is the end */
655 return 1;
656 }
657 }
658 sieve_lexer_skip_token(lexer);
659 }
660
661 /* Special case: COMMAND */
662 if (end_token == STT_SEMICOLON &&
663 sieve_lexer_token_type(lexer) == STT_LCURLY) {
664 return 1;
665 }
666
667 /* End not found before eof or end of surrounding grammatical structure
668 */
669 return 0;
670 }
671