1 /*------------------------------------------------------------------------------
2 *
3 * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4 * The YADIFA TM software product is provided under the BSD 3-clause license:
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of EURid nor the names of its contributors may be
16 * used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *------------------------------------------------------------------------------
32 *
33 */
34
35 #include "dnscore/dnscore-config.h"
36 #include <unistd.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/time.h>
42 //#include <sys/resource.h>
43 #include <arpa/inet.h>
44 #include <fcntl.h>
45
46 #include "dnscore/timems.h"
47 #include "dnscore/logger.h"
48 #include "dnscore/file_output_stream.h"
49 #include "dnscore/file_input_stream.h"
50 #include "dnscore/logger_channel_stream.h"
51 #include "dnscore/buffer_input_stream.h"
52
53 #include "dnscore/base16.h"
54 #include "dnscore/base32hex.h"
55 #include "dnscore/base64.h"
56
57 #include "dnscore/parser.h"
58 #include "dnscore/ptr_set.h"
59 #include "dnscore/mutex.h"
60
61
62 #define DO_PRINT 0
63 #define DO_BUFFERIZE 1
64
65 #define PARSER_STREAM_BUFFER_SIZE 4096
66
67 static const char eol_park_needle[2] = {' ', '\0'};
68 static bool parser_init_error_codes_done = FALSE;
69
70 static inline ya_result
parser_set_couples(parser_s * parser,const char * input,u8 kind,u8 closer_kind)71 parser_set_couples(parser_s *parser, const char* input, u8 kind, u8 closer_kind)
72 {
73 u32 n = strlen(input);
74
75 if((n & 1) != 0)
76 {
77 return PARSER_ODD_CHAR_NUMBER;
78 }
79
80 for(u32 i = 0; i < n; i+= 2)
81 {
82 parser->char_type[(u8)input[i]] = kind;
83 if(closer_kind != PARSER_CHAR_TYPE_IGNORE)
84 {
85 parser->char_type[(u8)input[i + 1]] = closer_kind;
86 }
87 parser->delimiter_close[(u8)input[i]] = input[i + 1];
88 }
89
90 return n>>1;
91 }
92
93 static inline u32
parser_set_singleton(parser_s * parser,const char * input,u8 kind)94 parser_set_singleton(parser_s *parser, const char* input, u8 kind)
95 {
96 u32 n = strlen(input);
97
98 for(u32 i = 0; i < n; i++)
99 {
100 parser->char_type[(u8)input[i]] = kind;
101 }
102
103 return n;
104 }
105
106 void
parser_init_error_codes()107 parser_init_error_codes()
108 {
109 if(parser_init_error_codes_done)
110 {
111 return;
112 }
113
114 parser_init_error_codes_done = TRUE;
115
116 error_register(PARSER_SYNTAX_ERROR_MULTILINE,"PARSER_SYNTAX_ERROR_MULTILINE");
117 error_register(PARSER_SYNTAX_ERROR_EXPECTED_EOL,"PARSER_SYNTAX_ERROR_EXPECTED_EOL");
118 error_register(PARSER_SYNTAX_ERROR_LINE_TOO_BIG,"PARSER_SYNTAX_ERROR_LINE_TOO_BIG");
119 error_register(PARSER_BUFFER_TOO_SMALL,"PARSER_BUFFER_TOO_SMALL");
120 error_register(PARSER_NO_INPUT,"PARSER_NO_INPUT");
121 error_register(PARSER_ODD_CHAR_NUMBER,"PARSER_ODD_CHAR_NUMBER");
122 error_register(PARSER_LINE_ENDED_WITH_ESCAPE,"PARSER_LINE_ENDED_WITH_ESCAPE");
123 error_register(PARSER_UNEXPECTED_STRING_DELIMITER,"PARSER_UNEXPECTED_STRING_DELIMITER");
124 error_register(PARSER_EXPECTED_STRING_END_DELIMITER,"PARSER_EXPECTED_STRING_END_DELIMITER");
125 error_register(PARSER_INCLUDE_DEPTH_TOO_BIG,"PARSER_INCLUDE_DEPTH_TOO_BIG");
126 error_register(PARSER_UNKNOWN_TIME_UNIT,"PARSER_UNKNOWN_TIME_UNIT");
127 error_register(PARSER_NO_MARK_SET,"PARSER_NO_MARK_SET");
128 error_register(PARSER_REACHED_END_OF_LINE,"PARSER_REACHED_END_OF_LINE");
129 error_register(PARSER_FOUND_WORD,"PARSER_FOUND_WORD");
130 error_register(PARSER_REACHED_END_OF_FILE, "PARSER_REACHED_END_OF_FILE");
131 error_register(PARSER_INVALID_ESCAPED_FORMAT, "PARSER_INVALID_ESCAPED_FORMAT");
132 }
133
134 ya_result
parser_init(parser_s * parser,const char * string_delimiters,const char * multiline_delimiters,const char * comment_markers,const char * blank_makers,const char * escape_characters)135 parser_init(parser_s *parser,
136 const char *string_delimiters, // by 2
137 const char *multiline_delimiters, // by 2
138 const char *comment_markers, // by 1
139 const char *blank_makers, // by 1
140 const char *escape_characters // by 1
141 )
142 {
143 ya_result return_code = SUCCESS;
144
145 /// @note may be improved if we spawn parser a lot
146
147 ZEROMEMORY(parser, sizeof(parser_s));
148
149 //
150
151 if(ISOK(return_code = parser_set_couples(parser, string_delimiters, PARSER_CHAR_TYPE_STRING_DELIMITER, PARSER_CHAR_TYPE_IGNORE)))
152 {
153 parser->string_delimiters_count = return_code;
154
155 if(ISOK(return_code = parser_set_couples(parser, multiline_delimiters, PARSER_CHAR_TYPE_MULTILINE_DELIMITER, PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END)))
156 {
157 parser->multiline_delimiters_count = return_code;
158
159 parser->comment_marker_count = parser_set_singleton(parser, comment_markers, PARSER_CHAR_TYPE_COMMENT_MARKER);
160 parser->comment_marker = comment_markers;
161 parser->blank_marker_count = parser_set_singleton(parser, blank_makers, PARSER_CHAR_TYPE_BLANK_MARKER);
162 parser->blank_marker = blank_makers;
163 parser->escape_characters_count =parser_set_singleton(parser, escape_characters, PARSER_CHAR_TYPE_ESCAPE_CHARACTER);
164 parser->escape_characters = escape_characters;
165 parser->close_last_stream = TRUE;
166 parser_set_singleton(parser, "\n", PARSER_CHAR_TYPE_EOL);
167 }
168 }
169
170 return return_code;
171 }
172
173 ya_result
parser_finalize(parser_s * parser)174 parser_finalize(parser_s *parser)
175 {
176 for(;;)
177 {
178 input_stream *is = parser_pop_stream(parser);
179 if(is == NULL)
180 {
181 break;
182 }
183 if((parser->input_stream_stack_size == 0) && !parser->close_last_stream)
184 {
185 break;
186 }
187 input_stream_close(is);
188 input_stream_set_void(is);
189 }
190
191 return SUCCESS;
192 }
193
194 static inline u32
parser_line_size(parser_s * parser)195 parser_line_size(parser_s *parser)
196 {
197 return (u32)(parser->limit - parser->needle);
198 }
199
200 static inline ya_result
parser_clear_escape_codes(char ** startp,int * lenp,char escape_char,char * new_start)201 parser_clear_escape_codes(char **startp, int *lenp, char escape_char, char *new_start)
202 {
203 char *start = *startp;
204 char *escape_char_ptr;
205 int len = *lenp;
206
207 if((escape_char_ptr = memchr(start, escape_char, len)) != NULL)
208 {
209 char *op = new_start;
210
211 for(;;)
212 {
213 size_t n = escape_char_ptr - start;
214
215 // is the escape code is at the last position ?
216
217 if(n + 1 == (size_t)len)
218 {
219 // oops
220 return PARSER_LINE_ENDED_WITH_ESCAPE;
221 }
222
223 memcpy(op, start, n);
224
225 char c = escape_char_ptr[1];
226
227 if((c >= '0') && (c <= '2'))
228 {
229 if(n + 3 < (size_t)len)
230 {
231 u32 decimal_char = (c - '0') * 100;
232 c = escape_char_ptr[2];
233 if((c >= '0') && (c <= '9'))
234 {
235 decimal_char += (c - '0') * 10;
236 c = escape_char_ptr[3];
237 if((c >= '0') && (c <= '9'))
238 {
239 decimal_char += (c - '0');
240 if(decimal_char <= 255)
241 {
242 op[n] = (u8)decimal_char;
243 op += n + 1;
244 start = escape_char_ptr + 4;
245 len -= n + 4;
246 }
247 else
248 {
249 return PARSER_INVALID_ESCAPED_FORMAT;
250 }
251 }
252 else
253 {
254 return PARSER_INVALID_ESCAPED_FORMAT;
255 }
256 }
257 else
258 {
259 return PARSER_INVALID_ESCAPED_FORMAT;
260 }
261 }
262 else
263 {
264 return PARSER_INVALID_ESCAPED_FORMAT;
265 }
266 }
267 else
268 {
269 op[n] = c;
270 op += n + 1;
271 start = escape_char_ptr + 2;
272 len -= n + 2;
273 }
274
275 yassert(len >= 0);
276
277 if(len == 0)
278 {
279 break;
280 }
281
282 if((escape_char_ptr = memchr(start, escape_char, len)) == NULL)
283 {
284 // copy the remaining bytes
285
286 memcpy(op, start, len);
287 op += len;
288 break;
289 }
290 }
291
292 *startp = new_start;
293 *lenp = op - new_start;
294 }
295 // else we have nothing more to do
296
297 return len;
298 }
299
300 /**
301 *
302 * returns the token type
303 *
304 * @param parser
305 * @return
306 */
307
308 static inline ya_result
parser_read_line(parser_s * parser)309 parser_read_line(parser_s *parser)
310 {
311 ya_result return_code;
312
313 if(parser_line_size(parser) == 0)
314 {
315 // read next line
316
317 if(parser->input_stream_stack_size == 0)
318 {
319 return_code = PARSER_NO_INPUT; // no input file/stream
320 return return_code;
321 }
322
323 char *buffer = parser->line_buffer;
324 char *limit = &parser->line_buffer[sizeof(parser->line_buffer)];
325
326 for(;;)
327 {
328 if(limit - buffer == 0)
329 {
330 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
331 }
332
333 #if DO_BUFFERIZE
334 return_code = buffer_input_stream_read_line(parser->input_stream_stack[parser->input_stream_stack_size - 1],
335 buffer,
336 limit - buffer);
337 #else
338 return_code = input_stream_read_line(parser->input_stream_stack[parser->input_stream_stack_size - 1],
339 buffer,
340 limit - buffer);
341 #endif
342
343 if(return_code > 0)
344 {
345 // one line has been read (maybe)
346
347 buffer += return_code;
348
349 if(return_code > 1)
350 {
351 if(buffer[-2] == parser->escape_characters[0])
352 {
353 // the EOL was escaped, concat the next line ...
354 // do NOT remove the escape code now
355
356 continue;
357 }
358 }
359
360 parser->limit = buffer;
361 parser->needle = parser->line_buffer;
362 parser->line_number++;
363 }
364 else
365 {
366 // error or end of stream
367
368 parser->limit = parser->line_buffer;
369 parser->needle = parser->line_buffer;
370
371 if(return_code == 0)
372 {
373 if(parser->multiline != 0)
374 {
375 return_code = PARSER_SYNTAX_ERROR_MULTILINE;
376 }
377 }
378 }
379
380 return return_code;
381 }
382 }
383
384 return PARSER_EOF;
385 }
386
387 ya_result
parser_next_token(parser_s * parser)388 parser_next_token(parser_s *parser)
389 {
390 ya_result return_code;
391
392 for(;;)
393 {
394 if((return_code = parser_read_line(parser)) <= 0)
395 {
396
397 if(return_code == 0)
398 {
399
400
401 return PARSER_EOF;
402 }
403
404 return return_code;
405 }
406
407 // there are bytes
408
409 return_code = 0;
410
411 for(char *needle = parser->needle; needle < parser->limit; needle++)
412 {
413 u8 b = (u8)*needle;
414
415 // test for multiline close
416
417 bool has_escapes = FALSE;
418
419 switch(parser->char_type[b])
420 {
421 #if DNSCORE_HAS_FULL_ASCII7
422 case PARSER_CHAR_TYPE_TO_TRANSLATE:
423 *needle = parser->translation_table[b];
424 --needle;
425 #endif
426 FALLTHROUGH // fall through
427 case PARSER_CHAR_TYPE_ESCAPE_CHARACTER:
428 // the text starts after the next char, whatever it is
429 if(++needle < parser->limit)
430 {
431 if((*needle >= '0') && (*needle <= '2'))
432 {
433 // octal byte
434 if(needle + 2 < parser->limit)
435 {
436 //u8 octal_char = ((*needle) - '0') * 100;
437 ++needle;
438 if((*needle >= '0') && (*needle <= '9'))
439 {
440 //octal_char |= ((*needle) - '0') * 10;
441 ++needle;
442 if((*needle >= '0') && (*needle <= '9'))
443 {
444 //octal_char |= ((*needle) - '0');
445 needle -= 3;
446 has_escapes = TRUE;
447 // the buffer needs to be copied
448 }
449 else
450 {
451 // octal parse error
452
453 return PARSER_INVALID_ESCAPED_FORMAT;
454 }
455 }
456 else
457 {
458 // octal parse error
459
460 return PARSER_INVALID_ESCAPED_FORMAT;
461 }
462 }
463 else
464 {
465 // octal parse error
466
467 return PARSER_INVALID_ESCAPED_FORMAT;
468 }
469 }
470 }
471
472 FALLTHROUGH // fall through
473
474 case PARSER_CHAR_TYPE_NORMAL:
475 {
476 // BLANK or MULTI => done
477 // STRING => error
478 // COMMENT => CUT
479
480 parser->text = needle++;
481
482 for(; needle < parser->limit; needle++)
483 {
484 b = (u8)*needle;
485
486 switch(parser->char_type[b])
487 {
488 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END:
489 {
490 if((parser->multiline) != 0 && (b == parser->multiline))
491 {
492 b = ' ';
493 *needle = b;
494 parser->multiline = 0;
495 }
496 else
497 {
498 return PARSER_SYNTAX_ERROR_MULTILINE;
499 }
500
501 // we got the whole word
502
503 parser->text_length = needle - parser->text;
504 parser->needle = needle + 1;
505 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
506 }
507
508 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER:
509 {
510 if(parser->multiline == 0)
511 {
512 parser->multiline = parser->delimiter_close[b];
513 }
514 else
515 {
516 return PARSER_SYNTAX_ERROR_MULTILINE;
517 }
518
519 *needle = ' ';
520
521 // we got the whole word
522
523 parser->text_length = needle - parser->text;
524 parser->needle = needle + 1;
525 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
526 }
527
528 case PARSER_CHAR_TYPE_EOL:
529 {
530 // only tell we got an EOL if we are not on "multiline"
531
532 if(parser->multiline != 0)
533 {
534 *needle = ' ';
535 }
536
537 // we got the whole word
538
539 parser->text_length = needle - parser->text;
540 parser->needle = needle;
541 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
542 }
543
544 case PARSER_CHAR_TYPE_BLANK_MARKER:
545 {
546 // we got the whole word
547
548 parser->text_length = needle - parser->text;
549 parser->needle = needle + 1;
550 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
551 }
552
553 case PARSER_CHAR_TYPE_ESCAPE_CHARACTER:
554 {
555 needle++;
556
557 has_escapes = TRUE;
558
559 break;
560 }
561
562 case PARSER_CHAR_TYPE_COMMENT_MARKER:
563 {
564 // we got the whole word
565
566 parser->text_length = needle - parser->text;
567 parser->needle = needle;
568 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
569 }
570
571 case PARSER_CHAR_TYPE_STRING_DELIMITER:
572 {
573 // parse error
574 if(!parser->tokenize_on_string)
575 {
576 return PARSER_UNEXPECTED_STRING_DELIMITER;
577 }
578
579 parser->text_length = needle - parser->text;
580 parser->needle = needle;
581 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
582 }
583 #if DNSCORE_HAS_FULL_ASCII7
584 case PARSER_CHAR_TYPE_TO_TRANSLATE:
585 {
586 *needle = parser->translation_table[b];
587 break;
588 }
589 #endif
590
591 //case PARSER_CHAR_TYPE_NORMAL:
592 default:
593 {
594 break;
595 }
596 } // end switch char type
597 } // end for needle
598
599 parser_next_token_end_of_token_found: ;
600
601 // at this point we have a full token (maybe still escaped)
602
603 int token_len = needle - parser->text;
604
605 if(has_escapes)
606 {
607 yassert(parser->escape_characters_count <= 1);
608
609 if(parser->escape_characters_count == 1)
610 {
611 ya_result err;
612
613 char escape_char = parser->escape_characters[0];
614
615 if(FAIL(err = parser_clear_escape_codes(&parser->text, &token_len, escape_char, parser->extra_buffer)))
616 {
617 return err;
618 }
619 }
620 }
621
622 parser->text_length = token_len;
623 parser->needle = needle;
624
625 return return_code | PARSER_WORD;
626 }
627 case PARSER_CHAR_TYPE_COMMENT_MARKER:
628 {
629 // cut line
630
631 parser->text = needle;
632 parser->text_length = parser->limit - needle;
633
634 parser->needle = parser->limit;
635
636 if(parser->multiline == 0)
637 {
638 parser->needle_mark = NULL;
639 return return_code | PARSER_COMMENT | PARSER_EOL;
640 }
641 else
642 {
643 return return_code | PARSER_COMMENT;
644 }
645 }
646 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END:
647 {
648 if((parser->multiline) != 0 && (b == parser->multiline))
649 {
650 /*b = ' ';
651 *needle = b;*/
652 parser->multiline = 0;
653 }
654 else
655 {
656 return PARSER_SYNTAX_ERROR_MULTILINE;
657 }
658
659 break;
660 }
661 case PARSER_CHAR_TYPE_STRING_DELIMITER:
662 {
663 // find the end char ...
664 // note: see strpbrk
665
666 char end_char = parser->delimiter_close[b];
667
668 char *string_start = ++needle;
669 char *string_end;
670 for(;;)
671 {
672 string_end = memchr(needle, end_char, parser->limit - needle);
673
674 if(string_end != NULL)
675 {
676 // this one may have been escaped
677
678 /// @note 20190917 edf -- Patch submitted trough github by JZerf
679 /// This fixes the case of escaped escapes as well as an incorrect limit test
680 /// The patch has been slightly adapted in 2.4.x but may be kept as it is in 2.3.x
681
682 /* Check if the string delimiter that was found was escaped. Keep in
683 * mind that if there was an escape character in front of the string
684 * delimiter, the escape character itself could have also been escaped
685 * (and the one before that and the one before that...). What we can do
686 * is check to see how many consecutive preceding escape characters
687 * there are (by finding the first preceding nonescape character or the
688 * opening string delimiter if there isn't one) and if it's an even
689 * number then the string delimiter we found is unescaped but if it's an
690 * odd number then it is escaped. Note that this will need to be revised
691 * if YADIDA later adds support for using \DDD type escape sequences
692 * between string delimiters.
693 */
694
695 /// @note 20190917 edf -- while => do-while : I've kept the first if out of the loop to avoid needlessly
696 /// testing for the needle. (Which should be the most common case)
697
698 const char *prior_nonescape_character = string_end - 1;
699
700 do
701 {
702 if(parser->char_type[(u8)*prior_nonescape_character] != PARSER_CHAR_TYPE_ESCAPE_CHARACTER)
703 {
704 break;
705 }
706 }
707 while(--prior_nonescape_character >= needle);
708
709 // this one was escaped ...
710 if(((string_end - prior_nonescape_character) & 1) == 1)
711 {
712 break; /* String delimiter was not escaped if we got here. */
713 }
714
715 string_end++;
716
717 // needle = string_end + 1 and try again ?
718
719 if(string_end >= parser->limit)
720 {
721 return PARSER_EXPECTED_STRING_END_DELIMITER;
722 }
723
724 needle = string_end;
725 }
726 else
727 {
728 // syntax error
729
730 return PARSER_EXPECTED_STRING_END_DELIMITER;
731 }
732 }
733
734 int token_len = string_end - string_start;
735
736 yassert(parser->escape_characters_count <= 1);
737
738 for(u32 escape_index = 0; escape_index < parser->escape_characters_count; escape_index++)
739 {
740 ya_result err;
741 char escape_char = parser->escape_characters[escape_index];
742
743 if(FAIL(err = parser_clear_escape_codes(&string_start, &token_len, escape_char, parser->extra_buffer)))
744 {
745 return err;
746 }
747 }
748
749 parser->text = string_start;
750 parser->text_length = token_len;
751
752 parser->needle = string_end + 1;
753
754 // end of token ... return ?
755
756 return return_code | PARSER_WORD;
757 }
758 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER:
759 {
760 if(parser->multiline == 0)
761 {
762 parser->multiline = parser->delimiter_close[b];
763 }
764 else
765 {
766 return PARSER_SYNTAX_ERROR_MULTILINE;
767 }
768 *needle = ' ';
769 break;
770 }
771 case PARSER_CHAR_TYPE_EOL:
772 {
773 // only tell we got an EOL if we are not on "multiline"
774
775 if(parser->multiline == 0)
776 {
777 parser->needle = parser->limit;
778 parser->text_length = 0;
779 parser->needle_mark = NULL;
780 return PARSER_EOL;
781 }
782
783 *needle = ' ';
784 }
785 FALLTHROUGH // fall through
786
787 case PARSER_CHAR_TYPE_BLANK_MARKER:
788 {
789 return_code |= PARSER_BLANK_START;
790 break;
791 }
792 }
793 }
794
795 // reached the end of line without a token : EOL
796 // if we are not on a multiline: return EOL
797
798 parser->needle = parser->limit;
799 parser->text_length = 0;
800
801 if(parser->multiline == 0)
802 {
803
804 return PARSER_EOL;
805 }
806
807 // else read the next line (loop)
808 }
809
810 // never reached
811
812 // return 0;
813 }
814
815 void
parser_set_eol(parser_s * parser)816 parser_set_eol(parser_s *parser)
817 {
818 parser->needle = (char*)&eol_park_needle[0];
819 parser->limit = (char*)&eol_park_needle[1];
820 }
821
822 #if DNSCORE_HAS_FULL_ASCII7
823 void
parser_add_translation(parser_s * parser,u8 character,u8 translates_into)824 parser_add_translation(parser_s *parser, u8 character, u8 translates_into)
825 {
826 parser->translation_table[character] = translates_into;
827 parser->char_type[character] = PARSER_CHAR_TYPE_TO_TRANSLATE;
828 }
829
830 void
parser_del_translation(parser_s * parser,u8 character)831 parser_del_translation(parser_s *parser, u8 character)
832 {
833 parser->char_type[character] = PARSER_CHAR_TYPE_NORMAL;
834 }
835 #endif
836
837 ya_result
parser_next_characters(parser_s * parser)838 parser_next_characters(parser_s *parser)
839 {
840 parser->text = parser->needle;
841 parser->text_length = parser->limit - parser->needle;
842
843 if(parser->multiline != 0)
844 {
845 u32 offset = parser->text_length;
846
847 memcpy(parser->additional_buffer, parser->text, offset);
848 parser->additional_buffer[offset++] = ' ';
849
850 ya_result ret;
851 do
852 {
853 ret = parser_next_token(parser);
854
855 const char *text = parser_text(parser);
856 size_t text_length = parser_text_length(parser);
857
858 size_t new_length = offset + text_length + 1;
859 if(new_length > sizeof(parser->additional_buffer))
860 {
861 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
862 }
863
864 memcpy(&parser->additional_buffer[offset], text, text_length);
865 offset = new_length;
866 parser->additional_buffer[offset - 1] = ' ';
867 }
868 while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
869
870 parser->text = parser->additional_buffer;
871 parser->text_length = offset - 1;
872 }
873
874 parser->needle = (char*)&eol_park_needle[0];
875 parser->limit = (char*)&eol_park_needle[1];
876
877 return parser->text_length;
878 }
879
880 ya_result
parser_next_characters_nospace(parser_s * parser)881 parser_next_characters_nospace(parser_s *parser)
882 {
883 parser->text = parser->needle;
884 parser->text_length = parser->limit - parser->needle;
885
886 if(parser->multiline != 0)
887 {
888 u32 offset = parser->text_length;
889
890 memcpy(parser->additional_buffer, parser->text, offset);
891
892 ya_result ret;
893 do
894 {
895 ret = parser_next_token(parser);
896
897 const char *text = parser_text(parser);
898 size_t text_length = parser_text_length(parser);
899 size_t new_length = offset + text_length;
900 if(new_length > sizeof(parser->additional_buffer))
901 {
902 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
903 }
904
905 memcpy(&parser->additional_buffer[offset], text, text_length);
906 offset = new_length;
907 }
908 while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
909
910 parser->text = parser->additional_buffer;
911 parser->text_length = offset;
912 }
913
914 char* text = parser->text;
915 while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
916 {
917 text++;
918 }
919 parser->text_length -= text - parser->text;
920 parser->text = text;
921
922 parser->needle = (char*)&eol_park_needle[0];
923 parser->limit = (char*)&eol_park_needle[1];
924
925 return parser->text_length;
926 }
927
928
929 ya_result
parser_concat_next_tokens(parser_s * parser)930 parser_concat_next_tokens(parser_s *parser)
931 {
932 ya_result ret;
933 size_t offset = 0;
934
935 // char space = parser->blank_marker[0];
936 char space = ' ';
937 do
938 {
939 ret = parser_next_token(parser);
940
941 if(ret & PARSER_WORD)
942 {
943 // if((ret & PARSER_COMMENT) != 0)
944 // {
945 // continue;
946 // }
947
948 const char *text = parser_text(parser);
949 size_t text_length = parser_text_length(parser);
950 size_t new_length = offset + text_length;
951 if(new_length > sizeof(parser->additional_buffer))
952 {
953 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
954 }
955
956 memcpy(&parser->additional_buffer[offset], text, text_length); // VS false positive: overflow is chercked right before
957 offset = new_length;
958
959 parser->additional_buffer[offset] = space;
960 offset++;
961 }
962 }
963 while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
964
965
966 // remove the last space, because we always add a space
967 offset--;
968
969
970 char *text = parser->additional_buffer;
971
972 parser->text_length = offset - (text - parser->additional_buffer);
973 parser->text = text;
974 parser->needle = (char*)&eol_park_needle[0];
975 parser->limit = (char*)&eol_park_needle[1];
976
977 return parser->text_length;
978 }
979
980 ya_result
parser_concat_current_and_next_tokens_nospace(parser_s * parser)981 parser_concat_current_and_next_tokens_nospace(parser_s *parser)
982 {
983 ya_result ret;
984 size_t offset;
985
986 if(parser->text_length > sizeof(parser->additional_buffer))
987 {
988 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
989 }
990
991 memcpy(&parser->additional_buffer[0], parser->text, parser->text_length);
992 offset = parser->text_length;
993
994 do
995 {
996 ret = parser_next_token(parser);
997
998 if((ret & PARSER_COMMENT) != 0)
999 {
1000 continue;
1001 }
1002
1003 const char *text = parser_text(parser);
1004 size_t text_length = parser_text_length(parser);
1005 size_t new_length = offset + text_length;
1006 if(new_length > sizeof(parser->additional_buffer))
1007 {
1008 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
1009 }
1010
1011 memcpy(&parser->additional_buffer[offset], text, text_length);
1012 offset = new_length;
1013 }
1014 while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
1015
1016 char* text = parser->additional_buffer;
1017 while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
1018 {
1019 text++;
1020 }
1021 parser->text_length = offset - (text - parser->additional_buffer);
1022 parser->text = text;
1023 parser->needle = (char*)&eol_park_needle[0];
1024 parser->limit = (char*)&eol_park_needle[1];
1025
1026 return parser->text_length;
1027 }
1028
1029 ya_result
parser_concat_next_tokens_nospace(parser_s * parser)1030 parser_concat_next_tokens_nospace(parser_s *parser)
1031 {
1032 ya_result ret;
1033 size_t offset = 0;
1034 do
1035 {
1036 ret = parser_next_token(parser);
1037
1038 if((ret & PARSER_COMMENT) != 0)
1039 {
1040 continue;
1041 }
1042
1043 if((ret & PARSER_WORD) != 0)
1044 {
1045 const char *text = parser_text(parser);
1046 size_t text_length = parser_text_length(parser);
1047 size_t new_length = offset + text_length;
1048 if(new_length > sizeof(parser->additional_buffer))
1049 {
1050 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
1051 }
1052
1053 memcpy(&parser->additional_buffer[offset], text, text_length);
1054 offset = new_length;
1055 }
1056 }
1057 while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
1058
1059 char* text = parser->additional_buffer;
1060 while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
1061 {
1062 text++;
1063 }
1064 parser->text_length = offset - (text - parser->additional_buffer);
1065 parser->text = text;
1066 parser->needle = (char*)&eol_park_needle[0];
1067 parser->limit = (char*)&eol_park_needle[1];
1068
1069 return parser->text_length;
1070 }
1071
1072 ya_result
parser_push_stream(parser_s * p,input_stream * is)1073 parser_push_stream(parser_s *p, input_stream *is)
1074 {
1075 if(p->input_stream_stack_size < PARSER_INCLUDE_DEPTH_MAX )
1076 {
1077 #if DO_BUFFERIZE
1078 buffer_input_stream_init(is, is, PARSER_STREAM_BUFFER_SIZE);
1079 #endif
1080 p->input_stream_stack[p->input_stream_stack_size] = is;
1081 p->line_number_stack[p->input_stream_stack_size] = p->line_number;
1082
1083 ++p->input_stream_stack_size;
1084
1085 return p->input_stream_stack_size;
1086 }
1087
1088 return PARSER_INCLUDE_DEPTH_TOO_BIG;
1089 }
1090
1091 /**
1092 * @param p
1093 * @return the popped stream or NULL if the stack is empty
1094 */
1095
1096 input_stream *
parser_pop_stream(parser_s * p)1097 parser_pop_stream(parser_s *p)
1098 {
1099 input_stream *is = NULL;
1100
1101 if(p->input_stream_stack_size > 0)
1102 {
1103 is = p->input_stream_stack[--p->input_stream_stack_size];
1104 #if DEBUG
1105 p->input_stream_stack[p->input_stream_stack_size] = NULL;
1106 #endif
1107 p->line_number = p->line_number_stack[p->input_stream_stack_size];
1108 }
1109
1110 return is;
1111 }
1112
1113 ///////////////////////////////////////////////////////////////////////////////
1114
1115 ya_result
parser_copy_next_ttl(parser_s * p,s32 * out_value)1116 parser_copy_next_ttl(parser_s *p, s32 *out_value)
1117 {
1118 ya_result return_code = parser_next_word(p);
1119
1120 if(ISOK(return_code))
1121 {
1122 const char *text = parser_text(p);
1123 u32 text_len = parser_text_length(p);
1124
1125 char lc = text[text_len - 1];
1126
1127 if(isdigit(lc))
1128 {
1129 return_code = parse_s32_check_range_len_base10(text, text_len, out_value, 0, MAX_S32);
1130 }
1131 else
1132 {
1133 s64 mult = 1;
1134 text_len--;
1135
1136 switch(lc)
1137 {
1138 case 'w':
1139 case 'W':
1140 mult = 60 * 60 * 24 * 7;
1141 break;
1142 case 'd':
1143 case 'D':
1144 mult = 60 * 60 * 24;
1145 break;
1146 case 'h':
1147 case 'H':
1148 mult = 60 * 60;
1149 break;
1150 case 'm':
1151 case 'M':
1152 mult = 60;
1153 break;
1154 case 's':
1155 case 'S':
1156 break;
1157 default:
1158 {
1159 return PARSER_UNKNOWN_TIME_UNIT;
1160 }
1161 }
1162
1163 s32 ttl32;
1164
1165 if(ISOK(return_code = parse_s32_check_range_len_base10(text, text_len, &ttl32, 0, MAX_S32)))
1166 {
1167 mult *= ttl32;
1168
1169 if(mult <= MAX_S32)
1170 {
1171 *out_value = (s32)mult;
1172 }
1173 else
1174 {
1175 return_code = PARSEINT_ERROR;
1176 }
1177 }
1178 }
1179 }
1180
1181 return return_code;
1182 }
1183
1184 ya_result
parser_type_bit_maps_initialise(parser_s * p,type_bit_maps_context * context)1185 parser_type_bit_maps_initialise(parser_s *p, type_bit_maps_context* context)
1186 {
1187 u16 type;
1188
1189 u8 *type_bitmap_field = context->type_bitmap_field;
1190 u8 *window_size = context->window_size;
1191
1192 u32 type_bit_maps_size = 0;
1193 u8 ws;
1194
1195 /* ------------------------------------------------------------ */
1196
1197 ZEROMEMORY(window_size, sizeof(context->window_size));
1198 context->last_type_window = -1;
1199 ZEROMEMORY(type_bitmap_field, sizeof(context->type_bitmap_field));
1200
1201 ya_result return_code;
1202
1203 do
1204 {
1205 if(FAIL(return_code = parser_next_token(p)))
1206 {
1207 return return_code;
1208 }
1209
1210 if((return_code & PARSER_WORD) != 0)
1211 {
1212 const char *text = parser_text(p);
1213 u32 text_len = parser_text_length(p);
1214
1215 ya_result ret; // MUST use another return variable than return_code
1216 if(FAIL(ret = dns_type_from_case_name_length(text, text_len, &type)))
1217 {
1218 return ret;
1219 }
1220
1221 type = ntohs(type); /* types are now stored in NETWORK order */
1222
1223 /* Network bit order */
1224 type_bitmap_field[type >> 3] |= 1 << (7 - (type & 7));
1225 window_size[type >> 8] = ((type & 0xf8) >> 3) + 1;
1226
1227 context->last_type_window = MAX(type >> 8, context->last_type_window);
1228 }
1229
1230 }
1231 while((return_code & (PARSER_EOF|PARSER_EOL)) == 0);
1232
1233 for(s32 i = 0; i <= context->last_type_window; i++)
1234 {
1235 ws = window_size[i];
1236
1237 if(ws > 0)
1238 {
1239 type_bit_maps_size += 1 + 1 + ws;
1240 }
1241 }
1242
1243 context->type_bit_maps_size = type_bit_maps_size;
1244
1245 return type_bit_maps_size;
1246 }
1247
1248 ya_result
parser_get_network_protocol_from_next_word(parser_s * p,int * out_value)1249 parser_get_network_protocol_from_next_word(parser_s *p, int *out_value)
1250 {
1251 char protocol_token[64];
1252
1253 ya_result ret = parser_copy_next_word(p, protocol_token, sizeof(protocol_token));
1254
1255 if(ISOK(ret))
1256 {
1257 ret = protocol_name_to_id(protocol_token, out_value);
1258 }
1259
1260 return ret;
1261 }
1262
1263 ya_result
parser_get_network_service_port_from_next_word(parser_s * p,int * out_value)1264 parser_get_network_service_port_from_next_word(parser_s *p, int *out_value)
1265 {
1266 char service_token[64];
1267
1268 ya_result ret = parser_copy_next_word(p, service_token, sizeof(service_token));
1269
1270 if(ISOK(ret))
1271 {
1272 ret = server_name_to_port(service_token, out_value);
1273 }
1274
1275 return ret;
1276 }
1277
1278 /** @} */
1279
1280