1 /*------------------------------------------------------------------------------
2  *
3  * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4  * The YADIFA TM software product is provided under the BSD 3-clause license:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *        * Redistributions in binary form must reproduce the above copyright
13  *          notice, this list of conditions and the following disclaimer in the
14  *          documentation and/or other materials provided with the distribution.
15  *        * Neither the name of EURid nor the names of its contributors may be
16  *          used to endorse or promote products derived from this software
17  *          without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  *------------------------------------------------------------------------------
32  *
33  */
34 
35 #include "dnscore/dnscore-config.h"
36 #include <unistd.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/time.h>
42 //#include <sys/resource.h>
43 #include <arpa/inet.h>
44 #include <fcntl.h>
45 
46 #include "dnscore/timems.h"
47 #include "dnscore/logger.h"
48 #include "dnscore/file_output_stream.h"
49 #include "dnscore/file_input_stream.h"
50 #include "dnscore/logger_channel_stream.h"
51 #include "dnscore/buffer_input_stream.h"
52 
53 #include "dnscore/base16.h"
54 #include "dnscore/base32hex.h"
55 #include "dnscore/base64.h"
56 
57 #include "dnscore/parser.h"
58 #include "dnscore/ptr_set.h"
59 #include "dnscore/mutex.h"
60 
61 
62 #define DO_PRINT 0
63 #define DO_BUFFERIZE 1
64 
65 #define PARSER_STREAM_BUFFER_SIZE 4096
66 
67 static const char eol_park_needle[2] = {' ', '\0'};
68 static bool parser_init_error_codes_done = FALSE;
69 
70 static inline ya_result
parser_set_couples(parser_s * parser,const char * input,u8 kind,u8 closer_kind)71 parser_set_couples(parser_s *parser, const char* input, u8 kind, u8 closer_kind)
72 {
73     u32 n = strlen(input);
74 
75     if((n & 1) != 0)
76     {
77         return PARSER_ODD_CHAR_NUMBER;
78     }
79 
80     for(u32 i = 0; i < n; i+= 2)
81     {
82         parser->char_type[(u8)input[i]] = kind;
83         if(closer_kind != PARSER_CHAR_TYPE_IGNORE)
84         {
85             parser->char_type[(u8)input[i + 1]] = closer_kind;
86         }
87         parser->delimiter_close[(u8)input[i]] = input[i + 1];
88     }
89 
90     return n>>1;
91 }
92 
93 static inline u32
parser_set_singleton(parser_s * parser,const char * input,u8 kind)94 parser_set_singleton(parser_s *parser, const char* input, u8 kind)
95 {
96     u32 n = strlen(input);
97 
98     for(u32 i = 0; i < n; i++)
99     {
100         parser->char_type[(u8)input[i]] = kind;
101     }
102 
103     return n;
104 }
105 
106 void
parser_init_error_codes()107 parser_init_error_codes()
108 {
109     if(parser_init_error_codes_done)
110     {
111         return;
112     }
113 
114     parser_init_error_codes_done = TRUE;
115 
116     error_register(PARSER_SYNTAX_ERROR_MULTILINE,"PARSER_SYNTAX_ERROR_MULTILINE");
117     error_register(PARSER_SYNTAX_ERROR_EXPECTED_EOL,"PARSER_SYNTAX_ERROR_EXPECTED_EOL");
118     error_register(PARSER_SYNTAX_ERROR_LINE_TOO_BIG,"PARSER_SYNTAX_ERROR_LINE_TOO_BIG");
119     error_register(PARSER_BUFFER_TOO_SMALL,"PARSER_BUFFER_TOO_SMALL");
120     error_register(PARSER_NO_INPUT,"PARSER_NO_INPUT");
121     error_register(PARSER_ODD_CHAR_NUMBER,"PARSER_ODD_CHAR_NUMBER");
122     error_register(PARSER_LINE_ENDED_WITH_ESCAPE,"PARSER_LINE_ENDED_WITH_ESCAPE");
123     error_register(PARSER_UNEXPECTED_STRING_DELIMITER,"PARSER_UNEXPECTED_STRING_DELIMITER");
124     error_register(PARSER_EXPECTED_STRING_END_DELIMITER,"PARSER_EXPECTED_STRING_END_DELIMITER");
125     error_register(PARSER_INCLUDE_DEPTH_TOO_BIG,"PARSER_INCLUDE_DEPTH_TOO_BIG");
126     error_register(PARSER_UNKNOWN_TIME_UNIT,"PARSER_UNKNOWN_TIME_UNIT");
127     error_register(PARSER_NO_MARK_SET,"PARSER_NO_MARK_SET");
128     error_register(PARSER_REACHED_END_OF_LINE,"PARSER_REACHED_END_OF_LINE");
129     error_register(PARSER_FOUND_WORD,"PARSER_FOUND_WORD");
130     error_register(PARSER_REACHED_END_OF_FILE, "PARSER_REACHED_END_OF_FILE");
131     error_register(PARSER_INVALID_ESCAPED_FORMAT, "PARSER_INVALID_ESCAPED_FORMAT");
132 }
133 
134 ya_result
parser_init(parser_s * parser,const char * string_delimiters,const char * multiline_delimiters,const char * comment_markers,const char * blank_makers,const char * escape_characters)135 parser_init(parser_s *parser,
136             const char *string_delimiters,      // by 2
137             const char *multiline_delimiters,   // by 2
138             const char *comment_markers,        // by 1
139             const char *blank_makers,           // by 1
140             const char *escape_characters       // by 1
141         )
142 {
143     ya_result return_code = SUCCESS;
144 
145     /// @note may be improved if we spawn parser a lot
146 
147     ZEROMEMORY(parser, sizeof(parser_s));
148 
149     //
150 
151     if(ISOK(return_code = parser_set_couples(parser, string_delimiters, PARSER_CHAR_TYPE_STRING_DELIMITER, PARSER_CHAR_TYPE_IGNORE)))
152     {
153         parser->string_delimiters_count = return_code;
154 
155         if(ISOK(return_code = parser_set_couples(parser, multiline_delimiters, PARSER_CHAR_TYPE_MULTILINE_DELIMITER, PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END)))
156         {
157             parser->multiline_delimiters_count = return_code;
158 
159             parser->comment_marker_count = parser_set_singleton(parser, comment_markers, PARSER_CHAR_TYPE_COMMENT_MARKER);
160             parser->comment_marker = comment_markers;
161             parser->blank_marker_count = parser_set_singleton(parser, blank_makers, PARSER_CHAR_TYPE_BLANK_MARKER);
162             parser->blank_marker = blank_makers;
163             parser->escape_characters_count =parser_set_singleton(parser, escape_characters, PARSER_CHAR_TYPE_ESCAPE_CHARACTER);
164             parser->escape_characters = escape_characters;
165             parser->close_last_stream = TRUE;
166             parser_set_singleton(parser, "\n", PARSER_CHAR_TYPE_EOL);
167         }
168     }
169 
170     return return_code;
171 }
172 
173 ya_result
parser_finalize(parser_s * parser)174 parser_finalize(parser_s *parser)
175 {
176     for(;;)
177     {
178         input_stream *is = parser_pop_stream(parser);
179         if(is == NULL)
180         {
181             break;
182         }
183         if((parser->input_stream_stack_size == 0) && !parser->close_last_stream)
184         {
185             break;
186         }
187         input_stream_close(is);
188         input_stream_set_void(is);
189     }
190 
191     return SUCCESS;
192 }
193 
194 static inline u32
parser_line_size(parser_s * parser)195 parser_line_size(parser_s *parser)
196 {
197     return (u32)(parser->limit - parser->needle);
198 }
199 
200 static inline ya_result
parser_clear_escape_codes(char ** startp,int * lenp,char escape_char,char * new_start)201 parser_clear_escape_codes(char **startp, int *lenp, char escape_char, char *new_start)
202 {
203     char *start = *startp;
204     char *escape_char_ptr;
205     int len = *lenp;
206 
207     if((escape_char_ptr = memchr(start, escape_char, len)) != NULL)
208     {
209         char *op = new_start;
210 
211         for(;;)
212         {
213             size_t n = escape_char_ptr - start;
214 
215             // is the escape code is at the last position ?
216 
217             if(n + 1 == (size_t)len)
218             {
219                 // oops
220                 return PARSER_LINE_ENDED_WITH_ESCAPE;
221             }
222 
223             memcpy(op, start, n);
224 
225             char c = escape_char_ptr[1];
226 
227             if((c >= '0') && (c <= '2'))
228             {
229                 if(n + 3 < (size_t)len)
230                 {
231                     u32 decimal_char = (c - '0') * 100;
232                     c = escape_char_ptr[2];
233                     if((c >= '0') && (c <= '9'))
234                     {
235                         decimal_char += (c - '0') * 10;
236                         c = escape_char_ptr[3];
237                         if((c >= '0') && (c <= '9'))
238                         {
239                             decimal_char += (c - '0');
240                             if(decimal_char <= 255)
241                             {
242                                 op[n] = (u8)decimal_char;
243                                 op += n + 1;
244                                 start = escape_char_ptr + 4;
245                                 len -= n + 4;
246                             }
247                             else
248                             {
249                                 return PARSER_INVALID_ESCAPED_FORMAT;
250                             }
251                         }
252                         else
253                         {
254                             return PARSER_INVALID_ESCAPED_FORMAT;
255                         }
256                     }
257                     else
258                     {
259                         return PARSER_INVALID_ESCAPED_FORMAT;
260                     }
261                 }
262                 else
263                 {
264                     return PARSER_INVALID_ESCAPED_FORMAT;
265                 }
266             }
267             else
268             {
269                 op[n] = c;
270                 op += n + 1;
271                 start = escape_char_ptr + 2;
272                 len -= n + 2;
273             }
274 
275             yassert(len >= 0);
276 
277             if(len == 0)
278             {
279                 break;
280             }
281 
282             if((escape_char_ptr = memchr(start, escape_char, len)) == NULL)
283             {
284                 // copy the remaining bytes
285 
286                 memcpy(op, start, len);
287                 op += len;
288                 break;
289             }
290         }
291 
292         *startp = new_start;
293         *lenp = op - new_start;
294     }
295     // else we have nothing more to do
296 
297     return len;
298 }
299 
300 /**
301  *
302  * returns the token type
303  *
304  * @param parser
305  * @return
306  */
307 
308 static inline ya_result
parser_read_line(parser_s * parser)309 parser_read_line(parser_s *parser)
310 {
311     ya_result return_code;
312 
313     if(parser_line_size(parser) == 0)
314     {
315         // read next line
316 
317         if(parser->input_stream_stack_size == 0)
318         {
319             return_code = PARSER_NO_INPUT; // no input file/stream
320             return return_code;
321         }
322 
323         char *buffer = parser->line_buffer;
324         char *limit = &parser->line_buffer[sizeof(parser->line_buffer)];
325 
326         for(;;)
327         {
328             if(limit - buffer == 0)
329             {
330                 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
331             }
332 
333 #if DO_BUFFERIZE
334             return_code = buffer_input_stream_read_line(parser->input_stream_stack[parser->input_stream_stack_size - 1],
335                                                             buffer,
336                                                             limit - buffer);
337 #else
338             return_code = input_stream_read_line(parser->input_stream_stack[parser->input_stream_stack_size - 1],
339                                                     buffer,
340                                                     limit - buffer);
341 #endif
342 
343             if(return_code > 0)
344             {
345                 // one line has been read (maybe)
346 
347                 buffer += return_code;
348 
349                 if(return_code > 1)
350                 {
351                     if(buffer[-2] == parser->escape_characters[0])
352                     {
353                         // the EOL was escaped, concat the next line ...
354                         // do NOT remove the escape code now
355 
356                         continue;
357                     }
358                 }
359 
360                 parser->limit = buffer;
361                 parser->needle = parser->line_buffer;
362                 parser->line_number++;
363             }
364             else
365             {
366                 // error or end of stream
367 
368                 parser->limit = parser->line_buffer;
369                 parser->needle = parser->line_buffer;
370 
371                 if(return_code == 0)
372                 {
373                     if(parser->multiline != 0)
374                     {
375                         return_code = PARSER_SYNTAX_ERROR_MULTILINE;
376                     }
377                 }
378             }
379 
380             return return_code;
381         }
382     }
383 
384     return PARSER_EOF;
385 }
386 
387 ya_result
parser_next_token(parser_s * parser)388 parser_next_token(parser_s *parser)
389 {
390     ya_result return_code;
391 
392     for(;;)
393     {
394         if((return_code = parser_read_line(parser)) <= 0)
395         {
396 
397             if(return_code == 0)
398             {
399 
400 
401                 return PARSER_EOF;
402             }
403 
404             return return_code;
405         }
406 
407         // there are bytes
408 
409         return_code = 0;
410 
411         for(char *needle = parser->needle; needle < parser->limit; needle++)
412         {
413             u8 b = (u8)*needle;
414 
415             // test for multiline close
416 
417             bool has_escapes = FALSE;
418 
419             switch(parser->char_type[b])
420             {
421 #if DNSCORE_HAS_FULL_ASCII7
422                 case PARSER_CHAR_TYPE_TO_TRANSLATE:
423                     *needle = parser->translation_table[b];
424                     --needle;
425 #endif
426                 FALLTHROUGH // fall through
427                 case PARSER_CHAR_TYPE_ESCAPE_CHARACTER:
428                     // the text starts after the next char, whatever it is
429                     if(++needle < parser->limit)
430                     {
431                         if((*needle >= '0') && (*needle <= '2'))
432                         {
433                             // octal byte
434                             if(needle + 2 < parser->limit)
435                             {
436                                 //u8 octal_char = ((*needle) - '0') * 100;
437                                 ++needle;
438                                 if((*needle >= '0') && (*needle <= '9'))
439                                 {
440                                     //octal_char |= ((*needle) - '0') * 10;
441                                     ++needle;
442                                     if((*needle >= '0') && (*needle <= '9'))
443                                     {
444                                         //octal_char |= ((*needle) - '0');
445                                         needle -= 3;
446                                         has_escapes = TRUE;
447                                         // the buffer needs to be copied
448                                     }
449                                     else
450                                     {
451                                         // octal parse error
452 
453                                         return PARSER_INVALID_ESCAPED_FORMAT;
454                                     }
455                                 }
456                                 else
457                                 {
458                                     // octal parse error
459 
460                                     return PARSER_INVALID_ESCAPED_FORMAT;
461                                 }
462                             }
463                             else
464                             {
465                                 // octal parse error
466 
467                                 return PARSER_INVALID_ESCAPED_FORMAT;
468                             }
469                         }
470                     }
471 
472                     FALLTHROUGH // fall through
473 
474                 case PARSER_CHAR_TYPE_NORMAL:
475                 {
476                     // BLANK or MULTI => done
477                     // STRING => error
478                     // COMMENT => CUT
479 
480                     parser->text = needle++;
481 
482                     for(; needle < parser->limit; needle++)
483                     {
484                         b = (u8)*needle;
485 
486                         switch(parser->char_type[b])
487                         {
488                             case PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END:
489                             {
490                                 if((parser->multiline) != 0 && (b == parser->multiline))
491                                 {
492                                     b = ' ';
493                                     *needle = b;
494                                     parser->multiline = 0;
495                                 }
496                                 else
497                                 {
498                                     return PARSER_SYNTAX_ERROR_MULTILINE;
499                                 }
500 
501                                 // we got the whole word
502 
503                                 parser->text_length = needle - parser->text;
504                                 parser->needle = needle + 1;
505                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
506                             }
507 
508                             case PARSER_CHAR_TYPE_MULTILINE_DELIMITER:
509                             {
510                                 if(parser->multiline == 0)
511                                 {
512                                     parser->multiline = parser->delimiter_close[b];
513                                 }
514                                 else
515                                 {
516                                     return PARSER_SYNTAX_ERROR_MULTILINE;
517                                 }
518 
519                                 *needle = ' ';
520 
521                                 // we got the whole word
522 
523                                 parser->text_length = needle - parser->text;
524                                 parser->needle = needle + 1;
525                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
526                             }
527 
528                             case PARSER_CHAR_TYPE_EOL:
529                             {
530                                 // only tell we got an EOL if we are not on "multiline"
531 
532                                 if(parser->multiline != 0)
533                                 {
534                                     *needle = ' ';
535                                 }
536 
537                                 // we got the whole word
538 
539                                 parser->text_length = needle - parser->text;
540                                 parser->needle = needle;
541                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
542                             }
543 
544                             case PARSER_CHAR_TYPE_BLANK_MARKER:
545                             {
546                                 // we got the whole word
547 
548                                 parser->text_length = needle - parser->text;
549                                 parser->needle = needle + 1;
550                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
551                             }
552 
553                             case PARSER_CHAR_TYPE_ESCAPE_CHARACTER:
554                             {
555                                 needle++;
556 
557                                 has_escapes = TRUE;
558 
559                                 break;
560                             }
561 
562                             case PARSER_CHAR_TYPE_COMMENT_MARKER:
563                             {
564                                 // we got the whole word
565 
566                                 parser->text_length = needle - parser->text;
567                                 parser->needle = needle;
568                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
569                             }
570 
571                             case PARSER_CHAR_TYPE_STRING_DELIMITER:
572                             {
573                                 // parse error
574                                 if(!parser->tokenize_on_string)
575                                 {
576                                     return PARSER_UNEXPECTED_STRING_DELIMITER;
577                                 }
578 
579                                 parser->text_length = needle - parser->text;
580                                 parser->needle = needle;
581                                 goto parser_next_token_end_of_token_found; /********* GOTO G O T O GOTO **********/
582                             }
583 #if DNSCORE_HAS_FULL_ASCII7
584                             case PARSER_CHAR_TYPE_TO_TRANSLATE:
585                             {
586                                 *needle = parser->translation_table[b];
587                                 break;
588                             }
589 #endif
590 
591                             //case PARSER_CHAR_TYPE_NORMAL:
592                             default:
593                             {
594                                 break;
595                             }
596                         } // end switch char type
597                     } // end for needle
598 
599                     parser_next_token_end_of_token_found: ;
600 
601                     // at this point we have a full token (maybe still escaped)
602 
603                     int token_len = needle - parser->text;
604 
605                     if(has_escapes)
606                     {
607                         yassert(parser->escape_characters_count <= 1);
608 
609                         if(parser->escape_characters_count == 1)
610                         {
611                             ya_result err;
612 
613                             char escape_char = parser->escape_characters[0];
614 
615                             if(FAIL(err = parser_clear_escape_codes(&parser->text, &token_len, escape_char, parser->extra_buffer)))
616                             {
617                                 return err;
618                             }
619                         }
620                     }
621 
622                     parser->text_length = token_len;
623                     parser->needle = needle;
624 
625                     return return_code | PARSER_WORD;
626                 }
627                 case PARSER_CHAR_TYPE_COMMENT_MARKER:
628                 {
629                     // cut line
630 
631                     parser->text = needle;
632                     parser->text_length = parser->limit - needle;
633 
634                     parser->needle = parser->limit;
635 
636                     if(parser->multiline == 0)
637                     {
638                         parser->needle_mark = NULL;
639                         return return_code | PARSER_COMMENT | PARSER_EOL;
640                     }
641                     else
642                     {
643                         return return_code | PARSER_COMMENT;
644                     }
645                 }
646                 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END:
647                 {
648                     if((parser->multiline) != 0 && (b == parser->multiline))
649                     {
650                         /*b = ' ';
651                         *needle = b;*/
652                         parser->multiline = 0;
653                     }
654                     else
655                     {
656                         return PARSER_SYNTAX_ERROR_MULTILINE;
657                     }
658 
659                     break;
660                 }
661                 case PARSER_CHAR_TYPE_STRING_DELIMITER:
662                 {
663                     // find the end char ...
664                     // note: see strpbrk
665 
666                     char end_char = parser->delimiter_close[b];
667 
668                     char *string_start = ++needle;
669                     char *string_end;
670                     for(;;)
671                     {
672                         string_end = memchr(needle, end_char, parser->limit - needle);
673 
674                         if(string_end != NULL)
675                         {
676                             // this one may have been escaped
677 
678                             /// @note 20190917 edf -- Patch submitted trough github by JZerf
679                             ///                       This fixes the case of escaped escapes as well as an incorrect limit test
680                             ///                       The patch has been slightly adapted in 2.4.x but may be kept as it is in 2.3.x
681 
682                             /* Check if the string delimiter that was found was escaped. Keep in
683                              * mind that if there was an escape character in front of the string
684                              * delimiter, the escape character itself could have also been escaped
685                              * (and the one before that and the one before that...). What we can do
686                              * is check to see how many consecutive preceding escape characters
687                              * there are (by finding the first preceding nonescape character or the
688                              * opening string delimiter if there isn't one) and if it's an even
689                              * number then the string delimiter we found is unescaped but if it's an
690                              * odd number then it is escaped. Note that this will need to be revised
691                              * if YADIDA later adds support for using \DDD type escape sequences
692                              * between string delimiters.
693                              */
694 
695                             /// @note 20190917 edf -- while => do-while : I've kept the first if out of the loop to avoid needlessly
696                             ///                       testing for the needle. (Which should be the most common case)
697 
698                             const char *prior_nonescape_character = string_end - 1;
699 
700                             do
701                             {
702                                 if(parser->char_type[(u8)*prior_nonescape_character] != PARSER_CHAR_TYPE_ESCAPE_CHARACTER)
703                                 {
704                                     break;
705                                 }
706                             }
707                             while(--prior_nonescape_character >= needle);
708 
709                             // this one was escaped ...
710                             if(((string_end - prior_nonescape_character) & 1) == 1)
711                             {
712                                 break; /* String delimiter was not escaped if we got here. */
713                             }
714 
715                             string_end++;
716 
717                             // needle = string_end + 1 and try again ?
718 
719                             if(string_end >= parser->limit)
720                             {
721                                 return PARSER_EXPECTED_STRING_END_DELIMITER;
722                             }
723 
724                             needle = string_end;
725                         }
726                         else
727                         {
728                             // syntax error
729 
730                             return PARSER_EXPECTED_STRING_END_DELIMITER;
731                         }
732                     }
733 
734                     int token_len = string_end - string_start;
735 
736                     yassert(parser->escape_characters_count <= 1);
737 
738                     for(u32 escape_index = 0; escape_index < parser->escape_characters_count; escape_index++)
739                     {
740                         ya_result err;
741                         char escape_char = parser->escape_characters[escape_index];
742 
743                         if(FAIL(err = parser_clear_escape_codes(&string_start, &token_len, escape_char, parser->extra_buffer)))
744                         {
745                             return err;
746                         }
747                     }
748 
749                     parser->text = string_start;
750                     parser->text_length = token_len;
751 
752                     parser->needle = string_end + 1;
753 
754                     // end of token ... return ?
755 
756                     return return_code | PARSER_WORD;
757                 }
758                 case PARSER_CHAR_TYPE_MULTILINE_DELIMITER:
759                 {
760                     if(parser->multiline == 0)
761                     {
762                         parser->multiline = parser->delimiter_close[b];
763                     }
764                     else
765                     {
766                         return PARSER_SYNTAX_ERROR_MULTILINE;
767                     }
768                     *needle = ' ';
769                     break;
770                 }
771                 case PARSER_CHAR_TYPE_EOL:
772                 {
773                     // only tell we got an EOL if we are not on "multiline"
774 
775                     if(parser->multiline == 0)
776                     {
777                         parser->needle = parser->limit;
778                         parser->text_length = 0;
779                         parser->needle_mark = NULL;
780                         return PARSER_EOL;
781                     }
782 
783                     *needle = ' ';
784                 }
785                 FALLTHROUGH // fall through
786 
787                 case PARSER_CHAR_TYPE_BLANK_MARKER:
788                 {
789                     return_code |= PARSER_BLANK_START;
790                     break;
791                 }
792             }
793         }
794 
795         // reached the end of line without a token : EOL
796         // if we are not on a multiline: return EOL
797 
798         parser->needle = parser->limit;
799         parser->text_length = 0;
800 
801         if(parser->multiline == 0)
802         {
803 
804             return PARSER_EOL;
805         }
806 
807         // else read the next line (loop)
808     }
809 
810     // never reached
811 
812     // return 0;
813 }
814 
815 void
parser_set_eol(parser_s * parser)816 parser_set_eol(parser_s *parser)
817 {
818     parser->needle = (char*)&eol_park_needle[0];
819     parser->limit = (char*)&eol_park_needle[1];
820 }
821 
822 #if DNSCORE_HAS_FULL_ASCII7
823 void
parser_add_translation(parser_s * parser,u8 character,u8 translates_into)824 parser_add_translation(parser_s *parser, u8 character, u8 translates_into)
825 {
826     parser->translation_table[character] = translates_into;
827     parser->char_type[character] = PARSER_CHAR_TYPE_TO_TRANSLATE;
828 }
829 
830 void
parser_del_translation(parser_s * parser,u8 character)831 parser_del_translation(parser_s *parser, u8 character)
832 {
833     parser->char_type[character] = PARSER_CHAR_TYPE_NORMAL;
834 }
835 #endif
836 
837 ya_result
parser_next_characters(parser_s * parser)838 parser_next_characters(parser_s *parser)
839 {
840     parser->text = parser->needle;
841     parser->text_length = parser->limit - parser->needle;
842 
843     if(parser->multiline != 0)
844     {
845         u32 offset = parser->text_length;
846 
847         memcpy(parser->additional_buffer, parser->text, offset);
848         parser->additional_buffer[offset++] = ' ';
849 
850         ya_result ret;
851         do
852         {
853             ret = parser_next_token(parser);
854 
855             const char *text = parser_text(parser);
856             size_t text_length = parser_text_length(parser);
857 
858             size_t new_length = offset + text_length + 1;
859             if(new_length > sizeof(parser->additional_buffer))
860             {
861                 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
862             }
863 
864             memcpy(&parser->additional_buffer[offset], text, text_length);
865             offset = new_length;
866             parser->additional_buffer[offset - 1] = ' ';
867         }
868         while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
869 
870         parser->text = parser->additional_buffer;
871         parser->text_length = offset - 1;
872     }
873 
874     parser->needle = (char*)&eol_park_needle[0];
875     parser->limit = (char*)&eol_park_needle[1];
876 
877     return parser->text_length;
878 }
879 
880 ya_result
parser_next_characters_nospace(parser_s * parser)881 parser_next_characters_nospace(parser_s *parser)
882 {
883     parser->text = parser->needle;
884     parser->text_length = parser->limit - parser->needle;
885 
886     if(parser->multiline != 0)
887     {
888         u32 offset = parser->text_length;
889 
890         memcpy(parser->additional_buffer, parser->text, offset);
891 
892         ya_result ret;
893         do
894         {
895             ret = parser_next_token(parser);
896 
897             const char *text = parser_text(parser);
898             size_t text_length = parser_text_length(parser);
899             size_t new_length = offset + text_length;
900             if(new_length > sizeof(parser->additional_buffer))
901             {
902                 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
903             }
904 
905             memcpy(&parser->additional_buffer[offset], text, text_length);
906             offset = new_length;
907         }
908         while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
909 
910         parser->text = parser->additional_buffer;
911         parser->text_length = offset;
912     }
913 
914     char* text = parser->text;
915     while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
916     {
917         text++;
918     }
919     parser->text_length -= text - parser->text;
920     parser->text = text;
921 
922     parser->needle = (char*)&eol_park_needle[0];
923     parser->limit = (char*)&eol_park_needle[1];
924 
925     return parser->text_length;
926 }
927 
928 
929 ya_result
parser_concat_next_tokens(parser_s * parser)930 parser_concat_next_tokens(parser_s *parser)
931 {
932     ya_result ret;
933     size_t offset = 0;
934 
935 //    char space = parser->blank_marker[0];
936     char space = ' ';
937     do
938     {
939         ret = parser_next_token(parser);
940 
941         if(ret & PARSER_WORD)
942         {
943             //   if((ret & PARSER_COMMENT) != 0)
944             //  {
945             //     continue;
946             //  }
947 
948             const char *text = parser_text(parser);
949             size_t text_length = parser_text_length(parser);
950             size_t new_length = offset + text_length;
951             if(new_length > sizeof(parser->additional_buffer))
952             {
953                 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
954             }
955 
956             memcpy(&parser->additional_buffer[offset], text, text_length); // VS false positive: overflow is chercked right before
957             offset = new_length;
958 
959             parser->additional_buffer[offset] = space;
960             offset++;
961         }
962     }
963     while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
964 
965 
966     // remove the last space, because we always add a space
967     offset--;
968 
969 
970     char *text = parser->additional_buffer;
971 
972     parser->text_length = offset - (text - parser->additional_buffer);
973     parser->text = text;
974     parser->needle = (char*)&eol_park_needle[0];
975     parser->limit = (char*)&eol_park_needle[1];
976 
977     return parser->text_length;
978 }
979 
980 ya_result
parser_concat_current_and_next_tokens_nospace(parser_s * parser)981 parser_concat_current_and_next_tokens_nospace(parser_s *parser)
982 {
983     ya_result ret;
984     size_t offset;
985 
986     if(parser->text_length > sizeof(parser->additional_buffer))
987     {
988         return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
989     }
990 
991     memcpy(&parser->additional_buffer[0], parser->text, parser->text_length);
992     offset = parser->text_length;
993 
994     do
995     {
996         ret = parser_next_token(parser);
997 
998         if((ret & PARSER_COMMENT) != 0)
999         {
1000             continue;
1001         }
1002 
1003         const char *text = parser_text(parser);
1004         size_t text_length = parser_text_length(parser);
1005         size_t new_length = offset + text_length;
1006         if(new_length > sizeof(parser->additional_buffer))
1007         {
1008             return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
1009         }
1010 
1011         memcpy(&parser->additional_buffer[offset], text, text_length);
1012         offset = new_length;
1013     }
1014     while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
1015 
1016     char* text = parser->additional_buffer;
1017     while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
1018     {
1019         text++;
1020     }
1021     parser->text_length = offset - (text - parser->additional_buffer);
1022     parser->text = text;
1023     parser->needle = (char*)&eol_park_needle[0];
1024     parser->limit = (char*)&eol_park_needle[1];
1025 
1026     return parser->text_length;
1027 }
1028 
1029 ya_result
parser_concat_next_tokens_nospace(parser_s * parser)1030 parser_concat_next_tokens_nospace(parser_s *parser)
1031 {
1032     ya_result ret;
1033     size_t offset = 0;
1034     do
1035     {
1036         ret = parser_next_token(parser);
1037 
1038         if((ret & PARSER_COMMENT) != 0)
1039         {
1040             continue;
1041         }
1042 
1043         if((ret & PARSER_WORD) != 0)
1044         {
1045             const char *text = parser_text(parser);
1046             size_t text_length = parser_text_length(parser);
1047             size_t new_length = offset + text_length;
1048             if(new_length > sizeof(parser->additional_buffer))
1049             {
1050                 return PARSER_SYNTAX_ERROR_LINE_TOO_BIG;
1051             }
1052 
1053             memcpy(&parser->additional_buffer[offset], text, text_length);
1054             offset = new_length;
1055         }
1056     }
1057     while((ret & (PARSER_EOF|PARSER_EOL)) == 0);
1058 
1059     char* text = parser->additional_buffer;
1060     while(parser->char_type[(u8)*text] == PARSER_CHAR_TYPE_BLANK_MARKER)
1061     {
1062         text++;
1063     }
1064     parser->text_length = offset - (text - parser->additional_buffer);
1065     parser->text = text;
1066     parser->needle = (char*)&eol_park_needle[0];
1067     parser->limit = (char*)&eol_park_needle[1];
1068 
1069     return parser->text_length;
1070 }
1071 
1072 ya_result
parser_push_stream(parser_s * p,input_stream * is)1073 parser_push_stream(parser_s *p, input_stream *is)
1074 {
1075     if(p->input_stream_stack_size < PARSER_INCLUDE_DEPTH_MAX )
1076     {
1077 #if DO_BUFFERIZE
1078         buffer_input_stream_init(is, is, PARSER_STREAM_BUFFER_SIZE);
1079 #endif
1080         p->input_stream_stack[p->input_stream_stack_size] = is;
1081         p->line_number_stack[p->input_stream_stack_size] = p->line_number;
1082 
1083         ++p->input_stream_stack_size;
1084 
1085         return p->input_stream_stack_size;
1086     }
1087 
1088     return PARSER_INCLUDE_DEPTH_TOO_BIG;
1089 }
1090 
1091 /**
1092  * @param p
1093  * @return the popped stream or NULL if the stack is empty
1094  */
1095 
1096 input_stream *
parser_pop_stream(parser_s * p)1097 parser_pop_stream(parser_s *p)
1098 {
1099     input_stream *is = NULL;
1100 
1101     if(p->input_stream_stack_size > 0)
1102     {
1103          is = p->input_stream_stack[--p->input_stream_stack_size];
1104 #if DEBUG
1105          p->input_stream_stack[p->input_stream_stack_size] = NULL;
1106 #endif
1107          p->line_number = p->line_number_stack[p->input_stream_stack_size];
1108     }
1109 
1110     return is;
1111 }
1112 
1113 ///////////////////////////////////////////////////////////////////////////////
1114 
1115 ya_result
parser_copy_next_ttl(parser_s * p,s32 * out_value)1116 parser_copy_next_ttl(parser_s *p, s32 *out_value)
1117 {
1118     ya_result return_code = parser_next_word(p);
1119 
1120     if(ISOK(return_code))
1121     {
1122         const char *text = parser_text(p);
1123         u32 text_len = parser_text_length(p);
1124 
1125         char lc = text[text_len - 1];
1126 
1127         if(isdigit(lc))
1128         {
1129             return_code = parse_s32_check_range_len_base10(text, text_len, out_value, 0, MAX_S32);
1130         }
1131         else
1132         {
1133             s64 mult = 1;
1134             text_len--;
1135 
1136             switch(lc)
1137             {
1138                 case 'w':
1139                 case 'W':
1140                     mult = 60 * 60 * 24 * 7;
1141                     break;
1142                 case 'd':
1143                 case 'D':
1144                     mult = 60 * 60 * 24;
1145                     break;
1146                 case 'h':
1147                 case 'H':
1148                     mult = 60 * 60;
1149                     break;
1150                 case 'm':
1151                 case 'M':
1152                     mult = 60;
1153                     break;
1154                 case 's':
1155                 case 'S':
1156                     break;
1157                 default:
1158                 {
1159                     return PARSER_UNKNOWN_TIME_UNIT;
1160                 }
1161             }
1162 
1163             s32 ttl32;
1164 
1165             if(ISOK(return_code = parse_s32_check_range_len_base10(text, text_len, &ttl32, 0, MAX_S32)))
1166             {
1167                 mult *= ttl32;
1168 
1169                 if(mult <= MAX_S32)
1170                 {
1171                     *out_value = (s32)mult;
1172                 }
1173                 else
1174                 {
1175                     return_code = PARSEINT_ERROR;
1176                 }
1177             }
1178         }
1179     }
1180 
1181     return return_code;
1182 }
1183 
1184 ya_result
parser_type_bit_maps_initialise(parser_s * p,type_bit_maps_context * context)1185 parser_type_bit_maps_initialise(parser_s *p, type_bit_maps_context* context)
1186 {
1187     u16                                                                type;
1188 
1189     u8                      *type_bitmap_field = context->type_bitmap_field;
1190     u8                                  *window_size = context->window_size;
1191 
1192     u32                                              type_bit_maps_size = 0;
1193     u8                                                                   ws;
1194 
1195     /*    ------------------------------------------------------------    */
1196 
1197     ZEROMEMORY(window_size, sizeof(context->window_size));
1198     context->last_type_window = -1;
1199     ZEROMEMORY(type_bitmap_field, sizeof(context->type_bitmap_field));
1200 
1201     ya_result return_code;
1202 
1203     do
1204     {
1205         if(FAIL(return_code = parser_next_token(p)))
1206         {
1207             return return_code;
1208         }
1209 
1210         if((return_code & PARSER_WORD) != 0)
1211         {
1212             const char *text = parser_text(p);
1213             u32 text_len = parser_text_length(p);
1214 
1215             ya_result ret; // MUST use another return variable than return_code
1216             if(FAIL(ret = dns_type_from_case_name_length(text, text_len, &type)))
1217             {
1218                 return ret;
1219             }
1220 
1221             type = ntohs(type); /* types are now stored in NETWORK order */
1222 
1223             /* Network bit order */
1224             type_bitmap_field[type >> 3] |= 1 << (7 - (type & 7));
1225             window_size[type >> 8] = ((type & 0xf8) >> 3) + 1;
1226 
1227             context->last_type_window = MAX(type >> 8, context->last_type_window);
1228         }
1229 
1230     }
1231     while((return_code & (PARSER_EOF|PARSER_EOL)) == 0);
1232 
1233     for(s32 i = 0; i <= context->last_type_window; i++)
1234     {
1235         ws = window_size[i];
1236 
1237         if(ws > 0)
1238         {
1239             type_bit_maps_size += 1 + 1 + ws;
1240         }
1241     }
1242 
1243     context->type_bit_maps_size = type_bit_maps_size;
1244 
1245     return type_bit_maps_size;
1246 }
1247 
1248 ya_result
parser_get_network_protocol_from_next_word(parser_s * p,int * out_value)1249 parser_get_network_protocol_from_next_word(parser_s *p, int *out_value)
1250 {
1251     char protocol_token[64];
1252 
1253     ya_result ret = parser_copy_next_word(p, protocol_token, sizeof(protocol_token));
1254 
1255     if(ISOK(ret))
1256     {
1257         ret = protocol_name_to_id(protocol_token, out_value);
1258     }
1259 
1260     return ret;
1261 }
1262 
1263 ya_result
parser_get_network_service_port_from_next_word(parser_s * p,int * out_value)1264 parser_get_network_service_port_from_next_word(parser_s *p, int *out_value)
1265 {
1266     char service_token[64];
1267 
1268     ya_result ret = parser_copy_next_word(p, service_token, sizeof(service_token));
1269 
1270     if(ISOK(ret))
1271     {
1272         ret = server_name_to_port(service_token, out_value);
1273     }
1274 
1275     return ret;
1276 }
1277 
1278 /** @} */
1279 
1280