1 /*------------------------------------------------------------------------------
2  *
3  * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4  * The YADIFA TM software product is provided under the BSD 3-clause license:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *        * Redistributions in binary form must reproduce the above copyright
13  *          notice, this list of conditions and the following disclaimer in the
14  *          documentation and/or other materials provided with the distribution.
15  *        * Neither the name of EURid nor the names of its contributors may be
16  *          used to endorse or promote products derived from this software
17  *          without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  *------------------------------------------------------------------------------
32  *
33  */
34 
35 #ifndef PARSER_H
36 #define	PARSER_H
37 
38 #include <dnscore/sys_types.h>
39 #include <dnscore/parsing.h>
40 #include <dnscore/typebitmap.h>
41 #include <dnscore/input_stream.h>
42 
43 #ifdef	__cplusplus
44 extern "C" {
45 #endif
46 
47 struct parser_delimiter_s
48 {
49     char begin; // " ' <
50     char end;   // " ' >
51     u8 id;
52     u8 reserved;
53 };
54 
55 typedef struct parser_delimiter_s parser_delimiter_s;
56 
57 #define PARSER_INCLUDE_DEPTH_MAX 256
58 #define PARSER_LINE_LENGTH_MAX 65535
59 
60 #define PARSER_CHAR_TYPE_NORMAL                  0
61 #define PARSER_CHAR_TYPE_ESCAPE_CHARACTER        1
62 #define PARSER_CHAR_TYPE_COMMENT_MARKER          2
63 #define PARSER_CHAR_TYPE_STRING_DELIMITER        3
64 #define PARSER_CHAR_TYPE_MULTILINE_DELIMITER     4
65 #define PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END 5
66 #define PARSER_CHAR_TYPE_BLANK_MARKER            6
67 #define PARSER_CHAR_TYPE_EOL                     7
68 #if DNSCORE_HAS_FULL_ASCII7
69 #define PARSER_CHAR_TYPE_TO_TRANSLATE            8
70 #endif
71 
72 #define PARSER_CHAR_TYPE_IGNORE                255
73 
74 #define PARSER_STATUS_NORMAL                 0
75 #define PARSER_STATUS_STRING                 1
76 #define PARSER_STATUS_MULTILINE              2
77 
78 #define PARSER_EOF          1
79 #define PARSER_EOL          2
80 #define PARSER_COMMENT      4
81 #define PARSER_WORD         8
82 #define PARSER_BLANK_START 16
83 
84 #define PARSER_ERROR_BASE                       0x800D0000
85 #define PARSER_ERROR_CODE(code_)                ((s32)(PARSER_ERROR_BASE+(code_)))
86 
87 #define PARSER_SYNTAX_ERROR_MULTILINE           PARSER_ERROR_CODE(0x0001)
88 #define PARSER_SYNTAX_ERROR_EXPECTED_EOL        PARSER_ERROR_CODE(0x0002)
89 #define PARSER_SYNTAX_ERROR_LINE_TOO_BIG        PARSER_ERROR_CODE(0x0003)
90 #define PARSER_BUFFER_TOO_SMALL                 PARSER_ERROR_CODE(0x0004)
91 #define PARSER_NO_INPUT                         PARSER_ERROR_CODE(0x0005)
92 #define PARSER_ODD_CHAR_NUMBER                  PARSER_ERROR_CODE(0x0006)
93 #define PARSER_LINE_ENDED_WITH_ESCAPE           PARSER_ERROR_CODE(0x0007)
94 #define PARSER_UNEXPECTED_STRING_DELIMITER      PARSER_ERROR_CODE(0x0008)
95 #define PARSER_EXPECTED_STRING_END_DELIMITER    PARSER_ERROR_CODE(0x0009)
96 #define PARSER_INCLUDE_DEPTH_TOO_BIG            PARSER_ERROR_CODE(0x000A)
97 #define PARSER_UNKNOWN_TIME_UNIT                PARSER_ERROR_CODE(0x000B)
98 #define PARSER_NO_MARK_SET                      PARSER_ERROR_CODE(0x000C)
99 #define PARSER_REACHED_END_OF_LINE              PARSER_ERROR_CODE(0x000D)
100 #define PARSER_FOUND_WORD                       PARSER_ERROR_CODE(0x000E)
101 #define PARSER_REACHED_END_OF_FILE              PARSER_ERROR_CODE(0x000F)
102 #define PARSER_INVALID_ESCAPED_FORMAT           PARSER_ERROR_CODE(0x0010)
103 
104 struct parser_token_s
105 {
106     const char *word;
107     u32 word_len;
108 };
109 
110 typedef struct parser_token_s parser_token_s;
111 
112 struct parser_s
113 {
114     // SETTINGS
115 
116     // ie: "" '' <> []
117     parser_delimiter_s *string_delimiters;
118 
119 
120     // ie: ()
121     parser_delimiter_s *multiline_delimiters;
122 
123     // ie: # ;
124     const char *comment_marker;
125 
126     // ie: SPACE TAB
127     const char *blank_marker;
128 
129     // ie: BACKSLASH
130     const char *escape_characters;
131 
132     // STATE MACHINE
133 
134     char *needle;
135     char *needle_mark;
136     char *limit;
137     char *text;
138     u32 text_length;
139     u32 string_delimiters_count;
140 
141     u32 multiline_delimiters_count;
142     u32 comment_marker_count;
143 
144     u32 blank_marker_count;
145     u32 escape_characters_count;
146 
147     u32 line_number;
148     u32 input_stream_stack_size;
149 
150     char multiline;     // TODO: stack of multilines
151     char cutchar;       //
152     bool tokenize_on_string;
153     bool close_last_stream;
154 
155     input_stream *input_stream_stack[PARSER_INCLUDE_DEPTH_MAX];
156     u32 line_number_stack[PARSER_INCLUDE_DEPTH_MAX];
157 
158     char char_type[256];
159     char delimiter_close[256];
160 #if DNSCORE_HAS_FULL_ASCII7
161     char translation_table[256];
162 #endif
163 
164     char line_buffer[PARSER_LINE_LENGTH_MAX];
165     char line_buffer_zero;
166     char extra_buffer[PARSER_LINE_LENGTH_MAX];
167     char extra_buffer_zero;
168     char additional_buffer[PARSER_LINE_LENGTH_MAX];
169     char additional_buffer_zero;
170 };
171 
172 typedef struct parser_s parser_s;
173 
174 void parser_init_error_codes();
175 
176 ya_result parser_init(parser_s *parser,
177             const char *string_delimiters,      // by 2
178             const char *multiline_delimiters,   // by 2
179             const char *comment_markers,        // by 1
180             const char *blank_makers,           // by 1
181             const char *escape_characters       // by 1
182         );
183 
184 ya_result parser_finalize(parser_s *parser);
185 
186 ya_result parser_next_token(parser_s *parser);
187 
188 ya_result parser_next_characters(parser_s *parser);
189 ya_result parser_next_characters_nospace(parser_s *parser);
190 
191 ya_result parser_concat_next_tokens(parser_s *parser);
192 ya_result parser_concat_next_tokens_nospace(parser_s *parser);
193 
194 ya_result parser_concat_current_and_next_tokens_nospace(parser_s *parser);
195 
196 void parser_set_eol(parser_s *parser);
197 
198 #if DNSCORE_HAS_FULL_ASCII7
199 void parser_add_translation(parser_s *parser, u8 character, u8 translates_into);
200 void parser_del_translation(parser_s *parser, u8 character);
201 #endif
202 
203 static inline u32
parser_text_length(const parser_s * parser)204 parser_text_length(const parser_s *parser)
205 {
206     return parser->text_length;
207 }
208 
209 static inline const char *
parser_text(const parser_s * parser)210 parser_text(const parser_s *parser)
211 {
212     return parser->text;
213 }
214 
215 /**
216  *
217  * sets a terminating zero at the end of the current text returned by parser_text(parser)
218  * can only work once
219  * parser_text_unasciiz(parser) MUST be called before parsing the remaining of the input
220  *
221  * @param parser
222  * @return
223  */
224 
225 static inline bool
parser_text_asciiz(parser_s * parser)226 parser_text_asciiz(parser_s *parser)
227 {
228     if(parser->cutchar == '\0')
229     {
230         parser->cutchar = parser->text[parser->text_length];
231         parser->text[parser->text_length] = '\0';
232 
233         return TRUE;
234     }
235 
236     return FALSE;
237 }
238 
239 /**
240  *
241  * see parser_text_unasciiz
242  *
243  * @param parser
244  * @return
245  */
246 
247 static inline bool
parser_text_unasciiz(parser_s * parser)248 parser_text_unasciiz(parser_s *parser)
249 {
250     if(parser->cutchar != '\0')
251     {
252         parser->text[parser->text_length] = parser->cutchar;
253         parser->cutchar = '\0';
254 
255         return TRUE;
256     }
257 
258     return FALSE;
259 }
260 
261 static inline u8
parser_text_delimiter(const parser_s * parser)262 parser_text_delimiter(const parser_s *parser)
263 {
264     (void)parser;
265     return 0; // not implemented
266 }
267 
268 ya_result parser_push_stream(parser_s *p, input_stream *is);
269 
270 input_stream *parser_pop_stream(parser_s *p);
271 
272 static inline u32
parser_stream_count(const parser_s * p)273 parser_stream_count(const parser_s *p)
274 {
275     return p->input_stream_stack_size;
276 }
277 
278 /**
279  *
280  * Set the rewind position in the parser
281  *
282  * @param p
283  */
284 
285 static inline void
parser_mark(parser_s * p)286 parser_mark(parser_s *p)
287 {
288     p->needle_mark = p->needle;
289 }
290 
291 static inline ya_result
parser_rewind(parser_s * p)292 parser_rewind(parser_s *p)
293 {
294     if(p->needle_mark != NULL)
295     {
296         p->needle = p->needle_mark;
297         return SUCCESS;
298     }
299     else
300     {
301         return PARSER_NO_MARK_SET;
302     }
303 }
304 
305 static inline u32
parser_get_line_number(const parser_s * p)306 parser_get_line_number(const parser_s *p)
307 {
308     return p->line_number;
309 }
310 
311 ///////////////////////////////////////////////////////////////////////////////
312 
313 static inline ya_result
parser_next_word(parser_s * p)314 parser_next_word(parser_s *p)
315 {
316     ya_result ret;
317 
318     for(;;)
319     {
320         if(FAIL(ret = parser_next_token(p)))
321         {
322             return ret;
323         }
324 
325         if(ret & PARSER_WORD)
326         {
327             return 1;
328         }
329 
330         if(ret & (PARSER_EOL|PARSER_EOF))
331         {
332             if(ret & PARSER_EOL)
333             {
334                 return PARSER_REACHED_END_OF_LINE;
335             }
336             else
337             {
338                 return PARSER_REACHED_END_OF_FILE;
339             }
340         }
341     }
342 }
343 
344 static inline ya_result
parser_get_u16(const char * text,u32 text_len,u16 * out_value)345 parser_get_u16(const char *text, u32 text_len, u16 *out_value)
346 {
347     u32 tmp_u32;
348     ya_result return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U16);
349     *out_value = (u16)tmp_u32;
350 
351     return return_code;
352 }
353 
354 static inline ya_result
parser_copy_next_u16(parser_s * p,u16 * out_value)355 parser_copy_next_u16(parser_s *p, u16 *out_value)
356 {
357     ya_result return_code = parser_next_word(p);
358 
359     if(ISOK(return_code))
360     {
361         const char *text = parser_text(p);
362         u32 text_len = parser_text_length(p);
363         u32 tmp_u32;
364         return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U16);
365         *out_value = (u16)tmp_u32;
366     }
367 
368     return return_code;
369 }
370 
371 static inline ya_result
parser_copy_next_u8(parser_s * p,u8 * out_value)372 parser_copy_next_u8(parser_s *p, u8 *out_value)
373 {
374     ya_result return_code = parser_next_word(p);
375 
376     if(ISOK(return_code))
377     {
378         const char *text = parser_text(p);
379         u32 text_len = parser_text_length(p);
380         u32 tmp_u32;
381         return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U8);
382         *out_value = (u8)tmp_u32;
383     }
384 
385     return return_code;
386 }
387 
388 static inline ya_result
parser_get_u8(const char * text,u32 text_len,u8 * out_value)389 parser_get_u8(const char *text, u32 text_len, u8 *out_value)
390 {
391     u32 tmp_u32;
392     ya_result return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U8);
393     *out_value = (u8)tmp_u32;
394 
395     return return_code;
396 }
397 
398 static inline ya_result
parser_get_s8(const char * text,u32 text_len,s8 * out_value)399 parser_get_s8(const char *text, u32 text_len, s8 *out_value)
400 {
401     s32 tmp_s32;
402     ya_result return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, (s32)MIN_S8, (s32)MAX_S8);
403     *out_value = (s8)tmp_s32;
404 
405     return return_code;
406 }
407 
408 static inline ya_result
parser_expect_eol(parser_s * p)409 parser_expect_eol(parser_s *p)
410 {
411     ya_result return_code;
412 
413     for(;;)
414     {
415         return_code = parser_next_token(p);
416 
417         if(return_code & PARSER_WORD)
418         {
419             return PARSER_FOUND_WORD;
420         }
421 
422         if(return_code & (PARSER_EOL|PARSER_EOF))
423         {
424             return SUCCESS;
425         }
426     }
427 }
428 
429 static inline bool
parse_word_match(const char * text,u32 text_len,const char * match,u32 match_len)430 parse_word_match(const char *text, u32 text_len, const char *match, u32 match_len)
431 {
432     if(text_len == match_len)
433     {
434         bool ret = (memcmp(text, match, text_len) == 0);
435 
436         return ret;
437     }
438 
439     return FALSE;
440 }
441 
442 static inline bool
parse_word_case_match(const char * text,u32 text_len,const char * match,u32 match_len)443 parse_word_case_match(const char *text, u32 text_len, const char *match, u32 match_len)
444 {
445     if(text_len == match_len)
446     {
447         for(u32 i = 0; i < text_len; ++i)
448         {
449             if(tolower(text[i]) != tolower(match[i]))
450             {
451                 return FALSE;
452             }
453         }
454 
455         return TRUE;
456     }
457 
458     return FALSE;
459 }
460 
461 static inline ya_result
parser_copy_word(parser_s * p,char * out_text,u32 out_text_len)462 parser_copy_word(parser_s *p, char *out_text, u32 out_text_len)
463 {
464     u32 len = parser_text_length(p);
465     if(len < out_text_len)
466     {
467         memcpy(out_text, parser_text(p), len);
468         out_text[len] = '\0';
469 
470         return len;
471     }
472     else
473     {
474         return PARSER_BUFFER_TOO_SMALL;
475     }
476 }
477 
478 static inline ya_result
parser_copy_next_word(parser_s * p,char * out_text,u32 out_text_len)479 parser_copy_next_word(parser_s *p, char *out_text, u32 out_text_len)
480 {
481     ya_result return_code = parser_next_word(p);
482 
483     if(ISOK(return_code))
484     {
485         u32 len = parser_text_length(p);
486         if(len < out_text_len)
487         {
488             memcpy(out_text, parser_text(p), len);
489             out_text[len] = '\0';
490 
491             return_code = len;
492         }
493         else
494         {
495             return_code = PARSER_BUFFER_TOO_SMALL;
496         }
497     }
498 
499     return return_code;
500 }
501 
502 static inline ya_result
parser_copy_next_class(parser_s * p,u16 * out_value)503 parser_copy_next_class(parser_s *p, u16 *out_value)
504 {
505     ya_result return_code;
506 
507     char text[32];
508 
509     if(ISOK(return_code = parser_copy_next_word(p, text, sizeof(text))))
510     {
511         return_code = dns_class_from_name(text, out_value);
512     }
513 
514     return return_code;
515 }
516 
517 static inline ya_result
parser_copy_next_type(parser_s * p,u16 * out_value)518 parser_copy_next_type(parser_s *p, u16 *out_value)
519 {
520     ya_result return_code;
521 
522     char text[32];
523 
524     if(ISOK(return_code = parser_copy_next_word(p, text, sizeof(text))))
525     {
526         return_code = dns_type_from_name(text, out_value);
527     }
528 
529     return return_code;
530 }
531 
532 ya_result parser_copy_next_ttl(parser_s *p, s32 *out_value);
533 
534 static inline ya_result
parser_copy_next_fqdn(parser_s * p,u8 * out_value)535 parser_copy_next_fqdn(parser_s *p, u8 *out_value)
536 {
537     ya_result return_code = parser_next_word(p);
538 
539     if(ISOK(return_code))
540     {
541         const char *text = parser_text(p);
542         u32 text_len = parser_text_length(p);
543 
544         return_code = cstr_to_dnsname_with_check_len(out_value, text, text_len);
545     }
546 
547     return return_code;
548 }
549 
550 static inline ya_result
parser_copy_next_fqdn_with_origin(parser_s * p,u8 * out_value,const u8 * origin)551 parser_copy_next_fqdn_with_origin(parser_s *p, u8 *out_value, const u8 *origin)
552 {
553     ya_result return_code = parser_next_word(p);
554 
555     if(ISOK(return_code))
556     {
557         const char *text = parser_text(p);
558         u32 text_len = parser_text_length(p);
559 
560         return_code = cstr_to_dnsname_with_check_len_with_origin(out_value, text, text_len, origin);
561     }
562 
563     return return_code;
564 }
565 
566 static inline ya_result
parser_copy_next_fqdn_locase_with_origin(parser_s * p,u8 * out_value,const u8 * origin)567 parser_copy_next_fqdn_locase_with_origin(parser_s *p, u8 *out_value, const u8 *origin)
568 {
569     ya_result return_code = parser_next_word(p);
570 
571     if(ISOK(return_code))
572     {
573         const char *text = parser_text(p);
574         u32 text_len = parser_text_length(p);
575 
576         return_code = cstr_to_locase_dnsname_with_check_len_with_origin(out_value, text, text_len, origin);
577     }
578 
579     return return_code;
580 }
581 
582 static inline ya_result
parser_copy_next_yyyymmddhhmmss(parser_s * p,u32 * out_value)583 parser_copy_next_yyyymmddhhmmss(parser_s *p, u32 *out_value)
584 {
585     ya_result return_code = parser_next_word(p);
586 
587     if(ISOK(return_code))
588     {
589         const char *text = parser_text(p);
590         u32 text_len = parser_text_length(p);
591         time_t t;
592         return_code = parse_yyyymmddhhmmss_check_range_len(text, text_len, &t);
593         *out_value = (u32)t;
594     }
595 
596     return return_code;
597 }
598 
599 static inline ya_result
parser_get_s16(const char * text,u32 text_len,s16 * out_value)600 parser_get_s16(const char *text, u32 text_len, s16 *out_value)
601 {
602     s32 tmp_s32;
603     ya_result return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, MIN_S16, MAX_S16);
604     *out_value = (s16)tmp_s32;
605 
606     return return_code;
607 }
608 
609 static inline ya_result
parser_copy_next_s16(parser_s * p,s16 * out_value)610 parser_copy_next_s16(parser_s *p, s16 *out_value)
611 {
612     ya_result return_code = parser_next_word(p);
613 
614     if(ISOK(return_code))
615     {
616         const char *text = parser_text(p);
617         u32 text_len = parser_text_length(p);
618 
619         s32 tmp_s32;
620         return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, MIN_S16, MAX_S16);
621         *out_value = (s16)tmp_s32;
622     }
623 
624     return return_code;
625 }
626 
627 static inline ya_result
parser_get_u32(const char * text,u32 text_len,u32 * out_value)628 parser_get_u32(const char *text, u32 text_len, u32 *out_value)
629 {
630     ya_result return_code = parse_u32_check_range_len_base10(text, text_len, out_value, 0, MAX_U32);
631 
632     return return_code;
633 }
634 
635 static inline ya_result
parser_get_s32(const char * text,u32 text_len,s32 * out_value)636 parser_get_s32(const char *text, u32 text_len, s32 *out_value)
637 {
638     ya_result return_code = parse_s32_check_range_len_base10(text, text_len, out_value, MIN_S32, MAX_S32);
639 
640     return return_code;
641 }
642 
643 static inline ya_result
parser_copy_next_s32(parser_s * p,s32 * out_value)644 parser_copy_next_s32(parser_s *p, s32 *out_value)
645 {
646     ya_result return_code = parser_next_word(p);
647 
648     if(ISOK(return_code))
649     {
650         const char *text = parser_text(p);
651         u32 text_len = parser_text_length(p);
652 
653         return_code = parse_s32_check_range_len_base10(text, text_len, out_value, MIN_S32, MAX_S32);
654     }
655 
656     return return_code;
657 }
658 
659 static inline ya_result
parser_copy_next_u32(parser_s * p,u32 * out_value)660 parser_copy_next_u32(parser_s *p, u32 *out_value)
661 {
662     ya_result return_code = parser_next_word(p);
663 
664     if(ISOK(return_code))
665     {
666         const char *text = parser_text(p);
667         u32 text_len = parser_text_length(p);
668 
669         return_code = parse_u32_check_range_len_base10(text, text_len, out_value, 0, MAX_U32);
670     }
671 
672     return return_code;
673 }
674 
675 static inline ya_result
parser_get_u64(const char * text,u32 text_len,u64 * out_value)676 parser_get_u64(const char *text, u32 text_len, u64 *out_value)
677 {
678     ya_result return_code = parse_u64_check_range_len_base10(text, text_len, out_value, 0, MAX_U64);
679 
680     return return_code;
681 }
682 
683 static inline ya_result
parser_copy_next_u64(parser_s * p,u64 * out_value)684 parser_copy_next_u64(parser_s *p, u64 *out_value)
685 {
686     ya_result return_code = parser_next_word(p);
687 
688     if(ISOK(return_code))
689     {
690         const char *text = parser_text(p);
691         u32 text_len = parser_text_length(p);
692 
693         return_code = parse_u64_check_range_len_base10(text, text_len, out_value, 0, MAX_U64);
694     }
695 
696     return return_code;
697 }
698 
699 ya_result parser_get_network_protocol_from_next_word(parser_s *p, int *out_value);
700 
701 ya_result parser_get_network_service_port_from_next_word(parser_s *p, int *out_value);
702 
703 ya_result parser_type_bit_maps_initialise(parser_s *p, type_bit_maps_context* context);
704 
705 #ifdef	__cplusplus
706 }
707 #endif
708 
709 #endif	/* PARSER_H */
710