1 /*------------------------------------------------------------------------------
2 *
3 * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4 * The YADIFA TM software product is provided under the BSD 3-clause license:
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of EURid nor the names of its contributors may be
16 * used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *------------------------------------------------------------------------------
32 *
33 */
34
35 #ifndef PARSER_H
36 #define PARSER_H
37
38 #include <dnscore/sys_types.h>
39 #include <dnscore/parsing.h>
40 #include <dnscore/typebitmap.h>
41 #include <dnscore/input_stream.h>
42
43 #ifdef __cplusplus
44 extern "C" {
45 #endif
46
47 struct parser_delimiter_s
48 {
49 char begin; // " ' <
50 char end; // " ' >
51 u8 id;
52 u8 reserved;
53 };
54
55 typedef struct parser_delimiter_s parser_delimiter_s;
56
57 #define PARSER_INCLUDE_DEPTH_MAX 256
58 #define PARSER_LINE_LENGTH_MAX 65535
59
60 #define PARSER_CHAR_TYPE_NORMAL 0
61 #define PARSER_CHAR_TYPE_ESCAPE_CHARACTER 1
62 #define PARSER_CHAR_TYPE_COMMENT_MARKER 2
63 #define PARSER_CHAR_TYPE_STRING_DELIMITER 3
64 #define PARSER_CHAR_TYPE_MULTILINE_DELIMITER 4
65 #define PARSER_CHAR_TYPE_MULTILINE_DELIMITER_END 5
66 #define PARSER_CHAR_TYPE_BLANK_MARKER 6
67 #define PARSER_CHAR_TYPE_EOL 7
68 #if DNSCORE_HAS_FULL_ASCII7
69 #define PARSER_CHAR_TYPE_TO_TRANSLATE 8
70 #endif
71
72 #define PARSER_CHAR_TYPE_IGNORE 255
73
74 #define PARSER_STATUS_NORMAL 0
75 #define PARSER_STATUS_STRING 1
76 #define PARSER_STATUS_MULTILINE 2
77
78 #define PARSER_EOF 1
79 #define PARSER_EOL 2
80 #define PARSER_COMMENT 4
81 #define PARSER_WORD 8
82 #define PARSER_BLANK_START 16
83
84 #define PARSER_ERROR_BASE 0x800D0000
85 #define PARSER_ERROR_CODE(code_) ((s32)(PARSER_ERROR_BASE+(code_)))
86
87 #define PARSER_SYNTAX_ERROR_MULTILINE PARSER_ERROR_CODE(0x0001)
88 #define PARSER_SYNTAX_ERROR_EXPECTED_EOL PARSER_ERROR_CODE(0x0002)
89 #define PARSER_SYNTAX_ERROR_LINE_TOO_BIG PARSER_ERROR_CODE(0x0003)
90 #define PARSER_BUFFER_TOO_SMALL PARSER_ERROR_CODE(0x0004)
91 #define PARSER_NO_INPUT PARSER_ERROR_CODE(0x0005)
92 #define PARSER_ODD_CHAR_NUMBER PARSER_ERROR_CODE(0x0006)
93 #define PARSER_LINE_ENDED_WITH_ESCAPE PARSER_ERROR_CODE(0x0007)
94 #define PARSER_UNEXPECTED_STRING_DELIMITER PARSER_ERROR_CODE(0x0008)
95 #define PARSER_EXPECTED_STRING_END_DELIMITER PARSER_ERROR_CODE(0x0009)
96 #define PARSER_INCLUDE_DEPTH_TOO_BIG PARSER_ERROR_CODE(0x000A)
97 #define PARSER_UNKNOWN_TIME_UNIT PARSER_ERROR_CODE(0x000B)
98 #define PARSER_NO_MARK_SET PARSER_ERROR_CODE(0x000C)
99 #define PARSER_REACHED_END_OF_LINE PARSER_ERROR_CODE(0x000D)
100 #define PARSER_FOUND_WORD PARSER_ERROR_CODE(0x000E)
101 #define PARSER_REACHED_END_OF_FILE PARSER_ERROR_CODE(0x000F)
102 #define PARSER_INVALID_ESCAPED_FORMAT PARSER_ERROR_CODE(0x0010)
103
104 struct parser_token_s
105 {
106 const char *word;
107 u32 word_len;
108 };
109
110 typedef struct parser_token_s parser_token_s;
111
112 struct parser_s
113 {
114 // SETTINGS
115
116 // ie: "" '' <> []
117 parser_delimiter_s *string_delimiters;
118
119
120 // ie: ()
121 parser_delimiter_s *multiline_delimiters;
122
123 // ie: # ;
124 const char *comment_marker;
125
126 // ie: SPACE TAB
127 const char *blank_marker;
128
129 // ie: BACKSLASH
130 const char *escape_characters;
131
132 // STATE MACHINE
133
134 char *needle;
135 char *needle_mark;
136 char *limit;
137 char *text;
138 u32 text_length;
139 u32 string_delimiters_count;
140
141 u32 multiline_delimiters_count;
142 u32 comment_marker_count;
143
144 u32 blank_marker_count;
145 u32 escape_characters_count;
146
147 u32 line_number;
148 u32 input_stream_stack_size;
149
150 char multiline; // TODO: stack of multilines
151 char cutchar; //
152 bool tokenize_on_string;
153 bool close_last_stream;
154
155 input_stream *input_stream_stack[PARSER_INCLUDE_DEPTH_MAX];
156 u32 line_number_stack[PARSER_INCLUDE_DEPTH_MAX];
157
158 char char_type[256];
159 char delimiter_close[256];
160 #if DNSCORE_HAS_FULL_ASCII7
161 char translation_table[256];
162 #endif
163
164 char line_buffer[PARSER_LINE_LENGTH_MAX];
165 char line_buffer_zero;
166 char extra_buffer[PARSER_LINE_LENGTH_MAX];
167 char extra_buffer_zero;
168 char additional_buffer[PARSER_LINE_LENGTH_MAX];
169 char additional_buffer_zero;
170 };
171
172 typedef struct parser_s parser_s;
173
174 void parser_init_error_codes();
175
176 ya_result parser_init(parser_s *parser,
177 const char *string_delimiters, // by 2
178 const char *multiline_delimiters, // by 2
179 const char *comment_markers, // by 1
180 const char *blank_makers, // by 1
181 const char *escape_characters // by 1
182 );
183
184 ya_result parser_finalize(parser_s *parser);
185
186 ya_result parser_next_token(parser_s *parser);
187
188 ya_result parser_next_characters(parser_s *parser);
189 ya_result parser_next_characters_nospace(parser_s *parser);
190
191 ya_result parser_concat_next_tokens(parser_s *parser);
192 ya_result parser_concat_next_tokens_nospace(parser_s *parser);
193
194 ya_result parser_concat_current_and_next_tokens_nospace(parser_s *parser);
195
196 void parser_set_eol(parser_s *parser);
197
198 #if DNSCORE_HAS_FULL_ASCII7
199 void parser_add_translation(parser_s *parser, u8 character, u8 translates_into);
200 void parser_del_translation(parser_s *parser, u8 character);
201 #endif
202
203 static inline u32
parser_text_length(const parser_s * parser)204 parser_text_length(const parser_s *parser)
205 {
206 return parser->text_length;
207 }
208
209 static inline const char *
parser_text(const parser_s * parser)210 parser_text(const parser_s *parser)
211 {
212 return parser->text;
213 }
214
215 /**
216 *
217 * sets a terminating zero at the end of the current text returned by parser_text(parser)
218 * can only work once
219 * parser_text_unasciiz(parser) MUST be called before parsing the remaining of the input
220 *
221 * @param parser
222 * @return
223 */
224
225 static inline bool
parser_text_asciiz(parser_s * parser)226 parser_text_asciiz(parser_s *parser)
227 {
228 if(parser->cutchar == '\0')
229 {
230 parser->cutchar = parser->text[parser->text_length];
231 parser->text[parser->text_length] = '\0';
232
233 return TRUE;
234 }
235
236 return FALSE;
237 }
238
239 /**
240 *
241 * see parser_text_unasciiz
242 *
243 * @param parser
244 * @return
245 */
246
247 static inline bool
parser_text_unasciiz(parser_s * parser)248 parser_text_unasciiz(parser_s *parser)
249 {
250 if(parser->cutchar != '\0')
251 {
252 parser->text[parser->text_length] = parser->cutchar;
253 parser->cutchar = '\0';
254
255 return TRUE;
256 }
257
258 return FALSE;
259 }
260
261 static inline u8
parser_text_delimiter(const parser_s * parser)262 parser_text_delimiter(const parser_s *parser)
263 {
264 (void)parser;
265 return 0; // not implemented
266 }
267
268 ya_result parser_push_stream(parser_s *p, input_stream *is);
269
270 input_stream *parser_pop_stream(parser_s *p);
271
272 static inline u32
parser_stream_count(const parser_s * p)273 parser_stream_count(const parser_s *p)
274 {
275 return p->input_stream_stack_size;
276 }
277
278 /**
279 *
280 * Set the rewind position in the parser
281 *
282 * @param p
283 */
284
285 static inline void
parser_mark(parser_s * p)286 parser_mark(parser_s *p)
287 {
288 p->needle_mark = p->needle;
289 }
290
291 static inline ya_result
parser_rewind(parser_s * p)292 parser_rewind(parser_s *p)
293 {
294 if(p->needle_mark != NULL)
295 {
296 p->needle = p->needle_mark;
297 return SUCCESS;
298 }
299 else
300 {
301 return PARSER_NO_MARK_SET;
302 }
303 }
304
305 static inline u32
parser_get_line_number(const parser_s * p)306 parser_get_line_number(const parser_s *p)
307 {
308 return p->line_number;
309 }
310
311 ///////////////////////////////////////////////////////////////////////////////
312
313 static inline ya_result
parser_next_word(parser_s * p)314 parser_next_word(parser_s *p)
315 {
316 ya_result ret;
317
318 for(;;)
319 {
320 if(FAIL(ret = parser_next_token(p)))
321 {
322 return ret;
323 }
324
325 if(ret & PARSER_WORD)
326 {
327 return 1;
328 }
329
330 if(ret & (PARSER_EOL|PARSER_EOF))
331 {
332 if(ret & PARSER_EOL)
333 {
334 return PARSER_REACHED_END_OF_LINE;
335 }
336 else
337 {
338 return PARSER_REACHED_END_OF_FILE;
339 }
340 }
341 }
342 }
343
344 static inline ya_result
parser_get_u16(const char * text,u32 text_len,u16 * out_value)345 parser_get_u16(const char *text, u32 text_len, u16 *out_value)
346 {
347 u32 tmp_u32;
348 ya_result return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U16);
349 *out_value = (u16)tmp_u32;
350
351 return return_code;
352 }
353
354 static inline ya_result
parser_copy_next_u16(parser_s * p,u16 * out_value)355 parser_copy_next_u16(parser_s *p, u16 *out_value)
356 {
357 ya_result return_code = parser_next_word(p);
358
359 if(ISOK(return_code))
360 {
361 const char *text = parser_text(p);
362 u32 text_len = parser_text_length(p);
363 u32 tmp_u32;
364 return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U16);
365 *out_value = (u16)tmp_u32;
366 }
367
368 return return_code;
369 }
370
371 static inline ya_result
parser_copy_next_u8(parser_s * p,u8 * out_value)372 parser_copy_next_u8(parser_s *p, u8 *out_value)
373 {
374 ya_result return_code = parser_next_word(p);
375
376 if(ISOK(return_code))
377 {
378 const char *text = parser_text(p);
379 u32 text_len = parser_text_length(p);
380 u32 tmp_u32;
381 return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U8);
382 *out_value = (u8)tmp_u32;
383 }
384
385 return return_code;
386 }
387
388 static inline ya_result
parser_get_u8(const char * text,u32 text_len,u8 * out_value)389 parser_get_u8(const char *text, u32 text_len, u8 *out_value)
390 {
391 u32 tmp_u32;
392 ya_result return_code = parse_u32_check_range_len_base10(text, text_len, &tmp_u32, 0, MAX_U8);
393 *out_value = (u8)tmp_u32;
394
395 return return_code;
396 }
397
398 static inline ya_result
parser_get_s8(const char * text,u32 text_len,s8 * out_value)399 parser_get_s8(const char *text, u32 text_len, s8 *out_value)
400 {
401 s32 tmp_s32;
402 ya_result return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, (s32)MIN_S8, (s32)MAX_S8);
403 *out_value = (s8)tmp_s32;
404
405 return return_code;
406 }
407
408 static inline ya_result
parser_expect_eol(parser_s * p)409 parser_expect_eol(parser_s *p)
410 {
411 ya_result return_code;
412
413 for(;;)
414 {
415 return_code = parser_next_token(p);
416
417 if(return_code & PARSER_WORD)
418 {
419 return PARSER_FOUND_WORD;
420 }
421
422 if(return_code & (PARSER_EOL|PARSER_EOF))
423 {
424 return SUCCESS;
425 }
426 }
427 }
428
429 static inline bool
parse_word_match(const char * text,u32 text_len,const char * match,u32 match_len)430 parse_word_match(const char *text, u32 text_len, const char *match, u32 match_len)
431 {
432 if(text_len == match_len)
433 {
434 bool ret = (memcmp(text, match, text_len) == 0);
435
436 return ret;
437 }
438
439 return FALSE;
440 }
441
442 static inline bool
parse_word_case_match(const char * text,u32 text_len,const char * match,u32 match_len)443 parse_word_case_match(const char *text, u32 text_len, const char *match, u32 match_len)
444 {
445 if(text_len == match_len)
446 {
447 for(u32 i = 0; i < text_len; ++i)
448 {
449 if(tolower(text[i]) != tolower(match[i]))
450 {
451 return FALSE;
452 }
453 }
454
455 return TRUE;
456 }
457
458 return FALSE;
459 }
460
461 static inline ya_result
parser_copy_word(parser_s * p,char * out_text,u32 out_text_len)462 parser_copy_word(parser_s *p, char *out_text, u32 out_text_len)
463 {
464 u32 len = parser_text_length(p);
465 if(len < out_text_len)
466 {
467 memcpy(out_text, parser_text(p), len);
468 out_text[len] = '\0';
469
470 return len;
471 }
472 else
473 {
474 return PARSER_BUFFER_TOO_SMALL;
475 }
476 }
477
478 static inline ya_result
parser_copy_next_word(parser_s * p,char * out_text,u32 out_text_len)479 parser_copy_next_word(parser_s *p, char *out_text, u32 out_text_len)
480 {
481 ya_result return_code = parser_next_word(p);
482
483 if(ISOK(return_code))
484 {
485 u32 len = parser_text_length(p);
486 if(len < out_text_len)
487 {
488 memcpy(out_text, parser_text(p), len);
489 out_text[len] = '\0';
490
491 return_code = len;
492 }
493 else
494 {
495 return_code = PARSER_BUFFER_TOO_SMALL;
496 }
497 }
498
499 return return_code;
500 }
501
502 static inline ya_result
parser_copy_next_class(parser_s * p,u16 * out_value)503 parser_copy_next_class(parser_s *p, u16 *out_value)
504 {
505 ya_result return_code;
506
507 char text[32];
508
509 if(ISOK(return_code = parser_copy_next_word(p, text, sizeof(text))))
510 {
511 return_code = dns_class_from_name(text, out_value);
512 }
513
514 return return_code;
515 }
516
517 static inline ya_result
parser_copy_next_type(parser_s * p,u16 * out_value)518 parser_copy_next_type(parser_s *p, u16 *out_value)
519 {
520 ya_result return_code;
521
522 char text[32];
523
524 if(ISOK(return_code = parser_copy_next_word(p, text, sizeof(text))))
525 {
526 return_code = dns_type_from_name(text, out_value);
527 }
528
529 return return_code;
530 }
531
532 ya_result parser_copy_next_ttl(parser_s *p, s32 *out_value);
533
534 static inline ya_result
parser_copy_next_fqdn(parser_s * p,u8 * out_value)535 parser_copy_next_fqdn(parser_s *p, u8 *out_value)
536 {
537 ya_result return_code = parser_next_word(p);
538
539 if(ISOK(return_code))
540 {
541 const char *text = parser_text(p);
542 u32 text_len = parser_text_length(p);
543
544 return_code = cstr_to_dnsname_with_check_len(out_value, text, text_len);
545 }
546
547 return return_code;
548 }
549
550 static inline ya_result
parser_copy_next_fqdn_with_origin(parser_s * p,u8 * out_value,const u8 * origin)551 parser_copy_next_fqdn_with_origin(parser_s *p, u8 *out_value, const u8 *origin)
552 {
553 ya_result return_code = parser_next_word(p);
554
555 if(ISOK(return_code))
556 {
557 const char *text = parser_text(p);
558 u32 text_len = parser_text_length(p);
559
560 return_code = cstr_to_dnsname_with_check_len_with_origin(out_value, text, text_len, origin);
561 }
562
563 return return_code;
564 }
565
566 static inline ya_result
parser_copy_next_fqdn_locase_with_origin(parser_s * p,u8 * out_value,const u8 * origin)567 parser_copy_next_fqdn_locase_with_origin(parser_s *p, u8 *out_value, const u8 *origin)
568 {
569 ya_result return_code = parser_next_word(p);
570
571 if(ISOK(return_code))
572 {
573 const char *text = parser_text(p);
574 u32 text_len = parser_text_length(p);
575
576 return_code = cstr_to_locase_dnsname_with_check_len_with_origin(out_value, text, text_len, origin);
577 }
578
579 return return_code;
580 }
581
582 static inline ya_result
parser_copy_next_yyyymmddhhmmss(parser_s * p,u32 * out_value)583 parser_copy_next_yyyymmddhhmmss(parser_s *p, u32 *out_value)
584 {
585 ya_result return_code = parser_next_word(p);
586
587 if(ISOK(return_code))
588 {
589 const char *text = parser_text(p);
590 u32 text_len = parser_text_length(p);
591 time_t t;
592 return_code = parse_yyyymmddhhmmss_check_range_len(text, text_len, &t);
593 *out_value = (u32)t;
594 }
595
596 return return_code;
597 }
598
599 static inline ya_result
parser_get_s16(const char * text,u32 text_len,s16 * out_value)600 parser_get_s16(const char *text, u32 text_len, s16 *out_value)
601 {
602 s32 tmp_s32;
603 ya_result return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, MIN_S16, MAX_S16);
604 *out_value = (s16)tmp_s32;
605
606 return return_code;
607 }
608
609 static inline ya_result
parser_copy_next_s16(parser_s * p,s16 * out_value)610 parser_copy_next_s16(parser_s *p, s16 *out_value)
611 {
612 ya_result return_code = parser_next_word(p);
613
614 if(ISOK(return_code))
615 {
616 const char *text = parser_text(p);
617 u32 text_len = parser_text_length(p);
618
619 s32 tmp_s32;
620 return_code = parse_s32_check_range_len_base10(text, text_len, &tmp_s32, MIN_S16, MAX_S16);
621 *out_value = (s16)tmp_s32;
622 }
623
624 return return_code;
625 }
626
627 static inline ya_result
parser_get_u32(const char * text,u32 text_len,u32 * out_value)628 parser_get_u32(const char *text, u32 text_len, u32 *out_value)
629 {
630 ya_result return_code = parse_u32_check_range_len_base10(text, text_len, out_value, 0, MAX_U32);
631
632 return return_code;
633 }
634
635 static inline ya_result
parser_get_s32(const char * text,u32 text_len,s32 * out_value)636 parser_get_s32(const char *text, u32 text_len, s32 *out_value)
637 {
638 ya_result return_code = parse_s32_check_range_len_base10(text, text_len, out_value, MIN_S32, MAX_S32);
639
640 return return_code;
641 }
642
643 static inline ya_result
parser_copy_next_s32(parser_s * p,s32 * out_value)644 parser_copy_next_s32(parser_s *p, s32 *out_value)
645 {
646 ya_result return_code = parser_next_word(p);
647
648 if(ISOK(return_code))
649 {
650 const char *text = parser_text(p);
651 u32 text_len = parser_text_length(p);
652
653 return_code = parse_s32_check_range_len_base10(text, text_len, out_value, MIN_S32, MAX_S32);
654 }
655
656 return return_code;
657 }
658
659 static inline ya_result
parser_copy_next_u32(parser_s * p,u32 * out_value)660 parser_copy_next_u32(parser_s *p, u32 *out_value)
661 {
662 ya_result return_code = parser_next_word(p);
663
664 if(ISOK(return_code))
665 {
666 const char *text = parser_text(p);
667 u32 text_len = parser_text_length(p);
668
669 return_code = parse_u32_check_range_len_base10(text, text_len, out_value, 0, MAX_U32);
670 }
671
672 return return_code;
673 }
674
675 static inline ya_result
parser_get_u64(const char * text,u32 text_len,u64 * out_value)676 parser_get_u64(const char *text, u32 text_len, u64 *out_value)
677 {
678 ya_result return_code = parse_u64_check_range_len_base10(text, text_len, out_value, 0, MAX_U64);
679
680 return return_code;
681 }
682
683 static inline ya_result
parser_copy_next_u64(parser_s * p,u64 * out_value)684 parser_copy_next_u64(parser_s *p, u64 *out_value)
685 {
686 ya_result return_code = parser_next_word(p);
687
688 if(ISOK(return_code))
689 {
690 const char *text = parser_text(p);
691 u32 text_len = parser_text_length(p);
692
693 return_code = parse_u64_check_range_len_base10(text, text_len, out_value, 0, MAX_U64);
694 }
695
696 return return_code;
697 }
698
699 ya_result parser_get_network_protocol_from_next_word(parser_s *p, int *out_value);
700
701 ya_result parser_get_network_service_port_from_next_word(parser_s *p, int *out_value);
702
703 ya_result parser_type_bit_maps_initialise(parser_s *p, type_bit_maps_context* context);
704
705 #ifdef __cplusplus
706 }
707 #endif
708
709 #endif /* PARSER_H */
710