1 /*------------------------------------------------------------------------------
2  *
3  * Copyright (c) 2011-2021, EURid vzw. All rights reserved.
4  * The YADIFA TM software product is provided under the BSD 3-clause license:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *        * Redistributions in binary form must reproduce the above copyright
13  *          notice, this list of conditions and the following disclaimer in the
14  *          documentation and/or other materials provided with the distribution.
15  *        * Neither the name of EURid nor the names of its contributors may be
16  *          used to endorse or promote products derived from this software
17  *          without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  *------------------------------------------------------------------------------
32  *
33  */
34 
35 /** @defgroup dnscoretools Generic Tools
36  *  @ingroup dnscore
37  *  @brief
38  *
39  * @{
40  */
41 
42 #include "dnscore/dnscore-config.h"
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <time.h>
47 #include <ctype.h>
48 
49 #include <arpa/inet.h>
50 #include <sys/socket.h>
51 #include <dnscore/timems.h>
52 #include <dnscore/config_settings.h>
53 
54 #include "dnscore/parsing.h"
55 
56 #if !HAVE_TIMEGM && !HAS_TIMEGM
timegm(struct tm * tv)57 static inline time_t timegm(struct tm *tv)
58 {
59     return timegm_internal(tv);
60 }
61 #endif
62 
63 /** \brief A string will be checked
64  *
65  *  The number will be extracted from the string if present. This number can
66  *  be 10-based, or hex-based, or...\n
67  *  The base must be between 2 and 36 and the number must be be between the min
68  *  values and max value
69  *
70  *  @param[in]  src  string with number part in it
71  *  @param[out] dst  number found
72  *  @param[in]  min
73  *  @param[in]  max
74  *  @param[in]  base
75  *
76  *  @retval OK
77  *  @retval NOK, if no digits found, or number not in the range
78  */
79 ya_result
parse_u32_check_range(const char * src,u32 * dst,u32 min,u32 max,u8 base)80 parse_u32_check_range(const char *src, u32 *dst, u32 min, u32 max, u8 base)
81 {
82     long long int val;
83     char *endptr;
84     int err;
85 
86     /** @note sizeof(long long int) > sizeof(u32) */
87 
88     /*    ------------------------------------------------------------    */
89 
90     errno = 0;
91 
92     /** @note strtol returns a 64 bits integer on 64 bits architectures
93      *        strtoll should be 64 bits on both 32 and 64 bits architectures
94      *	      so for portability 64 bits has to be handled
95      */
96 
97     val = strtoll(src, &endptr, base); /* stroll = 64 bits : dst is 32 */
98 
99     err = errno; /* in case errno is a macro */
100 
101     if((endptr == src) || (err == EINVAL) || (err == ERANGE) || (val < (long long int)min) || (val > (long long int)max))
102     {
103         return PARSEINT_ERROR;
104     }
105 
106     *dst = (u32)val;
107 
108     return OK;
109 }
110 
111 
112 
113 ya_result
parse_u32_check_range_len_base10(const char * src,u32 src_len,u32 * dst,u32 min,u32 max)114 parse_u32_check_range_len_base10(const char *src, u32 src_len, u32 *dst, u32 min, u32 max)
115 {
116     // 0......N
117     // 67612321
118 
119     if(src_len > 10)
120     {
121         return PARSEINT_ERROR; // out of range
122     }
123 
124     --src_len;
125 
126     u64 output_value = ((u64)src[src_len]) - '0';
127 
128     if((u64)output_value > 9)
129     {
130         return PARSEINT_ERROR;
131     }
132 
133     u32 base_multiplier = 10;
134 
135     while(src_len > 0)
136     {
137         --src_len;
138 
139         u64 value = ((u64)src[src_len]) - '0';
140 
141         if(value > 9)
142         {
143             return PARSEINT_ERROR;
144         }
145 
146         value *= base_multiplier;
147 
148         output_value += value;
149 
150         base_multiplier *= 10;
151     }
152 
153     if((output_value < min) || (output_value > max))
154     {
155         return PARSEINT_ERROR;
156     }
157 
158     *dst = (u32)output_value;
159 
160     return SUCCESS;
161 }
162 
163 ya_result
parse_s32_check_range_len_base10(const char * src,u32 src_len,s32 * dst,s32 min,s32 max)164 parse_s32_check_range_len_base10(const char *src, u32 src_len, s32 *dst, s32 min, s32 max)
165 {
166     // 0......N
167     // 67612321
168 
169     --src_len;
170 
171     if(src_len > 10)
172     {
173         return PARSEINT_ERROR; // out of range
174     }
175 
176     bool minus;
177 
178     if((minus = (src[0] == '-')))
179     {
180         src++;
181         --src_len;
182     }
183 
184     u32 base_multiplier = 10;
185 
186     s64 output_value = ((s64)src[src_len]) - '0';
187 
188     if((u64)output_value > 9)
189     {
190         return PARSEINT_ERROR;
191     }
192 
193     while(src_len > 0)
194     {
195         --src_len;
196 
197         s64 value = ((s64)src[src_len]) - '0';
198 
199         if((u64)value > 9)
200         {
201             return PARSEINT_ERROR;
202         }
203 
204         value *= base_multiplier;
205 
206         output_value += value;
207 
208         base_multiplier *= 10;
209     }
210 
211     if(minus)
212     {
213         output_value = -output_value;
214     }
215 
216     if((output_value < min) || (output_value > max))
217     {
218         return PARSEINT_ERROR;
219     }
220 
221     *dst = (s32)output_value;
222 
223     return SUCCESS;
224 }
225 
226 ya_result
parse_u64_check_range_len_base10(const char * src,u32 src_len,u64 * dst,u64 min,u64 max)227 parse_u64_check_range_len_base10(const char *src, u32 src_len, u64 *dst, u64 min, u64 max)
228 {
229     // 0......N
230     // 18446744073709551615
231 
232     if(src_len > 20)
233     {
234         return PARSEINT_ERROR; // out of range
235     }
236 
237     --src_len; // 19
238 
239     u64 output_value = ((u64)src[src_len]) - '0';
240 
241     if((u64)output_value > 9)
242     {
243         return PARSEINT_ERROR;
244     }
245 
246     if(src_len < 19) // if no risk of overflow
247     {
248         u64 base_multiplier = 10;
249 
250         while(src_len > 0)
251         {
252             --src_len;
253 
254             u64 value = ((u64)src[src_len]) - '0';
255 
256             if(value > 9)
257             {
258                 return PARSEINT_ERROR;
259             }
260 
261             output_value += value * base_multiplier;
262 
263             base_multiplier *= 10;
264         }
265     }
266     else // the only case with possible overflow at the last iteration of the loop
267     {
268         u64 base_multiplier = 10;
269 
270         while(src_len-- > 1)
271         {
272             u64 value = ((u64)src[src_len]) - '0';
273 
274             if(value > 9)
275             {
276                 return PARSEINT_ERROR;
277             }
278 
279             output_value += value * base_multiplier;
280 
281             base_multiplier *= 10;
282         }
283 
284         if(src_len == 0)
285         {
286             u64 max_div_10 = max / 10;
287 
288             if(output_value > max_div_10)   // check before multiplication there will be no 64 bits overflow
289             {                               // this only should be tested for the last iteration of the loop
290                 return PARSEINT_ERROR;      // => the last pass should happen out of this loop
291             }
292 
293             u64 value = ((u64)src[0]) - '0';
294 
295             if(value > 9)
296             {
297                 return PARSEINT_ERROR;
298             }
299 
300             value *= base_multiplier;
301 
302             if(output_value > max - value)  // check before addition there will be no 64 bits overflow
303             {
304                 return PARSEINT_ERROR;
305             }
306 
307             output_value += value;
308         }
309     }
310 
311     if((output_value < min) || (output_value > max)) // the second half of the test could probably get rid of, with a slight modification
312     {
313         return PARSEINT_ERROR;
314     }
315 
316     *dst = output_value;
317 
318     return SUCCESS;
319 }
320 
321 
322 
323 /** \brief Converts a string to an epoch
324  *
325  *  Converts a string to an epoch
326  *
327  *  @param[in]  src  string in the form YYYYMMDDhhmmss
328  *  @param[out] dst  value of the source converted into GMT epoch
329  *
330  *  @retval OK
331  *  @retval NOK, if no digits found, or number not in the range
332  */
333 ya_result
parse_yyyymmddhhmmss_check_range_len(const char * src,u32 src_len,time_t * dst)334 parse_yyyymmddhhmmss_check_range_len(const char *src, u32 src_len, time_t *dst)
335 {
336     struct tm thetime;
337 
338     if(src_len != 14)
339     {
340         return PARSEDATE_ERROR;
341     }
342 
343 #if DEBUG
344     memset(&thetime, 0xff, sizeof(thetime));
345 #endif
346 
347     u32 tmp_u32;
348 
349     if(FAIL(parse_u32_check_range_len_base10(src, 4, &tmp_u32, 1970, 2106/*2038*/)))
350     {
351         return PARSEDATE_ERROR;
352     }
353     thetime.tm_year = tmp_u32;
354     src += 4;
355 
356     if(FAIL(parse_u32_check_range_len_base10(src, 2, &tmp_u32, 1, 12)))
357     {
358         return PARSEDATE_ERROR;
359     }
360     thetime.tm_mon = tmp_u32;
361     src += 2;
362 
363     if(FAIL(parse_u32_check_range_len_base10(src, 2, &tmp_u32, 1, 31)))
364     {
365         return PARSEDATE_ERROR;
366     }
367     thetime.tm_mday = tmp_u32;
368     src += 2;
369 
370     if(FAIL(parse_u32_check_range_len_base10(src, 2, &tmp_u32, 0, 23)))
371     {
372         return PARSEDATE_ERROR;
373     }
374     thetime.tm_hour = tmp_u32;
375     src += 2;
376 
377     if(FAIL(parse_u32_check_range_len_base10(src, 2, &tmp_u32, 0, 59)))
378     {
379         return PARSEDATE_ERROR;
380     }
381     thetime.tm_min = tmp_u32;
382     src += 2;
383 
384     if(FAIL(parse_u32_check_range_len_base10(src, 2, &tmp_u32, 0, 61)))
385     {
386         return PARSEDATE_ERROR;
387     }
388     thetime.tm_sec = tmp_u32;
389 
390     thetime.tm_year -= 1900;
391     thetime.tm_mon--;
392 
393     time_t t = timegm(&thetime);
394 
395     if(t < 0)
396     {
397         return PARSEDATE_ERROR;
398     }
399 
400     *dst = (u32)t;
401 
402     return OK;
403 }
404 
405 ya_result
parse_yyyymmddhhmmss_check_range(const char * src,time_t * dst)406 parse_yyyymmddhhmmss_check_range(const char *src, time_t *dst)
407 {
408     ya_result return_code;
409 
410     return_code = parse_yyyymmddhhmmss_check_range_len(src, strlen(src), dst);
411 
412     return return_code;
413 }
414 
415 /** \brief Converts a chain of pascal strings to a string
416  *
417  *  Converts a chain of pascal strings to a string
418  *
419  *  @param[in]  src  string in the form [len+chars]*
420  *  @param[out] dst  string
421  *
422  *  @retval OK
423  *  @retval NOK, if something is broken
424  */
425 ya_result
parse_pstring(char ** srcp,size_t src_len,u8 * dst,size_t dst_len)426 parse_pstring(char **srcp, size_t src_len, u8 *dst, size_t dst_len)
427 {
428     char *s = *srcp;
429     const char * const limit = &s[src_len];
430     u8 *p;
431     const u8 *dst_limit;
432     bool quoted;
433 
434     if(src_len == 0 || dst_len < 256)
435     {
436         return PARSESTRING_ERROR;
437     }
438 
439     p = &dst[1];
440     dst_limit = &dst[dst_len];
441 
442     quoted = FALSE;
443     if(s[0] == '"')
444     {
445         quoted = TRUE;
446         s++;
447     }
448 
449     for(; s < limit; s++)
450     {
451         char c = *s;
452 
453         if((c < 32))
454         {
455             return PARSE_INVALID_CHARACTER;
456         }
457 
458         // If unescaped '\' go on otherwise set escape = 1
459         if(c == '\\')
460         {
461             // grab next char IF there is one
462 
463             s++;
464 
465             if(s < limit)
466             {
467                 if((c < 32))
468                 {
469                     return PARSE_INVALID_CHARACTER;
470                 }
471 
472                 if(p == dst_limit)
473                 {
474                     return PARSE_BUFFER_TOO_SMALL_ERROR;
475                 }
476 
477                 *p++ = *s;
478             }
479             else
480             {
481                 return PARSESTRING_ERROR;
482             }
483 
484             continue;
485         }
486 
487         // only "
488 
489         if(c == '"')
490         {
491             if(!quoted)
492             {
493                 return PARSESTRING_ERROR;
494             }
495 
496             quoted = FALSE;
497 
498             break;
499         }
500 
501         if(!quoted)
502         {
503             if(isspace(c))
504             {
505                 break;
506             }
507         }
508 
509         if(p == dst_limit)
510         {
511             return PARSE_BUFFER_TOO_SMALL_ERROR;
512         }
513 
514         /* add character to temporary variable */
515 
516         *p++    = c;
517     }
518 
519     /* if unbalanaced qoutes --> stop */
520     if(quoted)
521     {
522         return PARSESTRING_ERROR;
523     }
524 
525     ya_result len    = p - dst;
526 
527     dst[0] = len - 1;
528 
529     /* Now it is really done the parsing */
530 
531     *srcp = s + 1;
532 
533     return len;
534 }
535 
536 /** \brief Copies and trim a string
537  *
538  *  Copies a string while remove head & tail spaces and reducing any blank run to a single space
539  *  The source does not need to be asciiz
540  *  The destination will be asciiz
541  *
542  *  @param[in] src      string
543  *  @param[in] src_len  size of the string (the zero sentinel is not checked)
544  *  @param[in] dst      buffer that will receive the output string
545  *  @param[in] dst_len  size of the buffer
546  *
547  *  @retval >= 0, the length of the dst string
548  *  @retval ERROR, dst_len was too small
549  */
550 
551 ya_result
parse_copy_trim_spaces(const char * src,u32 src_len,char * dst,u32 dst_len)552 parse_copy_trim_spaces(const char *src, u32 src_len, char *dst, u32 dst_len)
553 {
554     yassert(src != NULL && dst != NULL && dst_len > 0);
555 
556     const char *src_limit = src + src_len;
557     const char *dst_limit = dst + dst_len - 1;
558     const char *dst_org = dst;
559 
560     bool has_space = FALSE;
561 
562     *dst = '\0';
563 
564     while(src < src_limit && isspace(*src))
565     {
566         src++;
567     }
568 
569     while(src < src_limit)
570     {
571         char c = *src++;
572 
573         if(isspace(c))
574         {
575             has_space = TRUE;
576             continue;
577         }
578 
579         if(has_space)
580         {
581             *dst++ = ' ';
582 
583             if(dst == dst_limit)
584             {
585                 return PARSE_BUFFER_TOO_SMALL_ERROR;       /* buffer too small */
586             }
587         }
588 
589         has_space = FALSE;
590 
591         *dst++ = c;
592 
593         if(dst == dst_limit)
594         {
595             return PARSE_BUFFER_TOO_SMALL_ERROR;       /* buffer too small */
596         }
597     }
598 
599     *dst++ = '\0';
600 
601     return dst - dst_org;
602 }
603 
604 ya_result
parse_remove_spaces(char * inout_txt)605 parse_remove_spaces(char *inout_txt)
606 {
607     char *p = inout_txt;
608     char c;
609 
610     while((c = *inout_txt++) != '\0')
611     {
612         if(isspace(c))
613         {
614             continue;
615         }
616 
617         *p++ = c;
618     }
619 
620     *p = '\0';
621 
622     return p - inout_txt;
623 }
624 
625 /** \brief Skips a specific keyword from a string, case insensitive
626  *
627  *  Skips a specific keyword from a string,  case insensitive, skips white spaces before and after the match
628  *
629  *  @param[in] src          string
630  *  @param[in] src_len      size of the string (the zero sentinel is not checked)
631  *  @param[in] words        array of strings that will be looked for
632  *  @param[in] word_count   the size of the array
633  *  @param[in] matched_word a pointer to an integer that will hold the matched word index or -1 (can be NULL)
634  *
635  *  @retval >= 0, the number of bytes until the next word
636  *  @retval ERROR, dst_len was too small
637  */
638 
639 ya_result
parse_skip_word_specific(const char * src,u32 src_len,const char ** words,u32 word_count,s32 * matched_word)640 parse_skip_word_specific(const char *src, u32 src_len, const char **words, u32 word_count, s32 *matched_word)
641 {
642     const char *src_org = src;
643     const char *src_limit = src + src_len;
644 
645     // skip spaces
646 
647     src = parse_skip_spaces(src);
648 
649     // get the non-space
650 
651     const char *p = src;
652     while(p < src_limit && !isspace(*p))
653     {
654         p++;
655     }
656     // p == src_limit OR p is at the first blank after the word
657 
658     src_limit = p;
659 
660     src_len = src_limit - src;
661 
662     for(u32 i = 0; i < word_count; i++)
663     {
664         const char *ptr = src;
665         const char *word = words[i];
666 
667         u32 word_len = strlen(word);
668 
669         if(word_len != src_len)
670         {
671             continue;
672         }
673 
674         const char *word_limit = word + word_len;
675 
676         // lengths are the same
677 
678         while(word < word_limit)
679         {
680             if(tolower(*ptr++) != tolower(*word++))
681             {
682                 break;
683             }
684         }
685 
686         if(word == word_limit)
687         {
688             /* match */
689             if(matched_word != NULL)
690             {
691                 *matched_word = i;
692             }
693 
694             return src_limit - src_org;
695         }
696     }
697 
698     if(matched_word != NULL)
699     {
700         *matched_word = -1;
701     }
702 
703     return PARSEWORD_NOMATCH_ERROR; /* no match */
704 }
705 
706 const char *
parse_skip_until_chars(const char * src,const char * chars,u32 chars_len)707 parse_skip_until_chars(const char *src, const char *chars, u32 chars_len)
708 {
709 
710     for(;;)
711     {
712         char c = *src;
713 
714         if(c == '\0')
715         {
716             return src;
717         }
718 
719         for(u32 i = 0; i < chars_len; i++)
720         {
721             if(c == chars[i])
722             {
723                 return src;
724             }
725         }
726 
727         src++;
728     }
729 }
730 
731 /** \brief Skips a specific keyword from a string, case insensitive
732  *
733  *  Skips a specific keyword from a string,  case insensitive, skips white spaces before and after the match
734  *
735  *  @param[in] src          string
736  *  @param[in] src_len      size of the string (the zero sentinel is not checked)
737  *  @param[in] dst          buffer that will receive the binary version of the ip
738  *  @param[in] dst_len      the size of the buffer, minimum 4 for ipv4 and minimum 16 for ipv6
739  *
740  *  @retval >= 0, the number of bytes written (4 for ipv4 and 16 for ipv6)
741  *  @retval ERROR, dst_len was too small or the src was not a valid ip
742  */
743 
744 ya_result
parse_ip_address(const char * src,u32 src_len_,u8 * dst,u32 dst_len)745 parse_ip_address(const char *src, u32 src_len_, u8 *dst, u32 dst_len)
746 {
747     const char *new_src = parse_skip_spaces(src);
748     s32 src_len = (s32)src_len_;
749     src_len -= new_src - src;
750     bool expect_v6_or_more = FALSE;
751 
752     if(src_len <= 0)
753     {
754         return PARSEIP_ERROR;
755     }
756 
757     if(*new_src == '[') /// @note handle RFC 3986, section 3.2.2
758     {
759         expect_v6_or_more = TRUE;
760 
761         new_src++;
762         // IPv6+ delimiter
763         char *end = strchr(new_src, ']');
764         if(end == NULL)
765         {
766             return PARSEIP_ERROR;
767         }
768         src_len = end - new_src;
769     }
770 
771     char tmp[64];
772     src_len = MIN((size_t)src_len, sizeof(tmp)-1);
773     memcpy(tmp, src, src_len);
774     tmp[src_len] = '\0';
775 
776     if(dst_len < 4)
777     {
778         return PARSE_BUFFER_TOO_SMALL_ERROR;   /* dst too small */
779     }
780 
781     if(inet_pton(AF_INET, tmp, dst) == 1)
782     {
783         if(expect_v6_or_more)
784         {
785             return PARSEIP_ERROR;
786         }
787 
788         return 4;
789     }
790 
791     if(dst_len < 16)
792     {
793         return PARSE_BUFFER_TOO_SMALL_ERROR;   /* dst too small */
794     }
795 
796     if(inet_pton(AF_INET6, tmp, dst) == 1)
797     {
798         return 16;
799     }
800 
801     return PARSEIP_ERROR;
802 }
803 
804 s32
parse_next_token(char * dest,size_t dest_size,const char * from,const char * delim)805 parse_next_token(char *dest, size_t dest_size, const char *from, const char *delim)
806 {
807     const char *to = from;
808     for(;;)
809     {
810         char c = *to;
811 
812         if(c == '\0')
813         {
814             size_t len = to - from;
815 
816             if(len > dest_size)
817             {
818                 return PARSE_BUFFER_TOO_SMALL_ERROR;
819             }
820 
821             memcpy(dest, from, len);
822             dest[len] = '\0';
823             return len;
824         }
825 
826         // for every delimiter, test if c if such a delimiter
827         // if it is, then
828 
829         for(const char *d = delim; *d != 0; d++)
830         {
831             if(*d == c)
832             {
833                 // end of word
834                 size_t len = to - from;
835 
836                 if(len > dest_size)
837                 {
838                     return PARSE_BUFFER_TOO_SMALL_ERROR;
839                 }
840 
841                 memcpy(dest, from, len);
842                 dest[len] = '\0';
843                 return len;
844             }
845         }
846         ++to;
847     }
848 }
849 
850 /** @} */
851