1 /*
2  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4  *
5  *  Authors: Tomasz Kojm, Nigel Horne, Török Edvin
6  *
7  *  Acknowledgements: cli_strcasestr() contains a public domain code from:
8  *                    http://unixpapa.com/incnote/string.html
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License version 2 as
12  *  published by the Free Software Foundation.
13  *
14  *  This program is distributed in the hope that it will be useful,
15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *  GNU General Public License for more details.
18  *
19  *  You should have received a copy of the GNU General Public License
20  *  along with this program; if not, write to the Free Software
21  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22  *  MA 02110-1301, USA.
23  */
24 
25 #if HAVE_CONFIG_H
26 #include "clamav-config.h"
27 #endif
28 
29 #include "str.h"
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <limits.h>
35 #ifdef HAVE_STRINGS_H
36 #include <strings.h>
37 #endif
38 #include <ctype.h>
39 #include <sys/types.h>
40 
41 #include "clamav.h"
42 #include "others.h"
43 #include "matcher.h"
44 #include "jsparse/textbuf.h"
45 #include "platform.h"
46 
47 // clang-format off
48 
49 static const int hex_chars[256] = {
50     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
51     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
52     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
53      0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
54     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
55     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
56     -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
57     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
58     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
59     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
60     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
61     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
62     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
63     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
64     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
65     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
66 };
67 
68 // clang-format on
69 
cli_hex2int(const char c)70 static inline int cli_hex2int(const char c)
71 {
72     return hex_chars[(const unsigned char)c];
73 }
74 
cli_realhex2ui(const char * hex,uint16_t * ptr,unsigned int len)75 int cli_realhex2ui(const char *hex, uint16_t *ptr, unsigned int len)
76 {
77     uint16_t val;
78     unsigned int i;
79     int c;
80 
81     for (i = 0; i < len; i += 2) {
82         val = 0;
83 
84         if (hex[i] == '?' && hex[i + 1] == '?') {
85             val |= CLI_MATCH_IGNORE;
86 
87         } else if (hex[i + 1] == '?') {
88             if ((c = cli_hex2int(hex[i])) >= 0) {
89                 val = c << 4;
90             } else {
91                 return 0;
92             }
93             val |= CLI_MATCH_NIBBLE_HIGH;
94 
95         } else if (hex[i] == '?') {
96             if ((c = cli_hex2int(hex[i + 1])) >= 0) {
97                 val = c;
98             } else {
99                 return 0;
100             }
101             val |= CLI_MATCH_NIBBLE_LOW;
102 
103         } else if (hex[i] == '(') {
104             val |= CLI_MATCH_SPECIAL;
105 
106         } else {
107             if ((c = cli_hex2int(hex[i])) >= 0) {
108                 val = c;
109                 if ((c = cli_hex2int(hex[i + 1])) >= 0) {
110                     val = (val << 4) + c;
111                 } else {
112                     return 0;
113                 }
114             } else {
115                 return 0;
116             }
117         }
118 
119         *ptr++ = val;
120     }
121     return 1;
122 }
123 
cli_hex2ui(const char * hex)124 uint16_t *cli_hex2ui(const char *hex)
125 {
126     uint16_t *str;
127     unsigned int len;
128 
129     len = strlen(hex);
130 
131     if (len % 2 != 0) {
132         cli_errmsg("cli_hex2ui(): Malformed hexstring: %s (length: %u)\n", hex,
133                    len);
134         return NULL;
135     }
136 
137     str = cli_calloc((len / 2) + 1, sizeof(uint16_t));
138     if (!str)
139         return NULL;
140 
141     if (cli_realhex2ui(hex, str, len))
142         return str;
143 
144     free(str);
145     return NULL;
146 }
147 
cli_hex2str(const char * hex)148 char *cli_hex2str(const char *hex)
149 {
150     char *str;
151     size_t len;
152 
153     len = strlen(hex);
154 
155     if (len % 2 != 0) {
156         cli_errmsg("cli_hex2str(): Malformed hexstring: %s (length: %u)\n", hex,
157                    (unsigned)len);
158         return NULL;
159     }
160 
161     str = cli_calloc((len / 2) + 1, sizeof(char));
162     if (!str)
163         return NULL;
164 
165     if (cli_hex2str_to(hex, str, len) == -1) {
166         free(str);
167         return NULL;
168     }
169     return str;
170 }
171 
cli_hex2str_to(const char * hex,char * ptr,size_t len)172 int cli_hex2str_to(const char *hex, char *ptr, size_t len)
173 {
174     size_t i;
175     int c;
176     char val;
177 
178     for (i = 0; i < len; i += 2) {
179         if ((c = cli_hex2int(hex[i])) >= 0) {
180             val = c;
181             if ((c = cli_hex2int(hex[i + 1])) >= 0) {
182                 val = (val << 4) + c;
183             } else {
184                 return -1;
185             }
186         } else {
187             return -1;
188         }
189 
190         *ptr++ = val;
191     }
192 
193     return 0;
194 }
195 
cli_hex2num(const char * hex)196 int cli_hex2num(const char *hex)
197 {
198     int hexval, ret = 0, len, i;
199 
200     len = strlen(hex);
201 
202     if (len % 2 != 0) {
203         cli_errmsg("cli_hex2num(): Malformed hexstring: %s (length: %d)\n", hex,
204                    len);
205         return -1;
206     }
207 
208     for (i = 0; i < len; i++) {
209         if ((hexval = cli_hex2int(hex[i])) < 0)
210             break;
211         ret = (ret << 4) | hexval;
212     }
213 
214     return ret;
215 }
216 
cli_xtoi(const char * hex)217 int cli_xtoi(const char *hex)
218 {
219     int len, val, i;
220     char *hexbuf;
221 
222     len = strlen(hex);
223 
224     if (len % 2 == 0)
225         return cli_hex2num(hex);
226 
227     hexbuf = cli_calloc(len + 2, sizeof(char));
228     if (hexbuf == NULL) {
229         cli_errmsg("cli_xtoi(): cli_malloc fails.\n");
230         return -1;
231     }
232 
233     for (i = 0; i < len; i++)
234         hexbuf[i + 1] = hex[i];
235     val = cli_hex2num(hexbuf);
236     free(hexbuf);
237     return val;
238 }
239 
cli_str2hex(const char * string,unsigned int len)240 char *cli_str2hex(const char *string, unsigned int len)
241 {
242     char *hexstr;
243     char HEX[] = {'0', '1', '2', '3', '4', '5', '6', '7',
244                   '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
245     unsigned int i, j;
246 
247     if ((hexstr = (char *)cli_calloc(2 * len + 1, sizeof(char))) == NULL)
248         return NULL;
249 
250     for (i = 0, j = 0; i < len; i++, j += 2) {
251         hexstr[j]     = HEX[(string[i] >> 4) & 0xf];
252         hexstr[j + 1] = HEX[string[i] & 0xf];
253     }
254 
255     return hexstr;
256 }
257 
cli_strbcasestr(const char * haystack,const char * needle)258 int cli_strbcasestr(const char *haystack, const char *needle)
259 {
260     const char *pt = haystack;
261     int i, j;
262 
263     i = strlen(haystack);
264     j = strlen(needle);
265 
266     if (i < j)
267         return 0;
268 
269     pt += i - j;
270 
271     return !strcasecmp(pt, needle);
272 }
273 
274 /**
275  * @brief Remove trailing NL and CR characters from the end of the given string.
276  *
277  * @param string    string input
278  * @return int      the new length of the string (ala strlen)
279  * @return int      -1 if string was NULL.
280  */
cli_chomp(char * string)281 int cli_chomp(char *string)
282 {
283     int l;
284 
285     if (string == NULL)
286         return -1;
287 
288     l = strlen(string);
289 
290     if (l == 0)
291         return 0;
292 
293     --l;
294 
295     while ((l >= 0) && ((string[l] == '\n') || (string[l] == '\r')))
296         string[l--] = '\0';
297 
298     return l + 1;
299 }
300 
301 /*
302  * char *cli_strok(const char *line, int fieldno, char *delim)
303  * Return a copy of field <fieldno> from the string <line>, where
304  * fields are delimited by any char from <delim>, or NULL if <line>
305  * doesn't have <fieldno> fields or not enough memory is available.
306  * The caller has to free() the result afterwards.
307  */
cli_strtok(const char * line,int fieldno,const char * delim)308 char *cli_strtok(const char *line, int fieldno, const char *delim)
309 {
310     int counter  = 0, i, j;
311     char *buffer = NULL;
312 
313     /* step to arg # <fieldno> */
314     for (i = 0; line[i] && counter != fieldno; i++) {
315         if (strchr(delim, line[i])) {
316             counter++;
317             while (line[i + 1] && strchr(delim, line[i + 1])) {
318                 i++;
319             }
320         }
321     }
322     if (!line[i]) {
323         /* end of buffer before field reached */
324         return NULL;
325     }
326 
327     for (j = i; line[j]; j++) {
328         if (strchr(delim, line[j])) {
329             break;
330         }
331     }
332     if (i == j) {
333         return NULL;
334     }
335     buffer = cli_malloc(j - i + 1);
336     if (!buffer) {
337         cli_errmsg("cli_strtok: Unable to allocate memory for buffer\n");
338         return NULL;
339     }
340     strncpy(buffer, line + i, j - i);
341     buffer[j - i] = '\0';
342 
343     return buffer;
344 }
345 
346 /*
347  * Like cli_strtok, but this puts the output into a given argument, rather
348  * than allocating fresh memory
349  * Returns NULL for error, or a pointer to output
350  * njh@bandsman.co.uk
351  */
cli_strtokbuf(const char * input,int fieldno,const char * delim,char * output)352 char *cli_strtokbuf(const char *input, int fieldno, const char *delim,
353                     char *output)
354 {
355     int counter = 0, i, j;
356 
357     /* step to arg # <fieldno> */
358     for (i = 0; input[i] && counter != fieldno; i++) {
359         if (strchr(delim, input[i])) {
360             counter++;
361             while (input[i + 1] && strchr(delim, input[i + 1])) {
362                 i++;
363             }
364         }
365     }
366     if (input[i] == '\0') {
367         /* end of buffer before field reached */
368         return NULL;
369     }
370 
371     for (j = i; input[j]; j++) {
372         if (strchr(delim, input[j])) {
373             break;
374         }
375     }
376     if (i == j) {
377         return NULL;
378     }
379     strncpy(output, input + i, j - i);
380     output[j - i] = '\0';
381 
382     return output;
383 }
384 
cli_memstr(const char * haystack,size_t hs,const char * needle,size_t ns)385 const char *cli_memstr(const char *haystack, size_t hs, const char *needle, size_t ns)
386 {
387     size_t i, s1, s2;
388 
389     if (!hs || !ns || hs < ns)
390         return NULL;
391 
392     if (needle == haystack)
393         return haystack;
394 
395     if (ns == 1)
396         return memchr(haystack, needle[0], hs);
397 
398     if (needle[0] == needle[1]) {
399         s1 = 2;
400         s2 = 1;
401     } else {
402         s1 = 1;
403         s2 = 2;
404     }
405     for (i = 0; i <= hs - ns;) {
406         if (needle[1] != haystack[i + 1]) {
407             i += s1;
408         } else {
409             if ((needle[0] == haystack[i]) &&
410                 !memcmp(needle + 2, haystack + i + 2, ns - 2))
411                 return &haystack[i];
412             i += s2;
413         }
414     }
415 
416     return NULL;
417 }
418 
cli_strrcpy(char * dest,const char * source)419 char *cli_strrcpy(char *dest, const char *source) /* by NJH */
420 {
421 
422     if (!dest || !source) {
423         cli_errmsg("cli_strrcpy: NULL argument\n");
424         return NULL;
425     }
426 
427     while ((*dest++ = *source++))
428         ;
429 
430     return --dest;
431 }
432 
__cli_strcasestr(const char * haystack,const char * needle)433 const char *__cli_strcasestr(const char *haystack, const char *needle)
434 {
435     size_t l;
436     char f[3];
437     const size_t strlen_a = strlen(haystack);
438     const size_t strlen_b = strlen(needle);
439 
440     f[0] = tolower(*needle);
441     f[1] = toupper(*needle);
442     f[2] = '\0';
443     for (l = strcspn(haystack, f); l != strlen_a; l += strcspn(haystack + l + 1, f) + 1)
444         if (strncasecmp(haystack + l, needle, strlen_b) == 0)
445             return (haystack + l);
446     return (NULL);
447 }
448 
__cli_strndup(const char * s,size_t n)449 char *__cli_strndup(const char *s, size_t n)
450 {
451     char *alloc;
452     size_t len;
453 
454     if (!s) {
455         return NULL;
456     }
457 
458     len   = CLI_STRNLEN(s, n);
459     alloc = malloc(len + 1);
460 
461     if (!alloc) {
462         return NULL;
463     } else
464         memcpy(alloc, s, len);
465 
466     alloc[len] = '\0';
467     return alloc;
468 }
469 
__cli_strnlen(const char * s,size_t n)470 size_t __cli_strnlen(const char *s, size_t n)
471 {
472     size_t i = 0;
473     for (; (i < n) && s[i] != '\0'; ++i)
474         ;
475     return i;
476 }
477 
478 /*
479  * @brief Find the first occurrence of find in s.
480  *
481  * The search is limited to the first slen characters of s.
482  *
483  * Copyright (c) 2001 Mike Barcroft <mike@FreeBSD.org>
484  * Copyright (c) 1990, 1993
485  * The Regents of the University of California.  All rights reserved.
486  *
487  * This code is derived from software contributed to Berkeley by
488  * Chris Torek.
489  *
490  * Copyright (c) 1990 The Regents of the University of California.
491  * All rights reserved.
492  *
493  * @param s      haystack
494  * @param find   needle
495  * @param slen   haystack length
496  * @return char* Address of the needle, if found, else NULL.
497  */
__cli_strnstr(const char * s,const char * find,size_t slen)498 char *__cli_strnstr(const char *s, const char *find, size_t slen)
499 {
500     char c, sc;
501     size_t len;
502 
503     if ((c = *find++) != '\0') {
504         len = strlen(find);
505         do {
506             do {
507                 if (slen-- < 1 || (sc = *s++) == '\0')
508                     return (NULL);
509             } while (sc != c);
510             if (len > slen)
511                 return (NULL);
512         } while (strncmp(s, find, len) != 0);
513         s--;
514     }
515     return ((char *)s);
516 }
517 
cli_strtokenize(char * buffer,const char delim,const size_t token_count,const char ** tokens)518 size_t cli_strtokenize(char *buffer, const char delim, const size_t token_count,
519                        const char **tokens)
520 {
521     size_t tokens_found, i;
522 
523     for (tokens_found = 0; tokens_found < token_count;) {
524         tokens[tokens_found++] = buffer;
525         buffer                 = strchr(buffer, delim);
526         if (buffer) {
527             *buffer++ = '\0';
528         } else {
529             i = tokens_found;
530             while (i < token_count)
531                 tokens[i++] = NULL;
532 
533             return tokens_found;
534         }
535     }
536     return tokens_found;
537 }
538 
539 /**
540  * @brief The strntol() function converts the string in str to a long value.
541  * Modifications made to validate the length of the string for non-null term
542  * strings.
543  *
544  * Copyright (c) 1990 The Regents of the University of California.
545  * All rights reserved.
546  *
547  * @param nptr          Pointer to start of string.
548  * @param n             Max length of buffer in bytes.
549  * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
550  * address of the first invalid character in *endptr. If there were no digits at
551  * all, however, strtol() stores the original value of str in *endptr. Nota
552  * Bene:  If the buffer is non-null terminated and the number comprises the
553  * entire buffer, endptr will point past the end of the buffer, and the caller
554  * should check if endptr >= nptr + n.
555  *
556  * @param int           The conversion is done according to the given base,
557  * which must be between 2 and 36 inclusive, or be the special value 0.
558  * @return long         The signed long value.
559  */
cli_strntol(const char * nptr,size_t n,char ** endptr,register int base)560 long cli_strntol(const char *nptr, size_t n, char **endptr, register int base)
561 {
562     register const char *s     = nptr;
563     register unsigned long acc = 0;
564     register int c;
565     register unsigned long cutoff;
566     register int neg = 0, any = 0, cutlim;
567 
568     if (0 == n) {
569         goto done;
570     }
571     /*
572    * Skip white space and pick up leading +/- sign if any.
573    * If base is 0, allow 0x for hex and 0 for octal, else
574    * assume decimal; if base is already 16, allow 0x.
575    */
576     do {
577         c = *s;
578     } while (isspace(c) && (++s < nptr + n));
579 
580     if (s >= nptr + n) {
581         goto done;
582     }
583 
584     if (c == '-') {
585         neg = 1;
586         c   = *s++;
587         if (s >= nptr + n) {
588             goto done;
589         }
590     } else if (c == '+') {
591         c = *s++;
592         if (s >= nptr + n) {
593             goto done;
594         }
595     }
596 
597     if (base == 0 || base == 16) {
598         if (c == '0' && (s + 1 < nptr + n) &&
599             (*(s + 1) == 'x' || *(s + 1) == 'X')) {
600             if (s + 2 >= nptr + n) {
601                 goto done;
602             }
603             c = s[1];
604             s += 2;
605             base = 16;
606         }
607     }
608 
609     if (base == 0)
610         base = c == '0' ? 8 : 10;
611 
612     /*
613    * Compute the cutoff value between legal numbers and illegal
614    * numbers.  That is the largest legal value, divided by the
615    * base.  An input number that is greater than this value, if
616    * followed by a legal input character, is too big.  One that
617    * is equal to this value may be valid or not; the limit
618    * between valid and invalid numbers is then based on the last
619    * digit.  For instance, if the range for longs is
620    * [-2147483648..2147483647] and the input base is 10,
621    * cutoff will be set to 214748364 and cutlim to either
622    * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
623    * a value > 214748364, or equal but the next digit is > 7 (or 8),
624    * the number is too big, and we will return a range error.
625    *
626    * Set any if any `digits' consumed; make it negative to indicate
627    * overflow.
628    */
629     cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
630     cutlim = cutoff % (unsigned long)base;
631     cutoff /= (unsigned long)base;
632     for (acc = 0, any = 0; s < nptr + n; s++) {
633         c = *s;
634 
635         if (isdigit(c))
636             c -= '0';
637         else if (isalpha(c))
638             c -= isupper(c) ? 'A' - 10 : 'a' - 10;
639         else
640             break;
641         if (c >= base)
642             break;
643         if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
644             any = -1;
645         else {
646             any = 1;
647             acc *= base;
648             acc += c;
649         }
650     }
651     if (any < 0) {
652         acc   = neg ? LONG_MIN : LONG_MAX;
653         errno = ERANGE;
654     } else if (neg)
655         acc = -acc;
656 
657 done:
658     if (endptr != 0)
659         *endptr = (char *)(any ? s : nptr);
660     return (acc);
661 }
662 
663 /**
664  * @brief The strntoul() function converts the string in str to an unsigned long
665  * value. Modifications made to validate the length of the string for non-null
666  * term strings.
667  *
668  * Copyright (c) 1990 The Regents of the University of California.
669  * All rights reserved.
670  *
671  * @param nptr          Pointer to start of string.
672  * @param n             Max length of buffer in bytes.
673  * @param[out] endptr   [optional] If endptr is not NULL, strtol() stores the
674  * address of the first invalid character in *endptr. If there were no digits at
675  * all, however, strtol() stores the original value of str in *endptr. Nota
676  * Bene:  If the buffer is non-null terminated and the number comprises the
677  * entire buffer, endptr will point past the end of the buffer, and the caller
678  * should check if endptr >= nptr + n.
679  *
680  * @param int           The conversion is done according to the given base,
681  * which must be between 2 and 36 inclusive, or be the special value 0.
682  * @return unsigned long The unsigned long value.
683  */
cli_strntoul(const char * nptr,size_t n,char ** endptr,register int base)684 unsigned long cli_strntoul(const char *nptr, size_t n, char **endptr,
685                            register int base)
686 {
687     register const char *s     = nptr;
688     register unsigned long acc = 0;
689     register int c;
690     register unsigned long cutoff;
691     register int neg = 0, any = 0, cutlim;
692 
693     /*
694    * See cli_strntol for comments as to the logic used.
695    */
696     do {
697         c = *s;
698     } while (isspace(c) && (++s < nptr + n));
699 
700     if (s >= nptr + n) {
701         goto done;
702     }
703 
704     if (c == '-') {
705         neg = 1;
706         c   = *s++;
707         if (s >= nptr + n) {
708             goto done;
709         }
710     } else if (c == '+') {
711         c = *s++;
712         if (s >= nptr + n) {
713             goto done;
714         }
715     }
716 
717     if (base == 0 || base == 16) {
718         if (c == '0' && (s + 1 < nptr + n) &&
719             (*(s + 1) == 'x' || *(s + 1) == 'X')) {
720             if (s + 2 >= nptr + n) {
721                 goto done;
722             }
723             c = s[1];
724             s += 2;
725             base = 16;
726         }
727     }
728     if (base == 0)
729         base = c == '0' ? 8 : 10;
730 
731     cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
732     cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
733     for (acc = 0, any = 0; s < nptr + n; s++) {
734         c = *s;
735 
736         if (isdigit(c))
737             c -= '0';
738         else if (isalpha(c))
739             c -= isupper(c) ? 'A' - 10 : 'a' - 10;
740         else
741             break;
742         if (c >= base)
743             break;
744         if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
745             any = -1;
746         else {
747             any = 1;
748             acc *= base;
749             acc += c;
750         }
751     }
752     if (any < 0) {
753         acc   = ULONG_MAX;
754         errno = ERANGE;
755     } else if (neg)
756         acc = -acc;
757 
758 done:
759     if (endptr != 0)
760         *endptr = (char *)(any ? s : nptr);
761     return (acc);
762 }
763 
764 /**
765  * @brief 	cli_strntol_wrap() converts the string in str to a long value.
766  *
767  * Wrapper for cli_strntol() that provides incentive to check for failure.
768  *
769  * @param buf               Pointer to start of string.
770  * @param buf_size 			Max length of buffer to convert to
771  * integer.
772  * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
773  * before the end of the buffer. If 0, non-digit character represents end of
774  * number and is not a failure.
775  * @param base              The conversion is done according to the given base,
776  * which must be between 2 and 36 inclusive, or be the special value 0.
777  * @param[out] result 	    Long integer value of ascii number.
778  * @return CL_SUCCESS       Success
779  * @return CL_EPARSE        Failure
780  */
cli_strntol_wrap(const char * buf,size_t buf_size,int fail_at_nondigit,int base,long * result)781 cl_error_t cli_strntol_wrap(const char *buf, size_t buf_size,
782                             int fail_at_nondigit, int base, long *result)
783 {
784     char *endptr = NULL;
785     long num;
786 
787     if (buf_size == 0 || !buf || !result) {
788         /* invalid parameter */
789         return CL_EPARSE;
790     }
791     errno = 0;
792     num   = cli_strntol(buf, buf_size, &endptr, base);
793     if ((num == LONG_MIN || num == LONG_MAX) && errno == ERANGE) {
794         /* under- or overflow */
795         return CL_EPARSE;
796     }
797     if (endptr == buf) {
798         /* no digits */
799         return CL_EPARSE;
800     }
801     if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
802         /* non-digit encountered */
803         return CL_EPARSE;
804     }
805     /* success */
806     *result = num;
807     return CL_SUCCESS;
808 }
809 
810 /**
811  * @brief 	cli_strntoul_wrap() converts the string in str to a long value.
812  *
813  * Wrapper for cli_strntoul() that provides incentive to check for failure.
814  *
815  * @param buf               Pointer to start of string.
816  * @param buf_size 			Max length of buffer to convert to
817  * integer.
818  * @param fail_at_nondigit  If 1, fail out if the a non-digit character is found
819  * before the end of the buffer. If 0, non-digit character represents end of
820  * number and is not a failure.
821  * @param base              The conversion is done according to the given base,
822  * which must be between 2 and 36 inclusive, or be the special value 0.
823  * @param[out] result 	    Unsigned long integer value of ascii number.
824  * @return CL_SUCCESS       Success
825  * @return CL_EPARSE        Failure
826  */
cli_strntoul_wrap(const char * buf,size_t buf_size,int fail_at_nondigit,int base,unsigned long * result)827 cl_error_t cli_strntoul_wrap(const char *buf, size_t buf_size,
828                              int fail_at_nondigit, int base,
829                              unsigned long *result)
830 {
831     char *endptr = NULL;
832     unsigned long num;
833 
834     if (buf_size == 0 || !buf || !result) {
835         /* invalid parameter */
836         return CL_EPARSE;
837     }
838     errno = 0;
839     num   = cli_strntoul(buf, buf_size, &endptr, base);
840     if ((num == ULONG_MAX) && (errno == ERANGE)) {
841         /* under- or overflow */
842         return CL_EPARSE;
843     }
844     if (endptr == buf) {
845         /* no digits */
846         return CL_EPARSE;
847     }
848     if (fail_at_nondigit && (endptr < (buf + buf_size)) && (*endptr != '\0')) {
849         /* non-digit encountered */
850         return CL_EPARSE;
851     }
852     /* success */
853     *result = num;
854     return CL_SUCCESS;
855 }
856 
cli_ldbtokenize(char * buffer,const char delim,const size_t token_count,const char ** tokens,size_t token_skip)857 size_t cli_ldbtokenize(char *buffer, const char delim, const size_t token_count,
858                        const char **tokens, size_t token_skip)
859 {
860     size_t tokens_found, i;
861     int within_pcre = 0;
862 
863     for (tokens_found = 0; tokens_found < token_count;) {
864         tokens[tokens_found++] = buffer;
865 
866         while (*buffer != '\0') {
867             if (!within_pcre && (*buffer == delim))
868                 break;
869             else if ((tokens_found > token_skip) && (*(buffer - 1) != '\\') &&
870                      (*buffer == '/'))
871                 within_pcre = !within_pcre;
872             buffer++;
873         }
874 
875         if (*buffer != '\0') {
876             *buffer++ = '\0';
877         } else {
878             i = tokens_found;
879             while (i < token_count)
880                 tokens[i++] = NULL;
881             return tokens_found;
882         }
883     }
884     return tokens_found;
885 }
886 
cli_isnumber(const char * str)887 int cli_isnumber(const char *str)
888 {
889     if (NULL == str) {
890         return 0;
891     }
892 
893     while (*str)
894         if (!strchr("0123456789", *str++))
895             return 0;
896 
897     return 1;
898 }
899 
900 /* encodes the unicode character as utf-8 */
output_utf8(uint16_t u,unsigned char * dst)901 static inline size_t output_utf8(uint16_t u, unsigned char *dst)
902 {
903     if (!u) {
904         *dst = 0x1; /* don't add \0, add \1 instead */
905         return 1;
906     }
907     if (u < 0x80) {
908         *dst = u & 0xff;
909         return 1;
910     }
911     if (u < 0x800) {
912         *dst++ = 0xc0 | (u >> 6);   /* 110yyyyy */
913         *dst   = 0x80 | (u & 0x3f); /* 10zzzzzz */
914         return 2;
915     }
916     /* u < 0x10000 because we only handle utf-16,
917    * values in range 0xd800 - 0xdfff aren't valid, but we don't check for
918    * that*/
919     *dst++ = 0xe0 | (u >> 12);         /* 1110xxxx */
920     *dst++ = 0x80 | ((u >> 6) & 0x3f); /* 10yyyyyy */
921     *dst   = 0x80 | (u & 0x3f);        /* 10zzzzzz */
922     return 3;
923 }
924 
925 /* javascript-like unescape() function */
cli_unescape(const char * str)926 char *cli_unescape(const char *str)
927 {
928     char *R;
929     size_t k, i = 0;
930     const size_t len = strlen(str);
931     /* unescaped string is at most as long as original,
932    * it will usually be shorter */
933     R = cli_malloc(len + 1);
934     if (!R) {
935         cli_errmsg("cli_unescape: Unable to allocate memory for string\n");
936         return NULL;
937     }
938     for (k = 0; k < len; k++) {
939         unsigned char c = str[k];
940         if (str[k] == '%') {
941             if (k + 5 >= len || str[k + 1] != 'u' || !isxdigit(str[k + 2]) ||
942                 !isxdigit(str[k + 3]) || !isxdigit(str[k + 4]) ||
943                 !isxdigit(str[k + 5])) {
944                 if (k + 2 < len && isxdigit(str[k + 1]) && isxdigit(str[k + 2])) {
945                     c = ((cli_hex2int(str[k + 1]) < 0 ? 0 : cli_hex2int(str[k + 1]))
946                          << 4) |
947                         cli_hex2int(str[k + 2]);
948                     k += 2;
949                 }
950             } else {
951                 uint16_t u =
952                     ((cli_hex2int(str[k + 2]) < 0 ? 0 : cli_hex2int(str[k + 2]))
953                      << 12) |
954                     ((cli_hex2int(str[k + 3]) < 0 ? 0 : cli_hex2int(str[k + 3])) << 8) |
955                     ((cli_hex2int(str[k + 4]) < 0 ? 0 : cli_hex2int(str[k + 4])) << 4) |
956                     cli_hex2int(str[k + 5]);
957                 i += output_utf8(u, (unsigned char *)&R[i]);
958                 k += 5;
959                 continue;
960             }
961         }
962         if (!c)
963             c = 1; /* don't add \0 */
964         R[i++] = c;
965     }
966     R[i++] = '\0';
967     R      = cli_realloc2(R, i);
968     return R;
969 }
970 
971 /* handle javascript's escape sequences inside strings */
cli_textbuffer_append_normalize(struct text_buffer * buf,const char * str,size_t len)972 int cli_textbuffer_append_normalize(struct text_buffer *buf, const char *str,
973                                     size_t len)
974 {
975     size_t i;
976     for (i = 0; i < len; i++) {
977         char c = str[i];
978         if (c == '\\' && i + 1 < len) {
979             i++;
980             switch (str[i]) {
981                 case '0':
982                     c = 0;
983                     break;
984                 case 'b':
985                     c = 8;
986                     break;
987                 case 't':
988                     c = 9;
989                     break;
990                 case 'n':
991                     c = 10;
992                     break;
993                 case 'v':
994                     c = 11;
995                     break;
996                 case 'f':
997                     c = 12;
998                     break;
999                 case 'r':
1000                     c = 13;
1001                     break;
1002                 case 'x':
1003                     if (i + 2 < len)
1004                         c = ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
1005                              << 4) |
1006                             cli_hex2int(str[i + 2]);
1007                     i += 2;
1008                     break;
1009                 case 'u':
1010                     if (i + 4 < len) {
1011                         uint16_t u =
1012                             ((cli_hex2int(str[i + 1]) < 0 ? 0 : cli_hex2int(str[i + 1]))
1013                              << 12) |
1014                             ((cli_hex2int(str[i + 2]) < 0 ? 0 : cli_hex2int(str[i + 2]))
1015                              << 8) |
1016                             ((cli_hex2int(str[i + 3]) < 0 ? 0 : cli_hex2int(str[i + 3]))
1017                              << 4) |
1018                             cli_hex2int(str[i + 4]);
1019                         if (textbuffer_ensure_capacity(buf, 4) == -1)
1020                             return -1;
1021                         buf->pos += output_utf8(u, (unsigned char *)&buf->data[buf->pos]);
1022                         i += 4;
1023                         continue;
1024                     }
1025                     break;
1026                 default:
1027                     c = str[i];
1028                     break;
1029             }
1030         }
1031         if (!c)
1032             c = 1; /* we don't insert \0 */
1033         if (textbuffer_putc(buf, c) == -1)
1034             return -1;
1035     }
1036     return 0;
1037 }
1038 
cli_hexnibbles(char * str,int len)1039 int cli_hexnibbles(char *str, int len)
1040 {
1041     int i;
1042     for (i = 0; i < len; i++) {
1043         int c = cli_hex2int(str[i]);
1044         if (c < 0)
1045             return 1;
1046         str[i] = c;
1047     }
1048     return 0;
1049 }
1050 
cli_basename(const char * filepath,size_t filepath_len,char ** filebase)1051 cl_error_t cli_basename(const char *filepath, size_t filepath_len,
1052                         char **filebase)
1053 {
1054     cl_error_t status = CL_EARG;
1055     const char *index = NULL;
1056 
1057     if (NULL == filepath || NULL == filebase || filepath_len == 0) {
1058         cli_dbgmsg("cli_basename: Invalid arguments.\n");
1059         goto done;
1060     }
1061 
1062     index = filepath + filepath_len - 1;
1063 
1064     while (index > filepath) {
1065         if (index[0] == PATHSEP[0])
1066             break;
1067         index--;
1068     }
1069     if ((index != filepath) || (index[0] == PATHSEP[0]))
1070         index++;
1071 
1072     if (0 == CLI_STRNLEN(index, filepath_len - (index - filepath))) {
1073         cli_dbgmsg("cli_basename: Provided path does not include a file name.\n");
1074         status = CL_EFORMAT;
1075         goto done;
1076     }
1077 
1078     *filebase = CLI_STRNDUP(index, filepath_len - (index - filepath));
1079     if (NULL == *filebase) {
1080         cli_errmsg("cli_basename: Failed to allocate memory for file basename.\n");
1081         status = CL_EMEM;
1082         goto done;
1083     }
1084 
1085     status = CL_SUCCESS;
1086 
1087 done:
1088     return status;
1089 }
1090