1 /*
2  * This file is part of mpv.
3  *
4  * mpv is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * mpv is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <string.h>
19 #include <strings.h>
20 #include <assert.h>
21 #include <stdarg.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 
25 #include "mpv_talloc.h"
26 
27 #include "common/common.h"
28 #include "misc/ctype.h"
29 #include "bstr.h"
30 
bstrcmp(struct bstr str1,struct bstr str2)31 int bstrcmp(struct bstr str1, struct bstr str2)
32 {
33     int ret = 0;
34     if (str1.len && str2.len)
35         ret = memcmp(str1.start, str2.start, MPMIN(str1.len, str2.len));
36 
37     if (!ret) {
38         if (str1.len == str2.len)
39             return 0;
40         else if (str1.len > str2.len)
41             return 1;
42         else
43             return -1;
44     }
45     return ret;
46 }
47 
bstrcasecmp(struct bstr str1,struct bstr str2)48 int bstrcasecmp(struct bstr str1, struct bstr str2)
49 {
50     int ret = 0;
51     if (str1.len && str2.len)
52         ret = strncasecmp(str1.start, str2.start, MPMIN(str1.len, str2.len));
53 
54     if (!ret) {
55         if (str1.len == str2.len)
56             return 0;
57         else if (str1.len > str2.len)
58             return 1;
59         else
60             return -1;
61     }
62     return ret;
63 }
64 
bstrchr(struct bstr str,int c)65 int bstrchr(struct bstr str, int c)
66 {
67     for (int i = 0; i < str.len; i++)
68         if (str.start[i] == c)
69             return i;
70     return -1;
71 }
72 
bstrrchr(struct bstr str,int c)73 int bstrrchr(struct bstr str, int c)
74 {
75     for (int i = str.len - 1; i >= 0; i--)
76         if (str.start[i] == c)
77             return i;
78     return -1;
79 }
80 
bstrcspn(struct bstr str,const char * reject)81 int bstrcspn(struct bstr str, const char *reject)
82 {
83     int i;
84     for (i = 0; i < str.len; i++)
85         if (strchr(reject, str.start[i]))
86             break;
87     return i;
88 }
89 
bstrspn(struct bstr str,const char * accept)90 int bstrspn(struct bstr str, const char *accept)
91 {
92     int i;
93     for (i = 0; i < str.len; i++)
94         if (!strchr(accept, str.start[i]))
95             break;
96     return i;
97 }
98 
bstr_find(struct bstr haystack,struct bstr needle)99 int bstr_find(struct bstr haystack, struct bstr needle)
100 {
101     for (int i = 0; i < haystack.len; i++)
102         if (bstr_startswith(bstr_splice(haystack, i, haystack.len), needle))
103             return i;
104     return -1;
105 }
106 
bstr_lstrip(struct bstr str)107 struct bstr bstr_lstrip(struct bstr str)
108 {
109     while (str.len && mp_isspace(*str.start)) {
110         str.start++;
111         str.len--;
112     }
113     return str;
114 }
115 
bstr_strip(struct bstr str)116 struct bstr bstr_strip(struct bstr str)
117 {
118     str = bstr_lstrip(str);
119     while (str.len && mp_isspace(str.start[str.len - 1]))
120         str.len--;
121     return str;
122 }
123 
bstr_split(struct bstr str,const char * sep,struct bstr * rest)124 struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest)
125 {
126     int start;
127     for (start = 0; start < str.len; start++)
128         if (!strchr(sep, str.start[start]))
129             break;
130     str = bstr_cut(str, start);
131     int end = bstrcspn(str, sep);
132     if (rest) {
133         *rest = bstr_cut(str, end);
134     }
135     return bstr_splice(str, 0, end);
136 }
137 
138 // Unlike with bstr_split(), tok is a string, and not a set of char.
139 // If tok is in str, return true, and: concat(out_left, tok, out_right) == str
140 // Otherwise, return false, and set out_left==str, out_right==""
bstr_split_tok(bstr str,const char * tok,bstr * out_left,bstr * out_right)141 bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right)
142 {
143     bstr bsep = bstr0(tok);
144     int pos = bstr_find(str, bsep);
145     if (pos < 0)
146         pos = str.len;
147     *out_left = bstr_splice(str, 0, pos);
148     *out_right = bstr_cut(str, pos + bsep.len);
149     return pos != str.len;
150 }
151 
bstr_splice(struct bstr str,int start,int end)152 struct bstr bstr_splice(struct bstr str, int start, int end)
153 {
154     if (start < 0)
155         start += str.len;
156     if (end < 0)
157         end += str.len;
158     end = MPMIN(end, str.len);
159     start = MPMAX(start, 0);
160     end = MPMAX(end, start);
161     str.start += start;
162     str.len = end - start;
163     return str;
164 }
165 
bstrtoll(struct bstr str,struct bstr * rest,int base)166 long long bstrtoll(struct bstr str, struct bstr *rest, int base)
167 {
168     str = bstr_lstrip(str);
169     char buf[51];
170     int len = MPMIN(str.len, 50);
171     memcpy(buf, str.start, len);
172     buf[len] = 0;
173     char *endptr;
174     long long r = strtoll(buf, &endptr, base);
175     if (rest)
176         *rest = bstr_cut(str, endptr - buf);
177     return r;
178 }
179 
bstrtod(struct bstr str,struct bstr * rest)180 double bstrtod(struct bstr str, struct bstr *rest)
181 {
182     str = bstr_lstrip(str);
183     char buf[101];
184     int len = MPMIN(str.len, 100);
185     memcpy(buf, str.start, len);
186     buf[len] = 0;
187     char *endptr;
188     double r = strtod(buf, &endptr);
189     if (rest)
190         *rest = bstr_cut(str, endptr - buf);
191     return r;
192 }
193 
bstr_splitchar(struct bstr str,struct bstr * rest,const char c)194 struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c)
195 {
196     int pos = bstrchr(str, c);
197     if (pos < 0)
198         pos = str.len;
199     if (rest)
200         *rest = bstr_cut(str, pos + 1);
201     return bstr_splice(str, 0, pos + 1);
202 }
203 
bstr_strip_linebreaks(struct bstr str)204 struct bstr bstr_strip_linebreaks(struct bstr str)
205 {
206     if (bstr_endswith0(str, "\r\n")) {
207         str = bstr_splice(str, 0, str.len - 2);
208     } else if (bstr_endswith0(str, "\n")) {
209         str = bstr_splice(str, 0, str.len - 1);
210     }
211     return str;
212 }
213 
bstr_eatstart(struct bstr * s,struct bstr prefix)214 bool bstr_eatstart(struct bstr *s, struct bstr prefix)
215 {
216     if (!bstr_startswith(*s, prefix))
217         return false;
218     *s = bstr_cut(*s, prefix.len);
219     return true;
220 }
221 
bstr_eatend(struct bstr * s,struct bstr prefix)222 bool bstr_eatend(struct bstr *s, struct bstr prefix)
223 {
224     if (!bstr_endswith(*s, prefix))
225         return false;
226     s->len -= prefix.len;
227     return true;
228 }
229 
bstr_lower(struct bstr str)230 void bstr_lower(struct bstr str)
231 {
232     for (int i = 0; i < str.len; i++)
233         str.start[i] = mp_tolower(str.start[i]);
234 }
235 
bstr_sscanf(struct bstr str,const char * format,...)236 int bstr_sscanf(struct bstr str, const char *format, ...)
237 {
238     char *ptr = bstrdup0(NULL, str);
239     va_list va;
240     va_start(va, format);
241     int ret = vsscanf(ptr, format, va);
242     va_end(va);
243     talloc_free(ptr);
244     return ret;
245 }
246 
bstr_parse_utf8_code_length(unsigned char b)247 int bstr_parse_utf8_code_length(unsigned char b)
248 {
249     if (b < 128)
250         return 1;
251     int bytes = 7 - mp_log2(b ^ 255);
252     return (bytes >= 2 && bytes <= 4) ? bytes : -1;
253 }
254 
bstr_decode_utf8(struct bstr s,struct bstr * out_next)255 int bstr_decode_utf8(struct bstr s, struct bstr *out_next)
256 {
257     if (s.len == 0)
258         return -1;
259     unsigned int codepoint = s.start[0];
260     s.start++; s.len--;
261     if (codepoint >= 128) {
262         int bytes = bstr_parse_utf8_code_length(codepoint);
263         if (bytes < 1 || s.len < bytes - 1)
264             return -1;
265         codepoint &= 127 >> bytes;
266         for (int n = 1; n < bytes; n++) {
267             int tmp = (unsigned char)s.start[0];
268             if ((tmp & 0xC0) != 0x80)
269                 return -1;
270             codepoint = (codepoint << 6) | (tmp & ~0xC0);
271             s.start++; s.len--;
272         }
273         if (codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF))
274             return -1;
275         // Overlong sequences - check taken from libavcodec.
276         // (The only reason we even bother with this is to make libavcodec's
277         //  retarded subtitle utf-8 check happy.)
278         unsigned int min = bytes == 2 ? 0x80 : 1 << (5 * bytes - 4);
279         if (codepoint < min)
280             return -1;
281     }
282     if (out_next)
283         *out_next = s;
284     return codepoint;
285 }
286 
bstr_split_utf8(struct bstr str,struct bstr * out_next)287 struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next)
288 {
289     bstr rest;
290     int code = bstr_decode_utf8(str, &rest);
291     if (code < 0)
292         return (bstr){0};
293     if (out_next)
294         *out_next = rest;
295     return bstr_splice(str, 0, str.len - rest.len);
296 }
297 
bstr_validate_utf8(struct bstr s)298 int bstr_validate_utf8(struct bstr s)
299 {
300     while (s.len) {
301         if (bstr_decode_utf8(s, &s) < 0) {
302             // Try to guess whether the sequence was just cut-off.
303             unsigned int codepoint = (unsigned char)s.start[0];
304             int bytes = bstr_parse_utf8_code_length(codepoint);
305             if (bytes > 1 && s.len < 6) {
306                 // Manually check validity of left bytes
307                 for (int n = 1; n < bytes; n++) {
308                     if (n >= s.len) {
309                         // Everything valid until now - just cut off.
310                         return -(bytes - s.len);
311                     }
312                     int tmp = (unsigned char)s.start[n];
313                     if ((tmp & 0xC0) != 0x80)
314                         break;
315                 }
316             }
317             return -8;
318         }
319     }
320     return 0;
321 }
322 
bstr_sanitize_utf8_latin1(void * talloc_ctx,struct bstr s)323 struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s)
324 {
325     bstr new = {0};
326     bstr left = s;
327     unsigned char *first_ok = s.start;
328     while (left.len) {
329         int r = bstr_decode_utf8(left, &left);
330         if (r < 0) {
331             bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok});
332             mp_append_utf8_bstr(talloc_ctx, &new, (unsigned char)left.start[0]);
333             left.start += 1;
334             left.len -= 1;
335             first_ok = left.start;
336         }
337     }
338     if (!new.start)
339         return s;
340     if (first_ok != left.start)
341         bstr_xappend(talloc_ctx, &new, (bstr){first_ok, left.start - first_ok});
342     return new;
343 }
344 
resize_append(void * talloc_ctx,bstr * s,size_t append_min)345 static void resize_append(void *talloc_ctx, bstr *s, size_t append_min)
346 {
347     size_t size = talloc_get_size(s->start);
348     assert(s->len <= size);
349     if (append_min > size - s->len) {
350         if (append_min < size)
351             append_min = size; // preallocate in power of 2s
352         if (size >= SIZE_MAX / 2 || append_min >= SIZE_MAX / 2)
353             abort(); // oom
354         s->start = talloc_realloc_size(talloc_ctx, s->start, size + append_min);
355     }
356 }
357 
358 // Append the string, so that *s = *s + append. s->start is expected to be
359 // a talloc allocation (which can be realloced) or NULL.
360 // This function will always implicitly append a \0 after the new string for
361 // convenience.
362 // talloc_ctx will be used as parent context, if s->start is NULL.
bstr_xappend(void * talloc_ctx,bstr * s,bstr append)363 void bstr_xappend(void *talloc_ctx, bstr *s, bstr append)
364 {
365     if (!append.len)
366         return;
367     resize_append(talloc_ctx, s, append.len + 1);
368     memcpy(s->start + s->len, append.start, append.len);
369     s->len += append.len;
370     s->start[s->len] = '\0';
371 }
372 
bstr_xappend_asprintf(void * talloc_ctx,bstr * s,const char * fmt,...)373 void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...)
374 {
375     va_list ap;
376     va_start(ap, fmt);
377     bstr_xappend_vasprintf(talloc_ctx, s, fmt, ap);
378     va_end(ap);
379 }
380 
381 // Exactly as bstr_xappend(), but with a formatted string.
bstr_xappend_vasprintf(void * talloc_ctx,bstr * s,const char * fmt,va_list ap)382 void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt,
383                             va_list ap)
384 {
385     int size;
386     va_list copy;
387     va_copy(copy, ap);
388     size_t avail = talloc_get_size(s->start) - s->len;
389     char *dest = s->start ? s->start + s->len : NULL;
390     char c;
391     if (avail < 1)
392         dest = &c;
393     size = vsnprintf(dest, MPMAX(avail, 1), fmt, copy);
394     va_end(copy);
395 
396     if (size < 0)
397         abort();
398 
399     if (avail < 1 || size + 1 > avail) {
400         resize_append(talloc_ctx, s, size + 1);
401         vsnprintf(s->start + s->len, size + 1, fmt, ap);
402     }
403     s->len += size;
404 }
405 
bstr_case_startswith(struct bstr s,struct bstr prefix)406 bool bstr_case_startswith(struct bstr s, struct bstr prefix)
407 {
408     struct bstr start = bstr_splice(s, 0, prefix.len);
409     return start.len == prefix.len && bstrcasecmp(start, prefix) == 0;
410 }
411 
bstr_case_endswith(struct bstr s,struct bstr suffix)412 bool bstr_case_endswith(struct bstr s, struct bstr suffix)
413 {
414     struct bstr end = bstr_cut(s, -suffix.len);
415     return end.len == suffix.len && bstrcasecmp(end, suffix) == 0;
416 }
417 
bstr_strip_ext(struct bstr str)418 struct bstr bstr_strip_ext(struct bstr str)
419 {
420     int dotpos = bstrrchr(str, '.');
421     if (dotpos < 0)
422         return str;
423     return (struct bstr){str.start, dotpos};
424 }
425 
bstr_get_ext(struct bstr s)426 struct bstr bstr_get_ext(struct bstr s)
427 {
428     int dotpos = bstrrchr(s, '.');
429     if (dotpos < 0)
430         return (struct bstr){NULL, 0};
431     return bstr_splice(s, dotpos + 1, s.len);
432 }
433 
h_to_i(unsigned char c)434 static int h_to_i(unsigned char c)
435 {
436     if (c >= '0' && c <= '9')
437         return c - '0';
438     if (c >= 'a' && c <= 'f')
439         return c - 'a' + 10;
440     if (c >= 'A' && c <= 'F')
441         return c - 'A' + 10;
442 
443     return -1; // invalid char
444 }
445 
bstr_decode_hex(void * talloc_ctx,struct bstr hex,struct bstr * out)446 bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out)
447 {
448     if (!out)
449         return false;
450 
451     char *arr = talloc_array(talloc_ctx, char, hex.len / 2);
452     int len = 0;
453 
454     while (hex.len >= 2) {
455         int a = h_to_i(hex.start[0]);
456         int b = h_to_i(hex.start[1]);
457         hex = bstr_splice(hex, 2, hex.len);
458 
459         if (a < 0 || b < 0) {
460             talloc_free(arr);
461             return false;
462         }
463 
464         arr[len++] = (a << 4) | b;
465     }
466 
467     *out = (struct bstr){ .start = arr, .len = len };
468     return true;
469 }
470