1 /*
2  * audstrings.c
3  * Copyright 2009-2012 John Lindgren and Ariadne Conill
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  *    this list of conditions, and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  *    this list of conditions, and the following disclaimer in the documentation
13  *    provided with the distribution.
14  *
15  * This software is provided "as is" and without any warranty, express or
16  * implied. In no event shall the authors be liable for any damages arising from
17  * the use of this software.
18  */
19 
20 #include "audstrings.h"
21 
22 #include <math.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include <new>
28 
29 #include <glib.h>
30 
31 #include "i18n.h"
32 #include "index.h"
33 #include "internal.h"
34 #include "runtime.h"
35 
36 #define MAX_POW10 9
37 static const unsigned int_pow10[MAX_POW10 + 1] = {
38     1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
39 
40 static const char ascii_to_hex[256] =
41     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
42     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
43     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
44     "\x0\x1\x2\x3\x4\x5\x6\x7\x8\x9\x0\x0\x0\x0\x0\x0"
45     "\x0\xa\xb\xc\xd\xe\xf\x0\x0\x0\x0\x0\x0\x0\x0\x0"
46     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
47     "\x0\xa\xb\xc\xd\xe\xf\x0\x0\x0\x0\x0\x0\x0\x0\x0"
48     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0";
49 
50 static const char hex_to_ascii[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
51                                       '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
52 
53 static const char uri_legal_table[256] =
54     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
55     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0"
56     "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x1\x1\x1" // '-' '.' '/'
57 #ifdef _WIN32
58     /* We assume ':' is used with a "reserved purpose" (i.e. drive letter).
59      * This assumption might need to be reconsidered for non-file URIs. */
60     "\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x0\x0\x0\x0\x0" // 0-9 ':'
61 #else
62     "\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x0\x0\x0\x0\x0\x0" // 0-9
63 #endif
64     "\x0\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1"  // A-O
65     "\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x0\x0\x0\x0\x1"  // P-Z '_'
66     "\x0\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1"  // a-o
67     "\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x1\x0\x0\x0\x1\x0"; // p-z '~'
68 
69 static const char swap_case[256] =
70     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
71     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
72     "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
73     "\0ABCDEFGHIJKLMNOPQRSTUVWXYZ\0\0\0\0\0";
74 
75 #define FROM_HEX(c) (ascii_to_hex[(unsigned char)(c)])
76 #define TO_HEX(i) (hex_to_ascii[(i)&15])
77 #define IS_LEGAL(c) (uri_legal_table[(unsigned char)(c)])
78 #define SWAP_CASE(c) (swap_case[(unsigned char)(c)])
79 
80 #ifdef _WIN32
81 #define IS_SEP(c) ((c) == '/' || (c) == '\\')
82 #else
83 #define IS_SEP(c) ((c) == '/')
84 #endif
85 
86 /* strcmp() that handles nullptr safely */
strcmp_safe(const char * a,const char * b,int len)87 EXPORT int strcmp_safe(const char * a, const char * b, int len)
88 {
89     if (!a)
90         return b ? -1 : 0;
91     if (!b)
92         return 1;
93 
94     return len < 0 ? strcmp(a, b) : strncmp(a, b, len);
95 }
96 
97 /* ASCII version of strcasecmp, also handles nullptr safely */
strcmp_nocase(const char * a,const char * b,int len)98 EXPORT int strcmp_nocase(const char * a, const char * b, int len)
99 {
100     if (!a)
101         return b ? -1 : 0;
102     if (!b)
103         return 1;
104 
105     return len < 0 ? g_ascii_strcasecmp(a, b) : g_ascii_strncasecmp(a, b, len);
106 }
107 
108 /* strlen() if <len> is negative, otherwise strnlen() */
strlen_bounded(const char * s,int len)109 EXPORT int strlen_bounded(const char * s, int len)
110 {
111     if (len < 0)
112         return strlen(s);
113 
114     const char * nul = (const char *)memchr(s, 0, len);
115     if (nul)
116         return nul - s;
117 
118     return len;
119 }
120 
str_copy(const char * s,int len)121 EXPORT StringBuf str_copy(const char * s, int len)
122 {
123     if (len < 0)
124         len = strlen(s);
125 
126     StringBuf str(len);
127     memcpy(str, s, len);
128     return str;
129 }
130 
str_concat(const std::initializer_list<const char * > & strings)131 EXPORT StringBuf str_concat(const std::initializer_list<const char *> & strings)
132 {
133     StringBuf str(-1);
134     char * set = str;
135     int left = str.len();
136 
137     for (const char * s : strings)
138     {
139         int len = strlen(s);
140         if (len > left)
141             throw std::bad_alloc();
142 
143         memcpy(set, s, len);
144 
145         set += len;
146         left -= len;
147     }
148 
149     str.resize(set - str);
150     return str;
151 }
152 
str_printf(const char * format,...)153 EXPORT StringBuf str_printf(const char * format, ...)
154 {
155     va_list args;
156     va_start(args, format);
157     StringBuf str = str_vprintf(format, args);
158     va_end(args);
159     return str;
160 }
161 
str_append_printf(StringBuf & str,const char * format,...)162 EXPORT void str_append_printf(StringBuf & str, const char * format, ...)
163 {
164     va_list args;
165     va_start(args, format);
166     str_append_vprintf(str, format, args);
167     va_end(args);
168 }
169 
str_vprintf(const char * format,va_list args)170 EXPORT StringBuf str_vprintf(const char * format, va_list args)
171 {
172     StringBuf str(-1);
173     int len = vsnprintf(str, str.len(), format, args);
174     str.resize(len);
175     return str;
176 }
177 
str_append_vprintf(StringBuf & str,const char * format,va_list args)178 EXPORT void str_append_vprintf(StringBuf & str, const char * format,
179                                va_list args)
180 {
181     int len0 = str.len();
182     str.resize(-1);
183     int len1 = vsnprintf(str + len0, str.len() - len0, format, args);
184     str.resize(len0 + len1);
185 }
186 
str_has_prefix_nocase(const char * str,const char * prefix)187 EXPORT bool str_has_prefix_nocase(const char * str, const char * prefix)
188 {
189     return !g_ascii_strncasecmp(str, prefix, strlen(prefix));
190 }
191 
str_has_suffix_nocase(const char * str,const char * suffix)192 EXPORT bool str_has_suffix_nocase(const char * str, const char * suffix)
193 {
194     int len1 = strlen(str);
195     int len2 = strlen(suffix);
196 
197     if (len2 > len1)
198         return false;
199 
200     return !g_ascii_strcasecmp(str + len1 - len2, suffix);
201 }
202 
203 /* Bernstein's hash function (unrolled version):
204  *    h(0) = 5381
205  *    h(n) = 33 * h(n-1) + c
206  *
207  * This function is more than twice as fast as g_str_hash (a simpler version of
208  * Bernstein's hash) and even slightly faster than Murmur 3. */
209 
str_calc_hash(const char * s)210 EXPORT unsigned str_calc_hash(const char * s)
211 {
212     unsigned h = 5381;
213 
214     int len = strlen(s);
215 
216     while (len >= 8)
217     {
218         h = h * 1954312449 + (unsigned)s[0] * 3963737313 +
219             (unsigned)s[1] * 1291467969 + (unsigned)s[2] * 39135393 +
220             (unsigned)s[3] * 1185921 + (unsigned)s[4] * 35937 +
221             (unsigned)s[5] * 1089 + (unsigned)s[6] * 33 + s[7];
222 
223         s += 8;
224         len -= 8;
225     }
226 
227     if (len >= 4)
228     {
229         h = h * 1185921 + (unsigned)s[0] * 35937 + (unsigned)s[1] * 1089 +
230             (unsigned)s[2] * 33 + s[3];
231 
232         s += 4;
233         len -= 4;
234     }
235 
236     switch (len)
237     {
238     case 3:
239         h = h * 33 + (*s++);
240     case 2:
241         h = h * 33 + (*s++);
242     case 1:
243         h = h * 33 + (*s++);
244     }
245 
246     return h;
247 }
248 
strstr_nocase(const char * haystack,const char * needle)249 EXPORT const char * strstr_nocase(const char * haystack, const char * needle)
250 {
251     while (1)
252     {
253         const char * ap = haystack;
254         const char * bp = needle;
255 
256         while (1)
257         {
258             char a = *ap++;
259             char b = *bp++;
260 
261             if (!b) /* all of needle matched */
262                 return (char *)haystack;
263             if (!a) /* end of haystack reached */
264                 return nullptr;
265 
266             if (a != b && a != SWAP_CASE(b))
267                 break;
268         }
269 
270         haystack++;
271     }
272 }
273 
strstr_nocase_utf8(const char * haystack,const char * needle)274 EXPORT const char * strstr_nocase_utf8(const char * haystack,
275                                        const char * needle)
276 {
277     while (1)
278     {
279         const char * ap = haystack;
280         const char * bp = needle;
281 
282         while (1)
283         {
284             gunichar a = g_utf8_get_char(ap);
285             gunichar b = g_utf8_get_char(bp);
286 
287             if (!b) /* all of needle matched */
288                 return (char *)haystack;
289             if (!a) /* end of haystack reached */
290                 return nullptr;
291 
292             if (a != b &&
293                 (a < 128 ? (gunichar)SWAP_CASE(a) != b
294                          : g_unichar_tolower(a) != g_unichar_tolower(b)))
295                 break;
296 
297             ap = g_utf8_next_char(ap);
298             bp = g_utf8_next_char(bp);
299         }
300 
301         haystack = g_utf8_next_char(haystack);
302     }
303 }
304 
str_tolower(const char * str)305 EXPORT StringBuf str_tolower(const char * str)
306 {
307     StringBuf buf(strlen(str));
308     char * set = buf;
309 
310     while (*str)
311         *set++ = g_ascii_tolower(*str++);
312 
313     return buf;
314 }
315 
str_tolower_utf8(const char * str)316 EXPORT StringBuf str_tolower_utf8(const char * str)
317 {
318     StringBuf buf(6 * strlen(str));
319     char * set = buf;
320     gunichar c;
321 
322     while ((c = g_utf8_get_char(str)))
323     {
324         if (c < 128)
325             *set++ = g_ascii_tolower(c);
326         else
327             set += g_unichar_to_utf8(g_unichar_tolower(c), set);
328 
329         str = g_utf8_next_char(str);
330     }
331 
332     buf.resize(set - buf);
333     return buf;
334 }
335 
str_toupper(const char * str)336 EXPORT StringBuf str_toupper(const char * str)
337 {
338     StringBuf buf(strlen(str));
339     char * set = buf;
340 
341     while (*str)
342         *set++ = g_ascii_toupper(*str++);
343 
344     return buf;
345 }
346 
str_toupper_utf8(const char * str)347 EXPORT StringBuf str_toupper_utf8(const char * str)
348 {
349     StringBuf buf(6 * strlen(str));
350     char * set = buf;
351     gunichar c;
352 
353     while ((c = g_utf8_get_char(str)))
354     {
355         if (c < 128)
356             *set++ = g_ascii_toupper(c);
357         else
358             set += g_unichar_to_utf8(g_unichar_toupper(c), set);
359 
360         str = g_utf8_next_char(str);
361     }
362 
363     buf.resize(set - buf);
364     return buf;
365 }
366 
str_replace_char(char * string,char old_c,char new_c)367 EXPORT void str_replace_char(char * string, char old_c, char new_c)
368 {
369     while ((string = strchr(string, old_c)))
370         *string++ = new_c;
371 }
372 
373 /* Percent-decodes <len> bytes of <str>.  If <len> is negative, decodes all of
374  * <str>. */
375 
str_decode_percent(const char * str,int len)376 EXPORT StringBuf str_decode_percent(const char * str, int len)
377 {
378     if (len < 0)
379         len = strlen(str);
380 
381     StringBuf buf(len);
382     char * out = buf;
383 
384     while (1)
385     {
386         const char * p = (const char *)memchr(str, '%', len);
387         if (!p)
388             break;
389 
390         int block = p - str;
391         memcpy(out, str, block);
392 
393         str += block;
394         out += block;
395         len -= block;
396 
397         if (len < 3)
398             break;
399 
400         *out++ = (FROM_HEX(str[1]) << 4) | FROM_HEX(str[2]);
401 
402         str += 3;
403         len -= 3;
404     }
405 
406     memcpy(out, str, len);
407     buf.resize(out + len - buf);
408     return buf;
409 }
410 
411 /* Percent-encodes <len> bytes of <str>.  If <len> is negative, encodes all of
412  * <str>. */
413 
str_encode_percent(const char * str,int len)414 EXPORT StringBuf str_encode_percent(const char * str, int len)
415 {
416     if (len < 0)
417         len = strlen(str);
418 
419     StringBuf buf(3 * len);
420     char * out = buf;
421 
422     while (len--)
423     {
424         char c = *str++;
425 
426         if (IS_LEGAL(c))
427             *out++ = c;
428         else
429         {
430             *out++ = '%';
431             *out++ = TO_HEX((unsigned char)c >> 4);
432             *out++ = TO_HEX(c & 0xF);
433         }
434     }
435 
436     buf.resize(out - buf);
437     return buf;
438 }
439 
filename_normalize(StringBuf && filename)440 EXPORT StringBuf filename_normalize(StringBuf && filename)
441 {
442     int len;
443     char * s;
444 
445 #ifdef _WIN32
446     /* convert slash to backslash on Windows */
447     str_replace_char(filename, '/', '\\');
448 #endif
449 
450     /* remove current directory (".") elements */
451     while ((len = filename.len()) >= 2 &&
452            (!strcmp((s = filename + len - 2), G_DIR_SEPARATOR_S ".") ||
453             (s = strstr(filename, G_DIR_SEPARATOR_S "." G_DIR_SEPARATOR_S))))
454         filename.remove(s + 1 - filename,
455                         aud::min(s + 3, filename + len) - (s + 1));
456 
457     /* remove parent directory ("..") elements */
458     while ((len = filename.len()) >= 3 &&
459            (!strcmp((s = filename + len - 3), G_DIR_SEPARATOR_S "..") ||
460             (s = strstr(filename, G_DIR_SEPARATOR_S ".." G_DIR_SEPARATOR_S))))
461     {
462         *s = 0;
463         char * s2 = strrchr(filename, G_DIR_SEPARATOR);
464         if (!s2)
465             *(s2 = s) = G_DIR_SEPARATOR;
466 
467         filename.remove(s2 + 1 - filename,
468                         aud::min(s + 4, filename + len) - (s2 + 1));
469     }
470 
471     /* remove trailing slash */
472 #ifdef _WIN32
473     if ((len = filename.len()) > 3 &&
474         filename[len - 1] == '\\') /* leave "C:\" */
475 #else
476     if ((len = filename.len()) > 1 &&
477         filename[len - 1] == '/') /* leave leading "/" */
478 #endif
479         filename.resize(len - 1);
480 
481     return std::move(filename);
482 }
483 
484 /* note #1: recommended order is filename_contract(filename_normalize(f)) */
485 /* note #2: currently assumes filename is UTF-8 (intended for display) */
filename_contract(StringBuf && filename)486 EXPORT StringBuf filename_contract(StringBuf && filename)
487 {
488     /* replace home folder with '~' */
489     const char * home = get_home_utf8();
490     int homelen = home ? strlen(home) : 0;
491 
492     if (homelen && !strncmp(filename, home, homelen) &&
493         (!filename[homelen] || IS_SEP(filename[homelen])))
494     {
495         filename[0] = '~';
496         filename.remove(1, homelen - 1);
497     }
498 
499     return std::move(filename);
500 }
501 
502 /* note #1: recommended order is filename_normalize(filename_expand(f)) */
503 /* note #2: currently assumes filename is UTF-8 (intended for display) */
filename_expand(StringBuf && filename)504 EXPORT StringBuf filename_expand(StringBuf && filename)
505 {
506     /* expand leading '~' */
507     if (filename[0] == '~' && (!filename[1] || IS_SEP(filename[1])))
508     {
509         const char * home = get_home_utf8();
510 
511         if (home && home[0])
512         {
513             filename[0] = home[0];
514             filename.insert(1, home + 1, -1);
515         }
516     }
517 
518     return std::move(filename);
519 }
520 
filename_get_parent(const char * filename)521 EXPORT StringBuf filename_get_parent(const char * filename)
522 {
523     StringBuf buf = filename_normalize(str_copy(filename));
524     const char * base = last_path_element(buf);
525 
526     if (!base)
527         return StringBuf();
528 
529 #ifdef _WIN32
530     if (base - buf > 3) /* leave "C:\" */
531 #else
532     if (base - buf > 1)           /* leave leading "/" */
533 #endif
534         buf.resize(base - buf - 1);
535     else
536         buf.resize(base - buf);
537 
538     return buf;
539 }
540 
filename_get_base(const char * filename)541 EXPORT StringBuf filename_get_base(const char * filename)
542 {
543     StringBuf buf = filename_normalize(str_copy(filename));
544     const char * base = last_path_element(buf);
545 
546     if (base)
547         buf.remove(0, base - buf);
548 
549     return buf;
550 }
551 
552 EXPORT StringBuf
filename_build(const std::initializer_list<const char * > & elems)553 filename_build(const std::initializer_list<const char *> & elems)
554 {
555     StringBuf str(-1);
556     char * set = str;
557     int left = str.len();
558 
559     for (const char * s : elems)
560     {
561         if (set > str && !IS_SEP(set[-1]))
562         {
563             if (!left)
564                 throw std::bad_alloc();
565 
566             *set++ = G_DIR_SEPARATOR;
567             left--;
568         }
569 
570         int len = strlen(s);
571         if (len > left)
572             throw std::bad_alloc();
573 
574         memcpy(set, s, len);
575 
576         set += len;
577         left -= len;
578     }
579 
580     str.resize(set - str);
581     return str;
582 }
583 
584 #ifdef _WIN32
585 #define URI_PREFIX "file:///"
586 #define URI_PREFIX_LEN 8
587 #else
588 #define URI_PREFIX "file://"
589 #define URI_PREFIX_LEN 7
590 #endif
591 
592 /* Like g_filename_to_uri, but converts the filename from the system locale to
593  * UTF-8 before percent-encoding (except on Windows, where filenames are assumed
594  * to be UTF-8).  On Windows, replaces '\' with '/' and adds a leading '/'. */
595 
filename_to_uri(const char * name)596 EXPORT StringBuf filename_to_uri(const char * name)
597 {
598 #ifdef _WIN32
599     StringBuf buf = str_copy(name);
600     str_replace_char(buf, '\\', '/');
601 #else
602     StringBuf buf;
603 
604     /* convert from locale if:
605      * 1) system locale is not UTF-8, and
606      * 2) filename is not already valid UTF-8 */
607     if (!g_get_charset(nullptr) && !g_utf8_validate(name, -1, nullptr))
608         buf = str_from_locale(name);
609 #endif
610 
611     buf = str_encode_percent(buf ? buf : name);
612     buf.insert(0, URI_PREFIX);
613     return buf.settle();
614 }
615 
616 /* Like g_filename_from_uri, but optionally converts the filename from UTF-8 to
617  * the system locale after percent-decoding (except on Windows, where filenames
618  * are assumed to be UTF-8).  On Windows, strips the leading '/' and replaces
619  * '/' with '\'.  If the input is not a valid URI, it is assumed to be a local
620  * filename already and is not percent-decoded. */
621 
uri_to_filename(const char * uri,bool use_locale)622 EXPORT StringBuf uri_to_filename(const char * uri, bool use_locale)
623 {
624     StringBuf buf;
625 
626     if (!strncmp(uri, URI_PREFIX, URI_PREFIX_LEN))
627         buf = str_decode_percent(uri + URI_PREFIX_LEN);
628     else if (!strstr(uri, "://")) /* already a local filename? */
629         buf = str_copy(uri);
630     else
631         return StringBuf();
632 
633 #ifndef _WIN32
634     /* convert to locale if:
635      * 1) use_locale flag was not set to false, and
636      * 2) system locale is not UTF-8, and
637      * 3) decoded URI is valid UTF-8 */
638     if (use_locale && !g_get_charset(nullptr) &&
639         g_utf8_validate(buf, buf.len(), nullptr))
640     {
641         StringBuf locale = str_to_locale(buf);
642         if (locale)
643             buf = std::move(locale);
644     }
645 #endif
646 
647     /* if UTF-8 was requested, make sure the result is valid */
648     if (!use_locale)
649     {
650         buf = str_to_utf8(std::move(buf));
651         if (!buf)
652             return StringBuf();
653     }
654 
655     return filename_normalize(buf.settle());
656 }
657 
658 /* Formats a URI for human-readable display.  Percent-decodes and, for file://
659  * URI's, converts to filename format, but in UTF-8. */
660 
uri_to_display(const char * uri)661 EXPORT StringBuf uri_to_display(const char * uri)
662 {
663     if (!strncmp(uri, "stdin://", 8))
664         return str_copy(_("Standard input"));
665     if (!strncmp(uri, "cdda://?", 8))
666         return str_printf(_("Audio CD, track %s"), uri + 8);
667 
668     StringBuf buf = str_to_utf8(str_decode_percent(uri));
669     if (!buf)
670         return str_copy(_("(character encoding error)"));
671 
672     if (!strncmp(buf, URI_PREFIX, URI_PREFIX_LEN))
673     {
674         buf.remove(0, URI_PREFIX_LEN);
675         return filename_contract(filename_normalize(std::move(buf)));
676     }
677 
678     return buf;
679 }
680 
681 #undef URI_PREFIX
682 #undef URI_PREFIX_LEN
683 
parse_subtune(const char * str,int * isub_p)684 static const char * parse_subtune(const char * str, int * isub_p)
685 {
686     const char * c = strrchr(str, '?');
687     int isub = 0;
688     char junk;
689 
690     if (c && sscanf(c + 1, "%d%c", &isub, &junk) != 1)
691         c = nullptr;
692     if (isub_p)
693         *isub_p = isub;
694 
695     return c;
696 }
697 
uri_parse(const char * uri,const char ** base_p,const char ** ext_p,const char ** sub_p,int * isub_p)698 EXPORT void uri_parse(const char * uri, const char ** base_p,
699                       const char ** ext_p, const char ** sub_p, int * isub_p)
700 {
701     const char * end = uri + strlen(uri);
702     const char *base, *ext, *sub, *c;
703 
704     if ((c = strrchr(uri, '/')))
705         base = c + 1;
706     else
707         base = end;
708 
709     if ((c = parse_subtune(base, isub_p)))
710         sub = c;
711     else
712         sub = end;
713 
714     if ((c = strrchr(base, '.')) && c < sub)
715         ext = c;
716     else
717         ext = sub;
718 
719     if (base_p)
720         *base_p = base;
721     if (ext_p)
722         *ext_p = ext;
723     if (sub_p)
724         *sub_p = sub;
725 }
726 
uri_get_scheme(const char * uri)727 EXPORT StringBuf uri_get_scheme(const char * uri)
728 {
729     const char * delim = strstr(uri, "://");
730     return delim ? str_copy(uri, delim - uri) : StringBuf();
731 }
732 
uri_get_extension(const char * uri)733 EXPORT StringBuf uri_get_extension(const char * uri)
734 {
735     const char * ext;
736     uri_parse(uri, nullptr, &ext, nullptr, nullptr);
737 
738     if (ext[0] != '.')
739         return StringBuf();
740 
741     ext++; // skip period
742 
743     // remove subtunes and HTTP query strings
744     const char * qmark = strchr(ext, '?');
745     return str_copy(ext, qmark ? qmark - ext : -1);
746 }
747 
uri_get_display_base(const char * uri)748 EXPORT StringBuf uri_get_display_base(const char * uri)
749 {
750     const char *base, *ext;
751     uri_parse(uri, &base, &ext, nullptr, nullptr);
752 
753     if (ext > base)
754         return str_to_utf8(str_decode_percent(base, ext - base));
755 
756     return StringBuf();
757 }
758 
759 /* Constructs a full URI given:
760  *   1. path: one of the following:
761  *     a. a full URI (returned unchanged)
762  *     b. an absolute filename (in UTF-8 or the system locale)
763  *     c. a relative path (character set detected according to user settings)
764  *   2. reference: the full URI of the playlist containing <path>
765  *
766  * Valid subtune suffixes such as '?3' are preserved. */
767 
uri_construct(const char * path,const char * reference)768 EXPORT StringBuf uri_construct(const char * path, const char * reference)
769 {
770     /* URI */
771     if (strstr(path, "://"))
772         return str_copy(path);
773 
774     StringBuf buf;
775     auto sub = parse_subtune(path, nullptr);
776     if (sub)
777     {
778         /* split out subtune suffix so it isn't percent-encoded */
779         buf = str_copy(path, sub - path);
780         path = buf;
781     }
782 
783 #ifdef _WIN32
784     if (path[0] && path[1] == ':' && IS_SEP(path[2]))
785 #else
786     if (path[0] == '/')
787 #endif
788     {
789         /* absolute filename */
790         buf = filename_to_uri(path);
791     }
792     else
793     {
794         /* relative path */
795         const char * slash = strrchr(reference, '/');
796         if (!slash)
797             return StringBuf();
798 
799         buf = str_to_utf8(path, -1);
800         if (!buf)
801             return StringBuf();
802 
803         if (aud_get_bool("convert_backslash"))
804             str_replace_char(buf, '\\', '/');
805 
806         buf = str_encode_percent(buf);
807         buf.insert(0, reference, slash + 1 - reference);
808     }
809 
810     /* re-add subtune suffix */
811     if (sub)
812         buf.insert(-1, sub);
813 
814     return buf.settle();
815 }
816 
817 /* Basically the reverse of uri_construct().
818  * First try to split off a relative path (if so configured).
819  * Failing that, try to convert to a local filename.
820  * Failing that, return the URI as-is.
821  *
822  * All output is UTF-8 for portability.
823  *
824  * Parameters:
825  *   1. uri: the full URI of a song file
826  *   2. reference: the full URI of the playlist being written */
827 
uri_deconstruct(const char * uri,const char * reference)828 EXPORT StringBuf uri_deconstruct(const char * uri, const char * reference)
829 {
830     if (aud_get_bool("export_relative_paths"))
831     {
832         const char * slash = strrchr(reference, '/');
833         if (slash && !strncmp(uri, reference, slash + 1 - reference))
834         {
835             StringBuf path =
836                 str_to_utf8(str_decode_percent(uri + (slash + 1 - reference)));
837             if (path)
838                 return path;
839         }
840     }
841 
842     StringBuf filename = uri_to_filename(uri, false);
843     if (filename)
844         return filename;
845 
846     return str_copy(uri);
847 }
848 
849 /* Like strcasecmp, but orders numbers correctly (2 before 10). */
850 /* Non-ASCII characters are treated exactly as is. */
851 /* Handles nullptr gracefully. */
852 
str_compare(const char * ap,const char * bp)853 EXPORT int str_compare(const char * ap, const char * bp)
854 {
855     if (!ap)
856         return bp ? -1 : 0;
857     if (!bp)
858         return 1;
859 
860     unsigned char a = *ap++, b = *bp++;
861     for (; a || b; a = *ap++, b = *bp++)
862     {
863         if (a > '9' || b > '9' || a < '0' || b < '0')
864         {
865             if (a <= 'Z' && a >= 'A')
866                 a += 'a' - 'A';
867             if (b <= 'Z' && b >= 'A')
868                 b += 'a' - 'A';
869 
870             if (a > b)
871                 return 1;
872             if (a < b)
873                 return -1;
874         }
875         else
876         {
877             int x = a - '0';
878             for (; (a = *ap) <= '9' && a >= '0'; ap++)
879                 x = 10 * x + (a - '0');
880 
881             int y = b - '0';
882             for (; (b = *bp) >= '0' && b <= '9'; bp++)
883                 y = 10 * y + (b - '0');
884 
885             if (x > y)
886                 return 1;
887             if (x < y)
888                 return -1;
889         }
890     }
891 
892     return 0;
893 }
894 
895 /* Decodes percent-encoded strings, then compares them with str_compare. */
896 
str_compare_encoded(const char * ap,const char * bp)897 EXPORT int str_compare_encoded(const char * ap, const char * bp)
898 {
899     if (!ap)
900         return bp ? -1 : 0;
901     if (!bp)
902         return 1;
903 
904     unsigned char a = *ap++, b = *bp++;
905     for (; a || b; a = *ap++, b = *bp++)
906     {
907         if (a == '%' && ap[0] && ap[1])
908         {
909             a = (FROM_HEX(ap[0]) << 4) | FROM_HEX(ap[1]);
910             ap += 2;
911         }
912         if (b == '%' && bp[0] && bp[1])
913         {
914             b = (FROM_HEX(bp[0]) << 4) | FROM_HEX(bp[1]);
915             bp += 2;
916         }
917 
918         if (a > '9' || b > '9' || a < '0' || b < '0')
919         {
920             if (a <= 'Z' && a >= 'A')
921                 a += 'a' - 'A';
922             if (b <= 'Z' && b >= 'A')
923                 b += 'a' - 'A';
924 
925             if (a > b)
926                 return 1;
927             if (a < b)
928                 return -1;
929         }
930         else
931         {
932             int x = a - '0';
933             for (; (a = *ap) <= '9' && a >= '0'; ap++)
934                 x = 10 * x + (a - '0');
935 
936             int y = b - '0';
937             for (; (b = *bp) >= '0' && b <= '9'; bp++)
938                 y = 10 * y + (b - '0');
939 
940             if (x > y)
941                 return 1;
942             if (x < y)
943                 return -1;
944         }
945     }
946 
947     return 0;
948 }
949 
str_list_to_index(const char * list,const char * delims)950 EXPORT Index<String> str_list_to_index(const char * list, const char * delims)
951 {
952     char dmap[256] = {0};
953 
954     for (; *delims; delims++)
955         dmap[(unsigned char)(*delims)] = 1;
956 
957     Index<String> index;
958     const char * word = nullptr;
959 
960     for (; *list; list++)
961     {
962         if (dmap[(unsigned char)(*list)])
963         {
964             if (word)
965             {
966                 index.append(String(str_copy(word, list - word)));
967                 word = nullptr;
968             }
969         }
970         else
971         {
972             if (!word)
973             {
974                 word = list;
975             }
976         }
977     }
978 
979     if (word)
980         index.append(String(word));
981 
982     return index;
983 }
984 
index_to_str_list(const Index<String> & index,const char * sep)985 EXPORT StringBuf index_to_str_list(const Index<String> & index,
986                                    const char * sep)
987 {
988     StringBuf str(-1);
989     char * set = str;
990     int left = str.len();
991     int seplen = strlen(sep);
992 
993     for (const String & s : index)
994     {
995         int len = strlen(s);
996         if (len + seplen > left)
997             throw std::bad_alloc();
998 
999         if (set > str)
1000         {
1001             memcpy(set, sep, seplen);
1002 
1003             set += seplen;
1004             left -= seplen;
1005         }
1006 
1007         memcpy(set, s, len);
1008 
1009         set += len;
1010         left -= len;
1011     }
1012 
1013     str.resize(set - str);
1014     return str;
1015 }
1016 
1017 /*
1018  * Routines to convert numbers between string and binary representations.
1019  *
1020  * Goals:
1021  *
1022  *  - Accuracy, meaning that we can convert back and forth between string and
1023  *    binary without the number changing slightly each time.
1024  *  - Consistency, meaning that we get the same results no matter what
1025  *    architecture or locale we have to deal with.
1026  *  - Readability, meaning that the number one is rendered "1", not "1.000".
1027  *
1028  * Values between -1,000,000,000 and 1,000,000,000 (inclusive) are guaranteed to
1029  * have an accuracy of 6 decimal places.
1030  */
1031 
str_to_uint(const char * string,const char ** end=nullptr,const char * stop=nullptr)1032 static unsigned str_to_uint(const char * string, const char ** end = nullptr,
1033                             const char * stop = nullptr)
1034 {
1035     unsigned val = 0;
1036     for (char c; string != stop && (c = *string) >= '0' && c <= '9'; string++)
1037         val = val * 10 + (c - '0');
1038 
1039     if (end)
1040         *end = string;
1041 
1042     return val;
1043 }
1044 
digits_for(unsigned val)1045 static int digits_for(unsigned val)
1046 {
1047     int digits = 1;
1048 
1049     for (; val >= 1000; val /= 1000)
1050         digits += 3;
1051     for (; val >= 10; val /= 10)
1052         digits++;
1053 
1054     return digits;
1055 }
1056 
uint_to_str(unsigned val,char * buf,int digits)1057 static void uint_to_str(unsigned val, char * buf, int digits)
1058 {
1059     for (char * rev = buf + digits; rev > buf; val /= 10)
1060         *(--rev) = '0' + val % 10;
1061 }
1062 
str_to_int(const char * string)1063 EXPORT int str_to_int(const char * string)
1064 {
1065     bool neg = (string[0] == '-');
1066     if (neg || string[0] == '+')
1067         string++;
1068 
1069     unsigned val = str_to_uint(string);
1070     return neg ? -val : val;
1071 }
1072 
str_to_double(const char * string)1073 EXPORT double str_to_double(const char * string)
1074 {
1075     bool neg = (string[0] == '-');
1076     if (neg || string[0] == '+')
1077         string++;
1078 
1079     const char * p;
1080     double val = str_to_uint(string, &p);
1081 
1082     if (*(p++) == '.')
1083     {
1084         const char * end;
1085         double decimal = str_to_uint(p, &end, p + MAX_POW10);
1086         val += decimal / int_pow10[end - p];
1087     }
1088 
1089     return neg ? -val : val;
1090 }
1091 
str_insert_int(StringBuf & string,int pos,int val)1092 EXPORT void str_insert_int(StringBuf & string, int pos, int val)
1093 {
1094     bool neg = (val < 0);
1095     unsigned absval = neg ? -val : val;
1096 
1097     int digits = digits_for(absval);
1098     int len = (neg ? 1 : 0) + digits;
1099     char * set = string.insert(pos, nullptr, len);
1100 
1101     if (neg)
1102         *(set++) = '-';
1103 
1104     uint_to_str(absval, set, digits);
1105 }
1106 
str_insert_double(StringBuf & string,int pos,double val)1107 EXPORT void str_insert_double(StringBuf & string, int pos, double val)
1108 {
1109     bool neg = (val < 0);
1110     if (neg)
1111         val = -val;
1112 
1113     unsigned i = floor(val);
1114     unsigned f = round((val - i) * 1000000);
1115 
1116     if (f == 1000000)
1117     {
1118         i++;
1119         f = 0;
1120     }
1121 
1122     int decimals = f ? 6 : 0;
1123     for (; decimals && !(f % 10); f /= 10)
1124         decimals--;
1125 
1126     int digits = digits_for(i);
1127     int len = (neg ? 1 : 0) + digits + (decimals ? 1 : 0) + decimals;
1128     char * set = string.insert(pos, nullptr, len);
1129 
1130     if (neg)
1131         *(set++) = '-';
1132 
1133     uint_to_str(i, set, digits);
1134 
1135     if (decimals)
1136     {
1137         set += digits;
1138         *(set++) = '.';
1139         uint_to_str(f, set, decimals);
1140     }
1141 }
1142 
int_to_str(int val)1143 EXPORT StringBuf int_to_str(int val)
1144 {
1145     StringBuf buf;
1146     str_insert_int(buf, 0, val);
1147     return buf;
1148 }
1149 
double_to_str(double val)1150 EXPORT StringBuf double_to_str(double val)
1151 {
1152     StringBuf buf;
1153     str_insert_double(buf, 0, val);
1154     return buf;
1155 }
1156 
str_to_int_array(const char * string,int * array,int count)1157 EXPORT bool str_to_int_array(const char * string, int * array, int count)
1158 {
1159     Index<String> index = str_list_to_index(string, ", ");
1160 
1161     if (index.len() != count)
1162         return false;
1163 
1164     for (int i = 0; i < count; i++)
1165         array[i] = str_to_int(index[i]);
1166 
1167     return true;
1168 }
1169 
int_array_to_str(const int * array,int count)1170 EXPORT StringBuf int_array_to_str(const int * array, int count)
1171 {
1172     Index<String> index;
1173 
1174     for (int i = 0; i < count; i++)
1175         index.append(String(int_to_str(array[i])));
1176 
1177     return index_to_str_list(index, ",");
1178 }
1179 
str_to_double_array(const char * string,double * array,int count)1180 EXPORT bool str_to_double_array(const char * string, double * array, int count)
1181 {
1182     Index<String> index = str_list_to_index(string, ", ");
1183 
1184     if (index.len() != count)
1185         return false;
1186 
1187     for (int i = 0; i < count; i++)
1188         array[i] = str_to_double(index[i]);
1189 
1190     return true;
1191 }
1192 
double_array_to_str(const double * array,int count)1193 EXPORT StringBuf double_array_to_str(const double * array, int count)
1194 {
1195     Index<String> index;
1196 
1197     for (int i = 0; i < count; i++)
1198         index.append(String(double_to_str(array[i])));
1199 
1200     return index_to_str_list(index, ",");
1201 }
1202 
str_format_time(int64_t milliseconds)1203 EXPORT StringBuf str_format_time(int64_t milliseconds)
1204 {
1205     bool neg = milliseconds < 0;
1206 
1207     if (neg)
1208         milliseconds *= -1;
1209 
1210     int hours = milliseconds / 3600000;
1211     int minutes = milliseconds / 60000;
1212     int seconds = (milliseconds / 1000) % 60;
1213 
1214     if (hours && aud_get_bool("show_hours"))
1215         return str_printf("%s%d:%02d:%02d", neg ? "- " : "", hours,
1216                           minutes % 60, seconds);
1217     else
1218     {
1219         bool zero = aud_get_bool("leading_zero");
1220         return str_printf(zero ? "%s%02d:%02d" : "%s%d:%02d", neg ? "- " : "",
1221                           minutes, seconds);
1222     }
1223 }
1224