1 /*
2  * This file is part of mpv.
3  *
4  * mpv is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * mpv is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef MPLAYER_BSTR_H
19 #define MPLAYER_BSTR_H
20 
21 #include <stdint.h>
22 #include <stddef.h>
23 #include <string.h>
24 #include <stdbool.h>
25 #include <stdarg.h>
26 
27 #include "mpv_talloc.h"
28 #include "osdep/compiler.h"
29 
30 /* NOTE: 'len' is size_t, but most string-handling functions below assume
31  * that input size has been sanity checked and len fits in an int.
32  */
33 typedef struct bstr {
34     unsigned char *start;
35     size_t len;
36 } bstr;
37 
38 // If str.start is NULL, return NULL.
bstrdup0(void * talloc_ctx,struct bstr str)39 static inline char *bstrdup0(void *talloc_ctx, struct bstr str)
40 {
41     return talloc_strndup(talloc_ctx, (char *)str.start, str.len);
42 }
43 
44 // Like bstrdup0(), but always return a valid C-string.
bstrto0(void * talloc_ctx,struct bstr str)45 static inline char *bstrto0(void *talloc_ctx, struct bstr str)
46 {
47     return str.start ? bstrdup0(talloc_ctx, str) : talloc_strdup(talloc_ctx, "");
48 }
49 
50 // Return start = NULL iff that is true for the original.
bstrdup(void * talloc_ctx,struct bstr str)51 static inline struct bstr bstrdup(void *talloc_ctx, struct bstr str)
52 {
53     struct bstr r = { NULL, str.len };
54     if (str.start)
55         r.start = (unsigned char *)talloc_memdup(talloc_ctx, str.start, str.len);
56     return r;
57 }
58 
bstr0(const char * s)59 static inline struct bstr bstr0(const char *s)
60 {
61     return (struct bstr){(unsigned char *)s, s ? strlen(s) : 0};
62 }
63 
64 int bstrcmp(struct bstr str1, struct bstr str2);
65 int bstrcasecmp(struct bstr str1, struct bstr str2);
66 int bstrchr(struct bstr str, int c);
67 int bstrrchr(struct bstr str, int c);
68 int bstrspn(struct bstr str, const char *accept);
69 int bstrcspn(struct bstr str, const char *reject);
70 
71 int bstr_find(struct bstr haystack, struct bstr needle);
72 struct bstr bstr_lstrip(struct bstr str);
73 struct bstr bstr_strip(struct bstr str);
74 struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest);
75 bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right);
76 struct bstr bstr_splice(struct bstr str, int start, int end);
77 long long bstrtoll(struct bstr str, struct bstr *rest, int base);
78 double bstrtod(struct bstr str, struct bstr *rest);
79 void bstr_lower(struct bstr str);
80 int bstr_sscanf(struct bstr str, const char *format, ...);
81 
82 // Decode a string containing hexadecimal data. All whitespace will be silently
83 // ignored. When successful, this allocates a new array to store the output.
84 bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out);
85 
86 // Decode the UTF-8 code point at the start of the string, and return the
87 // character.
88 // After calling this function, *out_next will point to the next character.
89 // out_next can be NULL.
90 // On error, -1 is returned, and *out_next is not modified.
91 int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
92 
93 // Return the UTF-8 code point at the start of the string.
94 // After calling this function, *out_next will point to the next character.
95 // out_next can be NULL.
96 // On error, an empty string is returned, and *out_next is not modified.
97 struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next);
98 
99 // Return the length of the UTF-8 sequence that starts with the given byte.
100 // Given a string char *s, the next UTF-8 code point is to be expected at
101 //      s + bstr_parse_utf8_code_length(s[0])
102 // On error, -1 is returned. On success, it returns a value in the range [1, 4].
103 int bstr_parse_utf8_code_length(unsigned char b);
104 
105 // Return >= 0 if the string is valid UTF-8, otherwise negative error code.
106 // Embedded \0 bytes are considered valid.
107 // This returns -N if the UTF-8 string was likely just cut-off in the middle of
108 // an UTF-8 sequence: -1 means 1 byte was missing, -5 5 bytes missing.
109 // If the string was likely not cut off, -8 is returned.
110 // Use (return_value > -8) to check whether the string is valid UTF-8 or valid
111 // but cut-off UTF-8.
112 int bstr_validate_utf8(struct bstr s);
113 
114 // Force the input string to valid UTF-8. If invalid UTF-8 encoding is
115 // encountered, the invalid bytes are interpreted as Latin-1.
116 // Embedded \0 bytes are considered valid.
117 // If replacement happens, a newly allocated string is returned (with a \0
118 // byte added past its end for convenience). The string is allocated via
119 // talloc, with talloc_ctx as parent.
120 struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s);
121 
122 // Return the text before the occurrence of a character, and return it. Change
123 // *rest to point to the text following this character. (rest can be NULL.)
124 struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c);
125 
126 // Like bstr_splitchar. Trailing newlines are not stripped.
bstr_getline(struct bstr str,struct bstr * rest)127 static inline struct bstr bstr_getline(struct bstr str, struct bstr *rest)
128 {
129     return bstr_splitchar(str, rest, '\n');
130 }
131 
132 // Strip one trailing line break. This is intended for use with bstr_getline,
133 // and will remove the trailing \n or \r\n sequence.
134 struct bstr bstr_strip_linebreaks(struct bstr str);
135 
136 void bstr_xappend(void *talloc_ctx, bstr *s, bstr append);
137 void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...)
138     PRINTF_ATTRIBUTE(3, 4);
139 void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list va)
140     PRINTF_ATTRIBUTE(3, 0);
141 
142 // If s starts/ends with prefix, return true and return the rest of the string
143 // in s.
144 bool bstr_eatstart(struct bstr *s, struct bstr prefix);
145 bool bstr_eatend(struct bstr *s, struct bstr prefix);
146 
147 bool bstr_case_startswith(struct bstr s, struct bstr prefix);
148 bool bstr_case_endswith(struct bstr s, struct bstr suffix);
149 struct bstr bstr_strip_ext(struct bstr str);
150 struct bstr bstr_get_ext(struct bstr s);
151 
bstr_cut(struct bstr str,int n)152 static inline struct bstr bstr_cut(struct bstr str, int n)
153 {
154     if (n < 0) {
155         n += str.len;
156         if (n < 0)
157             n = 0;
158     }
159     if (((size_t)n) > str.len)
160         n = str.len;
161     return (struct bstr){str.start + n, str.len - n};
162 }
163 
bstr_startswith(struct bstr str,struct bstr prefix)164 static inline bool bstr_startswith(struct bstr str, struct bstr prefix)
165 {
166     if (str.len < prefix.len)
167         return false;
168     return !memcmp(str.start, prefix.start, prefix.len);
169 }
170 
bstr_startswith0(struct bstr str,const char * prefix)171 static inline bool bstr_startswith0(struct bstr str, const char *prefix)
172 {
173     return bstr_startswith(str, bstr0(prefix));
174 }
175 
bstr_endswith(struct bstr str,struct bstr suffix)176 static inline bool bstr_endswith(struct bstr str, struct bstr suffix)
177 {
178     if (str.len < suffix.len)
179         return false;
180     return !memcmp(str.start + str.len - suffix.len, suffix.start, suffix.len);
181 }
182 
bstr_endswith0(struct bstr str,const char * suffix)183 static inline bool bstr_endswith0(struct bstr str, const char *suffix)
184 {
185     return bstr_endswith(str, bstr0(suffix));
186 }
187 
bstrcmp0(struct bstr str1,const char * str2)188 static inline int bstrcmp0(struct bstr str1, const char *str2)
189 {
190     return bstrcmp(str1, bstr0(str2));
191 }
192 
bstr_equals(struct bstr str1,struct bstr str2)193 static inline bool bstr_equals(struct bstr str1, struct bstr str2)
194 {
195     if (str1.len != str2.len)
196         return false;
197 
198     return str1.start == str2.start || bstrcmp(str1, str2) == 0;
199 }
200 
bstr_equals0(struct bstr str1,const char * str2)201 static inline bool bstr_equals0(struct bstr str1, const char *str2)
202 {
203     return bstr_equals(str1, bstr0(str2));
204 }
205 
bstrcasecmp0(struct bstr str1,const char * str2)206 static inline int bstrcasecmp0(struct bstr str1, const char *str2)
207 {
208     return bstrcasecmp(str1, bstr0(str2));
209 }
210 
bstr_find0(struct bstr haystack,const char * needle)211 static inline int bstr_find0(struct bstr haystack, const char *needle)
212 {
213     return bstr_find(haystack, bstr0(needle));
214 }
215 
bstr_eatstart0(struct bstr * s,const char * prefix)216 static inline bool bstr_eatstart0(struct bstr *s, const char *prefix)
217 {
218     return bstr_eatstart(s, bstr0(prefix));
219 }
220 
bstr_eatend0(struct bstr * s,const char * prefix)221 static inline bool bstr_eatend0(struct bstr *s, const char *prefix)
222 {
223     return bstr_eatend(s, bstr0(prefix));
224 }
225 
226 // create a pair (not single value!) for "%.*s" printf syntax
227 #define BSTR_P(bstr) (int)((bstr).len), ((bstr).start ? (char*)(bstr).start : "")
228 
229 #define WHITESPACE " \f\n\r\t\v"
230 
231 #endif /* MPLAYER_BSTR_H */
232