1 /*
2 * This file is part of mpv.
3 *
4 * mpv is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * mpv is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #ifndef MPLAYER_BSTR_H
19 #define MPLAYER_BSTR_H
20
21 #include <stdint.h>
22 #include <stddef.h>
23 #include <string.h>
24 #include <stdbool.h>
25 #include <stdarg.h>
26
27 #include "mpv_talloc.h"
28 #include "osdep/compiler.h"
29
30 /* NOTE: 'len' is size_t, but most string-handling functions below assume
31 * that input size has been sanity checked and len fits in an int.
32 */
33 typedef struct bstr {
34 unsigned char *start;
35 size_t len;
36 } bstr;
37
38 // If str.start is NULL, return NULL.
bstrdup0(void * talloc_ctx,struct bstr str)39 static inline char *bstrdup0(void *talloc_ctx, struct bstr str)
40 {
41 return talloc_strndup(talloc_ctx, (char *)str.start, str.len);
42 }
43
44 // Like bstrdup0(), but always return a valid C-string.
bstrto0(void * talloc_ctx,struct bstr str)45 static inline char *bstrto0(void *talloc_ctx, struct bstr str)
46 {
47 return str.start ? bstrdup0(talloc_ctx, str) : talloc_strdup(talloc_ctx, "");
48 }
49
50 // Return start = NULL iff that is true for the original.
bstrdup(void * talloc_ctx,struct bstr str)51 static inline struct bstr bstrdup(void *talloc_ctx, struct bstr str)
52 {
53 struct bstr r = { NULL, str.len };
54 if (str.start)
55 r.start = (unsigned char *)talloc_memdup(talloc_ctx, str.start, str.len);
56 return r;
57 }
58
bstr0(const char * s)59 static inline struct bstr bstr0(const char *s)
60 {
61 return (struct bstr){(unsigned char *)s, s ? strlen(s) : 0};
62 }
63
64 int bstrcmp(struct bstr str1, struct bstr str2);
65 int bstrcasecmp(struct bstr str1, struct bstr str2);
66 int bstrchr(struct bstr str, int c);
67 int bstrrchr(struct bstr str, int c);
68 int bstrspn(struct bstr str, const char *accept);
69 int bstrcspn(struct bstr str, const char *reject);
70
71 int bstr_find(struct bstr haystack, struct bstr needle);
72 struct bstr bstr_lstrip(struct bstr str);
73 struct bstr bstr_strip(struct bstr str);
74 struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest);
75 bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right);
76 struct bstr bstr_splice(struct bstr str, int start, int end);
77 long long bstrtoll(struct bstr str, struct bstr *rest, int base);
78 double bstrtod(struct bstr str, struct bstr *rest);
79 void bstr_lower(struct bstr str);
80 int bstr_sscanf(struct bstr str, const char *format, ...);
81
82 // Decode a string containing hexadecimal data. All whitespace will be silently
83 // ignored. When successful, this allocates a new array to store the output.
84 bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out);
85
86 // Decode the UTF-8 code point at the start of the string, and return the
87 // character.
88 // After calling this function, *out_next will point to the next character.
89 // out_next can be NULL.
90 // On error, -1 is returned, and *out_next is not modified.
91 int bstr_decode_utf8(struct bstr str, struct bstr *out_next);
92
93 // Return the UTF-8 code point at the start of the string.
94 // After calling this function, *out_next will point to the next character.
95 // out_next can be NULL.
96 // On error, an empty string is returned, and *out_next is not modified.
97 struct bstr bstr_split_utf8(struct bstr str, struct bstr *out_next);
98
99 // Return the length of the UTF-8 sequence that starts with the given byte.
100 // Given a string char *s, the next UTF-8 code point is to be expected at
101 // s + bstr_parse_utf8_code_length(s[0])
102 // On error, -1 is returned. On success, it returns a value in the range [1, 4].
103 int bstr_parse_utf8_code_length(unsigned char b);
104
105 // Return >= 0 if the string is valid UTF-8, otherwise negative error code.
106 // Embedded \0 bytes are considered valid.
107 // This returns -N if the UTF-8 string was likely just cut-off in the middle of
108 // an UTF-8 sequence: -1 means 1 byte was missing, -5 5 bytes missing.
109 // If the string was likely not cut off, -8 is returned.
110 // Use (return_value > -8) to check whether the string is valid UTF-8 or valid
111 // but cut-off UTF-8.
112 int bstr_validate_utf8(struct bstr s);
113
114 // Force the input string to valid UTF-8. If invalid UTF-8 encoding is
115 // encountered, the invalid bytes are interpreted as Latin-1.
116 // Embedded \0 bytes are considered valid.
117 // If replacement happens, a newly allocated string is returned (with a \0
118 // byte added past its end for convenience). The string is allocated via
119 // talloc, with talloc_ctx as parent.
120 struct bstr bstr_sanitize_utf8_latin1(void *talloc_ctx, struct bstr s);
121
122 // Return the text before the occurrence of a character, and return it. Change
123 // *rest to point to the text following this character. (rest can be NULL.)
124 struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c);
125
126 // Like bstr_splitchar. Trailing newlines are not stripped.
bstr_getline(struct bstr str,struct bstr * rest)127 static inline struct bstr bstr_getline(struct bstr str, struct bstr *rest)
128 {
129 return bstr_splitchar(str, rest, '\n');
130 }
131
132 // Strip one trailing line break. This is intended for use with bstr_getline,
133 // and will remove the trailing \n or \r\n sequence.
134 struct bstr bstr_strip_linebreaks(struct bstr str);
135
136 void bstr_xappend(void *talloc_ctx, bstr *s, bstr append);
137 void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...)
138 PRINTF_ATTRIBUTE(3, 4);
139 void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list va)
140 PRINTF_ATTRIBUTE(3, 0);
141
142 // If s starts/ends with prefix, return true and return the rest of the string
143 // in s.
144 bool bstr_eatstart(struct bstr *s, struct bstr prefix);
145 bool bstr_eatend(struct bstr *s, struct bstr prefix);
146
147 bool bstr_case_startswith(struct bstr s, struct bstr prefix);
148 bool bstr_case_endswith(struct bstr s, struct bstr suffix);
149 struct bstr bstr_strip_ext(struct bstr str);
150 struct bstr bstr_get_ext(struct bstr s);
151
bstr_cut(struct bstr str,int n)152 static inline struct bstr bstr_cut(struct bstr str, int n)
153 {
154 if (n < 0) {
155 n += str.len;
156 if (n < 0)
157 n = 0;
158 }
159 if (((size_t)n) > str.len)
160 n = str.len;
161 return (struct bstr){str.start + n, str.len - n};
162 }
163
bstr_startswith(struct bstr str,struct bstr prefix)164 static inline bool bstr_startswith(struct bstr str, struct bstr prefix)
165 {
166 if (str.len < prefix.len)
167 return false;
168 return !memcmp(str.start, prefix.start, prefix.len);
169 }
170
bstr_startswith0(struct bstr str,const char * prefix)171 static inline bool bstr_startswith0(struct bstr str, const char *prefix)
172 {
173 return bstr_startswith(str, bstr0(prefix));
174 }
175
bstr_endswith(struct bstr str,struct bstr suffix)176 static inline bool bstr_endswith(struct bstr str, struct bstr suffix)
177 {
178 if (str.len < suffix.len)
179 return false;
180 return !memcmp(str.start + str.len - suffix.len, suffix.start, suffix.len);
181 }
182
bstr_endswith0(struct bstr str,const char * suffix)183 static inline bool bstr_endswith0(struct bstr str, const char *suffix)
184 {
185 return bstr_endswith(str, bstr0(suffix));
186 }
187
bstrcmp0(struct bstr str1,const char * str2)188 static inline int bstrcmp0(struct bstr str1, const char *str2)
189 {
190 return bstrcmp(str1, bstr0(str2));
191 }
192
bstr_equals(struct bstr str1,struct bstr str2)193 static inline bool bstr_equals(struct bstr str1, struct bstr str2)
194 {
195 if (str1.len != str2.len)
196 return false;
197
198 return str1.start == str2.start || bstrcmp(str1, str2) == 0;
199 }
200
bstr_equals0(struct bstr str1,const char * str2)201 static inline bool bstr_equals0(struct bstr str1, const char *str2)
202 {
203 return bstr_equals(str1, bstr0(str2));
204 }
205
bstrcasecmp0(struct bstr str1,const char * str2)206 static inline int bstrcasecmp0(struct bstr str1, const char *str2)
207 {
208 return bstrcasecmp(str1, bstr0(str2));
209 }
210
bstr_find0(struct bstr haystack,const char * needle)211 static inline int bstr_find0(struct bstr haystack, const char *needle)
212 {
213 return bstr_find(haystack, bstr0(needle));
214 }
215
bstr_eatstart0(struct bstr * s,const char * prefix)216 static inline bool bstr_eatstart0(struct bstr *s, const char *prefix)
217 {
218 return bstr_eatstart(s, bstr0(prefix));
219 }
220
bstr_eatend0(struct bstr * s,const char * prefix)221 static inline bool bstr_eatend0(struct bstr *s, const char *prefix)
222 {
223 return bstr_eatend(s, bstr0(prefix));
224 }
225
226 // create a pair (not single value!) for "%.*s" printf syntax
227 #define BSTR_P(bstr) (int)((bstr).len), ((bstr).start ? (char*)(bstr).start : "")
228
229 #define WHITESPACE " \f\n\r\t\v"
230
231 #endif /* MPLAYER_BSTR_H */
232