1 /* The MIT License
2
3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4 Copyright (C) 2013-2014, 2016, 2018-2020 Genome Research Ltd.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice shall be
15 included in all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 SOFTWARE.
25 */
26
27 #ifndef KSTRING_H
28 #define KSTRING_H
29
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <limits.h>
36 #include <errno.h>
37 #include <sys/types.h>
38
39 #include "hts_defs.h"
40 #include "kroundup.h"
41
42 #if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4))
43 #ifdef __MINGW_PRINTF_FORMAT
44 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg)))
45 #else
46 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg)))
47 #endif // __MINGW_PRINTF_FORMAT
48 #else
49 #define KS_ATTR_PRINTF(fmt, arg)
50 #endif
51
52 #ifndef HAVE___BUILTIN_CLZ
53 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
54 #define HAVE___BUILTIN_CLZ 1
55 #endif
56 #endif
57
58 /* kstring_t is a simple non-opaque type whose fields are likely to be
59 * used directly by user code (but see also ks_str() and ks_len() below).
60 * A kstring_t object is initialised by either of
61 * kstring_t str = KS_INITIALIZE;
62 * kstring_t str; ...; ks_initialize(&str);
63 * and either ownership of the underlying buffer should be given away before
64 * the object disappears (see ks_release() below) or the kstring_t should be
65 * destroyed with ks_free(&str) or free(str.s) */
66 #ifndef KSTRING_T
67 #define KSTRING_T kstring_t
68 typedef struct kstring_t {
69 size_t l, m;
70 char *s;
71 } kstring_t;
72 #endif
73
74 typedef struct ks_tokaux_t {
75 uint64_t tab[4];
76 int sep, finished;
77 const char *p; // end of the current token
78 } ks_tokaux_t;
79
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83
84 HTSLIB_EXPORT
85 int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0);
86
87 HTSLIB_EXPORT
88 int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3);
89
90 HTSLIB_EXPORT
91 int kputd(double d, kstring_t *s); // custom %g only handler
92
93 HTSLIB_EXPORT
94 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
95
96 HTSLIB_EXPORT
97 char *kstrstr(const char *str, const char *pat, int **_prep);
98
99 HTSLIB_EXPORT
100 char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
101
102 HTSLIB_EXPORT
103 void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
104
105 /* kstrtok() is similar to strtok_r() except that str is not
106 * modified and both str and sep can be NULL. For efficiency, it is
107 * actually recommended to set both to NULL in the subsequent calls
108 * if sep is not changed. */
109 HTSLIB_EXPORT
110 char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
111
112 /* kgetline() uses the supplied fgets()-like function to read a "\n"-
113 * or "\r\n"-terminated line from fp. The line read is appended to the
114 * kstring without its terminator and 0 is returned; EOF is returned at
115 * EOF or on error (determined by querying fp, as per fgets()). */
116 typedef char *kgets_func(char *, int, void *);
117 HTSLIB_EXPORT
118 int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp);
119
120 /* kgetline2() uses the supplied hgetln()-like function to read a "\n"-
121 * or "\r\n"-terminated line from fp. The line read is appended to the
122 * ksring without its terminator and 0 is returned; EOF is returned at
123 * EOF or on error (determined by querying fp, as per fgets()). */
124 typedef ssize_t kgets_func2(char *, size_t, void *);
125 HTSLIB_EXPORT
126 int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp);
127
128 #ifdef __cplusplus
129 }
130 #endif
131
132 /// kstring initializer for structure assignment
133 #define KS_INITIALIZE { 0, 0, NULL }
134
135 /// kstring initializer for pointers
136 /**
137 @note Not to be used if the buffer has been allocated. Use ks_release()
138 or ks_clear() instead.
139 */
140
ks_initialize(kstring_t * s)141 static inline void ks_initialize(kstring_t *s)
142 {
143 s->l = s->m = 0;
144 s->s = NULL;
145 }
146
147 /// Resize a kstring to a given capacity
ks_resize(kstring_t * s,size_t size)148 static inline int ks_resize(kstring_t *s, size_t size)
149 {
150 if (s->m < size) {
151 char *tmp;
152 size = (size > (SIZE_MAX>>2)) ? size : size + (size >> 1);
153 tmp = (char*)realloc(s->s, size);
154 if (!tmp)
155 return -1;
156 s->s = tmp;
157 s->m = size;
158 }
159 return 0;
160 }
161
162 /// Increase kstring capacity by a given number of bytes
ks_expand(kstring_t * s,size_t expansion)163 static inline int ks_expand(kstring_t *s, size_t expansion)
164 {
165 size_t new_size = s->l + expansion;
166
167 if (new_size < s->l) // Overflow check
168 return -1;
169 return ks_resize(s, new_size);
170 }
171
172 /// Returns the kstring buffer
ks_str(kstring_t * s)173 static inline char *ks_str(kstring_t *s)
174 {
175 return s->s;
176 }
177
178 /// Returns the kstring buffer, or an empty string if l == 0
179 /**
180 * Unlike ks_str(), this function will never return NULL. If the kstring is
181 * empty it will return a read-only empty string. As the returned value
182 * may be read-only, the caller should not attempt to modify it.
183 */
ks_c_str(kstring_t * s)184 static inline const char *ks_c_str(kstring_t *s)
185 {
186 return s->l && s->s ? s->s : "";
187 }
188
ks_len(kstring_t * s)189 static inline size_t ks_len(kstring_t *s)
190 {
191 return s->l;
192 }
193
194 /// Reset kstring length to zero
195 /**
196 @return The kstring itself
197
198 Example use: kputsn(string, len, ks_clear(s))
199 */
ks_clear(kstring_t * s)200 static inline kstring_t *ks_clear(kstring_t *s)
201 {
202 s->l = 0;
203 return s;
204 }
205
206 // Give ownership of the underlying buffer away to something else (making
207 // that something else responsible for freeing it), leaving the kstring_t
208 // empty and ready to be used again, or ready to go out of scope without
209 // needing free(str.s) to prevent a memory leak.
ks_release(kstring_t * s)210 static inline char *ks_release(kstring_t *s)
211 {
212 char *ss = s->s;
213 s->l = s->m = 0;
214 s->s = NULL;
215 return ss;
216 }
217
218 /// Safely free the underlying buffer in a kstring.
ks_free(kstring_t * s)219 static inline void ks_free(kstring_t *s)
220 {
221 if (s) {
222 free(s->s);
223 ks_initialize(s);
224 }
225 }
226
kputsn(const char * p,size_t l,kstring_t * s)227 static inline int kputsn(const char *p, size_t l, kstring_t *s)
228 {
229 size_t new_sz = s->l + l + 2;
230 if (new_sz <= s->l || ks_resize(s, new_sz) < 0)
231 return EOF;
232 memcpy(s->s + s->l, p, l);
233 s->l += l;
234 s->s[s->l] = 0;
235 return l;
236 }
237
kputs(const char * p,kstring_t * s)238 static inline int kputs(const char *p, kstring_t *s)
239 {
240 if (!p) { errno = EFAULT; return -1; }
241 return kputsn(p, strlen(p), s);
242 }
243
kputc(int c,kstring_t * s)244 static inline int kputc(int c, kstring_t *s)
245 {
246 if (ks_resize(s, s->l + 2) < 0)
247 return EOF;
248 s->s[s->l++] = c;
249 s->s[s->l] = 0;
250 return (unsigned char)c;
251 }
252
kputc_(int c,kstring_t * s)253 static inline int kputc_(int c, kstring_t *s)
254 {
255 if (ks_resize(s, s->l + 1) < 0)
256 return EOF;
257 s->s[s->l++] = c;
258 return 1;
259 }
260
kputsn_(const void * p,size_t l,kstring_t * s)261 static inline int kputsn_(const void *p, size_t l, kstring_t *s)
262 {
263 size_t new_sz = s->l + l;
264 if (new_sz < s->l || ks_resize(s, new_sz ? new_sz : 1) < 0)
265 return EOF;
266 memcpy(s->s + s->l, p, l);
267 s->l += l;
268 return l;
269 }
270
kputuw(unsigned x,kstring_t * s)271 static inline int kputuw(unsigned x, kstring_t *s)
272 {
273 #if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U
274 static const unsigned int kputuw_num_digits[32] = {
275 10, 10, 10, 9, 9, 9, 8, 8,
276 8, 7, 7, 7, 7, 6, 6, 6,
277 5, 5, 5, 4, 4, 4, 4, 3,
278 3, 3, 2, 2, 2, 1, 1, 1
279 };
280 static const unsigned int kputuw_thresholds[32] = {
281 0, 0, 1000000000U, 0, 0, 100000000U, 0, 0,
282 10000000, 0, 0, 0, 1000000, 0, 0, 100000,
283 0, 0, 10000, 0, 0, 0, 1000, 0,
284 0, 100, 0, 0, 10, 0, 0, 0
285 };
286 #else
287 uint64_t m;
288 #endif
289 static const char kputuw_dig2r[] =
290 "00010203040506070809"
291 "10111213141516171819"
292 "20212223242526272829"
293 "30313233343536373839"
294 "40414243444546474849"
295 "50515253545556575859"
296 "60616263646566676869"
297 "70717273747576777879"
298 "80818283848586878889"
299 "90919293949596979899";
300 unsigned int l, j;
301 char *cp;
302
303 // Trivial case - also prevents __builtin_clz(0), which is undefined
304 if (x < 10) {
305 if (ks_resize(s, s->l + 2) < 0)
306 return EOF;
307 s->s[s->l++] = '0'+x;
308 s->s[s->l] = 0;
309 return 0;
310 }
311
312 // Find out how many digits are to be printed.
313 #if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U
314 /*
315 * Table method - should be quick if clz can be done in hardware.
316 * Find the most significant bit of the value to print and look
317 * up in a table to find out how many decimal digits are needed.
318 * This number needs to be adjusted by 1 for cases where the decimal
319 * length could vary for a given number of bits (for example,
320 * a four bit number could be between 8 and 15).
321 */
322
323 l = __builtin_clz(x);
324 l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]);
325 #else
326 // Fallback for when clz is not available
327 m = 1;
328 l = 0;
329 do {
330 l++;
331 m *= 10;
332 } while (x >= m);
333 #endif
334
335 if (ks_resize(s, s->l + l + 2) < 0)
336 return EOF;
337
338 // Add digits two at a time
339 j = l;
340 cp = s->s + s->l;
341 while (x >= 10) {
342 const char *d = &kputuw_dig2r[2*(x%100)];
343 x /= 100;
344 memcpy(&cp[j-=2], d, 2);
345 }
346
347 // Last one (if necessary). We know that x < 10 by now.
348 if (j == 1)
349 cp[0] = x + '0';
350
351 s->l += l;
352 s->s[s->l] = 0;
353 return 0;
354 }
355
kputw(int c,kstring_t * s)356 static inline int kputw(int c, kstring_t *s)
357 {
358 unsigned int x = c;
359 if (c < 0) {
360 x = -x;
361 if (ks_resize(s, s->l + 3) < 0)
362 return EOF;
363 s->s[s->l++] = '-';
364 }
365
366 return kputuw(x, s);
367 }
368
kputll(long long c,kstring_t * s)369 static inline int kputll(long long c, kstring_t *s)
370 {
371 char buf[32];
372 int i, l = 0;
373 unsigned long long x = c;
374 if (c < 0) x = -x;
375 do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
376 if (c < 0) buf[l++] = '-';
377 if (ks_resize(s, s->l + l + 2) < 0)
378 return EOF;
379 for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
380 s->s[s->l] = 0;
381 return 0;
382 }
383
kputl(long c,kstring_t * s)384 static inline int kputl(long c, kstring_t *s) {
385 return kputll(c, s);
386 }
387
388 /*
389 * Returns 's' split by delimiter, with *n being the number of components;
390 * NULL on failure.
391 */
ksplit(kstring_t * s,int delimiter,int * n)392 static inline int *ksplit(kstring_t *s, int delimiter, int *n)
393 {
394 int max = 0, *offsets = 0;
395 *n = ksplit_core(s->s, delimiter, &max, &offsets);
396 return offsets;
397 }
398
399 #endif
400