1 /* The MIT License
2 
3    Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4    Copyright (C) 2013-2014, 2016, 2018-2020 Genome Research Ltd.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice shall be
15    included in all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24    SOFTWARE.
25 */
26 
27 #ifndef KSTRING_H
28 #define KSTRING_H
29 
30 #include <stdlib.h>
31 #include <string.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <limits.h>
36 #include <errno.h>
37 #include <sys/types.h>
38 
39 #include "hts_defs.h"
40 #include "kroundup.h"
41 
42 #if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4))
43 #ifdef __MINGW_PRINTF_FORMAT
44 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg)))
45 #else
46 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg)))
47 #endif // __MINGW_PRINTF_FORMAT
48 #else
49 #define KS_ATTR_PRINTF(fmt, arg)
50 #endif
51 
52 #ifndef HAVE___BUILTIN_CLZ
53 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
54 #define HAVE___BUILTIN_CLZ 1
55 #endif
56 #endif
57 
58 /* kstring_t is a simple non-opaque type whose fields are likely to be
59  * used directly by user code (but see also ks_str() and ks_len() below).
60  * A kstring_t object is initialised by either of
61  *       kstring_t str = KS_INITIALIZE;
62  *       kstring_t str; ...; ks_initialize(&str);
63  * and either ownership of the underlying buffer should be given away before
64  * the object disappears (see ks_release() below) or the kstring_t should be
65  * destroyed with  ks_free(&str) or free(str.s) */
66 #ifndef KSTRING_T
67 #define KSTRING_T kstring_t
68 typedef struct kstring_t {
69 	size_t l, m;
70 	char *s;
71 } kstring_t;
72 #endif
73 
74 typedef struct ks_tokaux_t {
75 	uint64_t tab[4];
76 	int sep, finished;
77 	const char *p; // end of the current token
78 } ks_tokaux_t;
79 
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83 
84     HTSLIB_EXPORT
85 	int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0);
86 
87     HTSLIB_EXPORT
88 	int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3);
89 
90     HTSLIB_EXPORT
91     int kputd(double d, kstring_t *s); // custom %g only handler
92 
93     HTSLIB_EXPORT
94 	int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
95 
96     HTSLIB_EXPORT
97 	char *kstrstr(const char *str, const char *pat, int **_prep);
98 
99     HTSLIB_EXPORT
100 	char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
101 
102     HTSLIB_EXPORT
103 	void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
104 
105 	/* kstrtok() is similar to strtok_r() except that str is not
106 	 * modified and both str and sep can be NULL. For efficiency, it is
107 	 * actually recommended to set both to NULL in the subsequent calls
108 	 * if sep is not changed. */
109     HTSLIB_EXPORT
110 	char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
111 
112     /* kgetline() uses the supplied fgets()-like function to read a "\n"-
113      * or "\r\n"-terminated line from fp.  The line read is appended to the
114      * kstring without its terminator and 0 is returned; EOF is returned at
115      * EOF or on error (determined by querying fp, as per fgets()). */
116     typedef char *kgets_func(char *, int, void *);
117     HTSLIB_EXPORT
118     int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp);
119 
120     /* kgetline2() uses the supplied hgetln()-like function to read a "\n"-
121      * or "\r\n"-terminated line from fp.  The line read is appended to the
122      * ksring without its terminator and 0 is returned; EOF is returned at
123      * EOF or on error (determined by querying fp, as per fgets()). */
124     typedef ssize_t kgets_func2(char *, size_t, void *);
125     HTSLIB_EXPORT
126     int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp);
127 
128 #ifdef __cplusplus
129 }
130 #endif
131 
132 /// kstring initializer for structure assignment
133 #define KS_INITIALIZE { 0, 0, NULL }
134 
135 /// kstring initializer for pointers
136 /**
137    @note Not to be used if the buffer has been allocated.  Use ks_release()
138    or ks_clear() instead.
139 */
140 
ks_initialize(kstring_t * s)141 static inline void ks_initialize(kstring_t *s)
142 {
143     s->l = s->m = 0;
144     s->s = NULL;
145 }
146 
147 /// Resize a kstring to a given capacity
ks_resize(kstring_t * s,size_t size)148 static inline int ks_resize(kstring_t *s, size_t size)
149 {
150 	if (s->m < size) {
151 	    char *tmp;
152 	    size = (size > (SIZE_MAX>>2)) ? size : size + (size >> 1);
153 	    tmp = (char*)realloc(s->s, size);
154 	    if (!tmp)
155 	        return -1;
156 	    s->s = tmp;
157 	    s->m = size;
158 	}
159 	return 0;
160 }
161 
162 /// Increase kstring capacity by a given number of bytes
ks_expand(kstring_t * s,size_t expansion)163 static inline int ks_expand(kstring_t *s, size_t expansion)
164 {
165     size_t new_size = s->l + expansion;
166 
167     if (new_size < s->l) // Overflow check
168         return -1;
169     return ks_resize(s, new_size);
170 }
171 
172 /// Returns the kstring buffer
ks_str(kstring_t * s)173 static inline char *ks_str(kstring_t *s)
174 {
175 	return s->s;
176 }
177 
178 /// Returns the kstring buffer, or an empty string if l == 0
179 /**
180  * Unlike ks_str(), this function will never return NULL.  If the kstring is
181  * empty it will return a read-only empty string.  As the returned value
182  * may be read-only, the caller should not attempt to modify it.
183  */
ks_c_str(kstring_t * s)184 static inline const char *ks_c_str(kstring_t *s)
185 {
186     return s->l && s->s ? s->s : "";
187 }
188 
ks_len(kstring_t * s)189 static inline size_t ks_len(kstring_t *s)
190 {
191 	return s->l;
192 }
193 
194 /// Reset kstring length to zero
195 /**
196    @return The kstring itself
197 
198    Example use: kputsn(string, len, ks_clear(s))
199 */
ks_clear(kstring_t * s)200 static inline kstring_t *ks_clear(kstring_t *s)
201 {
202     s->l = 0;
203     return s;
204 }
205 
206 // Give ownership of the underlying buffer away to something else (making
207 // that something else responsible for freeing it), leaving the kstring_t
208 // empty and ready to be used again, or ready to go out of scope without
209 // needing  free(str.s)  to prevent a memory leak.
ks_release(kstring_t * s)210 static inline char *ks_release(kstring_t *s)
211 {
212 	char *ss = s->s;
213 	s->l = s->m = 0;
214 	s->s = NULL;
215 	return ss;
216 }
217 
218 /// Safely free the underlying buffer in a kstring.
ks_free(kstring_t * s)219 static inline void ks_free(kstring_t *s)
220 {
221     if (s) {
222         free(s->s);
223         ks_initialize(s);
224     }
225 }
226 
kputsn(const char * p,size_t l,kstring_t * s)227 static inline int kputsn(const char *p, size_t l, kstring_t *s)
228 {
229 	size_t new_sz = s->l + l + 2;
230 	if (new_sz <= s->l || ks_resize(s, new_sz) < 0)
231 		return EOF;
232 	memcpy(s->s + s->l, p, l);
233 	s->l += l;
234 	s->s[s->l] = 0;
235 	return l;
236 }
237 
kputs(const char * p,kstring_t * s)238 static inline int kputs(const char *p, kstring_t *s)
239 {
240 	if (!p) { errno = EFAULT; return -1; }
241 	return kputsn(p, strlen(p), s);
242 }
243 
kputc(int c,kstring_t * s)244 static inline int kputc(int c, kstring_t *s)
245 {
246 	if (ks_resize(s, s->l + 2) < 0)
247 		return EOF;
248 	s->s[s->l++] = c;
249 	s->s[s->l] = 0;
250 	return (unsigned char)c;
251 }
252 
kputc_(int c,kstring_t * s)253 static inline int kputc_(int c, kstring_t *s)
254 {
255 	if (ks_resize(s, s->l + 1) < 0)
256 		return EOF;
257 	s->s[s->l++] = c;
258 	return 1;
259 }
260 
kputsn_(const void * p,size_t l,kstring_t * s)261 static inline int kputsn_(const void *p, size_t l, kstring_t *s)
262 {
263 	size_t new_sz = s->l + l;
264 	if (new_sz < s->l || ks_resize(s, new_sz ? new_sz : 1) < 0)
265 		return EOF;
266 	memcpy(s->s + s->l, p, l);
267 	s->l += l;
268 	return l;
269 }
270 
kputuw(unsigned x,kstring_t * s)271 static inline int kputuw(unsigned x, kstring_t *s)
272 {
273 #if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U
274     static const unsigned int kputuw_num_digits[32] = {
275         10, 10, 10,  9,  9,  9,  8,  8,
276         8,   7,  7,  7,  7,  6,  6,  6,
277         5,   5,  5,  4,  4,  4,  4,  3,
278         3,   3,  2,  2,  2,  1,  1,  1
279     };
280     static const unsigned int kputuw_thresholds[32] = {
281         0,        0, 1000000000U, 0,       0, 100000000U,   0,      0,
282         10000000, 0,          0,  0, 1000000,         0,    0, 100000,
283         0,        0,      10000,  0,       0,         0, 1000,      0,
284         0,      100,          0,  0,      10,         0,    0,      0
285     };
286 #else
287     uint64_t m;
288 #endif
289     static const char kputuw_dig2r[] =
290         "00010203040506070809"
291         "10111213141516171819"
292         "20212223242526272829"
293         "30313233343536373839"
294         "40414243444546474849"
295         "50515253545556575859"
296         "60616263646566676869"
297         "70717273747576777879"
298         "80818283848586878889"
299         "90919293949596979899";
300     unsigned int l, j;
301     char *cp;
302 
303     // Trivial case - also prevents __builtin_clz(0), which is undefined
304     if (x < 10) {
305         if (ks_resize(s, s->l + 2) < 0)
306             return EOF;
307         s->s[s->l++] = '0'+x;
308         s->s[s->l] = 0;
309         return 0;
310     }
311 
312     // Find out how many digits are to be printed.
313 #if HAVE___BUILTIN_CLZ && UINT_MAX == 4294967295U
314     /*
315      * Table method - should be quick if clz can be done in hardware.
316      * Find the most significant bit of the value to print and look
317      * up in a table to find out how many decimal digits are needed.
318      * This number needs to be adjusted by 1 for cases where the decimal
319      * length could vary for a given number of bits (for example,
320      * a four bit number could be between 8 and 15).
321      */
322 
323     l = __builtin_clz(x);
324     l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]);
325 #else
326     // Fallback for when clz is not available
327     m = 1;
328     l = 0;
329     do {
330         l++;
331         m *= 10;
332     } while (x >= m);
333 #endif
334 
335     if (ks_resize(s, s->l + l + 2) < 0)
336         return EOF;
337 
338     // Add digits two at a time
339     j = l;
340     cp = s->s + s->l;
341     while (x >= 10) {
342         const char *d = &kputuw_dig2r[2*(x%100)];
343         x /= 100;
344         memcpy(&cp[j-=2], d, 2);
345     }
346 
347     // Last one (if necessary).  We know that x < 10 by now.
348     if (j == 1)
349         cp[0] = x + '0';
350 
351     s->l += l;
352     s->s[s->l] = 0;
353     return 0;
354 }
355 
kputw(int c,kstring_t * s)356 static inline int kputw(int c, kstring_t *s)
357 {
358     unsigned int x = c;
359     if (c < 0) {
360         x = -x;
361         if (ks_resize(s, s->l + 3) < 0)
362             return EOF;
363         s->s[s->l++] = '-';
364     }
365 
366     return kputuw(x, s);
367 }
368 
kputll(long long c,kstring_t * s)369 static inline int kputll(long long c, kstring_t *s)
370 {
371 	char buf[32];
372 	int i, l = 0;
373 	unsigned long long x = c;
374 	if (c < 0) x = -x;
375 	do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
376 	if (c < 0) buf[l++] = '-';
377 	if (ks_resize(s, s->l + l + 2) < 0)
378 		return EOF;
379 	for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
380 	s->s[s->l] = 0;
381 	return 0;
382 }
383 
kputl(long c,kstring_t * s)384 static inline int kputl(long c, kstring_t *s) {
385     return kputll(c, s);
386 }
387 
388 /*
389  * Returns 's' split by delimiter, with *n being the number of components;
390  *         NULL on failure.
391  */
ksplit(kstring_t * s,int delimiter,int * n)392 static inline int *ksplit(kstring_t *s, int delimiter, int *n)
393 {
394 	int max = 0, *offsets = 0;
395 	*n = ksplit_core(s->s, delimiter, &max, &offsets);
396 	return offsets;
397 }
398 
399 #endif
400