1 /* The MIT License
2
3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 #include <config.h>
27
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <string.h>
32 #include <stdint.h>
33 #include <math.h>
34 #include "htslib/kstring.h"
35
kputd(double d,kstring_t * s)36 int kputd(double d, kstring_t *s) {
37 int len = 0;
38 char buf[21], *cp = buf+20, *ep;
39 if (d == 0) {
40 if (signbit(d)) {
41 kputsn("-0",2,s);
42 return 2;
43 } else {
44 kputsn("0",1,s);
45 return 1;
46 }
47 }
48
49 if (d < 0) {
50 kputc('-',s);
51 len = 1;
52 d=-d;
53 }
54 if (!(d >= 0.0001 && d <= 999999)) {
55 if (ks_resize(s, s->l + 50) < 0)
56 return EOF;
57 // We let stdio handle the exponent cases
58 int s2 = sprintf(s->s + s->l, "%g", d);
59 len += s2;
60 s->l += s2;
61 return len;
62 }
63
64 uint64_t i = d*10000000000LL;
65 // Correction for rounding - rather ugly
66
67 // Optimised for small numbers.
68 // Better still would be __builtin_clz on hi/lo 32 and get the
69 // starting point very rapidly.
70 if (d<.0001)
71 i+=0;
72 else if (d<0.001)
73 i+=5;
74 else if (d < 0.01)
75 i+=50;
76 else if (d < 0.1)
77 i+=500;
78 else if (d < 1)
79 i+=5000;
80 else if (d < 10)
81 i+=50000;
82 else if (d < 100)
83 i+=500000;
84 else if (d < 1000)
85 i+=5000000;
86 else if (d < 10000)
87 i+=50000000;
88 else if (d < 100000)
89 i+=500000000;
90 else
91 i+=5000000000LL;
92
93 do {
94 *--cp = '0' + i%10;
95 i /= 10;
96 } while (i >= 1);
97 buf[20] = 0;
98 int p = buf+20-cp;
99 if (p <= 10) { // d < 1
100 //assert(d/1);
101 cp[6] = 0; ep = cp+5;// 6 precision
102 while (p < 10) {
103 *--cp = '0';
104 p++;
105 }
106 *--cp = '.';
107 *--cp = '0';
108 } else {
109 char *xp = --cp;
110 while (p > 10) {
111 xp[0] = xp[1];
112 p--;
113 xp++;
114 }
115 xp[0] = '.';
116 cp[7] = 0; ep=cp+6;
117 if (cp[6] == '.') cp[6] = 0;
118 }
119
120 // Cull trailing zeros
121 while (*ep == '0' && ep > cp)
122 ep--;
123 char *z = ep+1;
124 while (ep > cp) {
125 if (*ep == '.') {
126 if (z[-1] == '.')
127 z[-1] = 0;
128 else
129 z[0] = 0;
130 break;
131 }
132 ep--;
133 }
134
135 int sl = strlen(cp);
136 len += sl;
137 kputsn(cp, sl, s);
138 return len;
139 }
140
kvsprintf(kstring_t * s,const char * fmt,va_list ap)141 int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
142 {
143 va_list args;
144 int l;
145 va_copy(args, ap);
146
147 if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) {
148 double d = va_arg(args, double);
149 l = kputd(d, s);
150 va_end(args);
151 return l;
152 }
153
154 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
155 va_end(args);
156 if (l + 1 > s->m - s->l) {
157 if (ks_resize(s, s->l + l + 2) < 0)
158 return -1;
159 va_copy(args, ap);
160 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
161 va_end(args);
162 }
163 s->l += l;
164 return l;
165 }
166
ksprintf(kstring_t * s,const char * fmt,...)167 int ksprintf(kstring_t *s, const char *fmt, ...)
168 {
169 va_list ap;
170 int l;
171 va_start(ap, fmt);
172 l = kvsprintf(s, fmt, ap);
173 va_end(ap);
174 return l;
175 }
176
kstrtok(const char * str,const char * sep_in,ks_tokaux_t * aux)177 char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux)
178 {
179 const unsigned char *p, *start, *sep = (unsigned char *) sep_in;
180 if (sep) { // set up the table
181 if (str == 0 && aux->finished) return 0; // no need to set up if we have finished
182 aux->finished = 0;
183 if (sep[0] && sep[1]) {
184 aux->sep = -1;
185 aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
186 for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
187 } else aux->sep = sep[0];
188 }
189 if (aux->finished) return 0;
190 else if (str) start = (unsigned char *) str, aux->finished = 0;
191 else start = (unsigned char *) aux->p + 1;
192 if (aux->sep < 0) {
193 for (p = start; *p; ++p)
194 if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
195 } else {
196 for (p = start; *p; ++p)
197 if (*p == aux->sep) break;
198 }
199 aux->p = (const char *) p; // end of token
200 if (*p == 0) aux->finished = 1; // no more tokens
201 return (char*)start;
202 }
203
204 // s MUST BE a null terminated string; l = strlen(s)
ksplit_core(char * s,int delimiter,int * _max,int ** _offsets)205 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
206 {
207 int i, n, max, last_char, last_start, *offsets, l;
208 n = 0; max = *_max; offsets = *_offsets;
209 l = strlen(s);
210
211 #define __ksplit_aux do { \
212 if (_offsets) { \
213 s[i] = 0; \
214 if (n == max) { \
215 int *tmp; \
216 max = max? max<<1 : 2; \
217 if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
218 offsets = tmp; \
219 } else { \
220 free(offsets); \
221 *_offsets = NULL; \
222 return 0; \
223 } \
224 } \
225 offsets[n++] = last_start; \
226 } else ++n; \
227 } while (0)
228
229 for (i = 0, last_char = last_start = 0; i <= l; ++i) {
230 if (delimiter == 0) {
231 if (isspace((int)((unsigned char) s[i])) || s[i] == 0) {
232 if (isgraph(last_char))
233 __ksplit_aux; // the end of a field
234 } else {
235 if (isspace(last_char) || last_char == 0)
236 last_start = i;
237 }
238 } else {
239 if (s[i] == delimiter || s[i] == 0) {
240 if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
241 } else {
242 if (last_char == delimiter || last_char == 0) last_start = i;
243 }
244 }
245 last_char = (int)((unsigned char)s[i]);
246 }
247 *_max = max; *_offsets = offsets;
248 return n;
249 }
250
kgetline(kstring_t * s,kgets_func * fgets_fn,void * fp)251 int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp)
252 {
253 size_t l0 = s->l;
254
255 while (s->l == l0 || s->s[s->l-1] != '\n') {
256 if (s->m - s->l < 200) {
257 if (ks_resize(s, s->m + 200) < 0)
258 return EOF;
259 }
260 if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break;
261 s->l += strlen(s->s + s->l);
262 }
263
264 if (s->l == l0) return EOF;
265
266 if (s->l > l0 && s->s[s->l-1] == '\n') {
267 s->l--;
268 if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
269 }
270 s->s[s->l] = '\0';
271 return 0;
272 }
273
274 /**********************
275 * Boyer-Moore search *
276 **********************/
277
278 typedef unsigned char ubyte_t;
279
280 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
ksBM_prep(const ubyte_t * pat,int m)281 static int *ksBM_prep(const ubyte_t *pat, int m)
282 {
283 int i, *suff, *prep, *bmGs, *bmBc;
284 prep = (int*)calloc(m + 256, sizeof(int));
285 bmGs = prep; bmBc = prep + m;
286 { // preBmBc()
287 for (i = 0; i < 256; ++i) bmBc[i] = m;
288 for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
289 }
290 suff = (int*)calloc(m, sizeof(int));
291 { // suffixes()
292 int f = 0, g;
293 suff[m - 1] = m;
294 g = m - 1;
295 for (i = m - 2; i >= 0; --i) {
296 if (i > g && suff[i + m - 1 - f] < i - g)
297 suff[i] = suff[i + m - 1 - f];
298 else {
299 if (i < g) g = i;
300 f = i;
301 while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
302 suff[i] = f - g;
303 }
304 }
305 }
306 { // preBmGs()
307 int j = 0;
308 for (i = 0; i < m; ++i) bmGs[i] = m;
309 for (i = m - 1; i >= 0; --i)
310 if (suff[i] == i + 1)
311 for (; j < m - 1 - i; ++j)
312 if (bmGs[j] == m)
313 bmGs[j] = m - 1 - i;
314 for (i = 0; i <= m - 2; ++i)
315 bmGs[m - 1 - suff[i]] = m - 1 - i;
316 }
317 free(suff);
318 return prep;
319 }
320
kmemmem(const void * _str,int n,const void * _pat,int m,int ** _prep)321 void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
322 {
323 int i, j, *prep = 0, *bmGs, *bmBc;
324 const ubyte_t *str, *pat;
325 str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
326 prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
327 if (_prep && *_prep == 0) *_prep = prep;
328 bmGs = prep; bmBc = prep + m;
329 j = 0;
330 while (j <= n - m) {
331 for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
332 if (i >= 0) {
333 int max = bmBc[str[i+j]] - m + 1 + i;
334 if (max < bmGs[i]) max = bmGs[i];
335 j += max;
336 } else return (void*)(str + j);
337 }
338 if (_prep == 0) free(prep);
339 return 0;
340 }
341
kstrstr(const char * str,const char * pat,int ** _prep)342 char *kstrstr(const char *str, const char *pat, int **_prep)
343 {
344 return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
345 }
346
kstrnstr(const char * str,const char * pat,int n,int ** _prep)347 char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
348 {
349 return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
350 }
351
352 /***********************
353 * The main() function *
354 ***********************/
355
356 #ifdef KSTRING_MAIN
357 #include <stdio.h>
main()358 int main()
359 {
360 kstring_t *s;
361 int *fields, n, i;
362 ks_tokaux_t aux;
363 char *p;
364 s = (kstring_t*)calloc(1, sizeof(kstring_t));
365 // test ksprintf()
366 ksprintf(s, " abcdefg: %d ", 100);
367 printf("'%s'\n", s->s);
368 // test ksplit()
369 fields = ksplit(s, 0, &n);
370 for (i = 0; i < n; ++i)
371 printf("field[%d] = '%s'\n", i, s->s + fields[i]);
372 // test kstrtok()
373 s->l = 0;
374 for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
375 kputsn(p, aux.p - p, s);
376 kputc('\n', s);
377 }
378 printf("%s", s->s);
379 // free
380 free(s->s); free(s); free(fields);
381
382 {
383 static char *str = "abcdefgcdgcagtcakcdcd";
384 static char *pat = "cd";
385 char *ret, *s = str;
386 int *prep = 0;
387 while ((ret = kstrstr(s, pat, &prep)) != 0) {
388 printf("match: %s\n", ret);
389 s = ret + prep[0];
390 }
391 free(prep);
392 }
393 return 0;
394 }
395 #endif
396