1 /* The MIT License
2
3 Copyright (c) 2008 Genome Research Ltd (GRL).
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include <limits.h>
34 #include "utils.h"
35 #include "bwt.h"
36 #include "kvec.h"
37
38 #ifdef USE_MALLOC_WRAPPERS
39 # include "malloc_wrap.h"
40 #endif
41
bwt_gen_cnt_table(bwt_t * bwt)42 void bwt_gen_cnt_table(bwt_t *bwt)
43 {
44 int i, j;
45 for (i = 0; i != 256; ++i) {
46 uint32_t x = 0;
47 for (j = 0; j != 4; ++j)
48 x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
49 bwt->cnt_table[i] = x;
50 }
51 }
52
bwt_invPsi(const bwt_t * bwt,bwtint_t k)53 static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inverse CSA
54 {
55 bwtint_t x = k - (k > bwt->primary);
56 x = bwt_B0(bwt, x);
57 x = bwt->L2[x] + bwt_occ(bwt, k, x);
58 return k == bwt->primary? 0 : x;
59 }
60
61 // bwt->bwt and bwt->occ must be precalculated
bwt_cal_sa(bwt_t * bwt,int intv)62 void bwt_cal_sa(bwt_t *bwt, int intv)
63 {
64 bwtint_t isa, sa, i; // S(isa) = sa
65 int intv_round = intv;
66
67 kv_roundup32(intv_round);
68 xassert(intv_round == intv, "SA sample interval is not a power of 2.");
69 xassert(bwt->bwt, "bwt_t::bwt is not initialized.");
70
71 if (bwt->sa) free(bwt->sa);
72 bwt->sa_intv = intv;
73 bwt->n_sa = (bwt->seq_len + intv) / intv;
74 bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
75 // calculate SA value
76 isa = 0; sa = bwt->seq_len;
77 for (i = 0; i < bwt->seq_len; ++i) {
78 if (isa % intv == 0) bwt->sa[isa/intv] = sa;
79 --sa;
80 isa = bwt_invPsi(bwt, isa);
81 }
82 if (isa % intv == 0) bwt->sa[isa/intv] = sa;
83 bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len
84 }
85
bwt_sa(const bwt_t * bwt,bwtint_t k)86 bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)
87 {
88 bwtint_t sa = 0, mask = bwt->sa_intv - 1;
89 while (k & mask) {
90 ++sa;
91 k = bwt_invPsi(bwt, k);
92 }
93 /* without setting bwt->sa[0] = -1, the following line should be
94 changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */
95 return sa + bwt->sa[k/bwt->sa_intv];
96 }
97
__occ_aux(uint64_t y,int c)98 static inline int __occ_aux(uint64_t y, int c)
99 {
100 // reduce nucleotide counting to bits counting
101 y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
102 // count the number of 1s in y
103 y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
104 return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
105 }
106
bwt_occ(const bwt_t * bwt,bwtint_t k,ubyte_t c)107 bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
108 {
109 bwtint_t n;
110 uint32_t *p, *end;
111
112 if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
113 if (k == (bwtint_t)(-1)) return 0;
114 k -= (k >= bwt->primary); // because $ is not in bwt
115
116 // retrieve Occ at k/OCC_INTERVAL
117 n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
118 p += sizeof(bwtint_t); // jump to the start of the first BWT cell
119
120 // calculate Occ up to the last k/32
121 end = p + (((k>>5) - ((k&~OCC_INTV_MASK)>>5))<<1);
122 for (; p < end; p += 2) n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
123
124 // calculate Occ
125 n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
126 if (c == 0) n -= ~k&31; // corrected for the masked bits
127
128 return n;
129 }
130
131 // an analogy to bwt_occ() but more efficient, requiring k <= l
bwt_2occ(const bwt_t * bwt,bwtint_t k,bwtint_t l,ubyte_t c,bwtint_t * ok,bwtint_t * ol)132 void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
133 {
134 bwtint_t _k, _l;
135 _k = (k >= bwt->primary)? k-1 : k;
136 _l = (l >= bwt->primary)? l-1 : l;
137 if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
138 *ok = bwt_occ(bwt, k, c);
139 *ol = bwt_occ(bwt, l, c);
140 } else {
141 bwtint_t m, n, i, j;
142 uint32_t *p;
143 if (k >= bwt->primary) --k;
144 if (l >= bwt->primary) --l;
145 n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
146 p += sizeof(bwtint_t);
147 // calculate *ok
148 j = k >> 5 << 5;
149 for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
150 n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
151 m = n;
152 n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
153 if (c == 0) n -= ~k&31; // corrected for the masked bits
154 *ok = n;
155 // calculate *ol
156 j = l >> 5 << 5;
157 for (; i < j; i += 32, p += 2)
158 m += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
159 m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c);
160 if (c == 0) m -= ~l&31; // corrected for the masked bits
161 *ol = m;
162 }
163 }
164
165 #define __occ_aux4(bwt, b) \
166 ((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff] \
167 + (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24])
168
bwt_occ4(const bwt_t * bwt,bwtint_t k,bwtint_t cnt[4])169 void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
170 {
171 bwtint_t x;
172 uint32_t *p, tmp, *end;
173 if (k == (bwtint_t)(-1)) {
174 memset(cnt, 0, 4 * sizeof(bwtint_t));
175 return;
176 }
177 k -= (k >= bwt->primary); // because $ is not in bwt
178 p = bwt_occ_intv(bwt, k);
179 memcpy(cnt, p, 4 * sizeof(bwtint_t));
180 p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
181 end = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4)); // this is the end point of the following loop
182 for (x = 0; p < end; ++p) x += __occ_aux4(bwt, *p);
183 tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
184 x += __occ_aux4(bwt, tmp) - (~k&15);
185 cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
186 }
187
188 // an analogy to bwt_occ4() but more efficient, requiring k <= l
bwt_2occ4(const bwt_t * bwt,bwtint_t k,bwtint_t l,bwtint_t cntk[4],bwtint_t cntl[4])189 void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
190 {
191 bwtint_t _k, _l;
192 _k = k - (k >= bwt->primary);
193 _l = l - (l >= bwt->primary);
194 if (_l>>OCC_INTV_SHIFT != _k>>OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
195 bwt_occ4(bwt, k, cntk);
196 bwt_occ4(bwt, l, cntl);
197 } else {
198 bwtint_t x, y;
199 uint32_t *p, tmp, *endk, *endl;
200 k -= (k >= bwt->primary); // because $ is not in bwt
201 l -= (l >= bwt->primary);
202 p = bwt_occ_intv(bwt, k);
203 memcpy(cntk, p, 4 * sizeof(bwtint_t));
204 p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
205 // prepare cntk[]
206 endk = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4));
207 endl = p + ((l>>4) - ((l&~OCC_INTV_MASK)>>4));
208 for (x = 0; p < endk; ++p) x += __occ_aux4(bwt, *p);
209 y = x;
210 tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
211 x += __occ_aux4(bwt, tmp) - (~k&15);
212 // calculate cntl[] and finalize cntk[]
213 for (; p < endl; ++p) y += __occ_aux4(bwt, *p);
214 tmp = *p & ~((1U<<((~l&15)<<1)) - 1);
215 y += __occ_aux4(bwt, tmp) - (~l&15);
216 memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
217 cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24;
218 cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24;
219 }
220 }
221
bwt_match_exact(const bwt_t * bwt,int len,const ubyte_t * str,bwtint_t * sa_begin,bwtint_t * sa_end)222 int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end)
223 {
224 bwtint_t k, l, ok, ol;
225 int i;
226 k = 0; l = bwt->seq_len;
227 for (i = len - 1; i >= 0; --i) {
228 ubyte_t c = str[i];
229 if (c > 3) return 0; // no match
230 bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
231 k = bwt->L2[c] + ok + 1;
232 l = bwt->L2[c] + ol;
233 if (k > l) break; // no match
234 }
235 if (k > l) return 0; // no match
236 if (sa_begin) *sa_begin = k;
237 if (sa_end) *sa_end = l;
238 return l - k + 1;
239 }
240
bwt_match_exact_alt(const bwt_t * bwt,int len,const ubyte_t * str,bwtint_t * k0,bwtint_t * l0)241 int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0)
242 {
243 int i;
244 bwtint_t k, l, ok, ol;
245 k = *k0; l = *l0;
246 for (i = len - 1; i >= 0; --i) {
247 ubyte_t c = str[i];
248 if (c > 3) return 0; // there is an N here. no match
249 bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
250 k = bwt->L2[c] + ok + 1;
251 l = bwt->L2[c] + ol;
252 if (k > l) return 0; // no match
253 }
254 *k0 = k; *l0 = l;
255 return l - k + 1;
256 }
257
258 /*********************
259 * Bidirectional BWT *
260 *********************/
261
bwt_extend(const bwt_t * bwt,const bwtintv_t * ik,bwtintv_t ok[4],int is_back)262 void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back)
263 {
264 bwtint_t tk[4], tl[4];
265 int i;
266 bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
267 for (i = 0; i != 4; ++i) {
268 ok[i].x[!is_back] = bwt->L2[i] + 1 + tk[i];
269 ok[i].x[2] = tl[i] - tk[i];
270 }
271 ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= bwt->primary && ik->x[!is_back] + ik->x[2] - 1 >= bwt->primary);
272 ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
273 ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
274 ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
275 }
276
bwt_reverse_intvs(bwtintv_v * p)277 static void bwt_reverse_intvs(bwtintv_v *p)
278 {
279 if (p->n > 1) {
280 int j;
281 for (j = 0; j < p->n>>1; ++j) {
282 bwtintv_t tmp = p->a[p->n - 1 - j];
283 p->a[p->n - 1 - j] = p->a[j];
284 p->a[j] = tmp;
285 }
286 }
287 }
288 // NOTE: $max_intv is not currently used in BWA-MEM
bwt_smem1a(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_intv,uint64_t max_intv,bwtintv_v * mem,bwtintv_v * tmpvec[2])289 int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
290 {
291 int i, j, c, ret;
292 bwtintv_t ik, ok[4];
293 bwtintv_v a[2], *prev, *curr, *swap;
294
295 mem->n = 0;
296 if (q[x] > 3) return x + 1;
297 if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
298 kv_init(a[0]); kv_init(a[1]);
299 prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided
300 curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1];
301 bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
302 ik.info = x + 1;
303
304 for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
305 if (ik.x[2] < max_intv) { // an interval small enough
306 kv_push(bwtintv_t, *curr, ik);
307 break;
308 } else if (q[i] < 4) { // an A/C/G/T base
309 c = 3 - q[i]; // complement of q[i]
310 bwt_extend(bwt, &ik, ok, 0);
311 if (ok[c].x[2] != ik.x[2]) { // change of the interval size
312 kv_push(bwtintv_t, *curr, ik);
313 if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
314 }
315 ik = ok[c]; ik.info = i + 1;
316 } else { // an ambiguous base
317 kv_push(bwtintv_t, *curr, ik);
318 break; // always terminate extension at an ambiguous base; in this case, i<len always stands
319 }
320 }
321 if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
322 bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
323 ret = curr->a[0].info; // this will be the returned value
324 swap = curr; curr = prev; prev = swap;
325
326 for (i = x - 1; i >= -1; --i) { // backward search for MEMs
327 c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base
328 for (j = 0, curr->n = 0; j < prev->n; ++j) {
329 bwtintv_t *p = &prev->a[j];
330 if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1);
331 if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
332 if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches
333 if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
334 ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
335 kv_push(bwtintv_t, *mem, ik);
336 }
337 } // otherwise the match is contained in another longer match
338 } else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) {
339 ok[c].info = p->info;
340 kv_push(bwtintv_t, *curr, ok[c]);
341 }
342 }
343 if (curr->n == 0) break;
344 swap = curr; curr = prev; prev = swap;
345 }
346 bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
347
348 if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);
349 if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
350 return ret;
351 }
352
bwt_smem1(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_intv,bwtintv_v * mem,bwtintv_v * tmpvec[2])353 int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
354 {
355 return bwt_smem1a(bwt, len, q, x, min_intv, 0, mem, tmpvec);
356 }
357
bwt_seed_strategy1(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_len,int max_intv,bwtintv_t * mem)358 int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem)
359 {
360 int i, c;
361 bwtintv_t ik, ok[4];
362
363 memset(mem, 0, sizeof(bwtintv_t));
364 if (q[x] > 3) return x + 1;
365 bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
366 for (i = x + 1; i < len; ++i) { // forward search
367 if (q[i] < 4) { // an A/C/G/T base
368 c = 3 - q[i]; // complement of q[i]
369 bwt_extend(bwt, &ik, ok, 0);
370 if (ok[c].x[2] < max_intv && i - x >= min_len) {
371 *mem = ok[c];
372 mem->info = (uint64_t)x<<32 | (i + 1);
373 return i + 1;
374 }
375 ik = ok[c];
376 } else return i + 1;
377 }
378 return len;
379 }
380
381 /*************************
382 * Read/write BWT and SA *
383 *************************/
384
bwt_dump_bwt(const char * fn,const bwt_t * bwt)385 void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
386 {
387 FILE *fp;
388 fp = xopen(fn, "wb");
389 err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
390 err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
391 err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
392 err_fflush(fp);
393 err_fclose(fp);
394 }
395
bwt_dump_sa(const char * fn,const bwt_t * bwt)396 void bwt_dump_sa(const char *fn, const bwt_t *bwt)
397 {
398 FILE *fp;
399 fp = xopen(fn, "wb");
400 err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
401 err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
402 err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
403 err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
404 err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
405 err_fflush(fp);
406 err_fclose(fp);
407 }
408
fread_fix(FILE * fp,bwtint_t size,void * a)409 static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
410 { // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks
411 const int bufsize = 0x1000000; // 16M block
412 bwtint_t offset = 0;
413 while (size) {
414 int x = bufsize < size? bufsize : size;
415 if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;
416 size -= x; offset += x;
417 }
418 return offset;
419 }
420
bwt_restore_sa(const char * fn,bwt_t * bwt)421 void bwt_restore_sa(const char *fn, bwt_t *bwt)
422 {
423 char skipped[256];
424 FILE *fp;
425 bwtint_t primary;
426
427 fp = xopen(fn, "rb");
428 err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
429 xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
430 err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip
431 err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
432 err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
433 xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
434
435 bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
436 bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
437 bwt->sa[0] = -1;
438
439 fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
440 err_fclose(fp);
441 }
442
bwt_restore_bwt(const char * fn)443 bwt_t *bwt_restore_bwt(const char *fn)
444 {
445 bwt_t *bwt;
446 FILE *fp;
447
448 bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
449 fp = xopen(fn, "rb");
450 err_fseek(fp, 0, SEEK_END);
451 bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
452 bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
453 err_fseek(fp, 0, SEEK_SET);
454 err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);
455 err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);
456 fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
457 bwt->seq_len = bwt->L2[4];
458 err_fclose(fp);
459 bwt_gen_cnt_table(bwt);
460
461 return bwt;
462 }
463
bwt_destroy(bwt_t * bwt)464 void bwt_destroy(bwt_t *bwt)
465 {
466 if (bwt == 0) return;
467 free(bwt->sa); free(bwt->bwt);
468 free(bwt);
469 }
470