1 /* The MIT License
2 
3    Copyright (c) 2008 Genome Research Ltd (GRL).
4 
5    Permission is hereby granted, free of charge, to any person obtaining
6    a copy of this software and associated documentation files (the
7    "Software"), to deal in the Software without restriction, including
8    without limitation the rights to use, copy, modify, merge, publish,
9    distribute, sublicense, and/or sell copies of the Software, and to
10    permit persons to whom the Software is furnished to do so, subject to
11    the following conditions:
12 
13    The above copyright notice and this permission notice shall be
14    included in all copies or substantial portions of the Software.
15 
16    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23    SOFTWARE.
24 */
25 
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include <limits.h>
34 #include "utils.h"
35 #include "bwt.h"
36 #include "kvec.h"
37 
38 #ifdef USE_MALLOC_WRAPPERS
39 #  include "malloc_wrap.h"
40 #endif
41 
bwt_gen_cnt_table(bwt_t * bwt)42 void bwt_gen_cnt_table(bwt_t *bwt)
43 {
44 	int i, j;
45 	for (i = 0; i != 256; ++i) {
46 		uint32_t x = 0;
47 		for (j = 0; j != 4; ++j)
48 			x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
49 		bwt->cnt_table[i] = x;
50 	}
51 }
52 
bwt_invPsi(const bwt_t * bwt,bwtint_t k)53 static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inverse CSA
54 {
55 	bwtint_t x = k - (k > bwt->primary);
56 	x = bwt_B0(bwt, x);
57 	x = bwt->L2[x] + bwt_occ(bwt, k, x);
58 	return k == bwt->primary? 0 : x;
59 }
60 
61 // bwt->bwt and bwt->occ must be precalculated
bwt_cal_sa(bwt_t * bwt,int intv)62 void bwt_cal_sa(bwt_t *bwt, int intv)
63 {
64 	bwtint_t isa, sa, i; // S(isa) = sa
65 	int intv_round = intv;
66 
67 	kv_roundup32(intv_round);
68 	xassert(intv_round == intv, "SA sample interval is not a power of 2.");
69 	xassert(bwt->bwt, "bwt_t::bwt is not initialized.");
70 
71 	if (bwt->sa) free(bwt->sa);
72 	bwt->sa_intv = intv;
73 	bwt->n_sa = (bwt->seq_len + intv) / intv;
74 	bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
75 	// calculate SA value
76 	isa = 0; sa = bwt->seq_len;
77 	for (i = 0; i < bwt->seq_len; ++i) {
78 		if (isa % intv == 0) bwt->sa[isa/intv] = sa;
79 		--sa;
80 		isa = bwt_invPsi(bwt, isa);
81 	}
82 	if (isa % intv == 0) bwt->sa[isa/intv] = sa;
83 	bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len
84 }
85 
bwt_sa(const bwt_t * bwt,bwtint_t k)86 bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)
87 {
88 	bwtint_t sa = 0, mask = bwt->sa_intv - 1;
89 	while (k & mask) {
90 		++sa;
91 		k = bwt_invPsi(bwt, k);
92 	}
93 	/* without setting bwt->sa[0] = -1, the following line should be
94 	   changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */
95 	return sa + bwt->sa[k/bwt->sa_intv];
96 }
97 
__occ_aux(uint64_t y,int c)98 static inline int __occ_aux(uint64_t y, int c)
99 {
100 	// reduce nucleotide counting to bits counting
101 	y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
102 	// count the number of 1s in y
103 	y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
104 	return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
105 }
106 
bwt_occ(const bwt_t * bwt,bwtint_t k,ubyte_t c)107 bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
108 {
109 	bwtint_t n;
110 	uint32_t *p, *end;
111 
112 	if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
113 	if (k == (bwtint_t)(-1)) return 0;
114 	k -= (k >= bwt->primary); // because $ is not in bwt
115 
116 	// retrieve Occ at k/OCC_INTERVAL
117 	n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
118 	p += sizeof(bwtint_t); // jump to the start of the first BWT cell
119 
120 	// calculate Occ up to the last k/32
121 	end = p + (((k>>5) - ((k&~OCC_INTV_MASK)>>5))<<1);
122 	for (; p < end; p += 2) n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
123 
124 	// calculate Occ
125 	n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
126 	if (c == 0) n -= ~k&31; // corrected for the masked bits
127 
128 	return n;
129 }
130 
131 // an analogy to bwt_occ() but more efficient, requiring k <= l
bwt_2occ(const bwt_t * bwt,bwtint_t k,bwtint_t l,ubyte_t c,bwtint_t * ok,bwtint_t * ol)132 void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
133 {
134 	bwtint_t _k, _l;
135 	_k = (k >= bwt->primary)? k-1 : k;
136 	_l = (l >= bwt->primary)? l-1 : l;
137 	if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
138 		*ok = bwt_occ(bwt, k, c);
139 		*ol = bwt_occ(bwt, l, c);
140 	} else {
141 		bwtint_t m, n, i, j;
142 		uint32_t *p;
143 		if (k >= bwt->primary) --k;
144 		if (l >= bwt->primary) --l;
145 		n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
146 		p += sizeof(bwtint_t);
147 		// calculate *ok
148 		j = k >> 5 << 5;
149 		for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
150 			n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
151 		m = n;
152 		n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
153 		if (c == 0) n -= ~k&31; // corrected for the masked bits
154 		*ok = n;
155 		// calculate *ol
156 		j = l >> 5 << 5;
157 		for (; i < j; i += 32, p += 2)
158 			m += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
159 		m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c);
160 		if (c == 0) m -= ~l&31; // corrected for the masked bits
161 		*ol = m;
162 	}
163 }
164 
165 #define __occ_aux4(bwt, b)											\
166 	((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff]		\
167 	 + (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24])
168 
bwt_occ4(const bwt_t * bwt,bwtint_t k,bwtint_t cnt[4])169 void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
170 {
171 	bwtint_t x;
172 	uint32_t *p, tmp, *end;
173 	if (k == (bwtint_t)(-1)) {
174 		memset(cnt, 0, 4 * sizeof(bwtint_t));
175 		return;
176 	}
177 	k -= (k >= bwt->primary); // because $ is not in bwt
178 	p = bwt_occ_intv(bwt, k);
179 	memcpy(cnt, p, 4 * sizeof(bwtint_t));
180 	p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
181 	end = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4)); // this is the end point of the following loop
182 	for (x = 0; p < end; ++p) x += __occ_aux4(bwt, *p);
183 	tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
184 	x += __occ_aux4(bwt, tmp) - (~k&15);
185 	cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
186 }
187 
188 // an analogy to bwt_occ4() but more efficient, requiring k <= l
bwt_2occ4(const bwt_t * bwt,bwtint_t k,bwtint_t l,bwtint_t cntk[4],bwtint_t cntl[4])189 void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
190 {
191 	bwtint_t _k, _l;
192 	_k = k - (k >= bwt->primary);
193 	_l = l - (l >= bwt->primary);
194 	if (_l>>OCC_INTV_SHIFT != _k>>OCC_INTV_SHIFT || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
195 		bwt_occ4(bwt, k, cntk);
196 		bwt_occ4(bwt, l, cntl);
197 	} else {
198 		bwtint_t x, y;
199 		uint32_t *p, tmp, *endk, *endl;
200 		k -= (k >= bwt->primary); // because $ is not in bwt
201 		l -= (l >= bwt->primary);
202 		p = bwt_occ_intv(bwt, k);
203 		memcpy(cntk, p, 4 * sizeof(bwtint_t));
204 		p += sizeof(bwtint_t); // sizeof(bwtint_t) = 4*(sizeof(bwtint_t)/sizeof(uint32_t))
205 		// prepare cntk[]
206 		endk = p + ((k>>4) - ((k&~OCC_INTV_MASK)>>4));
207 		endl = p + ((l>>4) - ((l&~OCC_INTV_MASK)>>4));
208 		for (x = 0; p < endk; ++p) x += __occ_aux4(bwt, *p);
209 		y = x;
210 		tmp = *p & ~((1U<<((~k&15)<<1)) - 1);
211 		x += __occ_aux4(bwt, tmp) - (~k&15);
212 		// calculate cntl[] and finalize cntk[]
213 		for (; p < endl; ++p) y += __occ_aux4(bwt, *p);
214 		tmp = *p & ~((1U<<((~l&15)<<1)) - 1);
215 		y += __occ_aux4(bwt, tmp) - (~l&15);
216 		memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
217 		cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24;
218 		cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24;
219 	}
220 }
221 
bwt_match_exact(const bwt_t * bwt,int len,const ubyte_t * str,bwtint_t * sa_begin,bwtint_t * sa_end)222 int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end)
223 {
224 	bwtint_t k, l, ok, ol;
225 	int i;
226 	k = 0; l = bwt->seq_len;
227 	for (i = len - 1; i >= 0; --i) {
228 		ubyte_t c = str[i];
229 		if (c > 3) return 0; // no match
230 		bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
231 		k = bwt->L2[c] + ok + 1;
232 		l = bwt->L2[c] + ol;
233 		if (k > l) break; // no match
234 	}
235 	if (k > l) return 0; // no match
236 	if (sa_begin) *sa_begin = k;
237 	if (sa_end)   *sa_end = l;
238 	return l - k + 1;
239 }
240 
bwt_match_exact_alt(const bwt_t * bwt,int len,const ubyte_t * str,bwtint_t * k0,bwtint_t * l0)241 int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0)
242 {
243 	int i;
244 	bwtint_t k, l, ok, ol;
245 	k = *k0; l = *l0;
246 	for (i = len - 1; i >= 0; --i) {
247 		ubyte_t c = str[i];
248 		if (c > 3) return 0; // there is an N here. no match
249 		bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
250 		k = bwt->L2[c] + ok + 1;
251 		l = bwt->L2[c] + ol;
252 		if (k > l) return 0; // no match
253 	}
254 	*k0 = k; *l0 = l;
255 	return l - k + 1;
256 }
257 
258 /*********************
259  * Bidirectional BWT *
260  *********************/
261 
bwt_extend(const bwt_t * bwt,const bwtintv_t * ik,bwtintv_t ok[4],int is_back)262 void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back)
263 {
264 	bwtint_t tk[4], tl[4];
265 	int i;
266 	bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
267 	for (i = 0; i != 4; ++i) {
268 		ok[i].x[!is_back] = bwt->L2[i] + 1 + tk[i];
269 		ok[i].x[2] = tl[i] - tk[i];
270 	}
271 	ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= bwt->primary && ik->x[!is_back] + ik->x[2] - 1 >= bwt->primary);
272 	ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
273 	ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
274 	ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
275 }
276 
bwt_reverse_intvs(bwtintv_v * p)277 static void bwt_reverse_intvs(bwtintv_v *p)
278 {
279 	if (p->n > 1) {
280 		int j;
281 		for (j = 0; j < p->n>>1; ++j) {
282 			bwtintv_t tmp = p->a[p->n - 1 - j];
283 			p->a[p->n - 1 - j] = p->a[j];
284 			p->a[j] = tmp;
285 		}
286 	}
287 }
288 // NOTE: $max_intv is not currently used in BWA-MEM
bwt_smem1a(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_intv,uint64_t max_intv,bwtintv_v * mem,bwtintv_v * tmpvec[2])289 int bwt_smem1a(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, uint64_t max_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
290 {
291 	int i, j, c, ret;
292 	bwtintv_t ik, ok[4];
293 	bwtintv_v a[2], *prev, *curr, *swap;
294 
295 	mem->n = 0;
296 	if (q[x] > 3) return x + 1;
297 	if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
298 	kv_init(a[0]); kv_init(a[1]);
299 	prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided
300 	curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1];
301 	bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
302 	ik.info = x + 1;
303 
304 	for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
305 		if (ik.x[2] < max_intv) { // an interval small enough
306 			kv_push(bwtintv_t, *curr, ik);
307 			break;
308 		} else if (q[i] < 4) { // an A/C/G/T base
309 			c = 3 - q[i]; // complement of q[i]
310 			bwt_extend(bwt, &ik, ok, 0);
311 			if (ok[c].x[2] != ik.x[2]) { // change of the interval size
312 				kv_push(bwtintv_t, *curr, ik);
313 				if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
314 			}
315 			ik = ok[c]; ik.info = i + 1;
316 		} else { // an ambiguous base
317 			kv_push(bwtintv_t, *curr, ik);
318 			break; // always terminate extension at an ambiguous base; in this case, i<len always stands
319 		}
320 	}
321 	if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
322 	bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
323 	ret = curr->a[0].info; // this will be the returned value
324 	swap = curr; curr = prev; prev = swap;
325 
326 	for (i = x - 1; i >= -1; --i) { // backward search for MEMs
327 		c = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base
328 		for (j = 0, curr->n = 0; j < prev->n; ++j) {
329 			bwtintv_t *p = &prev->a[j];
330 			if (c >= 0 && ik.x[2] >= max_intv) bwt_extend(bwt, p, ok, 1);
331 			if (c < 0 || ik.x[2] < max_intv || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough
332 				if (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches
333 					if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
334 						ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
335 						kv_push(bwtintv_t, *mem, ik);
336 					}
337 				} // otherwise the match is contained in another longer match
338 			} else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) {
339 				ok[c].info = p->info;
340 				kv_push(bwtintv_t, *curr, ok[c]);
341 			}
342 		}
343 		if (curr->n == 0) break;
344 		swap = curr; curr = prev; prev = swap;
345 	}
346 	bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
347 
348 	if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);
349 	if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
350 	return ret;
351 }
352 
bwt_smem1(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_intv,bwtintv_v * mem,bwtintv_v * tmpvec[2])353 int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
354 {
355 	return bwt_smem1a(bwt, len, q, x, min_intv, 0, mem, tmpvec);
356 }
357 
bwt_seed_strategy1(const bwt_t * bwt,int len,const uint8_t * q,int x,int min_len,int max_intv,bwtintv_t * mem)358 int bwt_seed_strategy1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem)
359 {
360 	int i, c;
361 	bwtintv_t ik, ok[4];
362 
363 	memset(mem, 0, sizeof(bwtintv_t));
364 	if (q[x] > 3) return x + 1;
365 	bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
366 	for (i = x + 1; i < len; ++i) { // forward search
367 		if (q[i] < 4) { // an A/C/G/T base
368 			c = 3 - q[i]; // complement of q[i]
369 			bwt_extend(bwt, &ik, ok, 0);
370 			if (ok[c].x[2] < max_intv && i - x >= min_len) {
371 				*mem = ok[c];
372 				mem->info = (uint64_t)x<<32 | (i + 1);
373 				return i + 1;
374 			}
375 			ik = ok[c];
376 		} else return i + 1;
377 	}
378 	return len;
379 }
380 
381 /*************************
382  * Read/write BWT and SA *
383  *************************/
384 
bwt_dump_bwt(const char * fn,const bwt_t * bwt)385 void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
386 {
387 	FILE *fp;
388 	fp = xopen(fn, "wb");
389 	err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
390 	err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
391 	err_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
392 	err_fflush(fp);
393 	err_fclose(fp);
394 }
395 
bwt_dump_sa(const char * fn,const bwt_t * bwt)396 void bwt_dump_sa(const char *fn, const bwt_t *bwt)
397 {
398 	FILE *fp;
399 	fp = xopen(fn, "wb");
400 	err_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
401 	err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
402 	err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
403 	err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
404 	err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
405 	err_fflush(fp);
406 	err_fclose(fp);
407 }
408 
fread_fix(FILE * fp,bwtint_t size,void * a)409 static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)
410 { // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks
411 	const int bufsize = 0x1000000; // 16M block
412 	bwtint_t offset = 0;
413 	while (size) {
414 		int x = bufsize < size? bufsize : size;
415 		if ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;
416 		size -= x; offset += x;
417 	}
418 	return offset;
419 }
420 
bwt_restore_sa(const char * fn,bwt_t * bwt)421 void bwt_restore_sa(const char *fn, bwt_t *bwt)
422 {
423 	char skipped[256];
424 	FILE *fp;
425 	bwtint_t primary;
426 
427 	fp = xopen(fn, "rb");
428 	err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
429 	xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
430 	err_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip
431 	err_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
432 	err_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);
433 	xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
434 
435 	bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
436 	bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
437 	bwt->sa[0] = -1;
438 
439 	fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);
440 	err_fclose(fp);
441 }
442 
bwt_restore_bwt(const char * fn)443 bwt_t *bwt_restore_bwt(const char *fn)
444 {
445 	bwt_t *bwt;
446 	FILE *fp;
447 
448 	bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
449 	fp = xopen(fn, "rb");
450 	err_fseek(fp, 0, SEEK_END);
451 	bwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
452 	bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
453 	err_fseek(fp, 0, SEEK_SET);
454 	err_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);
455 	err_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);
456 	fread_fix(fp, bwt->bwt_size<<2, bwt->bwt);
457 	bwt->seq_len = bwt->L2[4];
458 	err_fclose(fp);
459 	bwt_gen_cnt_table(bwt);
460 
461 	return bwt;
462 }
463 
bwt_destroy(bwt_t * bwt)464 void bwt_destroy(bwt_t *bwt)
465 {
466 	if (bwt == 0) return;
467 	free(bwt->sa); free(bwt->bwt);
468 	free(bwt);
469 }
470