1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <math.h>
5 #include <string.h>
6 #include <zlib.h>
7 #include <errno.h>
8
roundup(uint32_t v)9 static uint32_t roundup(uint32_t v) {
10 v--;
11 v |= v >> 1;
12 v |= v >> 2;
13 v |= v >> 4;
14 v |= v >> 8;
15 v |= v >> 16;
16 v++;
17 return v;
18 }
19
20 //Returns the root node on success and NULL on error
readRTreeIdx(bigWigFile_t * fp,uint64_t offset)21 static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
22 uint32_t magic;
23 bwRTree_t *node;
24
25 if(!offset) {
26 if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
27 } else {
28 if(bwSetPos(fp, offset)) return NULL;
29 }
30
31 if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
32 if(magic != IDX_MAGIC) {
33 fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
34 return NULL;
35 }
36
37 node = calloc(1, sizeof(bwRTree_t));
38 if(!node) return NULL;
39
40 if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
41 if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
42 if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43 if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
44 if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45 if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
46 if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
47 if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
48 //Padding
49 if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
50 node->rootOffset = bwTell(fp);
51
52 //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
53 errno = 0;
54
55 return node;
56
57 error:
58 free(node);
59 return NULL;
60 }
61
62 //Returns a bwRTreeNode_t on success and NULL on an error
63 //For the root node, set offset to 0
bwGetRTreeNode(bigWigFile_t * fp,uint64_t offset)64 static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
65 bwRTreeNode_t *node = NULL;
66 uint8_t padding;
67 uint16_t i;
68 if(offset) {
69 if(bwSetPos(fp, offset)) return NULL;
70 } else {
71 //seek
72 if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
73 }
74
75 node = calloc(1, sizeof(bwRTreeNode_t));
76 if(!node) return NULL;
77
78 if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
79 if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
80 if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
81
82 node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
83 if(!node->chrIdxStart) goto error;
84 node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
85 if(!node->baseStart) goto error;
86 node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
87 if(!node->chrIdxEnd) goto error;
88 node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
89 if(!node->baseEnd) goto error;
90 node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
91 if(!node->dataOffset) goto error;
92 if(node->isLeaf) {
93 node->x.size = malloc(node->nChildren * sizeof(uint64_t));
94 if(!node->x.size) goto error;
95 } else {
96 node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
97 if(!node->x.child) goto error;
98 }
99 for(i=0; i<node->nChildren; i++) {
100 if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101 if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102 if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103 if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104 if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105 if(node->isLeaf) {
106 if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107 }
108 }
109
110 return node;
111
112 error:
113 if(node->chrIdxStart) free(node->chrIdxStart);
114 if(node->baseStart) free(node->baseStart);
115 if(node->chrIdxEnd) free(node->chrIdxEnd);
116 if(node->baseEnd) free(node->baseEnd);
117 if(node->dataOffset) free(node->dataOffset);
118 if(node->isLeaf && node->x.size) free(node->x.size);
119 else if((!node->isLeaf) && node->x.child) free(node->x.child);
120 free(node);
121 return NULL;
122 }
123
destroyBWOverlapBlock(bwOverlapBlock_t * b)124 void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125 if(!b) return;
126 if(b->size) free(b->size);
127 if(b->offset) free(b->offset);
128 free(b);
129 }
130
131 //Returns a bwOverlapBlock_t * object or NULL on error.
overlapsLeaf(bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)132 static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133 uint16_t i, idx = 0;
134 bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135 if(!o) return NULL;
136
137 for(i=0; i<node->nChildren; i++) {
138 if(tid < node->chrIdxStart[i]) break;
139 if(tid > node->chrIdxEnd[i]) continue;
140
141 /*
142 The individual blocks can theoretically span multiple contigs.
143 So if we treat the first/last contig in the range as special
144 but anything in the middle is a guaranteed match
145 */
146 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
147 if(tid == node->chrIdxStart[i]) {
148 if(node->baseStart[i] >= end) break;
149 } else if(tid == node->chrIdxEnd[i]) {
150 if(node->baseEnd[i] <= start) continue;
151 }
152 } else {
153 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
154 }
155 o->n++;
156 }
157
158 if(o->n) {
159 o->offset = malloc(sizeof(uint64_t) * (o->n));
160 if(!o->offset) goto error;
161 o->size = malloc(sizeof(uint64_t) * (o->n));
162 if(!o->size) goto error;
163
164 for(i=0; i<node->nChildren; i++) {
165 if(tid < node->chrIdxStart[i]) break;
166 if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
167 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
168 if(tid == node->chrIdxStart[i]) {
169 if(node->baseStart[i] >= end) continue;
170 } else if(tid == node->chrIdxEnd[i]) {
171 if(node->baseEnd[i] <= start) continue;
172 }
173 } else {
174 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
175 }
176 o->offset[idx] = node->dataOffset[i];
177 o->size[idx++] = node->x.size[i];
178 if(idx >= o->n) break;
179 }
180 }
181
182 if(idx != o->n) { //This should never happen
183 fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
184 goto error;
185 }
186
187 return o;
188
189 error:
190 if(o) destroyBWOverlapBlock(o);
191 return NULL;
192 }
193
194 //This will free l2 unless there's an error!
195 //Returns NULL on error, otherwise the merged lists
mergeOverlapBlocks(bwOverlapBlock_t * b1,bwOverlapBlock_t * b2)196 static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
197 uint64_t i,j;
198 if(!b2) return b1;
199 if(!b2->n) {
200 destroyBWOverlapBlock(b2);
201 return b1;
202 }
203 if(!b1->n) {
204 destroyBWOverlapBlock(b1);
205 return b2;
206 }
207 j = b1->n;
208 b1->n += b2->n;
209 b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
210 if(!b1->offset) goto error;
211 b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
212 if(!b1->size) goto error;
213
214 for(i=0; i<b2->n; i++) {
215 b1->offset[j+i] = b2->offset[i];
216 b1->size[j+i] = b2->size[i];
217 }
218 destroyBWOverlapBlock(b2);
219 return b1;
220
221 error:
222 destroyBWOverlapBlock(b1);
223 return NULL;
224 }
225
226 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
227 //The output needs to be free()d if not NULL (likewise with *sizes)
overlapsNonLeaf(bigWigFile_t * fp,bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)228 static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
229 uint16_t i;
230 bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
231 if(!output) return NULL;
232
233 for(i=0; i<node->nChildren; i++) {
234 if(tid < node->chrIdxStart[i]) break;
235 if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
236 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
237 if(tid == node->chrIdxStart[i]) {
238 if(node->baseStart[i] >= end) continue;
239 } else if(tid == node->chrIdxEnd[i]) {
240 if(node->baseEnd[i] <= start) continue;
241 }
242 } else {
243 if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
244 }
245
246 //We have an overlap!
247 if(!node->x.child[i])
248 node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
249 if(!node->x.child[i]) goto error;
250
251 if(node->x.child[i]->isLeaf) { //leaf
252 nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
253 } else { //non-leaf
254 nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
255 }
256
257 //The output is processed the same regardless of leaf/non-leaf
258 if(!nodeBlocks) goto error;
259 else {
260 output = mergeOverlapBlocks(output, nodeBlocks);
261 if(!output) {
262 destroyBWOverlapBlock(nodeBlocks);
263 goto error;
264 }
265 }
266 }
267
268 return output;
269
270 error:
271 destroyBWOverlapBlock(output);
272 return NULL;
273 }
274
275 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
276 //The output must be free()d
walkRTreeNodes(bigWigFile_t * bw,bwRTreeNode_t * root,uint32_t tid,uint32_t start,uint32_t end)277 bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
278 if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
279 return overlapsNonLeaf(bw, root, tid, start, end);
280 }
281
282 //In reality, a hash or some sort of tree structure is probably faster...
283 //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
bwGetTid(bigWigFile_t * fp,char * chrom)284 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
285 uint32_t i;
286 if(!chrom) return -1;
287 for(i=0; i<fp->cl->nKeys; i++) {
288 if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
289 }
290 return -1;
291 }
292
bwGetOverlappingBlocks(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)293 static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
294 uint32_t tid = bwGetTid(fp, chrom);
295
296 if(tid == (uint32_t) -1) {
297 fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
298 return NULL;
299 }
300
301 //Get the info if needed
302 if(!fp->idx) {
303 fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
304 if(!fp->idx) {
305 return NULL;
306 }
307 }
308
309 if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
310 if(!fp->idx->root) return NULL;
311
312 return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
313 }
314
bwFillDataHdr(bwDataHeader_t * hdr,void * b)315 void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
316 hdr->tid = ((uint32_t*)b)[0];
317 hdr->start = ((uint32_t*)b)[1];
318 hdr->end = ((uint32_t*)b)[2];
319 hdr->step = ((uint32_t*)b)[3];
320 hdr->span = ((uint32_t*)b)[4];
321 hdr->type = ((uint8_t*)b)[20];
322 hdr->nItems = ((uint16_t*)b)[11];
323 }
324
bwDestroyOverlappingIntervals(bwOverlappingIntervals_t * o)325 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
326 if(!o) return;
327 if(o->start) free(o->start);
328 if(o->end) free(o->end);
329 if(o->value) free(o->value);
330 free(o);
331 }
332
bbDestroyOverlappingEntries(bbOverlappingEntries_t * o)333 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
334 uint32_t i;
335 if(!o) return;
336 if(o->start) free(o->start);
337 if(o->end) free(o->end);
338 if(o->str) {
339 for(i=0; i<o->l; i++) {
340 if(o->str[i]) free(o->str[i]);
341 }
342 free(o->str);
343 }
344 free(o);
345 }
346
347 //Returns NULL on error, in which case o has been free()d
pushIntervals(bwOverlappingIntervals_t * o,uint32_t start,uint32_t end,float value)348 static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
349 if(o->l+1 >= o->m) {
350 o->m = roundup(o->l+1);
351 o->start = realloc(o->start, o->m * sizeof(uint32_t));
352 if(!o->start) goto error;
353 o->end = realloc(o->end, o->m * sizeof(uint32_t));
354 if(!o->end) goto error;
355 o->value = realloc(o->value, o->m * sizeof(float));
356 if(!o->value) goto error;
357 }
358 o->start[o->l] = start;
359 o->end[o->l] = end;
360 o->value[o->l++] = value;
361 return o;
362
363 error:
364 bwDestroyOverlappingIntervals(o);
365 return NULL;
366 }
367
pushBBIntervals(bbOverlappingEntries_t * o,uint32_t start,uint32_t end,char * str,int withString)368 static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
369 if(o->l+1 >= o->m) {
370 o->m = roundup(o->l+1);
371 o->start = realloc(o->start, o->m * sizeof(uint32_t));
372 if(!o->start) goto error;
373 o->end = realloc(o->end, o->m * sizeof(uint32_t));
374 if(!o->end) goto error;
375 if(withString) {
376 o->str = realloc(o->str, o->m * sizeof(char**));
377 if(!o->str) goto error;
378 }
379 }
380 o->start[o->l] = start;
381 o->end[o->l] = end;
382 if(withString) o->str[o->l] = strdup(str);
383 o->l++;
384 return o;
385
386 error:
387 bbDestroyOverlappingEntries(o);
388 return NULL;
389 }
390
391 //Returns NULL on error
bwGetOverlappingIntervalsCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend)392 bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
393 uint64_t i;
394 uint16_t j;
395 int compressed = 0, rv;
396 uLongf sz = fp->hdr->bufSize, tmp;
397 void *buf = NULL, *compBuf = NULL;
398 uint32_t start = 0, end , *p;
399 float value;
400 bwDataHeader_t hdr;
401 bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
402
403 if(!output) goto error;
404
405 if(!o) return output;
406 if(!o->n) return output;
407
408 if(sz) {
409 compressed = 1;
410 buf = malloc(sz);
411 }
412 sz = 0; //This is now the size of the compressed buffer
413
414 for(i=0; i<o->n; i++) {
415 if(bwSetPos(fp, o->offset[i])) goto error;
416
417 if(sz < o->size[i]) {
418 compBuf = realloc(compBuf, o->size[i]);
419 sz = o->size[i];
420 }
421 if(!compBuf) goto error;
422
423 if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
424 if(compressed) {
425 tmp = fp->hdr->bufSize; //This gets over-written by uncompress
426 rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
427 if(rv != Z_OK) goto error;
428 } else {
429 buf = compBuf;
430 }
431
432 //TODO: ensure that tmp is large enough!
433 bwFillDataHdr(&hdr, buf);
434
435 p = ((uint32_t*) buf);
436 p += 6;
437 if(hdr.tid != tid) continue;
438
439 if(hdr.type == 3) start = hdr.start - hdr.step;
440
441 //FIXME: We should ensure that sz is large enough to hold nItems of the given type
442 for(j=0; j<hdr.nItems; j++) {
443 switch(hdr.type) {
444 case 1:
445 start = *p;
446 p++;
447 end = *p;
448 p++;
449 value = *((float *)p);
450 p++;
451 break;
452 case 2:
453 start = *p;
454 p++;
455 end = start + hdr.span;
456 value = *((float *)p);
457 p++;
458 break;
459 case 3:
460 start += hdr.step;
461 end = start+hdr.span;
462 value = *((float *)p);
463 p++;
464 break;
465 default :
466 goto error;
467 break;
468 }
469
470 if(end <= ostart || start >= oend) continue;
471 //Push the overlap
472 if(!pushIntervals(output, start, end, value)) goto error;
473 }
474 }
475
476 if(compressed && buf) free(buf);
477 if(compBuf) free(compBuf);
478 return output;
479
480 error:
481 fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
482 if(output) bwDestroyOverlappingIntervals(output);
483 if(compressed && buf) free(buf);
484 if(compBuf) free(compBuf);
485 return NULL;
486 }
487
bbGetOverlappingEntriesCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend,int withString)488 bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
489 uint64_t i;
490 int compressed = 0, rv, slen;
491 uLongf sz = fp->hdr->bufSize, tmp = 0;
492 void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
493 uint32_t entryTid = 0, start = 0, end;
494 char *str;
495 bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
496
497 if(!output) goto error;
498
499 if(!o) return output;
500 if(!o->n) return output;
501
502 if(sz) {
503 compressed = 1;
504 buf = malloc(sz);
505 }
506 sz = 0; //This is now the size of the compressed buffer
507
508 for(i=0; i<o->n; i++) {
509 if(bwSetPos(fp, o->offset[i])) goto error;
510
511 if(sz < o->size[i]) {
512 compBuf = realloc(compBuf, o->size[i]);
513 sz = o->size[i];
514 }
515 if(!compBuf) goto error;
516
517 if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
518 if(compressed) {
519 tmp = fp->hdr->bufSize; //This gets over-written by uncompress
520 rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
521 if(rv != Z_OK) goto error;
522 } else {
523 buf = compBuf;
524 tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
525 }
526
527 bufEnd = buf + tmp;
528 while(buf < bufEnd) {
529 entryTid = ((uint32_t*)buf)[0];
530 start = ((uint32_t*)buf)[1];
531 end = ((uint32_t*)buf)[2];
532 buf += 12;
533 str = (char*)buf;
534 slen = strlen(str) + 1;
535 buf += slen;
536
537 if(entryTid < tid) continue;
538 if(entryTid > tid) break;
539 if(end <= ostart) continue;
540 if(start >= oend) break;
541
542 //Push the overlap
543 if(!pushBBIntervals(output, start, end, str, withString)) goto error;
544 }
545
546 buf = bufEnd - tmp; //reset the buffer pointer
547 }
548
549 if(compressed && buf) free(buf);
550 if(compBuf) free(compBuf);
551 return output;
552
553 error:
554 fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
555 buf = bufEnd - tmp;
556 if(output) bbDestroyOverlappingEntries(output);
557 if(compressed && buf) free(buf);
558 if(compBuf) free(compBuf);
559 return NULL;
560 }
561
562 //Returns NULL on error OR no intervals, which is a bad design...
bwGetOverlappingIntervals(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)563 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
564 bwOverlappingIntervals_t *output;
565 uint32_t tid = bwGetTid(fp, chrom);
566 if(tid == (uint32_t) -1) return NULL;
567 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
568 if(!blocks) return NULL;
569 output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
570 destroyBWOverlapBlock(blocks);
571 return output;
572 }
573
574 //Like above, but for bigBed files
bbGetOverlappingEntries(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int withString)575 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
576 bbOverlappingEntries_t *output;
577 uint32_t tid = bwGetTid(fp, chrom);
578 if(tid == (uint32_t) -1) return NULL;
579 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
580 if(!blocks) return NULL;
581 output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
582 destroyBWOverlapBlock(blocks);
583 return output;
584 }
585
586 //Returns NULL on error
bwOverlappingIntervalsIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,uint32_t blocksPerIteration)587 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
588 bwOverlapIterator_t *output = NULL;
589 uint64_t n;
590 uint32_t tid = bwGetTid(bw, chrom);
591 if(tid == (uint32_t) -1) return output;
592 output = calloc(1, sizeof(bwOverlapIterator_t));
593 if(!output) return output;
594 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
595
596 output->bw = bw;
597 output->tid = tid;
598 output->start = start;
599 output->end = end;
600 output->blocks = blocks;
601 output->blocksPerIteration = blocksPerIteration;
602
603 if(blocks) {
604 n = blocks->n;
605 if(n>blocksPerIteration) blocks->n = blocksPerIteration;
606 output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
607 blocks->n = n;
608 output->offset = blocksPerIteration;
609 }
610 output->data = output->intervals;
611 return output;
612 }
613
614 //Returns NULL on error
bbOverlappingEntriesIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,int withString,uint32_t blocksPerIteration)615 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
616 bwOverlapIterator_t *output = NULL;
617 uint64_t n;
618 uint32_t tid = bwGetTid(bw, chrom);
619 if(tid == (uint32_t) -1) return output;
620 output = calloc(1, sizeof(bwOverlapIterator_t));
621 if(!output) return output;
622 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
623
624 output->bw = bw;
625 output->tid = tid;
626 output->start = start;
627 output->end = end;
628 output->blocks = blocks;
629 output->blocksPerIteration = blocksPerIteration;
630 output->withString = withString;
631
632 if(blocks) {
633 n = blocks->n;
634 if(n>blocksPerIteration) blocks->n = blocksPerIteration;
635 output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
636 blocks->n = n;
637 output->offset = blocksPerIteration;
638 }
639 output->data = output->entries;
640 return output;
641 }
642
bwIteratorDestroy(bwOverlapIterator_t * iter)643 void bwIteratorDestroy(bwOverlapIterator_t *iter) {
644 if(!iter) return;
645 if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
646 if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
647 if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
648 free(iter);
649 }
650
651 //On error, points to NULL and destroys the input
bwIteratorNext(bwOverlapIterator_t * iter)652 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
653 uint64_t n, *offset, *size;
654 bwOverlapBlock_t *blocks = iter->blocks;
655
656 if(iter->intervals) {
657 bwDestroyOverlappingIntervals(iter->intervals);
658 iter->intervals = NULL;
659 }
660 if(iter->entries) {
661 bbDestroyOverlappingEntries(iter->entries);
662 iter->entries = NULL;
663 }
664 iter->data = NULL;
665
666 if(iter->offset < blocks->n) {
667 //store the previous values
668 n = blocks->n;
669 offset = blocks->offset;
670 size = blocks->size;
671
672 //Move the start of the blocks
673 blocks->offset += iter->offset;
674 blocks->size += iter->offset;
675 if(iter->offset + iter->blocksPerIteration > n) {
676 blocks->n = blocks->n - iter->offset;
677 } else {
678 blocks->n = iter->blocksPerIteration;
679 }
680
681 //Get the intervals or entries, as appropriate
682 if(iter->bw->type == 0) {
683 //bigWig
684 iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
685 iter->data = iter->intervals;
686 } else {
687 //bigBed
688 iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
689 iter->data = iter->entries;
690 }
691 iter->offset += iter->blocksPerIteration;
692
693 //reset the values in iter->blocks
694 blocks->n = n;
695 blocks->offset = offset;
696 blocks->size = size;
697
698 //Check for error
699 if(!iter->intervals && !iter->entries) goto error;
700 }
701
702 return iter;
703
704 error:
705 bwIteratorDestroy(iter);
706 return NULL;
707 }
708
709 //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
710 //The ->end member is NULL
711 //If includeNA is not 0 then ->start is also NULL, since it's implied
712 //Note that bwDestroyOverlappingIntervals() will work in either case
bwGetValues(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int includeNA)713 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
714 uint32_t i, j, n;
715 bwOverlappingIntervals_t *output = NULL;
716 bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
717 if(!intermediate) return NULL;
718
719 output = calloc(1, sizeof(bwOverlappingIntervals_t));
720 if(!output) goto error;
721 if(includeNA) {
722 output->l = end-start;
723 output->value = malloc(output->l*sizeof(float));
724 if(!output->value) goto error;
725 for(i=0; i<output->l; i++) output->value[i] = NAN;
726 for(i=0; i<intermediate->l; i++) {
727 for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
728 if(j < start || j >= end) continue;
729 output->value[j-start] = intermediate->value[i];
730 }
731 }
732 } else {
733 n = 0;
734 for(i=0; i<intermediate->l; i++) {
735 if(intermediate->start[i] < start) intermediate->start[i] = start;
736 if(intermediate->end[i] > end) intermediate->end[i] = end;
737 n += intermediate->end[i]-intermediate->start[i];
738 }
739 output->l = n;
740 output->start = malloc(sizeof(uint32_t)*n);
741 if(!output->start) goto error;
742 output->value = malloc(sizeof(float)*n);
743 if(!output->value) goto error;
744 n = 0; //this is now the index
745 for(i=0; i<intermediate->l; i++) {
746 for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
747 if(j < start || j >= end) continue;
748 output->start[n] = j;
749 output->value[n++] = intermediate->value[i];
750 }
751 }
752 }
753
754 bwDestroyOverlappingIntervals(intermediate);
755 return output;
756
757 error:
758 if(intermediate) bwDestroyOverlappingIntervals(intermediate);
759 if(output) bwDestroyOverlappingIntervals(output);
760 return NULL;
761 }
762
bwDestroyIndexNode(bwRTreeNode_t * node)763 void bwDestroyIndexNode(bwRTreeNode_t *node) {
764 uint16_t i;
765
766 if(!node) return;
767
768 free(node->chrIdxStart);
769 free(node->baseStart);
770 free(node->chrIdxEnd);
771 free(node->baseEnd);
772 free(node->dataOffset);
773 if(!node->isLeaf) {
774 for(i=0; i<node->nChildren; i++) {
775 bwDestroyIndexNode(node->x.child[i]);
776 }
777 free(node->x.child);
778 } else {
779 free(node->x.size);
780 }
781 free(node);
782 }
783
bwDestroyIndex(bwRTree_t * idx)784 void bwDestroyIndex(bwRTree_t *idx) {
785 bwDestroyIndexNode(idx->root);
786 free(idx);
787 }
788
789 //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
790 //Returns NULL on error
bwReadIndex(bigWigFile_t * fp,uint64_t offset)791 bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
792 bwRTree_t *idx = readRTreeIdx(fp, offset);
793 if(!idx) return NULL;
794
795 //Read in the root node
796 idx->root = bwGetRTreeNode(fp, idx->rootOffset);
797
798 if(!idx->root) {
799 bwDestroyIndex(idx);
800 return NULL;
801 }
802 return idx;
803 }
804