1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <zlib.h>
6 #include <errno.h>
7 
roundup(uint32_t v)8 static uint32_t roundup(uint32_t v) {
9     v--;
10     v |= v >> 1;
11     v |= v >> 2;
12     v |= v >> 4;
13     v |= v >> 8;
14     v |= v >> 16;
15     v++;
16     return v;
17 }
18 
19 //Returns the root node on success and NULL on error
readRTreeIdx(bigWigFile_t * fp,uint64_t offset)20 static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
21     uint32_t magic;
22     bwRTree_t *node;
23 
24     if(!offset) {
25         if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
26     } else {
27         if(bwSetPos(fp, offset)) return NULL;
28     }
29 
30     if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
31     if(magic != IDX_MAGIC) {
32         fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
33         return NULL;
34     }
35 
36     node = calloc(1, sizeof(bwRTree_t));
37     if(!node) return NULL;
38 
39     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
40     if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
41     if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
42     if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43     if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
44     if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45     if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
46     if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
47     //Padding
48     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
49     node->rootOffset = bwTell(fp);
50 
51     //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
52     errno = 0;
53 
54     return node;
55 
56 error:
57     free(node);
58     return NULL;
59 }
60 
61 //Returns a bwRTreeNode_t on success and NULL on an error
62 //For the root node, set offset to 0
bwGetRTreeNode(bigWigFile_t * fp,uint64_t offset)63 static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
64     bwRTreeNode_t *node = NULL;
65     uint8_t padding;
66     uint16_t i;
67     if(offset) {
68         if(bwSetPos(fp, offset)) return NULL;
69     } else {
70         //seek
71         if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
72     }
73 
74     node = calloc(1, sizeof(bwRTreeNode_t));
75     if(!node) return NULL;
76 
77     if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
78     if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
79     if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
80 
81     node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
82     if(!node->chrIdxStart) goto error;
83     node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
84     if(!node->baseStart) goto error;
85     node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
86     if(!node->chrIdxEnd) goto error;
87     node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
88     if(!node->baseEnd) goto error;
89     node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
90     if(!node->dataOffset) goto error;
91     if(node->isLeaf) {
92         node->x.size = malloc(node->nChildren * sizeof(uint64_t));
93         if(!node->x.size) goto error;
94     } else {
95         node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
96         if(!node->x.child) goto error;
97     }
98     for(i=0; i<node->nChildren; i++) {
99         if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
100         if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101         if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102         if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103         if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
104         if(node->isLeaf) {
105             if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
106         }
107     }
108 
109     return node;
110 
111 error:
112     if(node->chrIdxStart) free(node->chrIdxStart);
113     if(node->baseStart) free(node->baseStart);
114     if(node->chrIdxEnd) free(node->chrIdxEnd);
115     if(node->baseEnd) free(node->baseEnd);
116     if(node->dataOffset) free(node->dataOffset);
117     if(node->isLeaf && node->x.size) free(node->x.size);
118     else if((!node->isLeaf) && node->x.child) free(node->x.child);
119     free(node);
120     return NULL;
121 }
122 
destroyBWOverlapBlock(bwOverlapBlock_t * b)123 void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
124     if(!b) return;
125     if(b->size) free(b->size);
126     if(b->offset) free(b->offset);
127     free(b);
128 }
129 
130 //Returns a bwOverlapBlock_t * object or NULL on error.
overlapsLeaf(bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)131 static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
132     uint16_t i, idx = 0;
133     bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
134     if(!o) return NULL;
135 
136     for(i=0; i<node->nChildren; i++) {
137         if(tid < node->chrIdxStart[i]) break;
138         if(tid > node->chrIdxEnd[i]) continue;
139 
140         /*
141           The individual blocks can theoretically span multiple contigs.
142           So if we treat the first/last contig in the range as special
143           but anything in the middle is a guaranteed match
144         */
145         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
146             if(tid == node->chrIdxStart[i]) {
147                 if(node->baseStart[i] >= end) break;
148             } else if(tid == node->chrIdxEnd[i]) {
149                 if(node->baseEnd[i] <= start) continue;
150             }
151         } else {
152             if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
153         }
154         o->n++;
155     }
156 
157     if(o->n) {
158         o->offset = malloc(sizeof(uint64_t) * (o->n));
159         if(!o->offset) goto error;
160         o->size = malloc(sizeof(uint64_t) * (o->n));
161         if(!o->size) goto error;
162 
163         for(i=0; i<node->nChildren; i++) {
164             if(tid < node->chrIdxStart[i]) break;
165             if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
166             if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
167                 if(tid == node->chrIdxStart[i]) {
168                     if(node->baseStart[i] >= end) continue;
169                 } else if(tid == node->chrIdxEnd[i]) {
170                     if(node->baseEnd[i] <= start) continue;
171                 }
172             } else {
173                 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
174             }
175             o->offset[idx] = node->dataOffset[i];
176             o->size[idx++] = node->x.size[i];
177             if(idx >= o->n) break;
178         }
179     }
180 
181     if(idx != o->n) { //This should never happen
182         fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
183         goto error;
184     }
185 
186     return o;
187 
188 error:
189     if(o) destroyBWOverlapBlock(o);
190     return NULL;
191 }
192 
193 //This will free l2 unless there's an error!
194 //Returns NULL on error, otherwise the merged lists
mergeOverlapBlocks(bwOverlapBlock_t * b1,bwOverlapBlock_t * b2)195 static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
196     uint64_t i,j;
197     if(!b2) return b1;
198     if(!b2->n) {
199         destroyBWOverlapBlock(b2);
200         return b1;
201     }
202     if(!b1->n) {
203         destroyBWOverlapBlock(b1);
204         return b2;
205     }
206     j = b1->n;
207     b1->n += b2->n;
208     b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
209     if(!b1->offset) goto error;
210     b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
211     if(!b1->size) goto error;
212 
213     for(i=0; i<b2->n; i++) {
214         b1->offset[j+i] = b2->offset[i];
215         b1->size[j+i] = b2->size[i];
216     }
217     destroyBWOverlapBlock(b2);
218     return b1;
219 
220 error:
221     destroyBWOverlapBlock(b1);
222     return NULL;
223 }
224 
225 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
226 //The output needs to be free()d if not NULL (likewise with *sizes)
overlapsNonLeaf(bigWigFile_t * fp,bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)227 static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
228     uint16_t i;
229     bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
230     if(!output) return NULL;
231 
232     for(i=0; i<node->nChildren; i++) {
233         if(tid < node->chrIdxStart[i]) break;
234         if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
235         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
236             if(tid == node->chrIdxStart[i]) {
237                 if(node->baseStart[i] >= end) continue;
238             } else if(tid == node->chrIdxEnd[i]) {
239                 if(node->baseEnd[i] <= start) continue;
240             }
241         } else {
242             if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
243         }
244 
245         //We have an overlap!
246         if(!node->x.child[i])
247           node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
248         if(!node->x.child[i]) goto error;
249 
250         if(node->x.child[i]->isLeaf) { //leaf
251             nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
252         } else { //non-leaf
253             nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
254         }
255 
256         //The output is processed the same regardless of leaf/non-leaf
257         if(!nodeBlocks) goto error;
258         else {
259             output = mergeOverlapBlocks(output, nodeBlocks);
260             if(!output) {
261                 destroyBWOverlapBlock(nodeBlocks);
262                 goto error;
263             }
264         }
265     }
266 
267     return output;
268 
269 error:
270     destroyBWOverlapBlock(output);
271     return NULL;
272 }
273 
274 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
275 //The output must be free()d
walkRTreeNodes(bigWigFile_t * bw,bwRTreeNode_t * root,uint32_t tid,uint32_t start,uint32_t end)276 bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
277     if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
278     return overlapsNonLeaf(bw, root, tid, start, end);
279 }
280 
281 //In reality, a hash or some sort of tree structure is probably faster...
282 //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
bwGetTid(bigWigFile_t * fp,char * chrom)283 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
284     uint32_t i;
285     if(!chrom) return -1;
286     for(i=0; i<fp->cl->nKeys; i++) {
287         if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
288     }
289     return -1;
290 }
291 
bwGetOverlappingBlocks(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)292 static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
293     uint32_t tid = bwGetTid(fp, chrom);
294 
295     if(tid == (uint32_t) -1) {
296         fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
297         return NULL;
298     }
299 
300     //Get the info if needed
301     if(!fp->idx) {
302         fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
303         if(!fp->idx) {
304             return NULL;
305         }
306     }
307 
308     if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
309     if(!fp->idx->root) return NULL;
310 
311     return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
312 }
313 
bwFillDataHdr(bwDataHeader_t * hdr,void * b)314 void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
315     hdr->tid = ((uint32_t*)b)[0];
316     hdr->start = ((uint32_t*)b)[1];
317     hdr->end = ((uint32_t*)b)[2];
318     hdr->step = ((uint32_t*)b)[3];
319     hdr->span = ((uint32_t*)b)[4];
320     hdr->type = ((uint8_t*)b)[20];
321     hdr->nItems = ((uint16_t*)b)[11];
322 }
323 
bwDestroyOverlappingIntervals(bwOverlappingIntervals_t * o)324 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
325     if(!o) return;
326     if(o->start) free(o->start);
327     if(o->end) free(o->end);
328     if(o->value) free(o->value);
329     free(o);
330 }
331 
bbDestroyOverlappingEntries(bbOverlappingEntries_t * o)332 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
333     uint32_t i;
334     if(!o) return;
335     if(o->start) free(o->start);
336     if(o->end) free(o->end);
337     if(o->str) {
338         for(i=0; i<o->l; i++) {
339             if(o->str[i]) free(o->str[i]);
340         }
341         free(o->str);
342     }
343     free(o);
344 }
345 
346 //Returns NULL on error, in which case o has been free()d
pushIntervals(bwOverlappingIntervals_t * o,uint32_t start,uint32_t end,float value)347 static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
348     if(o->l+1 >= o->m) {
349         o->m = roundup(o->l+1);
350         o->start = realloc(o->start, o->m * sizeof(uint32_t));
351         if(!o->start) goto error;
352         o->end = realloc(o->end, o->m * sizeof(uint32_t));
353         if(!o->end) goto error;
354         o->value = realloc(o->value, o->m * sizeof(float));
355         if(!o->value) goto error;
356     }
357     o->start[o->l] = start;
358     o->end[o->l] = end;
359     o->value[o->l++] = value;
360     return o;
361 
362 error:
363     bwDestroyOverlappingIntervals(o);
364     return NULL;
365 }
366 
pushBBIntervals(bbOverlappingEntries_t * o,uint32_t start,uint32_t end,char * str,int withString)367 static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
368     if(o->l+1 >= o->m) {
369         o->m = roundup(o->l+1);
370         o->start = realloc(o->start, o->m * sizeof(uint32_t));
371         if(!o->start) goto error;
372         o->end = realloc(o->end, o->m * sizeof(uint32_t));
373         if(!o->end) goto error;
374         if(withString) {
375             o->str = realloc(o->str, o->m * sizeof(char**));
376             if(!o->str) goto error;
377         }
378     }
379     o->start[o->l] = start;
380     o->end[o->l] = end;
381     if(withString) o->str[o->l] = strdup(str);
382     o->l++;
383     return o;
384 
385 error:
386     bbDestroyOverlappingEntries(o);
387     return NULL;
388 }
389 
390 //Returns NULL on error
bwGetOverlappingIntervalsCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend)391 bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
392     uint64_t i;
393     uint16_t j;
394     int compressed = 0, rv;
395     uLongf sz = fp->hdr->bufSize, tmp;
396     void *buf = NULL, *compBuf = NULL;
397     uint32_t start = 0, end , *p;
398     float value;
399     bwDataHeader_t hdr;
400     bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
401 
402     if(!output) goto error;
403 
404     if(!o) return output;
405     if(!o->n) return output;
406 
407     if(sz) {
408         compressed = 1;
409         buf = malloc(sz);
410     }
411     sz = 0; //This is now the size of the compressed buffer
412 
413     for(i=0; i<o->n; i++) {
414         if(bwSetPos(fp, o->offset[i])) goto error;
415 
416         if(sz < o->size[i]) {
417             compBuf = realloc(compBuf, o->size[i]);
418             sz = o->size[i];
419         }
420         if(!compBuf) goto error;
421 
422         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
423         if(compressed) {
424             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
425             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
426             if(rv != Z_OK) goto error;
427         } else {
428             buf = compBuf;
429         }
430 
431         //TODO: ensure that tmp is large enough!
432         bwFillDataHdr(&hdr, buf);
433 
434         p = ((uint32_t*) buf);
435         p += 6;
436         if(hdr.tid != tid) continue;
437 
438         if(hdr.type == 3) start = hdr.start - hdr.step;
439 
440         //FIXME: We should ensure that sz is large enough to hold nItems of the given type
441         for(j=0; j<hdr.nItems; j++) {
442             switch(hdr.type) {
443             case 1:
444                 start = *p;
445                 p++;
446                 end = *p;
447                 p++;
448                 value = *((float *)p);
449                 p++;
450                 break;
451             case 2:
452                 start = *p;
453                 p++;
454                 end = start + hdr.span;
455                 value = *((float *)p);
456                 p++;
457                 break;
458             case 3:
459                 start += hdr.step;
460                 end = start+hdr.span;
461                 value = *((float *)p);
462                 p++;
463                 break;
464             default :
465                 goto error;
466                 break;
467             }
468 
469             if(end <= ostart || start >= oend) continue;
470             //Push the overlap
471             if(!pushIntervals(output, start, end, value)) goto error;
472         }
473     }
474 
475     if(compressed && buf) free(buf);
476     if(compBuf) free(compBuf);
477     return output;
478 
479 error:
480     fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
481     if(output) bwDestroyOverlappingIntervals(output);
482     if(compressed && buf) free(buf);
483     if(compBuf) free(compBuf);
484     return NULL;
485 }
486 
bbGetOverlappingEntriesCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend,int withString)487 bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
488     uint64_t i;
489     int compressed = 0, rv, slen;
490     uLongf sz = fp->hdr->bufSize, tmp = 0;
491     void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
492     uint32_t entryTid = 0, start = 0, end;
493     char *str;
494     bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
495 
496     if(!output) goto error;
497 
498     if(!o) return output;
499     if(!o->n) return output;
500 
501     if(sz) {
502         compressed = 1;
503         buf = malloc(sz);
504     }
505     sz = 0; //This is now the size of the compressed buffer
506 
507     for(i=0; i<o->n; i++) {
508         if(bwSetPos(fp, o->offset[i])) goto error;
509 
510         if(sz < o->size[i]) {
511             compBuf = realloc(compBuf, o->size[i]);
512             sz = o->size[i];
513         }
514         if(!compBuf) goto error;
515 
516         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
517         if(compressed) {
518             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
519             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
520             if(rv != Z_OK) goto error;
521         } else {
522             buf = compBuf;
523             tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
524         }
525 
526         bufEnd = buf + tmp;
527         while(buf < bufEnd) {
528             entryTid = ((uint32_t*)buf)[0];
529             start = ((uint32_t*)buf)[1];
530             end = ((uint32_t*)buf)[2];
531             buf += 12;
532             str = (char*)buf;
533             slen = strlen(str) + 1;
534             buf += slen;
535 
536             if(entryTid < tid) continue;
537             if(entryTid > tid) break;
538             if(end <= ostart) continue;
539             if(start >= oend) break;
540 
541             //Push the overlap
542             if(!pushBBIntervals(output, start, end, str, withString)) goto error;
543         }
544 
545         buf = bufEnd - tmp; //reset the buffer pointer
546     }
547 
548     if(compressed && buf) free(buf);
549     if(compBuf) free(compBuf);
550     return output;
551 
552 error:
553     fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
554     buf = bufEnd - tmp;
555     if(output) bbDestroyOverlappingEntries(output);
556     if(compressed && buf) free(buf);
557     if(compBuf) free(compBuf);
558     return NULL;
559 }
560 
561 //Returns NULL on error OR no intervals, which is a bad design...
bwGetOverlappingIntervals(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)562 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
563     bwOverlappingIntervals_t *output;
564     uint32_t tid = bwGetTid(fp, chrom);
565     if(tid == (uint32_t) -1) return NULL;
566     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
567     if(!blocks) return NULL;
568     output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
569     destroyBWOverlapBlock(blocks);
570     return output;
571 }
572 
573 //Like above, but for bigBed files
bbGetOverlappingEntries(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int withString)574 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
575     bbOverlappingEntries_t *output;
576     uint32_t tid = bwGetTid(fp, chrom);
577     if(tid == (uint32_t) -1) return NULL;
578     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
579     if(!blocks) return NULL;
580     output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
581     destroyBWOverlapBlock(blocks);
582     return output;
583 }
584 
585 //Returns NULL on error
bwOverlappingIntervalsIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,uint32_t blocksPerIteration)586 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
587     bwOverlapIterator_t *output = NULL;
588     uint64_t n;
589     uint32_t tid = bwGetTid(bw, chrom);
590     if(tid == (uint32_t) -1) return output;
591     output = calloc(1, sizeof(bwOverlapIterator_t));
592     if(!output) return output;
593     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
594 
595     output->bw = bw;
596     output->tid = tid;
597     output->start = start;
598     output->end = end;
599     output->blocks = blocks;
600     output->blocksPerIteration = blocksPerIteration;
601 
602     if(blocks) {
603         n = blocks->n;
604         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
605         output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
606         blocks->n = n;
607         output->offset = blocksPerIteration;
608     }
609     output->data = output->intervals;
610     return output;
611 }
612 
613 //Returns NULL on error
bbOverlappingEntriesIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,int withString,uint32_t blocksPerIteration)614 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
615     bwOverlapIterator_t *output = NULL;
616     uint64_t n;
617     uint32_t tid = bwGetTid(bw, chrom);
618     if(tid == (uint32_t) -1) return output;
619     output = calloc(1, sizeof(bwOverlapIterator_t));
620     if(!output) return output;
621     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
622 
623     output->bw = bw;
624     output->tid = tid;
625     output->start = start;
626     output->end = end;
627     output->blocks = blocks;
628     output->blocksPerIteration = blocksPerIteration;
629     output->withString = withString;
630 
631     if(blocks) {
632         n = blocks->n;
633         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
634         output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
635         blocks->n = n;
636         output->offset = blocksPerIteration;
637     }
638     output->data = output->entries;
639     return output;
640 }
641 
bwIteratorDestroy(bwOverlapIterator_t * iter)642 void bwIteratorDestroy(bwOverlapIterator_t *iter) {
643     if(!iter) return;
644     if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
645     if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
646     if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
647     free(iter);
648 }
649 
650 //On error, points to NULL and destroys the input
bwIteratorNext(bwOverlapIterator_t * iter)651 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
652     uint64_t n, *offset, *size;
653     bwOverlapBlock_t *blocks = iter->blocks;
654 
655     if(iter->intervals) {
656         bwDestroyOverlappingIntervals(iter->intervals);
657         iter->intervals = NULL;
658     }
659     if(iter->entries) {
660         bbDestroyOverlappingEntries(iter->entries);
661         iter->entries = NULL;
662     }
663     iter->data = NULL;
664 
665     if(iter->offset < blocks->n) {
666         //store the previous values
667         n = blocks->n;
668         offset = blocks->offset;
669         size = blocks->size;
670 
671         //Move the start of the blocks
672         blocks->offset += iter->offset;
673         blocks->size += iter->offset;
674         if(iter->offset + iter->blocksPerIteration > n) {
675             blocks->n = blocks->n - iter->offset;
676         } else {
677             blocks->n = iter->blocksPerIteration;
678         }
679 
680         //Get the intervals or entries, as appropriate
681         if(iter->bw->type == 0) {
682             //bigWig
683             iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
684             iter->data = iter->intervals;
685         } else {
686             //bigBed
687             iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
688             iter->data = iter->entries;
689         }
690         iter->offset += iter->blocksPerIteration;
691 
692         //reset the values in iter->blocks
693         blocks->n = n;
694         blocks->offset = offset;
695         blocks->size = size;
696 
697         //Check for error
698         if(!iter->intervals && !iter->entries) goto error;
699     }
700 
701     return iter;
702 
703 error:
704     bwIteratorDestroy(iter);
705     return NULL;
706 }
707 
708 //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
709 //The ->end member is NULL
710 //If includeNA is not 0 then ->start is also NULL, since it's implied
711 //Note that bwDestroyOverlappingIntervals() will work in either case
bwGetValues(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int includeNA)712 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
713     uint32_t i, j, n;
714     bwOverlappingIntervals_t *output = NULL;
715     bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
716     if(!intermediate) return NULL;
717 
718     output = calloc(1, sizeof(bwOverlappingIntervals_t));
719     if(!output) goto error;
720     if(includeNA) {
721         output->l = end-start;
722         output->value = malloc((end-start)*sizeof(float));
723         if(!output->value) goto error;
724         for(i=0; i<end-start; i++) output->value[i] = strtod("NaN", NULL);
725         for(i=0; i<intermediate->l; i++) {
726             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
727                 if(j < start || j >= end) continue;
728                 output->value[j-start] = intermediate->value[i];
729             }
730         }
731     } else {
732         n = 0;
733         for(i=0; i<intermediate->l; i++) {
734             if(intermediate->start[i] < start) intermediate->start[i] = start;
735             if(intermediate->end[i] > end) intermediate->end[i] = end;
736             n += intermediate->end[i]-intermediate->start[i];
737         }
738         output->l = n;
739         output->start = malloc(sizeof(uint32_t)*n);
740         if(!output->start) goto error;
741         output->value = malloc(sizeof(float)*n);
742         if(!output->value) goto error;
743         n = 0; //this is now the index
744         for(i=0; i<intermediate->l; i++) {
745             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
746                 if(j < start || j >= end) continue;
747                 output->start[n] = j;
748                 output->value[n++] = intermediate->value[i];
749             }
750         }
751     }
752 
753     bwDestroyOverlappingIntervals(intermediate);
754     return output;
755 
756 error:
757     if(intermediate) bwDestroyOverlappingIntervals(intermediate);
758     if(output) bwDestroyOverlappingIntervals(output);
759     return NULL;
760 }
761 
bwDestroyIndexNode(bwRTreeNode_t * node)762 void bwDestroyIndexNode(bwRTreeNode_t *node) {
763     uint16_t i;
764 
765     if(!node) return;
766 
767     free(node->chrIdxStart);
768     free(node->baseStart);
769     free(node->chrIdxEnd);
770     free(node->baseEnd);
771     free(node->dataOffset);
772     if(!node->isLeaf) {
773         for(i=0; i<node->nChildren; i++) {
774             bwDestroyIndexNode(node->x.child[i]);
775         }
776         free(node->x.child);
777     } else {
778         free(node->x.size);
779     }
780     free(node);
781 }
782 
bwDestroyIndex(bwRTree_t * idx)783 void bwDestroyIndex(bwRTree_t *idx) {
784     bwDestroyIndexNode(idx->root);
785     free(idx);
786 }
787 
788 //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
789 //Returns NULL on error
bwReadIndex(bigWigFile_t * fp,uint64_t offset)790 bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
791     bwRTree_t *idx = readRTreeIdx(fp, offset);
792     if(!idx) return NULL;
793 
794     //Read in the root node
795     idx->root = bwGetRTreeNode(fp, idx->rootOffset);
796 
797     if(!idx->root) {
798         bwDestroyIndex(idx);
799         return NULL;
800     }
801     return idx;
802 }
803