1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <math.h>
5 #include <string.h>
6 #include <zlib.h>
7 #include <errno.h>
8 
roundup(uint32_t v)9 static uint32_t roundup(uint32_t v) {
10     v--;
11     v |= v >> 1;
12     v |= v >> 2;
13     v |= v >> 4;
14     v |= v >> 8;
15     v |= v >> 16;
16     v++;
17     return v;
18 }
19 
20 //Returns the root node on success and NULL on error
readRTreeIdx(bigWigFile_t * fp,uint64_t offset)21 static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
22     uint32_t magic;
23     bwRTree_t *node;
24 
25     if(!offset) {
26         if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
27     } else {
28         if(bwSetPos(fp, offset)) return NULL;
29     }
30 
31     if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
32     if(magic != IDX_MAGIC) {
33         fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
34         return NULL;
35     }
36 
37     node = calloc(1, sizeof(bwRTree_t));
38     if(!node) return NULL;
39 
40     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
41     if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
42     if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43     if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
44     if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45     if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
46     if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
47     if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
48     //Padding
49     if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
50     node->rootOffset = bwTell(fp);
51 
52     //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
53     errno = 0;
54 
55     return node;
56 
57 error:
58     free(node);
59     return NULL;
60 }
61 
62 //Returns a bwRTreeNode_t on success and NULL on an error
63 //For the root node, set offset to 0
bwGetRTreeNode(bigWigFile_t * fp,uint64_t offset)64 static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
65     bwRTreeNode_t *node = NULL;
66     uint8_t padding;
67     uint16_t i;
68     if(offset) {
69         if(bwSetPos(fp, offset)) return NULL;
70     } else {
71         //seek
72         if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
73     }
74 
75     node = calloc(1, sizeof(bwRTreeNode_t));
76     if(!node) return NULL;
77 
78     if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
79     if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
80     if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
81 
82     node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
83     if(!node->chrIdxStart) goto error;
84     node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
85     if(!node->baseStart) goto error;
86     node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
87     if(!node->chrIdxEnd) goto error;
88     node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
89     if(!node->baseEnd) goto error;
90     node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
91     if(!node->dataOffset) goto error;
92     if(node->isLeaf) {
93         node->x.size = malloc(node->nChildren * sizeof(uint64_t));
94         if(!node->x.size) goto error;
95     } else {
96         node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
97         if(!node->x.child) goto error;
98     }
99     for(i=0; i<node->nChildren; i++) {
100         if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101         if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102         if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103         if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104         if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105         if(node->isLeaf) {
106             if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107         }
108     }
109 
110     return node;
111 
112 error:
113     if(node->chrIdxStart) free(node->chrIdxStart);
114     if(node->baseStart) free(node->baseStart);
115     if(node->chrIdxEnd) free(node->chrIdxEnd);
116     if(node->baseEnd) free(node->baseEnd);
117     if(node->dataOffset) free(node->dataOffset);
118     if(node->isLeaf && node->x.size) free(node->x.size);
119     else if((!node->isLeaf) && node->x.child) free(node->x.child);
120     free(node);
121     return NULL;
122 }
123 
destroyBWOverlapBlock(bwOverlapBlock_t * b)124 void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125     if(!b) return;
126     if(b->size) free(b->size);
127     if(b->offset) free(b->offset);
128     free(b);
129 }
130 
131 //Returns a bwOverlapBlock_t * object or NULL on error.
overlapsLeaf(bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)132 static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133     uint16_t i, idx = 0;
134     bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135     if(!o) return NULL;
136 
137     for(i=0; i<node->nChildren; i++) {
138         if(tid < node->chrIdxStart[i]) break;
139         if(tid > node->chrIdxEnd[i]) continue;
140 
141         /*
142           The individual blocks can theoretically span multiple contigs.
143           So if we treat the first/last contig in the range as special
144           but anything in the middle is a guaranteed match
145         */
146         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
147             if(tid == node->chrIdxStart[i]) {
148                 if(node->baseStart[i] >= end) break;
149             } else if(tid == node->chrIdxEnd[i]) {
150                 if(node->baseEnd[i] <= start) continue;
151             }
152         } else {
153             if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
154         }
155         o->n++;
156     }
157 
158     if(o->n) {
159         o->offset = malloc(sizeof(uint64_t) * (o->n));
160         if(!o->offset) goto error;
161         o->size = malloc(sizeof(uint64_t) * (o->n));
162         if(!o->size) goto error;
163 
164         for(i=0; i<node->nChildren; i++) {
165             if(tid < node->chrIdxStart[i]) break;
166             if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
167             if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
168                 if(tid == node->chrIdxStart[i]) {
169                     if(node->baseStart[i] >= end) continue;
170                 } else if(tid == node->chrIdxEnd[i]) {
171                     if(node->baseEnd[i] <= start) continue;
172                 }
173             } else {
174                 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
175             }
176             o->offset[idx] = node->dataOffset[i];
177             o->size[idx++] = node->x.size[i];
178             if(idx >= o->n) break;
179         }
180     }
181 
182     if(idx != o->n) { //This should never happen
183         fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
184         goto error;
185     }
186 
187     return o;
188 
189 error:
190     if(o) destroyBWOverlapBlock(o);
191     return NULL;
192 }
193 
194 //This will free l2 unless there's an error!
195 //Returns NULL on error, otherwise the merged lists
mergeOverlapBlocks(bwOverlapBlock_t * b1,bwOverlapBlock_t * b2)196 static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
197     uint64_t i,j;
198     if(!b2) return b1;
199     if(!b2->n) {
200         destroyBWOverlapBlock(b2);
201         return b1;
202     }
203     if(!b1->n) {
204         destroyBWOverlapBlock(b1);
205         return b2;
206     }
207     j = b1->n;
208     b1->n += b2->n;
209     b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
210     if(!b1->offset) goto error;
211     b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
212     if(!b1->size) goto error;
213 
214     for(i=0; i<b2->n; i++) {
215         b1->offset[j+i] = b2->offset[i];
216         b1->size[j+i] = b2->size[i];
217     }
218     destroyBWOverlapBlock(b2);
219     return b1;
220 
221 error:
222     destroyBWOverlapBlock(b1);
223     return NULL;
224 }
225 
226 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
227 //The output needs to be free()d if not NULL (likewise with *sizes)
overlapsNonLeaf(bigWigFile_t * fp,bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)228 static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
229     uint16_t i;
230     bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
231     if(!output) return NULL;
232 
233     for(i=0; i<node->nChildren; i++) {
234         if(tid < node->chrIdxStart[i]) break;
235         if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
236         if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
237             if(tid == node->chrIdxStart[i]) {
238                 if(node->baseStart[i] >= end) continue;
239             } else if(tid == node->chrIdxEnd[i]) {
240                 if(node->baseEnd[i] <= start) continue;
241             }
242         } else {
243             if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
244         }
245 
246         //We have an overlap!
247         if(!node->x.child[i])
248           node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
249         if(!node->x.child[i]) goto error;
250 
251         if(node->x.child[i]->isLeaf) { //leaf
252             nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
253         } else { //non-leaf
254             nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
255         }
256 
257         //The output is processed the same regardless of leaf/non-leaf
258         if(!nodeBlocks) goto error;
259         else {
260             output = mergeOverlapBlocks(output, nodeBlocks);
261             if(!output) {
262                 destroyBWOverlapBlock(nodeBlocks);
263                 goto error;
264             }
265         }
266     }
267 
268     return output;
269 
270 error:
271     destroyBWOverlapBlock(output);
272     return NULL;
273 }
274 
275 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
276 //The output must be free()d
walkRTreeNodes(bigWigFile_t * bw,bwRTreeNode_t * root,uint32_t tid,uint32_t start,uint32_t end)277 bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
278     if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
279     return overlapsNonLeaf(bw, root, tid, start, end);
280 }
281 
282 //In reality, a hash or some sort of tree structure is probably faster...
283 //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
bwGetTid(bigWigFile_t * fp,char * chrom)284 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
285     uint32_t i;
286     if(!chrom) return -1;
287     for(i=0; i<fp->cl->nKeys; i++) {
288         if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
289     }
290     return -1;
291 }
292 
bwGetOverlappingBlocks(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)293 static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
294     uint32_t tid = bwGetTid(fp, chrom);
295 
296     if(tid == (uint32_t) -1) {
297         fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
298         return NULL;
299     }
300 
301     //Get the info if needed
302     if(!fp->idx) {
303         fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
304         if(!fp->idx) {
305             return NULL;
306         }
307     }
308 
309     if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
310     if(!fp->idx->root) return NULL;
311 
312     return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
313 }
314 
bwFillDataHdr(bwDataHeader_t * hdr,void * b)315 void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
316     hdr->tid = ((uint32_t*)b)[0];
317     hdr->start = ((uint32_t*)b)[1];
318     hdr->end = ((uint32_t*)b)[2];
319     hdr->step = ((uint32_t*)b)[3];
320     hdr->span = ((uint32_t*)b)[4];
321     hdr->type = ((uint8_t*)b)[20];
322     hdr->nItems = ((uint16_t*)b)[11];
323 }
324 
bwDestroyOverlappingIntervals(bwOverlappingIntervals_t * o)325 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
326     if(!o) return;
327     if(o->start) free(o->start);
328     if(o->end) free(o->end);
329     if(o->value) free(o->value);
330     free(o);
331 }
332 
bbDestroyOverlappingEntries(bbOverlappingEntries_t * o)333 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
334     uint32_t i;
335     if(!o) return;
336     if(o->start) free(o->start);
337     if(o->end) free(o->end);
338     if(o->str) {
339         for(i=0; i<o->l; i++) {
340             if(o->str[i]) free(o->str[i]);
341         }
342         free(o->str);
343     }
344     free(o);
345 }
346 
347 //Returns NULL on error, in which case o has been free()d
pushIntervals(bwOverlappingIntervals_t * o,uint32_t start,uint32_t end,float value)348 static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
349     if(o->l+1 >= o->m) {
350         o->m = roundup(o->l+1);
351         o->start = realloc(o->start, o->m * sizeof(uint32_t));
352         if(!o->start) goto error;
353         o->end = realloc(o->end, o->m * sizeof(uint32_t));
354         if(!o->end) goto error;
355         o->value = realloc(o->value, o->m * sizeof(float));
356         if(!o->value) goto error;
357     }
358     o->start[o->l] = start;
359     o->end[o->l] = end;
360     o->value[o->l++] = value;
361     return o;
362 
363 error:
364     bwDestroyOverlappingIntervals(o);
365     return NULL;
366 }
367 
pushBBIntervals(bbOverlappingEntries_t * o,uint32_t start,uint32_t end,char * str,int withString)368 static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
369     if(o->l+1 >= o->m) {
370         o->m = roundup(o->l+1);
371         o->start = realloc(o->start, o->m * sizeof(uint32_t));
372         if(!o->start) goto error;
373         o->end = realloc(o->end, o->m * sizeof(uint32_t));
374         if(!o->end) goto error;
375         if(withString) {
376             o->str = realloc(o->str, o->m * sizeof(char**));
377             if(!o->str) goto error;
378         }
379     }
380     o->start[o->l] = start;
381     o->end[o->l] = end;
382     if(withString) o->str[o->l] = strdup(str);
383     o->l++;
384     return o;
385 
386 error:
387     bbDestroyOverlappingEntries(o);
388     return NULL;
389 }
390 
391 //Returns NULL on error
bwGetOverlappingIntervalsCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend)392 bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
393     uint64_t i;
394     uint16_t j;
395     int compressed = 0, rv;
396     uLongf sz = fp->hdr->bufSize, tmp;
397     void *buf = NULL, *compBuf = NULL;
398     uint32_t start = 0, end , *p;
399     float value;
400     bwDataHeader_t hdr;
401     bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
402 
403     if(!output) goto error;
404 
405     if(!o) return output;
406     if(!o->n) return output;
407 
408     if(sz) {
409         compressed = 1;
410         buf = malloc(sz);
411     }
412     sz = 0; //This is now the size of the compressed buffer
413 
414     for(i=0; i<o->n; i++) {
415         if(bwSetPos(fp, o->offset[i])) goto error;
416 
417         if(sz < o->size[i]) {
418             compBuf = realloc(compBuf, o->size[i]);
419             sz = o->size[i];
420         }
421         if(!compBuf) goto error;
422 
423         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
424         if(compressed) {
425             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
426             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
427             if(rv != Z_OK) goto error;
428         } else {
429             buf = compBuf;
430         }
431 
432         //TODO: ensure that tmp is large enough!
433         bwFillDataHdr(&hdr, buf);
434 
435         p = ((uint32_t*) buf);
436         p += 6;
437         if(hdr.tid != tid) continue;
438 
439         if(hdr.type == 3) start = hdr.start - hdr.step;
440 
441         //FIXME: We should ensure that sz is large enough to hold nItems of the given type
442         for(j=0; j<hdr.nItems; j++) {
443             switch(hdr.type) {
444             case 1:
445                 start = *p;
446                 p++;
447                 end = *p;
448                 p++;
449                 value = *((float *)p);
450                 p++;
451                 break;
452             case 2:
453                 start = *p;
454                 p++;
455                 end = start + hdr.span;
456                 value = *((float *)p);
457                 p++;
458                 break;
459             case 3:
460                 start += hdr.step;
461                 end = start+hdr.span;
462                 value = *((float *)p);
463                 p++;
464                 break;
465             default :
466                 goto error;
467                 break;
468             }
469 
470             if(end <= ostart || start >= oend) continue;
471             //Push the overlap
472             if(!pushIntervals(output, start, end, value)) goto error;
473         }
474     }
475 
476     if(compressed && buf) free(buf);
477     if(compBuf) free(compBuf);
478     return output;
479 
480 error:
481     fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
482     if(output) bwDestroyOverlappingIntervals(output);
483     if(compressed && buf) free(buf);
484     if(compBuf) free(compBuf);
485     return NULL;
486 }
487 
bbGetOverlappingEntriesCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend,int withString)488 bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
489     uint64_t i;
490     int compressed = 0, rv, slen;
491     uLongf sz = fp->hdr->bufSize, tmp = 0;
492     void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
493     uint32_t entryTid = 0, start = 0, end;
494     char *str;
495     bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
496 
497     if(!output) goto error;
498 
499     if(!o) return output;
500     if(!o->n) return output;
501 
502     if(sz) {
503         compressed = 1;
504         buf = malloc(sz);
505     }
506     sz = 0; //This is now the size of the compressed buffer
507 
508     for(i=0; i<o->n; i++) {
509         if(bwSetPos(fp, o->offset[i])) goto error;
510 
511         if(sz < o->size[i]) {
512             compBuf = realloc(compBuf, o->size[i]);
513             sz = o->size[i];
514         }
515         if(!compBuf) goto error;
516 
517         if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
518         if(compressed) {
519             tmp = fp->hdr->bufSize; //This gets over-written by uncompress
520             rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
521             if(rv != Z_OK) goto error;
522         } else {
523             buf = compBuf;
524             tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
525         }
526 
527         bufEnd = buf + tmp;
528         while(buf < bufEnd) {
529             entryTid = ((uint32_t*)buf)[0];
530             start = ((uint32_t*)buf)[1];
531             end = ((uint32_t*)buf)[2];
532             buf += 12;
533             str = (char*)buf;
534             slen = strlen(str) + 1;
535             buf += slen;
536 
537             if(entryTid < tid) continue;
538             if(entryTid > tid) break;
539             if(end <= ostart) continue;
540             if(start >= oend) break;
541 
542             //Push the overlap
543             if(!pushBBIntervals(output, start, end, str, withString)) goto error;
544         }
545 
546         buf = bufEnd - tmp; //reset the buffer pointer
547     }
548 
549     if(compressed && buf) free(buf);
550     if(compBuf) free(compBuf);
551     return output;
552 
553 error:
554     fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
555     buf = bufEnd - tmp;
556     if(output) bbDestroyOverlappingEntries(output);
557     if(compressed && buf) free(buf);
558     if(compBuf) free(compBuf);
559     return NULL;
560 }
561 
562 //Returns NULL on error OR no intervals, which is a bad design...
bwGetOverlappingIntervals(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)563 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
564     bwOverlappingIntervals_t *output;
565     uint32_t tid = bwGetTid(fp, chrom);
566     if(tid == (uint32_t) -1) return NULL;
567     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
568     if(!blocks) return NULL;
569     output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
570     destroyBWOverlapBlock(blocks);
571     return output;
572 }
573 
574 //Like above, but for bigBed files
bbGetOverlappingEntries(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int withString)575 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
576     bbOverlappingEntries_t *output;
577     uint32_t tid = bwGetTid(fp, chrom);
578     if(tid == (uint32_t) -1) return NULL;
579     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
580     if(!blocks) return NULL;
581     output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
582     destroyBWOverlapBlock(blocks);
583     return output;
584 }
585 
586 //Returns NULL on error
bwOverlappingIntervalsIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,uint32_t blocksPerIteration)587 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
588     bwOverlapIterator_t *output = NULL;
589     uint64_t n;
590     uint32_t tid = bwGetTid(bw, chrom);
591     if(tid == (uint32_t) -1) return output;
592     output = calloc(1, sizeof(bwOverlapIterator_t));
593     if(!output) return output;
594     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
595 
596     output->bw = bw;
597     output->tid = tid;
598     output->start = start;
599     output->end = end;
600     output->blocks = blocks;
601     output->blocksPerIteration = blocksPerIteration;
602 
603     if(blocks) {
604         n = blocks->n;
605         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
606         output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
607         blocks->n = n;
608         output->offset = blocksPerIteration;
609     }
610     output->data = output->intervals;
611     return output;
612 }
613 
614 //Returns NULL on error
bbOverlappingEntriesIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,int withString,uint32_t blocksPerIteration)615 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
616     bwOverlapIterator_t *output = NULL;
617     uint64_t n;
618     uint32_t tid = bwGetTid(bw, chrom);
619     if(tid == (uint32_t) -1) return output;
620     output = calloc(1, sizeof(bwOverlapIterator_t));
621     if(!output) return output;
622     bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
623 
624     output->bw = bw;
625     output->tid = tid;
626     output->start = start;
627     output->end = end;
628     output->blocks = blocks;
629     output->blocksPerIteration = blocksPerIteration;
630     output->withString = withString;
631 
632     if(blocks) {
633         n = blocks->n;
634         if(n>blocksPerIteration) blocks->n = blocksPerIteration;
635         output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
636         blocks->n = n;
637         output->offset = blocksPerIteration;
638     }
639     output->data = output->entries;
640     return output;
641 }
642 
bwIteratorDestroy(bwOverlapIterator_t * iter)643 void bwIteratorDestroy(bwOverlapIterator_t *iter) {
644     if(!iter) return;
645     if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
646     if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
647     if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
648     free(iter);
649 }
650 
651 //On error, points to NULL and destroys the input
bwIteratorNext(bwOverlapIterator_t * iter)652 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
653     uint64_t n, *offset, *size;
654     bwOverlapBlock_t *blocks = iter->blocks;
655 
656     if(iter->intervals) {
657         bwDestroyOverlappingIntervals(iter->intervals);
658         iter->intervals = NULL;
659     }
660     if(iter->entries) {
661         bbDestroyOverlappingEntries(iter->entries);
662         iter->entries = NULL;
663     }
664     iter->data = NULL;
665 
666     if(iter->offset < blocks->n) {
667         //store the previous values
668         n = blocks->n;
669         offset = blocks->offset;
670         size = blocks->size;
671 
672         //Move the start of the blocks
673         blocks->offset += iter->offset;
674         blocks->size += iter->offset;
675         if(iter->offset + iter->blocksPerIteration > n) {
676             blocks->n = blocks->n - iter->offset;
677         } else {
678             blocks->n = iter->blocksPerIteration;
679         }
680 
681         //Get the intervals or entries, as appropriate
682         if(iter->bw->type == 0) {
683             //bigWig
684             iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
685             iter->data = iter->intervals;
686         } else {
687             //bigBed
688             iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
689             iter->data = iter->entries;
690         }
691         iter->offset += iter->blocksPerIteration;
692 
693         //reset the values in iter->blocks
694         blocks->n = n;
695         blocks->offset = offset;
696         blocks->size = size;
697 
698         //Check for error
699         if(!iter->intervals && !iter->entries) goto error;
700     }
701 
702     return iter;
703 
704 error:
705     bwIteratorDestroy(iter);
706     return NULL;
707 }
708 
709 //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
710 //The ->end member is NULL
711 //If includeNA is not 0 then ->start is also NULL, since it's implied
712 //Note that bwDestroyOverlappingIntervals() will work in either case
bwGetValues(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int includeNA)713 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
714     uint32_t i, j, n;
715     bwOverlappingIntervals_t *output = NULL;
716     bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
717     if(!intermediate) return NULL;
718 
719     output = calloc(1, sizeof(bwOverlappingIntervals_t));
720     if(!output) goto error;
721     if(includeNA) {
722         output->l = end-start;
723         output->value = malloc(output->l*sizeof(float));
724         if(!output->value) goto error;
725         for(i=0; i<output->l; i++) output->value[i] = NAN;
726         for(i=0; i<intermediate->l; i++) {
727             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
728                 if(j < start || j >= end) continue;
729                 output->value[j-start] = intermediate->value[i];
730             }
731         }
732     } else {
733         n = 0;
734         for(i=0; i<intermediate->l; i++) {
735             if(intermediate->start[i] < start) intermediate->start[i] = start;
736             if(intermediate->end[i] > end) intermediate->end[i] = end;
737             n += intermediate->end[i]-intermediate->start[i];
738         }
739         output->l = n;
740         output->start = malloc(sizeof(uint32_t)*n);
741         if(!output->start) goto error;
742         output->value = malloc(sizeof(float)*n);
743         if(!output->value) goto error;
744         n = 0; //this is now the index
745         for(i=0; i<intermediate->l; i++) {
746             for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
747                 if(j < start || j >= end) continue;
748                 output->start[n] = j;
749                 output->value[n++] = intermediate->value[i];
750             }
751         }
752     }
753 
754     bwDestroyOverlappingIntervals(intermediate);
755     return output;
756 
757 error:
758     if(intermediate) bwDestroyOverlappingIntervals(intermediate);
759     if(output) bwDestroyOverlappingIntervals(output);
760     return NULL;
761 }
762 
bwDestroyIndexNode(bwRTreeNode_t * node)763 void bwDestroyIndexNode(bwRTreeNode_t *node) {
764     uint16_t i;
765 
766     if(!node) return;
767 
768     free(node->chrIdxStart);
769     free(node->baseStart);
770     free(node->chrIdxEnd);
771     free(node->baseEnd);
772     free(node->dataOffset);
773     if(!node->isLeaf) {
774         for(i=0; i<node->nChildren; i++) {
775             bwDestroyIndexNode(node->x.child[i]);
776         }
777         free(node->x.child);
778     } else {
779         free(node->x.size);
780     }
781     free(node);
782 }
783 
bwDestroyIndex(bwRTree_t * idx)784 void bwDestroyIndex(bwRTree_t *idx) {
785     bwDestroyIndexNode(idx->root);
786     free(idx);
787 }
788 
789 //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
790 //Returns NULL on error
bwReadIndex(bigWigFile_t * fp,uint64_t offset)791 bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
792     bwRTree_t *idx = readRTreeIdx(fp, offset);
793     if(!idx) return NULL;
794 
795     //Read in the root node
796     idx->root = bwGetRTreeNode(fp, idx->rootOffset);
797 
798     if(!idx->root) {
799         bwDestroyIndex(idx);
800         return NULL;
801     }
802     return idx;
803 }
804