1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <string.h>
5 #include <zlib.h>
6 #include <errno.h>
7
roundup(uint32_t v)8 static uint32_t roundup(uint32_t v) {
9 v--;
10 v |= v >> 1;
11 v |= v >> 2;
12 v |= v >> 4;
13 v |= v >> 8;
14 v |= v >> 16;
15 v++;
16 return v;
17 }
18
19 //Returns the root node on success and NULL on error
readRTreeIdx(bigWigFile_t * fp,uint64_t offset)20 static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
21 uint32_t magic;
22 bwRTree_t *node;
23
24 if(!offset) {
25 if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
26 } else {
27 if(bwSetPos(fp, offset)) return NULL;
28 }
29
30 if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
31 if(magic != IDX_MAGIC) {
32 fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
33 return NULL;
34 }
35
36 node = calloc(1, sizeof(bwRTree_t));
37 if(!node) return NULL;
38
39 if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
40 if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
41 if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
42 if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43 if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
44 if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45 if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
46 if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
47 //Padding
48 if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
49 node->rootOffset = bwTell(fp);
50
51 //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
52 errno = 0;
53
54 return node;
55
56 error:
57 free(node);
58 return NULL;
59 }
60
61 //Returns a bwRTreeNode_t on success and NULL on an error
62 //For the root node, set offset to 0
bwGetRTreeNode(bigWigFile_t * fp,uint64_t offset)63 static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
64 bwRTreeNode_t *node = NULL;
65 uint8_t padding;
66 uint16_t i;
67 if(offset) {
68 if(bwSetPos(fp, offset)) return NULL;
69 } else {
70 //seek
71 if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
72 }
73
74 node = calloc(1, sizeof(bwRTreeNode_t));
75 if(!node) return NULL;
76
77 if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
78 if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
79 if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
80
81 node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
82 if(!node->chrIdxStart) goto error;
83 node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
84 if(!node->baseStart) goto error;
85 node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
86 if(!node->chrIdxEnd) goto error;
87 node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
88 if(!node->baseEnd) goto error;
89 node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
90 if(!node->dataOffset) goto error;
91 if(node->isLeaf) {
92 node->x.size = malloc(node->nChildren * sizeof(uint64_t));
93 if(!node->x.size) goto error;
94 } else {
95 node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
96 if(!node->x.child) goto error;
97 }
98 for(i=0; i<node->nChildren; i++) {
99 if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
100 if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101 if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102 if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103 if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
104 if(node->isLeaf) {
105 if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
106 }
107 }
108
109 return node;
110
111 error:
112 if(node->chrIdxStart) free(node->chrIdxStart);
113 if(node->baseStart) free(node->baseStart);
114 if(node->chrIdxEnd) free(node->chrIdxEnd);
115 if(node->baseEnd) free(node->baseEnd);
116 if(node->dataOffset) free(node->dataOffset);
117 if(node->isLeaf && node->x.size) free(node->x.size);
118 else if((!node->isLeaf) && node->x.child) free(node->x.child);
119 free(node);
120 return NULL;
121 }
122
destroyBWOverlapBlock(bwOverlapBlock_t * b)123 void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
124 if(!b) return;
125 if(b->size) free(b->size);
126 if(b->offset) free(b->offset);
127 free(b);
128 }
129
130 //Returns a bwOverlapBlock_t * object or NULL on error.
overlapsLeaf(bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)131 static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
132 uint16_t i, idx = 0;
133 bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
134 if(!o) return NULL;
135
136 for(i=0; i<node->nChildren; i++) {
137 if(tid < node->chrIdxStart[i]) break;
138 if(tid > node->chrIdxEnd[i]) continue;
139
140 /*
141 The individual blocks can theoretically span multiple contigs.
142 So if we treat the first/last contig in the range as special
143 but anything in the middle is a guaranteed match
144 */
145 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
146 if(tid == node->chrIdxStart[i]) {
147 if(node->baseStart[i] >= end) break;
148 } else if(tid == node->chrIdxEnd[i]) {
149 if(node->baseEnd[i] <= start) continue;
150 }
151 } else {
152 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
153 }
154 o->n++;
155 }
156
157 if(o->n) {
158 o->offset = malloc(sizeof(uint64_t) * (o->n));
159 if(!o->offset) goto error;
160 o->size = malloc(sizeof(uint64_t) * (o->n));
161 if(!o->size) goto error;
162
163 for(i=0; i<node->nChildren; i++) {
164 if(tid < node->chrIdxStart[i]) break;
165 if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
166 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
167 if(tid == node->chrIdxStart[i]) {
168 if(node->baseStart[i] >= end) continue;
169 } else if(tid == node->chrIdxEnd[i]) {
170 if(node->baseEnd[i] <= start) continue;
171 }
172 } else {
173 if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
174 }
175 o->offset[idx] = node->dataOffset[i];
176 o->size[idx++] = node->x.size[i];
177 if(idx >= o->n) break;
178 }
179 }
180
181 if(idx != o->n) { //This should never happen
182 fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
183 goto error;
184 }
185
186 return o;
187
188 error:
189 if(o) destroyBWOverlapBlock(o);
190 return NULL;
191 }
192
193 //This will free l2 unless there's an error!
194 //Returns NULL on error, otherwise the merged lists
mergeOverlapBlocks(bwOverlapBlock_t * b1,bwOverlapBlock_t * b2)195 static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
196 uint64_t i,j;
197 if(!b2) return b1;
198 if(!b2->n) {
199 destroyBWOverlapBlock(b2);
200 return b1;
201 }
202 if(!b1->n) {
203 destroyBWOverlapBlock(b1);
204 return b2;
205 }
206 j = b1->n;
207 b1->n += b2->n;
208 b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
209 if(!b1->offset) goto error;
210 b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
211 if(!b1->size) goto error;
212
213 for(i=0; i<b2->n; i++) {
214 b1->offset[j+i] = b2->offset[i];
215 b1->size[j+i] = b2->size[i];
216 }
217 destroyBWOverlapBlock(b2);
218 return b1;
219
220 error:
221 destroyBWOverlapBlock(b1);
222 return NULL;
223 }
224
225 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
226 //The output needs to be free()d if not NULL (likewise with *sizes)
overlapsNonLeaf(bigWigFile_t * fp,bwRTreeNode_t * node,uint32_t tid,uint32_t start,uint32_t end)227 static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
228 uint16_t i;
229 bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
230 if(!output) return NULL;
231
232 for(i=0; i<node->nChildren; i++) {
233 if(tid < node->chrIdxStart[i]) break;
234 if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
235 if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
236 if(tid == node->chrIdxStart[i]) {
237 if(node->baseStart[i] >= end) continue;
238 } else if(tid == node->chrIdxEnd[i]) {
239 if(node->baseEnd[i] <= start) continue;
240 }
241 } else {
242 if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
243 }
244
245 //We have an overlap!
246 if(!node->x.child[i])
247 node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
248 if(!node->x.child[i]) goto error;
249
250 if(node->x.child[i]->isLeaf) { //leaf
251 nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
252 } else { //non-leaf
253 nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
254 }
255
256 //The output is processed the same regardless of leaf/non-leaf
257 if(!nodeBlocks) goto error;
258 else {
259 output = mergeOverlapBlocks(output, nodeBlocks);
260 if(!output) {
261 destroyBWOverlapBlock(nodeBlocks);
262 goto error;
263 }
264 }
265 }
266
267 return output;
268
269 error:
270 destroyBWOverlapBlock(output);
271 return NULL;
272 }
273
274 //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
275 //The output must be free()d
walkRTreeNodes(bigWigFile_t * bw,bwRTreeNode_t * root,uint32_t tid,uint32_t start,uint32_t end)276 bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
277 if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
278 return overlapsNonLeaf(bw, root, tid, start, end);
279 }
280
281 //In reality, a hash or some sort of tree structure is probably faster...
282 //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
bwGetTid(bigWigFile_t * fp,char * chrom)283 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
284 uint32_t i;
285 if(!chrom) return -1;
286 for(i=0; i<fp->cl->nKeys; i++) {
287 if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
288 }
289 return -1;
290 }
291
bwGetOverlappingBlocks(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)292 static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
293 uint32_t tid = bwGetTid(fp, chrom);
294
295 if(tid == (uint32_t) -1) {
296 fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
297 return NULL;
298 }
299
300 //Get the info if needed
301 if(!fp->idx) {
302 fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
303 if(!fp->idx) {
304 return NULL;
305 }
306 }
307
308 if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
309 if(!fp->idx->root) return NULL;
310
311 return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
312 }
313
bwFillDataHdr(bwDataHeader_t * hdr,void * b)314 void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
315 hdr->tid = ((uint32_t*)b)[0];
316 hdr->start = ((uint32_t*)b)[1];
317 hdr->end = ((uint32_t*)b)[2];
318 hdr->step = ((uint32_t*)b)[3];
319 hdr->span = ((uint32_t*)b)[4];
320 hdr->type = ((uint8_t*)b)[20];
321 hdr->nItems = ((uint16_t*)b)[11];
322 }
323
bwDestroyOverlappingIntervals(bwOverlappingIntervals_t * o)324 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
325 if(!o) return;
326 if(o->start) free(o->start);
327 if(o->end) free(o->end);
328 if(o->value) free(o->value);
329 free(o);
330 }
331
bbDestroyOverlappingEntries(bbOverlappingEntries_t * o)332 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
333 uint32_t i;
334 if(!o) return;
335 if(o->start) free(o->start);
336 if(o->end) free(o->end);
337 if(o->str) {
338 for(i=0; i<o->l; i++) {
339 if(o->str[i]) free(o->str[i]);
340 }
341 free(o->str);
342 }
343 free(o);
344 }
345
346 //Returns NULL on error, in which case o has been free()d
pushIntervals(bwOverlappingIntervals_t * o,uint32_t start,uint32_t end,float value)347 static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
348 if(o->l+1 >= o->m) {
349 o->m = roundup(o->l+1);
350 o->start = realloc(o->start, o->m * sizeof(uint32_t));
351 if(!o->start) goto error;
352 o->end = realloc(o->end, o->m * sizeof(uint32_t));
353 if(!o->end) goto error;
354 o->value = realloc(o->value, o->m * sizeof(float));
355 if(!o->value) goto error;
356 }
357 o->start[o->l] = start;
358 o->end[o->l] = end;
359 o->value[o->l++] = value;
360 return o;
361
362 error:
363 bwDestroyOverlappingIntervals(o);
364 return NULL;
365 }
366
pushBBIntervals(bbOverlappingEntries_t * o,uint32_t start,uint32_t end,char * str,int withString)367 static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
368 if(o->l+1 >= o->m) {
369 o->m = roundup(o->l+1);
370 o->start = realloc(o->start, o->m * sizeof(uint32_t));
371 if(!o->start) goto error;
372 o->end = realloc(o->end, o->m * sizeof(uint32_t));
373 if(!o->end) goto error;
374 if(withString) {
375 o->str = realloc(o->str, o->m * sizeof(char**));
376 if(!o->str) goto error;
377 }
378 }
379 o->start[o->l] = start;
380 o->end[o->l] = end;
381 if(withString) o->str[o->l] = strdup(str);
382 o->l++;
383 return o;
384
385 error:
386 bbDestroyOverlappingEntries(o);
387 return NULL;
388 }
389
390 //Returns NULL on error
bwGetOverlappingIntervalsCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend)391 bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
392 uint64_t i;
393 uint16_t j;
394 int compressed = 0, rv;
395 uLongf sz = fp->hdr->bufSize, tmp;
396 void *buf = NULL, *compBuf = NULL;
397 uint32_t start = 0, end , *p;
398 float value;
399 bwDataHeader_t hdr;
400 bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
401
402 if(!output) goto error;
403
404 if(!o) return output;
405 if(!o->n) return output;
406
407 if(sz) {
408 compressed = 1;
409 buf = malloc(sz);
410 }
411 sz = 0; //This is now the size of the compressed buffer
412
413 for(i=0; i<o->n; i++) {
414 if(bwSetPos(fp, o->offset[i])) goto error;
415
416 if(sz < o->size[i]) {
417 compBuf = realloc(compBuf, o->size[i]);
418 sz = o->size[i];
419 }
420 if(!compBuf) goto error;
421
422 if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
423 if(compressed) {
424 tmp = fp->hdr->bufSize; //This gets over-written by uncompress
425 rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
426 if(rv != Z_OK) goto error;
427 } else {
428 buf = compBuf;
429 }
430
431 //TODO: ensure that tmp is large enough!
432 bwFillDataHdr(&hdr, buf);
433
434 p = ((uint32_t*) buf);
435 p += 6;
436 if(hdr.tid != tid) continue;
437
438 if(hdr.type == 3) start = hdr.start - hdr.step;
439
440 //FIXME: We should ensure that sz is large enough to hold nItems of the given type
441 for(j=0; j<hdr.nItems; j++) {
442 switch(hdr.type) {
443 case 1:
444 start = *p;
445 p++;
446 end = *p;
447 p++;
448 value = *((float *)p);
449 p++;
450 break;
451 case 2:
452 start = *p;
453 p++;
454 end = start + hdr.span;
455 value = *((float *)p);
456 p++;
457 break;
458 case 3:
459 start += hdr.step;
460 end = start+hdr.span;
461 value = *((float *)p);
462 p++;
463 break;
464 default :
465 goto error;
466 break;
467 }
468
469 if(end <= ostart || start >= oend) continue;
470 //Push the overlap
471 if(!pushIntervals(output, start, end, value)) goto error;
472 }
473 }
474
475 if(compressed && buf) free(buf);
476 if(compBuf) free(compBuf);
477 return output;
478
479 error:
480 fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
481 if(output) bwDestroyOverlappingIntervals(output);
482 if(compressed && buf) free(buf);
483 if(compBuf) free(compBuf);
484 return NULL;
485 }
486
bbGetOverlappingEntriesCore(bigWigFile_t * fp,bwOverlapBlock_t * o,uint32_t tid,uint32_t ostart,uint32_t oend,int withString)487 bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
488 uint64_t i;
489 int compressed = 0, rv, slen;
490 uLongf sz = fp->hdr->bufSize, tmp = 0;
491 void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
492 uint32_t entryTid = 0, start = 0, end;
493 char *str;
494 bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
495
496 if(!output) goto error;
497
498 if(!o) return output;
499 if(!o->n) return output;
500
501 if(sz) {
502 compressed = 1;
503 buf = malloc(sz);
504 }
505 sz = 0; //This is now the size of the compressed buffer
506
507 for(i=0; i<o->n; i++) {
508 if(bwSetPos(fp, o->offset[i])) goto error;
509
510 if(sz < o->size[i]) {
511 compBuf = realloc(compBuf, o->size[i]);
512 sz = o->size[i];
513 }
514 if(!compBuf) goto error;
515
516 if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
517 if(compressed) {
518 tmp = fp->hdr->bufSize; //This gets over-written by uncompress
519 rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
520 if(rv != Z_OK) goto error;
521 } else {
522 buf = compBuf;
523 tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
524 }
525
526 bufEnd = buf + tmp;
527 while(buf < bufEnd) {
528 entryTid = ((uint32_t*)buf)[0];
529 start = ((uint32_t*)buf)[1];
530 end = ((uint32_t*)buf)[2];
531 buf += 12;
532 str = (char*)buf;
533 slen = strlen(str) + 1;
534 buf += slen;
535
536 if(entryTid < tid) continue;
537 if(entryTid > tid) break;
538 if(end <= ostart) continue;
539 if(start >= oend) break;
540
541 //Push the overlap
542 if(!pushBBIntervals(output, start, end, str, withString)) goto error;
543 }
544
545 buf = bufEnd - tmp; //reset the buffer pointer
546 }
547
548 if(compressed && buf) free(buf);
549 if(compBuf) free(compBuf);
550 return output;
551
552 error:
553 fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
554 buf = bufEnd - tmp;
555 if(output) bbDestroyOverlappingEntries(output);
556 if(compressed && buf) free(buf);
557 if(compBuf) free(compBuf);
558 return NULL;
559 }
560
561 //Returns NULL on error OR no intervals, which is a bad design...
bwGetOverlappingIntervals(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end)562 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
563 bwOverlappingIntervals_t *output;
564 uint32_t tid = bwGetTid(fp, chrom);
565 if(tid == (uint32_t) -1) return NULL;
566 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
567 if(!blocks) return NULL;
568 output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
569 destroyBWOverlapBlock(blocks);
570 return output;
571 }
572
573 //Like above, but for bigBed files
bbGetOverlappingEntries(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int withString)574 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
575 bbOverlappingEntries_t *output;
576 uint32_t tid = bwGetTid(fp, chrom);
577 if(tid == (uint32_t) -1) return NULL;
578 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
579 if(!blocks) return NULL;
580 output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
581 destroyBWOverlapBlock(blocks);
582 return output;
583 }
584
585 //Returns NULL on error
bwOverlappingIntervalsIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,uint32_t blocksPerIteration)586 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
587 bwOverlapIterator_t *output = NULL;
588 uint64_t n;
589 uint32_t tid = bwGetTid(bw, chrom);
590 if(tid == (uint32_t) -1) return output;
591 output = calloc(1, sizeof(bwOverlapIterator_t));
592 if(!output) return output;
593 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
594
595 output->bw = bw;
596 output->tid = tid;
597 output->start = start;
598 output->end = end;
599 output->blocks = blocks;
600 output->blocksPerIteration = blocksPerIteration;
601
602 if(blocks) {
603 n = blocks->n;
604 if(n>blocksPerIteration) blocks->n = blocksPerIteration;
605 output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
606 blocks->n = n;
607 output->offset = blocksPerIteration;
608 }
609 output->data = output->intervals;
610 return output;
611 }
612
613 //Returns NULL on error
bbOverlappingEntriesIterator(bigWigFile_t * bw,char * chrom,uint32_t start,uint32_t end,int withString,uint32_t blocksPerIteration)614 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
615 bwOverlapIterator_t *output = NULL;
616 uint64_t n;
617 uint32_t tid = bwGetTid(bw, chrom);
618 if(tid == (uint32_t) -1) return output;
619 output = calloc(1, sizeof(bwOverlapIterator_t));
620 if(!output) return output;
621 bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
622
623 output->bw = bw;
624 output->tid = tid;
625 output->start = start;
626 output->end = end;
627 output->blocks = blocks;
628 output->blocksPerIteration = blocksPerIteration;
629 output->withString = withString;
630
631 if(blocks) {
632 n = blocks->n;
633 if(n>blocksPerIteration) blocks->n = blocksPerIteration;
634 output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
635 blocks->n = n;
636 output->offset = blocksPerIteration;
637 }
638 output->data = output->entries;
639 return output;
640 }
641
bwIteratorDestroy(bwOverlapIterator_t * iter)642 void bwIteratorDestroy(bwOverlapIterator_t *iter) {
643 if(!iter) return;
644 if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
645 if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
646 if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
647 free(iter);
648 }
649
650 //On error, points to NULL and destroys the input
bwIteratorNext(bwOverlapIterator_t * iter)651 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
652 uint64_t n, *offset, *size;
653 bwOverlapBlock_t *blocks = iter->blocks;
654
655 if(iter->intervals) {
656 bwDestroyOverlappingIntervals(iter->intervals);
657 iter->intervals = NULL;
658 }
659 if(iter->entries) {
660 bbDestroyOverlappingEntries(iter->entries);
661 iter->entries = NULL;
662 }
663 iter->data = NULL;
664
665 if(iter->offset < blocks->n) {
666 //store the previous values
667 n = blocks->n;
668 offset = blocks->offset;
669 size = blocks->size;
670
671 //Move the start of the blocks
672 blocks->offset += iter->offset;
673 blocks->size += iter->offset;
674 if(iter->offset + iter->blocksPerIteration > n) {
675 blocks->n = blocks->n - iter->offset;
676 } else {
677 blocks->n = iter->blocksPerIteration;
678 }
679
680 //Get the intervals or entries, as appropriate
681 if(iter->bw->type == 0) {
682 //bigWig
683 iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
684 iter->data = iter->intervals;
685 } else {
686 //bigBed
687 iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
688 iter->data = iter->entries;
689 }
690 iter->offset += iter->blocksPerIteration;
691
692 //reset the values in iter->blocks
693 blocks->n = n;
694 blocks->offset = offset;
695 blocks->size = size;
696
697 //Check for error
698 if(!iter->intervals && !iter->entries) goto error;
699 }
700
701 return iter;
702
703 error:
704 bwIteratorDestroy(iter);
705 return NULL;
706 }
707
708 //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
709 //The ->end member is NULL
710 //If includeNA is not 0 then ->start is also NULL, since it's implied
711 //Note that bwDestroyOverlappingIntervals() will work in either case
bwGetValues(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t end,int includeNA)712 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
713 uint32_t i, j, n;
714 bwOverlappingIntervals_t *output = NULL;
715 bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
716 if(!intermediate) return NULL;
717
718 output = calloc(1, sizeof(bwOverlappingIntervals_t));
719 if(!output) goto error;
720 if(includeNA) {
721 output->l = end-start;
722 output->value = malloc((end-start)*sizeof(float));
723 if(!output->value) goto error;
724 for(i=0; i<end-start; i++) output->value[i] = strtod("NaN", NULL);
725 for(i=0; i<intermediate->l; i++) {
726 for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
727 if(j < start || j >= end) continue;
728 output->value[j-start] = intermediate->value[i];
729 }
730 }
731 } else {
732 n = 0;
733 for(i=0; i<intermediate->l; i++) {
734 if(intermediate->start[i] < start) intermediate->start[i] = start;
735 if(intermediate->end[i] > end) intermediate->end[i] = end;
736 n += intermediate->end[i]-intermediate->start[i];
737 }
738 output->l = n;
739 output->start = malloc(sizeof(uint32_t)*n);
740 if(!output->start) goto error;
741 output->value = malloc(sizeof(float)*n);
742 if(!output->value) goto error;
743 n = 0; //this is now the index
744 for(i=0; i<intermediate->l; i++) {
745 for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
746 if(j < start || j >= end) continue;
747 output->start[n] = j;
748 output->value[n++] = intermediate->value[i];
749 }
750 }
751 }
752
753 bwDestroyOverlappingIntervals(intermediate);
754 return output;
755
756 error:
757 if(intermediate) bwDestroyOverlappingIntervals(intermediate);
758 if(output) bwDestroyOverlappingIntervals(output);
759 return NULL;
760 }
761
bwDestroyIndexNode(bwRTreeNode_t * node)762 void bwDestroyIndexNode(bwRTreeNode_t *node) {
763 uint16_t i;
764
765 if(!node) return;
766
767 free(node->chrIdxStart);
768 free(node->baseStart);
769 free(node->chrIdxEnd);
770 free(node->baseEnd);
771 free(node->dataOffset);
772 if(!node->isLeaf) {
773 for(i=0; i<node->nChildren; i++) {
774 bwDestroyIndexNode(node->x.child[i]);
775 }
776 free(node->x.child);
777 } else {
778 free(node->x.size);
779 }
780 free(node);
781 }
782
bwDestroyIndex(bwRTree_t * idx)783 void bwDestroyIndex(bwRTree_t *idx) {
784 bwDestroyIndexNode(idx->root);
785 free(idx);
786 }
787
788 //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
789 //Returns NULL on error
bwReadIndex(bigWigFile_t * fp,uint64_t offset)790 bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
791 bwRTree_t *idx = readRTreeIdx(fp, offset);
792 if(!idx) return NULL;
793
794 //Read in the root node
795 idx->root = bwGetRTreeNode(fp, idx->rootOffset);
796
797 if(!idx->root) {
798 bwDestroyIndex(idx);
799 return NULL;
800 }
801 return idx;
802 }
803