1 #include <limits.h>
2 #include <float.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <math.h>
6 #include "bigWig.h"
7 #include "bwCommon.h"
8 
9 /// @cond SKIP
10 struct val_t {
11     uint32_t tid;
12     uint32_t start;
13     uint32_t nBases;
14     float min, max, sum, sumsq;
15     double scalar;
16     struct val_t *next;
17 };
18 /// @endcond
19 
20 //Create a chromList_t and attach it to a bigWigFile_t *. Returns NULL on error
21 //Note that chroms and lengths are duplicated, so you MUST free the input
bwCreateChromList(char ** chroms,uint32_t * lengths,int64_t n)22 chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n) {
23     int64_t i = 0;
24     chromList_t *cl = calloc(1, sizeof(chromList_t));
25     if(!cl) return NULL;
26 
27     cl->nKeys = n;
28     cl->chrom = malloc(sizeof(char*)*n);
29     cl->len = malloc(sizeof(uint32_t)*n);
30     if(!cl->chrom) goto error;
31     if(!cl->len) goto error;
32 
33     for(i=0; i<n; i++) {
34         cl->len[i] = lengths[i];
35         cl->chrom[i] = strdup(chroms[i]);
36         if(!cl->chrom[i]) goto error;
37     }
38 
39     return cl;
40 
41 error:
42     if(i) {
43         int64_t j;
44         for(j=0; j<i; j++) free(cl->chrom[j]);
45     }
46     if(cl) {
47         if(cl->chrom) free(cl->chrom);
48         if(cl->len) free(cl->len);
49         free(cl);
50     }
51     return NULL;
52 }
53 
54 //If maxZooms == 0, then 0 is used (i.e., there are no zoom levels). If maxZooms < 0 or > 65535 then 10 is used.
55 //TODO allow changing bufSize and blockSize
bwCreateHdr(bigWigFile_t * fp,int32_t maxZooms)56 int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms) {
57     if(!fp->isWrite) return 1;
58     bigWigHdr_t *hdr = calloc(1, sizeof(bigWigHdr_t));
59     if(!hdr) return 2;
60 
61     hdr->version = 4;
62     if(maxZooms < 0 || maxZooms > 65535) {
63         hdr->nLevels = 10;
64     } else {
65         hdr->nLevels = maxZooms;
66     }
67 
68     hdr->bufSize = 32768; //When the file is finalized this is reset if fp->writeBuffer->compressPsz is 0!
69     hdr->minVal = DBL_MAX;
70     hdr->maxVal = DBL_MIN;
71     fp->hdr = hdr;
72     fp->writeBuffer->blockSize = 64;
73 
74     //Allocate the writeBuffer buffers
75     fp->writeBuffer->compressPsz = compressBound(hdr->bufSize);
76     fp->writeBuffer->compressP = malloc(fp->writeBuffer->compressPsz);
77     if(!fp->writeBuffer->compressP) return 3;
78     fp->writeBuffer->p = calloc(1,hdr->bufSize);
79     if(!fp->writeBuffer->p) return 4;
80 
81     return 0;
82 }
83 
84 //return 0 on success
writeAtPos(void * ptr,size_t sz,size_t nmemb,size_t pos,FILE * fp)85 static int writeAtPos(void *ptr, size_t sz, size_t nmemb, size_t pos, FILE *fp) {
86     size_t curpos = ftell(fp);
87     if(fseek(fp, pos, SEEK_SET)) return 1;
88     if(fwrite(ptr, sz, nmemb, fp) != nmemb) return 2;
89     if(fseek(fp, curpos, SEEK_SET)) return 3;
90     return 0;
91 }
92 
93 //We lose keySize bytes on error
writeChromList(FILE * fp,chromList_t * cl)94 static int writeChromList(FILE *fp, chromList_t *cl) {
95     uint16_t k;
96     uint32_t j, magic = CIRTREE_MAGIC;
97     uint32_t nperblock = (cl->nKeys > 0x7FFF) ? 0x7FFF : cl->nKeys; //Items per leaf/non-leaf, there are no unsigned ints in java :(
98     uint32_t nblocks, keySize = 0, valSize = 8; //In theory valSize could be optimized, in practice that'd be annoying
99     uint64_t i, nonLeafEnd, leafSize, nextLeaf;
100     uint8_t eight;
101     int64_t i64;
102     char *chrom;
103     size_t l;
104 
105     if(cl->nKeys > 1073676289) {
106         fprintf(stderr, "[writeChromList] Error: Currently only 1,073,676,289 contigs are supported. If you really need more then please post a request on github.\n");
107         return 1;
108     }
109     nblocks = cl->nKeys/nperblock;
110     nblocks += ((cl->nKeys % nperblock) > 0)?1:0;
111 
112     for(i64=0; i64<cl->nKeys; i64++) {
113         l = strlen(cl->chrom[i64]);
114         if(l>keySize) keySize = l;
115     }
116     l--; //We don't null terminate strings, because schiess mich tot
117     chrom = calloc(keySize, sizeof(char));
118 
119     //Write the root node of a largely pointless tree
120     if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 1;
121     if(fwrite(&nperblock, sizeof(uint32_t), 1, fp) != 1) return 2;
122     if(fwrite(&keySize, sizeof(uint32_t), 1, fp) != 1) return 3;
123     if(fwrite(&valSize, sizeof(uint32_t), 1, fp) != 1) return 4;
124     if(fwrite(&(cl->nKeys), sizeof(uint64_t), 1, fp) != 1) return 5;
125 
126     //Padding?
127     i=0;
128     if(fwrite(&i, sizeof(uint64_t), 1, fp) != 1) return 6;
129 
130     //Do we need a non-leaf node?
131     if(nblocks > 1) {
132         eight = 0;
133         if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 7;
134         if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 8; //padding
135         if(fwrite(&nblocks, sizeof(uint16_t), 1, fp) != 1) return 8;
136         nonLeafEnd = ftell(fp) + nperblock * (keySize + 8);
137         leafSize = nperblock * (keySize + 8) + 4;
138         for(i=0; i<nblocks; i++) { //Why yes, this is pointless
139             chrom = strncpy(chrom, cl->chrom[i * nperblock], keySize);
140             nextLeaf = nonLeafEnd + i * leafSize;
141             if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
142             if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
143         }
144         for(i=0; i<keySize; i++) chrom[i] = '\0';
145         nextLeaf = 0;
146         for(i=nblocks; i<nperblock; i++) {
147             if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
148             if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
149         }
150     }
151 
152     //Write the leaves
153     nextLeaf = 0;
154     for(i=0, j=0; i<nblocks; i++) {
155         eight = 1;
156         if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 11;
157         eight = 0;
158         if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 12;
159         if(cl->nKeys - j < nperblock) {
160             k = cl->nKeys - j;
161             if(fwrite(&k, sizeof(uint16_t), 1, fp) != 1) return 13;
162         } else {
163             if(fwrite(&nperblock, sizeof(uint16_t), 1, fp) != 1) return 13;
164         }
165         for(k=0; k<nperblock; k++) {
166             if(j>=cl->nKeys) {
167                 if(chrom[0]) {
168                     for(l=0; l<keySize; l++) chrom[l] = '\0';
169                 }
170                 if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
171                 if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 16;
172             } else {
173                 chrom = strncpy(chrom, cl->chrom[j], keySize);
174                 if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
175                 if(fwrite(&j, sizeof(uint32_t), 1, fp) != 1) return 16;
176                 if(fwrite(&(cl->len[j++]), sizeof(uint32_t), 1, fp) != 1) return 17;
177             }
178         }
179     }
180 
181     free(chrom);
182     return 0;
183 }
184 
185 //returns 0 on success
186 //Still need to fill in indexOffset
bwWriteHdr(bigWigFile_t * bw)187 int bwWriteHdr(bigWigFile_t *bw) {
188     uint32_t magic = BIGWIG_MAGIC;
189     uint16_t two = 4;
190     FILE *fp;
191     void *p = calloc(58, sizeof(uint8_t)); //58 bytes of nothing
192     if(!bw->isWrite) return 1;
193 
194     //The header itself, largely just reserving space...
195     fp = bw->URL->x.fp;
196     if(!fp) return 2;
197     if(fseek(fp, 0, SEEK_SET)) return 3;
198     if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 4;
199     if(fwrite(&two, sizeof(uint16_t), 1, fp) != 1) return 5;
200     if(fwrite(p, sizeof(uint8_t), 58, fp) != 58) return 6;
201 
202     //Empty zoom headers
203     if(bw->hdr->nLevels) {
204         for(two=0; two<bw->hdr->nLevels; two++) {
205             if(fwrite(p, sizeof(uint8_t), 24, fp) != 24) return 9;
206         }
207     }
208 
209     //Update summaryOffset and write an empty summary block
210     bw->hdr->summaryOffset = ftell(fp);
211     if(fwrite(p, sizeof(uint8_t), 40, fp) != 40) return 10;
212     if(writeAtPos(&(bw->hdr->summaryOffset), sizeof(uint64_t), 1, 0x2c, fp)) return 11;
213 
214     //Write the chromosome list as a stupid freaking tree (because let's TREE ALL THE THINGS!!!)
215     bw->hdr->ctOffset = ftell(fp);
216     if(writeChromList(fp, bw->cl)) return 7;
217     if(writeAtPos(&(bw->hdr->ctOffset), sizeof(uint64_t), 1, 0x8, fp)) return 8;
218 
219     //Update the dataOffset
220     bw->hdr->dataOffset = ftell(fp);
221     if(writeAtPos(&bw->hdr->dataOffset, sizeof(uint64_t), 1, 0x10, fp)) return 12;
222 
223     //Save space for the number of blocks
224     if(fwrite(p, sizeof(uint8_t), 8, fp) != 8) return 13;
225 
226     free(p);
227     return 0;
228 }
229 
insertIndexNode(bigWigFile_t * fp,bwRTreeNode_t * leaf)230 static int insertIndexNode(bigWigFile_t *fp, bwRTreeNode_t *leaf) {
231     bwLL *l = malloc(sizeof(bwLL));
232     if(!l) return 1;
233     l->node = leaf;
234     l->next = NULL;
235 
236     if(!fp->writeBuffer->firstIndexNode) {
237         fp->writeBuffer->firstIndexNode = l;
238     } else {
239         fp->writeBuffer->currentIndexNode->next = l;
240     }
241     fp->writeBuffer->currentIndexNode = l;
242     return 0;
243 }
244 
245 //0 on success
appendIndexNodeEntry(bigWigFile_t * fp,uint32_t tid0,uint32_t tid1,uint32_t start,uint32_t end,uint64_t offset,uint64_t size)246 static int appendIndexNodeEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
247     bwLL *n = fp->writeBuffer->currentIndexNode;
248     if(!n) return 1;
249     if(n->node->nChildren >= fp->writeBuffer->blockSize) return 2;
250 
251     n->node->chrIdxStart[n->node->nChildren] = tid0;
252     n->node->baseStart[n->node->nChildren] = start;
253     n->node->chrIdxEnd[n->node->nChildren] = tid1;
254     n->node->baseEnd[n->node->nChildren] = end;
255     n->node->dataOffset[n->node->nChildren] = offset;
256     n->node->x.size[n->node->nChildren] = size;
257     n->node->nChildren++;
258     return 0;
259 }
260 
261 //Returns 0 on success
addIndexEntry(bigWigFile_t * fp,uint32_t tid0,uint32_t tid1,uint32_t start,uint32_t end,uint64_t offset,uint64_t size)262 static int addIndexEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
263     bwRTreeNode_t *node;
264 
265     if(appendIndexNodeEntry(fp, tid0, tid1, start, end, offset, size)) {
266         //The last index node is full, we need to add a new one
267         node = calloc(1, sizeof(bwRTreeNode_t));
268         if(!node) return 1;
269 
270         //Allocate and set the fields
271         node->isLeaf = 1;
272         node->nChildren = 1;
273         node->chrIdxStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
274         if(!node->chrIdxStart) goto error;
275         node->baseStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
276         if(!node->baseStart) goto error;
277         node->chrIdxEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
278         if(!node->chrIdxEnd) goto error;
279         node->baseEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
280         if(!node->baseEnd) goto error;
281         node->dataOffset = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
282         if(!node->dataOffset) goto error;
283         node->x.size = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
284         if(!node->x.size) goto error;
285 
286         node->chrIdxStart[0] = tid0;
287         node->baseStart[0] = start;
288         node->chrIdxEnd[0] = tid1;
289         node->baseEnd[0] = end;
290         node->dataOffset[0] = offset;
291         node->x.size[0] = size;
292 
293         if(insertIndexNode(fp, node)) goto error;
294     }
295 
296     return 0;
297 
298 error:
299     if(node->chrIdxStart) free(node->chrIdxStart);
300     if(node->baseStart) free(node->baseStart);
301     if(node->chrIdxEnd) free(node->chrIdxEnd);
302     if(node->baseEnd) free(node->baseEnd);
303     if(node->dataOffset) free(node->dataOffset);
304     if(node->x.size) free(node->x.size);
305     return 2;
306 }
307 
308 /*
309  * TODO:
310  *     The buffer size and compression sz need to be determined elsewhere (and p and compressP filled in!)
311  */
flushBuffer(bigWigFile_t * fp)312 static int flushBuffer(bigWigFile_t *fp) {
313     bwWriteBuffer_t *wb = fp->writeBuffer;
314     uLongf sz = wb->compressPsz;
315     uint16_t nItems;
316     if(!fp->writeBuffer->l) return 0;
317     if(!wb->ltype) return 0;
318 
319     //Fill in the header
320     if(!memcpy(wb->p, &(wb->tid), sizeof(uint32_t))) return 1;
321     if(!memcpy(wb->p+4, &(wb->start), sizeof(uint32_t))) return 2;
322     if(!memcpy(wb->p+8, &(wb->end), sizeof(uint32_t))) return 3;
323     if(!memcpy(wb->p+12, &(wb->step), sizeof(uint32_t))) return 4;
324     if(!memcpy(wb->p+16, &(wb->span), sizeof(uint32_t))) return 5;
325     if(!memcpy(wb->p+20, &(wb->ltype), sizeof(uint8_t))) return 6;
326     //1 byte padding
327     //Determine the number of items
328     switch(wb->ltype) {
329     case 1:
330         nItems = (wb->l-24)/12;
331         break;
332     case 2:
333         nItems = (wb->l-24)/8;
334         break;
335     case 3:
336         nItems = (wb->l-24)/4;
337         break;
338     default:
339         return 7;
340     }
341     if(!memcpy(wb->p+22, &nItems, sizeof(uint16_t))) return 8;
342 
343     if(sz) {
344         //compress
345         if(compress(wb->compressP, &sz, wb->p, wb->l) != Z_OK) return 9;
346 
347         //write the data to disk
348         if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 10;
349     } else {
350         sz = wb->l;
351         if(fwrite(wb->p, sizeof(uint8_t), wb->l, fp->URL->x.fp) != wb->l) return 10;
352     }
353 
354     //Add an entry into the index
355     if(addIndexEntry(fp, wb->tid, wb->tid, wb->start, wb->end, bwTell(fp)-sz, sz)) return 11;
356 
357     wb->nBlocks++;
358     wb->l = 24;
359     return 0;
360 }
361 
updateStats(bigWigFile_t * fp,uint32_t span,float val)362 static void updateStats(bigWigFile_t *fp, uint32_t span, float val) {
363     if(val < fp->hdr->minVal) fp->hdr->minVal = val;
364     else if(val > fp->hdr->maxVal) fp->hdr->maxVal = val;
365     fp->hdr->nBasesCovered += span;
366     fp->hdr->sumData += span*val;
367     fp->hdr->sumSquared += span*pow(val,2);
368 
369     fp->writeBuffer->nEntries++;
370     fp->writeBuffer->runningWidthSum += span;
371 }
372 
373 //12 bytes per entry
bwAddIntervals(bigWigFile_t * fp,char ** chrom,uint32_t * start,uint32_t * end,float * values,uint32_t n)374 int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n) {
375     uint32_t tid = 0, i;
376     char *lastChrom = NULL;
377     bwWriteBuffer_t *wb = fp->writeBuffer;
378     if(!n) return 0; //Not an error per se
379     if(!fp->isWrite) return 1;
380     if(!wb) return 2;
381 
382     //Flush if needed
383     if(wb->ltype != 1) if(flushBuffer(fp)) return 3;
384     if(wb->l+36 > fp->hdr->bufSize) if(flushBuffer(fp)) return 4;
385     lastChrom = chrom[0];
386     tid = bwGetTid(fp, chrom[0]);
387     if(tid == (uint32_t) -1) return 5;
388     if(tid != wb->tid) {
389         if(flushBuffer(fp)) return 6;
390         wb->tid = tid;
391         wb->start = start[0];
392         wb->end = end[0];
393     }
394 
395     //Ensure that everything is set correctly
396     wb->ltype = 1;
397     if(wb->l <= 24) {
398         wb->start = start[0];
399         wb->span = 0;
400         wb->step = 0;
401     }
402     if(!memcpy(wb->p+wb->l, start, sizeof(uint32_t))) return 7;
403     if(!memcpy(wb->p+wb->l+4, end, sizeof(uint32_t))) return 8;
404     if(!memcpy(wb->p+wb->l+8, values, sizeof(float))) return 9;
405     updateStats(fp, end[0]-start[0], values[0]);
406     wb->l += 12;
407 
408     for(i=1; i<n; i++) {
409         if(strcmp(chrom[i],lastChrom) != 0) {
410             wb->end = end[i-1];
411             flushBuffer(fp);
412             lastChrom = chrom[i];
413             tid = bwGetTid(fp, chrom[i]);
414             if(tid == (uint32_t) -1) return 10;
415             wb->tid = tid;
416             wb->start = start[i];
417         }
418         if(wb->l+12 > fp->hdr->bufSize) { //12 bytes/entry
419             wb->end = end[i-1];
420             flushBuffer(fp);
421             wb->start = start[i];
422         }
423         if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 11;
424         if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 12;
425         if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 13;
426         updateStats(fp, end[i]-start[i], values[i]);
427         wb->l += 12;
428     }
429     wb->end = end[i-1];
430 
431     return 0;
432 }
433 
bwAppendIntervals(bigWigFile_t * fp,uint32_t * start,uint32_t * end,float * values,uint32_t n)434 int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n) {
435     uint32_t i;
436     bwWriteBuffer_t *wb = fp->writeBuffer;
437     if(!n) return 0;
438     if(!fp->isWrite) return 1;
439     if(!wb) return 2;
440     if(wb->ltype != 1) return 3;
441 
442     for(i=0; i<n; i++) {
443         if(wb->l+12 > fp->hdr->bufSize) {
444             if(i>0) { //otherwise it's already set
445                 wb->end = end[i-1];
446             }
447             flushBuffer(fp);
448             wb->start = start[i];
449         }
450         if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
451         if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 5;
452         if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 6;
453         updateStats(fp, end[i]-start[i], values[i]);
454         wb->l += 12;
455     }
456     wb->end = end[i-1];
457 
458     return 0;
459 }
460 
461 //8 bytes per entry
bwAddIntervalSpans(bigWigFile_t * fp,char * chrom,uint32_t * start,uint32_t span,float * values,uint32_t n)462 int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n) {
463     uint32_t i, tid;
464     bwWriteBuffer_t *wb = fp->writeBuffer;
465     if(!n) return 0;
466     if(!fp->isWrite) return 1;
467     if(!wb) return 2;
468     if(wb->ltype != 2) if(flushBuffer(fp)) return 3;
469     if(flushBuffer(fp)) return 4;
470 
471     tid = bwGetTid(fp, chrom);
472     if(tid == (uint32_t) -1) return 5;
473     wb->tid = tid;
474     wb->start = start[0];
475     wb->step = 0;
476     wb->span = span;
477     wb->ltype = 2;
478 
479     for(i=0; i<n; i++) {
480         if(wb->l + 8 >= fp->hdr->bufSize) { //8 bytes/entry
481             if(i) wb->end = start[i-1]+span;
482             flushBuffer(fp);
483             wb->start = start[i];
484         }
485         if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 5;
486         if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 6;
487         updateStats(fp, span, values[i]);
488         wb->l += 8;
489     }
490     wb->end = start[n-1] + span;
491 
492     return 0;
493 }
494 
bwAppendIntervalSpans(bigWigFile_t * fp,uint32_t * start,float * values,uint32_t n)495 int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n) {
496     uint32_t i;
497     bwWriteBuffer_t *wb = fp->writeBuffer;
498     if(!n) return 0;
499     if(!fp->isWrite) return 1;
500     if(!wb) return 2;
501     if(wb->ltype != 2) return 3;
502 
503     for(i=0; i<n; i++) {
504         if(wb->l + 8 >= fp->hdr->bufSize) {
505             if(i) wb->end = start[i-1]+wb->span;
506             flushBuffer(fp);
507             wb->start = start[i];
508         }
509         if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
510         if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 5;
511         updateStats(fp, wb->span, values[i]);
512         wb->l += 8;
513     }
514     wb->end = start[n-1] + wb->span;
515 
516     return 0;
517 }
518 
519 //4 bytes per entry
bwAddIntervalSpanSteps(bigWigFile_t * fp,char * chrom,uint32_t start,uint32_t span,uint32_t step,float * values,uint32_t n)520 int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n) {
521     uint32_t i, tid;
522     bwWriteBuffer_t *wb = fp->writeBuffer;
523     if(!n) return 0;
524     if(!fp->isWrite) return 1;
525     if(!wb) return 2;
526     if(wb->ltype != 3) flushBuffer(fp);
527     if(flushBuffer(fp)) return 3;
528 
529     tid = bwGetTid(fp, chrom);
530     if(tid == (uint32_t) -1) return 4;
531     wb->tid = tid;
532     wb->start = start;
533     wb->step = step;
534     wb->span = span;
535     wb->ltype = 3;
536 
537     for(i=0; i<n; i++) {
538         if(wb->l + 4 >= fp->hdr->bufSize) {
539             wb->end = wb->start + ((wb->l-24)>>2) * step;
540             flushBuffer(fp);
541             wb->start = wb->end;
542         }
543         if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 5;
544         updateStats(fp, wb->span, values[i]);
545         wb->l += 4;
546     }
547     wb->end = wb->start + (wb->l>>2) * step;
548 
549     return 0;
550 }
551 
bwAppendIntervalSpanSteps(bigWigFile_t * fp,float * values,uint32_t n)552 int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n) {
553     uint32_t i;
554     bwWriteBuffer_t *wb = fp->writeBuffer;
555     if(!n) return 0;
556     if(!fp->isWrite) return 1;
557     if(!wb) return 2;
558     if(wb->ltype != 3) return 3;
559 
560     for(i=0; i<n; i++) {
561         if(wb->l + 4 >= fp->hdr->bufSize) {
562             wb->end = wb->start + ((wb->l-24)>>2) * wb->step;
563             flushBuffer(fp);
564             wb->start = wb->end;
565         }
566         if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 4;
567         updateStats(fp, wb->span, values[i]);
568         wb->l += 4;
569     }
570     wb->end = wb->start + (wb->l>>2) * wb->step;
571 
572     return 0;
573 }
574 
575 //0 on success
writeSummary(bigWigFile_t * fp)576 int writeSummary(bigWigFile_t *fp) {
577     if(writeAtPos(&(fp->hdr->nBasesCovered), sizeof(uint64_t), 1, fp->hdr->summaryOffset, fp->URL->x.fp)) return 1;
578     if(writeAtPos(&(fp->hdr->minVal), sizeof(double), 1, fp->hdr->summaryOffset+8, fp->URL->x.fp)) return 2;
579     if(writeAtPos(&(fp->hdr->maxVal), sizeof(double), 1, fp->hdr->summaryOffset+16, fp->URL->x.fp)) return 3;
580     if(writeAtPos(&(fp->hdr->sumData), sizeof(double), 1, fp->hdr->summaryOffset+24, fp->URL->x.fp)) return 4;
581     if(writeAtPos(&(fp->hdr->sumSquared), sizeof(double), 1, fp->hdr->summaryOffset+32, fp->URL->x.fp)) return 5;
582     return 0;
583 }
584 
makeEmptyNode(uint32_t blockSize)585 static bwRTreeNode_t *makeEmptyNode(uint32_t blockSize) {
586     bwRTreeNode_t *n = calloc(1, sizeof(bwRTreeNode_t));
587     if(!n) return NULL;
588 
589     n->chrIdxStart = malloc(blockSize*sizeof(uint32_t));
590     if(!n->chrIdxStart) goto error;
591     n->baseStart = malloc(blockSize*sizeof(uint32_t));
592     if(!n->baseStart) goto error;
593     n->chrIdxEnd = malloc(blockSize*sizeof(uint32_t));
594     if(!n->chrIdxEnd) goto error;
595     n->baseEnd = malloc(blockSize*sizeof(uint32_t));
596     if(!n->baseEnd) goto error;
597     n->dataOffset = calloc(blockSize,sizeof(uint64_t)); //This MUST be 0 for node writing!
598     if(!n->dataOffset) goto error;
599     n->x.child = malloc(blockSize*sizeof(uint64_t));
600     if(!n->x.child) goto error;
601 
602     return n;
603 
604 error:
605     if(n->chrIdxStart) free(n->chrIdxStart);
606     if(n->baseStart) free(n->baseStart);
607     if(n->chrIdxEnd) free(n->chrIdxEnd);
608     if(n->baseEnd) free(n->baseEnd);
609     if(n->dataOffset) free(n->dataOffset);
610     if(n->x.child) free(n->x.child);
611     free(n);
612     return NULL;
613 }
614 
615 //Returns 0 on success. This doesn't attempt to clean up!
addLeaves(bwLL ** ll,uint64_t * sz,uint64_t toProcess,uint32_t blockSize)616 static bwRTreeNode_t *addLeaves(bwLL **ll, uint64_t *sz, uint64_t toProcess, uint32_t blockSize) {
617     uint32_t i;
618     uint64_t foo;
619     bwRTreeNode_t *n = makeEmptyNode(blockSize);
620     if(!n) return NULL;
621 
622     if(toProcess <= blockSize) {
623         for(i=0; i<toProcess; i++) {
624             n->chrIdxStart[i] = (*ll)->node->chrIdxStart[0];
625             n->baseStart[i] = (*ll)->node->baseStart[0];
626             n->chrIdxEnd[i] = (*ll)->node->chrIdxEnd[(*ll)->node->nChildren-1];
627             n->baseEnd[i] = (*ll)->node->baseEnd[(*ll)->node->nChildren-1];
628             n->x.child[i] = (*ll)->node;
629             *sz += 4 + 32*(*ll)->node->nChildren;
630             *ll = (*ll)->next;
631             n->nChildren++;
632         }
633     } else {
634         for(i=0; i<blockSize; i++) {
635             foo = ceil(((double) toProcess)/((double) blockSize-i));
636             if(!ll) break;
637             n->x.child[i] = addLeaves(ll, sz, foo, blockSize);
638             if(!n->x.child[i]) goto error;
639             n->chrIdxStart[i] = n->x.child[i]->chrIdxStart[0];
640             n->baseStart[i] = n->x.child[i]->baseStart[0];
641             n->chrIdxEnd[i] = n->x.child[i]->chrIdxEnd[n->x.child[i]->nChildren-1];
642             n->baseEnd[i] = n->x.child[i]->baseEnd[n->x.child[i]->nChildren-1];
643             n->nChildren++;
644             toProcess -= foo;
645         }
646     }
647 
648     *sz += 4 + 24*n->nChildren;
649     return n;
650 
651 error:
652     bwDestroyIndexNode(n);
653     return NULL;
654 }
655 
656 //Returns 1 on error
writeIndexTreeNode(FILE * fp,bwRTreeNode_t * n,uint8_t * wrote,int level)657 int writeIndexTreeNode(FILE *fp, bwRTreeNode_t *n, uint8_t *wrote, int level) {
658     uint8_t one = 0;
659     uint32_t i, j, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
660 
661     if(n->isLeaf) return 0;
662 
663     for(i=0; i<n->nChildren; i++) {
664         if(n->dataOffset[i]) { //traverse into child
665             if(n->isLeaf) return 0; //Only write leaves once!
666             if(writeIndexTreeNode(fp, n->x.child[i], wrote, level+1)) return 1;
667         } else {
668             n->dataOffset[i] = ftell(fp);
669             if(fwrite(&(n->x.child[i]->isLeaf), sizeof(uint8_t), 1, fp) != 1) return 1;
670             if(fwrite(&one, sizeof(uint8_t), 1, fp) != 1) return 1; //one byte of padding
671             if(fwrite(&(n->x.child[i]->nChildren), sizeof(uint16_t), 1, fp) != 1) return 1;
672             for(j=0; j<n->x.child[i]->nChildren; j++) {
673                 vector[0] = n->x.child[i]->chrIdxStart[j];
674                 vector[1] = n->x.child[i]->baseStart[j];
675                 vector[2] = n->x.child[i]->chrIdxEnd[j];
676                 vector[3] = n->x.child[i]->baseEnd[j];
677                 if(n->x.child[i]->isLeaf) {
678                     //Include the offset and size
679                     if(fwrite(vector, sizeof(uint32_t), 4, fp) != 4) return 1;
680                     if(fwrite(&(n->x.child[i]->dataOffset[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
681                     if(fwrite(&(n->x.child[i]->x.size[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
682                 } else {
683                     if(fwrite(vector, sizeof(uint32_t), 6, fp) != 6) return 1;
684                 }
685             }
686             *wrote = 1;
687         }
688     }
689 
690     return 0;
691 }
692 
693 //returns 1 on success
writeIndexOffsets(FILE * fp,bwRTreeNode_t * n,uint64_t offset)694 int writeIndexOffsets(FILE *fp, bwRTreeNode_t *n, uint64_t offset) {
695     uint32_t i;
696 
697     if(n->isLeaf) return 0;
698     for(i=0; i<n->nChildren; i++) {
699         if(writeIndexOffsets(fp, n->x.child[i], n->dataOffset[i])) return 1;
700         if(writeAtPos(&(n->dataOffset[i]), sizeof(uint64_t), 1, offset+20+24*i, fp)) return 2;
701     }
702     return 0;
703 }
704 
705 //Returns 0 on success
writeIndexTree(bigWigFile_t * fp)706 int writeIndexTree(bigWigFile_t *fp) {
707     uint64_t offset;
708     uint8_t wrote = 0;
709     int rv;
710 
711     while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->idx->root, &wrote, 0)) == 0) {
712         if(!wrote) break;
713         wrote = 0;
714     }
715 
716     if(rv || wrote) return 1;
717 
718     //Save the file position
719     offset = bwTell(fp);
720 
721     //Write the offsets
722     if(writeIndexOffsets(fp->URL->x.fp, fp->idx->root, fp->idx->rootOffset)) return 2;
723 
724     //Move the file pointer back to the end
725     bwSetPos(fp, offset);
726 
727     return 0;
728 }
729 
730 //Returns 0 on success. The original state SHOULD be preserved on error
writeIndex(bigWigFile_t * fp)731 int writeIndex(bigWigFile_t *fp) {
732     uint32_t four = IDX_MAGIC;
733     uint64_t idxSize = 0, foo;
734     uint8_t one = 0;
735     uint32_t i, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
736     bwLL *ll = fp->writeBuffer->firstIndexNode, *p;
737     bwRTreeNode_t *root = NULL;
738 
739     if(!fp->writeBuffer->nBlocks) return 0;
740     fp->idx = malloc(sizeof(bwRTree_t));
741     if(!fp->idx) return 2;
742     fp->idx->root = root;
743 
744     //Update the file header to indicate the proper index position
745     foo = bwTell(fp);
746     if(writeAtPos(&foo, sizeof(uint64_t), 1, 0x18, fp->URL->x.fp)) return 3;
747 
748     //Make the tree
749     if(ll == fp->writeBuffer->currentIndexNode) {
750         root = ll->node;
751         idxSize = 4 + 24*root->nChildren;
752     } else {
753         root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
754     }
755     if(!root) return 4;
756     fp->idx->root = root;
757 
758     ll = fp->writeBuffer->firstIndexNode;
759     while(ll) {
760         p = ll->next;
761         free(ll);
762         ll=p;
763     }
764 
765     //write the header
766     if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 5;
767     if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
768     if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
769     if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
770     if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
771     if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
772     if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
773     if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
774     four = 1;
775     if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
776     four = 0;
777     if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
778     fp->idx->rootOffset = bwTell(fp);
779 
780     //Write the root node, since writeIndexTree writes the children and fills in the offset
781     if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
782     if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
783     if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
784     for(i=0; i<root->nChildren; i++) {
785         vector[0] = root->chrIdxStart[i];
786         vector[1] = root->baseStart[i];
787         vector[2] = root->chrIdxEnd[i];
788         vector[3] = root->baseEnd[i];
789         if(root->isLeaf) {
790             //Include the offset and size
791             if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
792             if(fwrite(&(root->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
793             if(fwrite(&(root->x.size[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
794         } else {
795             root->dataOffset[i] = 0; //FIXME: Something upstream is setting this to impossible values (e.g., 0x21?!?!?)
796             if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
797         }
798     }
799 
800     //Write each level
801     if(writeIndexTree(fp)) return 23;
802 
803     return 0;
804 }
805 
806 //The first zoom level has a resolution of 4x mean entry size
807 //This may or may not produce the requested number of zoom levels
makeZoomLevels(bigWigFile_t * fp)808 int makeZoomLevels(bigWigFile_t *fp) {
809     uint32_t meanBinSize, i;
810     uint32_t multiplier = 4, zoom = 10, maxZoom = 0;
811     uint16_t nLevels = 0;
812 
813     meanBinSize = ((double) fp->writeBuffer->runningWidthSum)/(fp->writeBuffer->nEntries);
814     //In reality, one level is skipped
815     meanBinSize *= 4;
816     //N.B., we must ALWAYS check that the zoom doesn't overflow a uint32_t!
817     if(((uint32_t)-1)>>2 < meanBinSize) return 0; //No zoom levels!
818     if(meanBinSize*4 > zoom) zoom = multiplier*meanBinSize;
819 
820     fp->hdr->zoomHdrs = calloc(1, sizeof(bwZoomHdr_t));
821     if(!fp->hdr->zoomHdrs) return 1;
822     fp->hdr->zoomHdrs->level = malloc(fp->hdr->nLevels * sizeof(uint32_t));
823     fp->hdr->zoomHdrs->dataOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
824     fp->hdr->zoomHdrs->indexOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
825     fp->hdr->zoomHdrs->idx = calloc(fp->hdr->nLevels, sizeof(bwRTree_t*));
826     if(!fp->hdr->zoomHdrs->level) return 2;
827     if(!fp->hdr->zoomHdrs->dataOffset) return 3;
828     if(!fp->hdr->zoomHdrs->indexOffset) return 4;
829     if(!fp->hdr->zoomHdrs->idx) return 5;
830 
831     //There's no point in having a zoom level larger than the largest chromosome
832     //This will none the less allow at least one zoom level, which is generally needed for IGV et al.
833     for(i=0; i<fp->cl->nKeys; i++) {
834         if(fp->cl->len[i] > maxZoom) maxZoom = fp->cl->len[i];
835     }
836     if(zoom > maxZoom) zoom = maxZoom;
837 
838     for(i=0; i<fp->hdr->nLevels; i++) {
839         if(zoom > maxZoom) break; //prevent absurdly large zoom levels
840         fp->hdr->zoomHdrs->level[i] = zoom;
841         nLevels++;
842         if(((uint32_t)-1)/multiplier < zoom) break;
843         zoom *= multiplier;
844     }
845     fp->hdr->nLevels = nLevels;
846 
847     fp->writeBuffer->firstZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
848     if(!fp->writeBuffer->firstZoomBuffer) goto error;
849     fp->writeBuffer->lastZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
850     if(!fp->writeBuffer->lastZoomBuffer) goto error;
851     fp->writeBuffer->nNodes = calloc(nLevels, sizeof(uint64_t));
852 
853     for(i=0; i<fp->hdr->nLevels; i++) {
854         fp->writeBuffer->firstZoomBuffer[i] = calloc(1, sizeof(bwZoomBuffer_t));
855         if(!fp->writeBuffer->firstZoomBuffer[i]) goto error;
856         fp->writeBuffer->firstZoomBuffer[i]->p = calloc(fp->hdr->bufSize/32, 32);
857         if(!fp->writeBuffer->firstZoomBuffer[i]->p) goto error;
858         fp->writeBuffer->firstZoomBuffer[i]->m = fp->hdr->bufSize;
859         ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[0] = 0;
860         ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[1] = 0;
861         ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->hdr->zoomHdrs->level[i];
862         if(fp->hdr->zoomHdrs->level[i] > fp->cl->len[0]) ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->cl->len[0];
863         fp->writeBuffer->lastZoomBuffer[i] =  fp->writeBuffer->firstZoomBuffer[i];
864     }
865 
866     return 0;
867 
868 error:
869     if(fp->writeBuffer->firstZoomBuffer) {
870         for(i=0; i<fp->hdr->nLevels; i++) {
871             if(fp->writeBuffer->firstZoomBuffer[i]) {
872                 if(fp->writeBuffer->firstZoomBuffer[i]->p) free(fp->writeBuffer->firstZoomBuffer[i]->p);
873                 free(fp->writeBuffer->firstZoomBuffer[i]);
874             }
875         }
876         free(fp->writeBuffer->firstZoomBuffer);
877     }
878     if(fp->writeBuffer->lastZoomBuffer) free(fp->writeBuffer->lastZoomBuffer);
879     if(fp->writeBuffer->nNodes) free(fp->writeBuffer->lastZoomBuffer);
880     return 6;
881 }
882 
883 //Given an interval start, calculate the next one at a zoom level
nextPos(bigWigFile_t * fp,uint32_t size,uint32_t * pos,uint32_t desiredTid)884 void nextPos(bigWigFile_t *fp, uint32_t size, uint32_t *pos, uint32_t desiredTid) {
885     uint32_t *tid = pos;
886     uint32_t *start = pos+1;
887     uint32_t *end = pos+2;
888     *start += size;
889     if(*start >= fp->cl->len[*tid]) {
890         (*start) = 0;
891         (*tid)++;
892     }
893 
894     //prevent needless iteration when changing chromosomes
895     if(*tid < desiredTid) {
896         *tid = desiredTid;
897         *start = 0;
898     }
899 
900     (*end) = *start+size;
901     if(*end > fp->cl->len[*tid]) (*end) = fp->cl->len[*tid];
902 }
903 
904 //Return the number of bases two intervals overlap
overlapsInterval(uint32_t tid0,uint32_t start0,uint32_t end0,uint32_t tid1,uint32_t start1,uint32_t end1)905 uint32_t overlapsInterval(uint32_t tid0, uint32_t start0, uint32_t end0, uint32_t tid1, uint32_t start1, uint32_t end1) {
906     if(tid0 != tid1) return 0;
907     if(end0 <= start1) return 0;
908     if(end1 <= start0) return 0;
909     if(end0 <= end1) {
910         if(start1 > start0) return end0-start1;
911         return end0-start0;
912     } else {
913         if(start1 > start0) return end1-start1;
914         return end1-start0;
915     }
916 }
917 
918 //Returns the number of bases of the interval written
updateInterval(bigWigFile_t * fp,bwZoomBuffer_t * buffer,double * sum,double * sumsq,uint32_t size,uint32_t tid,uint32_t start,uint32_t end,float value)919 uint32_t updateInterval(bigWigFile_t *fp, bwZoomBuffer_t *buffer, double *sum, double *sumsq, uint32_t size, uint32_t tid, uint32_t start, uint32_t end, float value) {
920     uint32_t *p2 = (uint32_t*) buffer->p;
921     float *fp2 = (float*) p2;
922     uint32_t rv = 0, offset = 0;
923     if(!buffer) return 0;
924     if(buffer->l+32 >= buffer->m) return 0;
925 
926     //Make sure that we don't overflow a uint32_t by adding some huge value to start
927     if(start + size < start) size = ((uint32_t) -1) - start;
928 
929     if(buffer->l) {
930         offset = buffer->l/32;
931     } else {
932         p2[0] = tid;
933         p2[1] = start;
934         if(start+size < end) p2[2] = start+size;
935         else p2[2] = end;
936     }
937 
938     //Do we have any overlap with the previously added interval?
939     if(offset) {
940         rv = overlapsInterval(p2[8*(offset-1)], p2[8*(offset-1)+1], p2[8*(offset-1)+1] + size, tid, start, end);
941         if(rv) {
942             p2[8*(offset-1)+2] = start + rv;
943             p2[8*(offset-1)+3] += rv;
944             if(fp2[8*(offset-1)+4] > value) fp2[8*(offset-1)+4] = value;
945             if(fp2[8*(offset-1)+5] < value) fp2[8*(offset-1)+5] = value;
946             *sum += rv*value;
947             *sumsq += rv*pow(value, 2.0);
948             return rv;
949         } else {
950             fp2[8*(offset-1)+6] = *sum;
951             fp2[8*(offset-1)+7] = *sumsq;
952             *sum = 0.0;
953             *sumsq = 0.0;
954         }
955     }
956 
957     //If we move to a new interval then skip iterating over a bunch of obviously non-overlapping intervals
958     if(offset && p2[8*offset+2] == 0) {
959         p2[8*offset] = tid;
960         p2[8*offset+1] = start;
961         if(start+size < end) p2[8*offset+2] = start+size;
962         else p2[8*offset+2] = end;
963         //nextPos(fp, size, p2+8*offset, tid); //We can actually skip uncovered intervals
964     }
965 
966     //Add a new entry
967     while(!(rv = overlapsInterval(p2[8*offset], p2[8*offset+1], p2[8*offset+1] + size, tid, start, end))) {
968         p2[8*offset] = tid;
969         p2[8*offset+1] = start;
970         if(start+size < end) p2[8*offset+2] = start+size;
971         else p2[8*offset+2] = end;
972         //nextPos(fp, size, p2+8*offset, tid);
973     }
974     p2[8*offset+3] = rv;
975     fp2[8*offset+4] = value; //min
976     fp2[8*offset+5] = value; //max
977     *sum += rv * value;
978     *sumsq += rv * pow(value,2.0);
979     buffer->l += 32;
980     return rv;
981 }
982 
983 //Returns 0 on success
addIntervalValue(bigWigFile_t * fp,uint64_t * nEntries,double * sum,double * sumsq,bwZoomBuffer_t * buffer,uint32_t itemsPerSlot,uint32_t zoom,uint32_t tid,uint32_t start,uint32_t end,float value)984 int addIntervalValue(bigWigFile_t *fp, uint64_t *nEntries, double *sum, double *sumsq, bwZoomBuffer_t *buffer, uint32_t itemsPerSlot, uint32_t zoom, uint32_t tid, uint32_t start, uint32_t end, float value) {
985     bwZoomBuffer_t *newBuffer = NULL;
986     uint32_t rv;
987 
988     while(start < end) {
989         rv = updateInterval(fp, buffer, sum, sumsq, zoom, tid, start, end, value);
990         if(!rv) {
991             //Allocate a new buffer
992             newBuffer = calloc(1, sizeof(bwZoomBuffer_t));
993             if(!newBuffer) return 1;
994             newBuffer->p = calloc(itemsPerSlot, 32);
995             if(!newBuffer->p) goto error;
996             newBuffer->m = itemsPerSlot*32;
997             memcpy(newBuffer->p, buffer->p+buffer->l-32, 4);
998             memcpy(newBuffer->p+4, buffer->p+buffer->l-28, 4);
999             ((uint32_t*) newBuffer->p)[2] = ((uint32_t*) newBuffer->p)[1] + zoom;
1000             *sum = *sumsq = 0.0;
1001             rv = updateInterval(fp, newBuffer, sum, sumsq, zoom, tid, start, end, value);
1002             if(!rv) goto error;
1003             buffer->next = newBuffer;
1004             buffer = buffer->next;
1005             *nEntries += 1;
1006         }
1007         start += rv;
1008     }
1009 
1010     return 0;
1011 
1012 error:
1013     if(newBuffer) {
1014         if(newBuffer->m) free(newBuffer->p);
1015         free(newBuffer);
1016     }
1017     return 2;
1018 }
1019 
1020 //Get all of the intervals and add them to the appropriate zoomBuffer
constructZoomLevels(bigWigFile_t * fp)1021 int constructZoomLevels(bigWigFile_t *fp) {
1022     bwOverlapIterator_t *it = NULL;
1023     double *sum = NULL, *sumsq = NULL;
1024     uint32_t i, j, k;
1025 
1026     sum = calloc(fp->hdr->nLevels, sizeof(double));
1027     sumsq = calloc(fp->hdr->nLevels, sizeof(double));
1028     if(!sum || !sumsq) goto error;
1029 
1030     for(i=0; i<fp->cl->nKeys; i++) {
1031         it = bwOverlappingIntervalsIterator(fp, fp->cl->chrom[i], 0, fp->cl->len[i], 100000);
1032         if(!it) goto error;
1033 	while(it->data != NULL){
1034 	  for(j=0;j<it->intervals->l;j++){
1035 		for(k=0;k<fp->hdr->nLevels;k++){
1036 			if(addIntervalValue(fp, &(fp->writeBuffer->nNodes[k]), sum+k, sumsq+k, fp->writeBuffer->lastZoomBuffer[k], fp->hdr->bufSize/32, fp->hdr->zoomHdrs->level[k], i, it->intervals->start[j], it->intervals->end[j], it->intervals->value[j])) goto error;
1037 			while(fp->writeBuffer->lastZoomBuffer[k]->next) fp->writeBuffer->lastZoomBuffer[k] = fp->writeBuffer->lastZoomBuffer[k]->next;
1038 		}
1039 	  }
1040 	  it = bwIteratorNext(it);
1041 	}
1042 	bwIteratorDestroy(it);
1043 
1044     }
1045 
1046     //Make an index for each zoom level
1047     for(i=0; i<fp->hdr->nLevels; i++) {
1048         fp->hdr->zoomHdrs->idx[i] = calloc(1, sizeof(bwRTree_t));
1049         if(!fp->hdr->zoomHdrs->idx[i]) return 1;
1050         fp->hdr->zoomHdrs->idx[i]->blockSize = fp->writeBuffer->blockSize;
1051     }
1052 
1053 
1054     free(sum);
1055     free(sumsq);
1056 
1057     return 0;
1058 
1059 error:
1060     if(it) bwIteratorDestroy(it);
1061     if(sum) free(sum);
1062     if(sumsq) free(sumsq);
1063     return 1;
1064 }
1065 
writeZoomLevels(bigWigFile_t * fp)1066 int writeZoomLevels(bigWigFile_t *fp) {
1067     uint64_t offset1, offset2, idxSize = 0;
1068     uint32_t i, j, four = 0, last, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0;
1069     uint8_t wrote, one = 0;
1070     uint16_t actualNLevels = 0;
1071     int rv;
1072     bwLL *ll, *p;
1073     bwRTreeNode_t *root;
1074     bwZoomBuffer_t *zb, *zb2;
1075     bwWriteBuffer_t *wb = fp->writeBuffer;
1076     uLongf sz;
1077 
1078     for(i=0; i<fp->hdr->nLevels; i++) {
1079         if(i) {
1080             //Is this a duplicate level?
1081             if(fp->writeBuffer->nNodes[i] == fp->writeBuffer->nNodes[i-1]) break;
1082         }
1083         actualNLevels++;
1084 
1085         //reserve a uint32_t for the number of blocks
1086         fp->hdr->zoomHdrs->dataOffset[i] = bwTell(fp);
1087         fp->writeBuffer->nBlocks = 0;
1088         fp->writeBuffer->l = 24;
1089         if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1090         zb = fp->writeBuffer->firstZoomBuffer[i];
1091         fp->writeBuffer->firstIndexNode = NULL;
1092         fp->writeBuffer->currentIndexNode = NULL;
1093         while(zb) {
1094             sz = fp->hdr->bufSize;
1095             if(compress(wb->compressP, &sz, zb->p, zb->l) != Z_OK) return 2;
1096 
1097             //write the data to disk
1098             if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 3;
1099 
1100             //Add an entry into the index
1101             last = (zb->l - 32)>>2;
1102             if(addIndexEntry(fp, ((uint32_t*)zb->p)[0], ((uint32_t*)zb->p)[last], ((uint32_t*)zb->p)[1], ((uint32_t*)zb->p)[last+2], bwTell(fp)-sz, sz)) return 4;
1103 
1104             wb->nBlocks++;
1105             wb->l = 24;
1106             zb = zb->next;
1107         }
1108         if(writeAtPos(&(wb->nBlocks), sizeof(uint32_t), 1, fp->hdr->zoomHdrs->dataOffset[i], fp->URL->x.fp)) return 5;
1109 
1110         //Make the tree
1111         ll = fp->writeBuffer->firstIndexNode;
1112         if(ll == fp->writeBuffer->currentIndexNode) {
1113             root = ll->node;
1114             idxSize = 4 + 24*root->nChildren;
1115         } else {
1116             root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
1117         }
1118         if(!root) return 4;
1119         fp->hdr->zoomHdrs->idx[i]->root = root;
1120 
1121         ll = fp->writeBuffer->firstIndexNode;
1122         while(ll) {
1123             p = ll->next;
1124             free(ll);
1125             ll=p;
1126         }
1127 
1128 
1129         //write the index
1130         wrote = 0;
1131         fp->hdr->zoomHdrs->indexOffset[i] = bwTell(fp);
1132         four = IDX_MAGIC;
1133         if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1134         root = fp->hdr->zoomHdrs->idx[i]->root;
1135         if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
1136         if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
1137         if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1138         if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1139         if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
1140         if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
1141         if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
1142         four = fp->hdr->bufSize/32;
1143         if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
1144         four = 0;
1145         if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
1146         fp->hdr->zoomHdrs->idx[i]->rootOffset = bwTell(fp);
1147 
1148         //Write the root node, since writeIndexTree writes the children and fills in the offset
1149         offset1 = bwTell(fp);
1150         if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
1151         if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
1152         if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
1153         for(j=0; j<root->nChildren; j++) {
1154             vector[0] = root->chrIdxStart[j];
1155             vector[1] = root->baseStart[j];
1156             vector[2] = root->chrIdxEnd[j];
1157             vector[3] = root->baseEnd[j];
1158             if(root->isLeaf) {
1159                 //Include the offset and size
1160                 if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
1161                 if(fwrite(&(root->dataOffset[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
1162                 if(fwrite(&(root->x.size[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
1163             } else {
1164                 if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
1165             }
1166         }
1167 
1168         while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->hdr->zoomHdrs->idx[i]->root, &wrote, 0)) == 0) {
1169             if(!wrote) break;
1170             wrote = 0;
1171         }
1172 
1173         if(rv || wrote) return 6;
1174 
1175         //Save the file position
1176         offset2 = bwTell(fp);
1177 
1178         //Write the offsets
1179         if(writeIndexOffsets(fp->URL->x.fp, root, offset1)) return 2;
1180 
1181         //Move the file pointer back to the end
1182         bwSetPos(fp, offset2);
1183 
1184 
1185         //Free the linked list
1186         zb = fp->writeBuffer->firstZoomBuffer[i];
1187         while(zb) {
1188             if(zb->p) free(zb->p);
1189             zb2 = zb->next;
1190             free(zb);
1191             zb = zb2;
1192         }
1193         fp->writeBuffer->firstZoomBuffer[i] = NULL;
1194     }
1195 
1196     //Free unused zoom levels
1197     for(i=actualNLevels; i<fp->hdr->nLevels; i++) {
1198         zb = fp->writeBuffer->firstZoomBuffer[i];
1199         while(zb) {
1200             if(zb->p) free(zb->p);
1201             zb2 = zb->next;
1202             free(zb);
1203             zb = zb2;
1204         }
1205         fp->writeBuffer->firstZoomBuffer[i] = NULL;
1206     }
1207 
1208     //Write the zoom headers to disk
1209     offset1 = bwTell(fp);
1210     if(bwSetPos(fp, 0x40)) return 7;
1211     four = 0;
1212     for(i=0; i<actualNLevels; i++) {
1213         if(fwrite(&(fp->hdr->zoomHdrs->level[i]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1214         if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1215         if(fwrite(&(fp->hdr->zoomHdrs->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 10;
1216         if(fwrite(&(fp->hdr->zoomHdrs->indexOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 11;
1217     }
1218 
1219     //Write the number of levels if needed
1220     if(bwSetPos(fp, 0x6)) return 12;
1221     if(fwrite(&actualNLevels, sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 13;
1222 
1223     if(bwSetPos(fp, offset1)) return 14;
1224 
1225     return 0;
1226 }
1227 
1228 //0 on success
bwFinalize(bigWigFile_t * fp)1229 int bwFinalize(bigWigFile_t *fp) {
1230     uint32_t four;
1231     uint64_t offset;
1232     if(!fp->isWrite) return 0;
1233 
1234     //Flush the buffer
1235     if(flushBuffer(fp)) return 1; //Valgrind reports a problem here!
1236 
1237     //Update the data section with the number of blocks written
1238     if(fp->hdr) {
1239         if(writeAtPos(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->hdr->dataOffset, fp->URL->x.fp)) return 2;
1240     } else {
1241         //The header wasn't written!
1242         return 1;
1243     }
1244 
1245     //write the bufferSize
1246     if(fp->hdr->bufSize) {
1247         if(writeAtPos(&(fp->hdr->bufSize), sizeof(uint32_t), 1, 0x34, fp->URL->x.fp)) return 2;
1248     }
1249 
1250     //write the summary information
1251     if(writeSummary(fp)) return 3;
1252 
1253     //Convert the linked-list to a tree and write to disk
1254     if(writeIndex(fp)) return 4;
1255 
1256     //Zoom level stuff here?
1257     if(fp->hdr->nLevels && fp->writeBuffer->nBlocks) {
1258         offset = bwTell(fp);
1259         if(makeZoomLevels(fp)) return 5;
1260         if(constructZoomLevels(fp)) return 6;
1261         bwSetPos(fp, offset);
1262         if(writeZoomLevels(fp)) return 7; //This write nLevels as well
1263     }
1264 
1265     //write magic at the end of the file
1266     four = BIGWIG_MAGIC;
1267     if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1268 
1269     return 0;
1270 }
1271 
1272 /*
1273 data chunk:
1274 uint64_t number of blocks (2 / 110851)
1275 some blocks
1276 
1277 an uncompressed data block (24 byte header)
1278 uint32_t Tid	0-4
1279 uint32_t start	4-8
1280 uint32_t end	8-12
1281 uint32_t step	12-16
1282 uint32_t span	16-20
1283 uint8_t type	20
1284 uint8_t padding
1285 uint16_t nItems	22
1286 nItems of:
1287     type 1: //12 bytes
1288         uint32_t start
1289         uint32_t end
1290         float value
1291     type 2: //8 bytes
1292         uint32_t start
1293         float value
1294     type 3: //4 bytes
1295         float value
1296 
1297 data block index header
1298 uint32_t magic
1299 uint32_t blockSize (256 in the example) maximum number of children
1300 uint64_t number of blocks (2 / 110851)
1301 uint32_t startTid
1302 uint32_t startPos
1303 uint32_t endTid
1304 uint32_t endPos
1305 uint64_t index size? (0x1E7 / 0x1AF0401F) index address?
1306 uint32_t itemsPerBlock (1 / 1) 1024 for zoom headers 1024 for zoom headers
1307 uint32_t padding
1308 
1309 data block index node non-leaf (4 bytes + 24*nChildren)
1310 uint8_t isLeaf
1311 uint8_t padding
1312 uint16_t nChildren (2, 256)
1313 uint32_t startTid
1314 uint32_t startPos
1315 uint32_t endTid
1316 uint32_t endPos
1317 uint64_t dataOffset (0x1AF05853, 0x1AF07057)
1318 
1319 data block index node leaf (4 bytes + 32*nChildren)
1320 uint8_t isLeaf
1321 uint8_t padding
1322 uint16_t nChildren (2)
1323 uint32_t startTid
1324 uint32_t startPos
1325 uint32_t endTid
1326 uint32_t endPos
1327 uint64_t dataOffset (0x198, 0x1CF)
1328 uint64_t dataSize (55, 24)
1329 
1330 zoom data block
1331 uint32_t number of blocks (10519766)
1332 some data blocks
1333 */
1334