1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <math.h>
5 #include <string.h>
6 #include <stdio.h>
7 
8 static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize);
9 
10 //Return the position in the file
bwTell(bigWigFile_t * fp)11 long bwTell(bigWigFile_t *fp) {
12     if(fp->URL->type == BWG_FILE) return ftell(fp->URL->x.fp);
13     return (long) (fp->URL->filePos + fp->URL->bufPos);
14 }
15 
16 //Seek to a given position, always from the beginning of the file
17 //Return 0 on success and -1 on error
18 //To do, use the return code of urlSeek() in a more useful way.
bwSetPos(bigWigFile_t * fp,size_t pos)19 int bwSetPos(bigWigFile_t *fp, size_t pos) {
20     CURLcode rv = urlSeek(fp->URL, pos);
21     if(rv == CURLE_OK) return 0;
22     return -1;
23 }
24 
25 //returns the number of full members read (nmemb on success, something less on error)
bwRead(void * data,size_t sz,size_t nmemb,bigWigFile_t * fp)26 size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp) {
27     size_t i, rv;
28     for(i=0; i<nmemb; i++) {
29         rv = urlRead(fp->URL, data+i*sz, sz);
30         if(rv != sz) return i;
31     }
32     return nmemb;
33 }
34 
35 //Initializes curl and sets global variables
36 //Returns 0 on success and 1 on error
37 //This should be called only once and bwCleanup() must be called when finished.
bwInit(size_t defaultBufSize)38 int bwInit(size_t defaultBufSize) {
39     //set the buffer size, number of iterations, sleep time between iterations, etc.
40     GLOBAL_DEFAULTBUFFERSIZE = defaultBufSize;
41 
42     //call curl_global_init()
43 #ifndef NOCURL
44     CURLcode rv;
45     rv = curl_global_init(CURL_GLOBAL_ALL);
46     if(rv != CURLE_OK) return 1;
47 #endif
48     return 0;
49 }
50 
51 //This should be called before quiting, to release memory acquired by curl
bwCleanup()52 void bwCleanup() {
53 #ifndef NOCURL
54     curl_global_cleanup();
55 #endif
56 }
57 
bwReadZoomHdrs(bigWigFile_t * bw)58 static bwZoomHdr_t *bwReadZoomHdrs(bigWigFile_t *bw) {
59     if(bw->isWrite) return NULL;
60     uint16_t i;
61     bwZoomHdr_t *zhdr = malloc(sizeof(bwZoomHdr_t));
62     if(!zhdr) return NULL;
63     uint32_t *level = malloc(bw->hdr->nLevels * sizeof(uint64_t));
64     if(!level) {
65         free(zhdr);
66         return NULL;
67     }
68     uint32_t padding = 0;
69     uint64_t *dataOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
70     if(!dataOffset) {
71         free(zhdr);
72         free(level);
73         return NULL;
74     }
75     uint64_t *indexOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
76     if(!indexOffset) {
77         free(zhdr);
78         free(level);
79         free(dataOffset);
80         return NULL;
81     }
82 
83     for(i=0; i<bw->hdr->nLevels; i++) {
84         if(bwRead((void*) &(level[i]), sizeof(uint32_t), 1, bw) != 1) goto error;
85         if(bwRead((void*) &padding, sizeof(uint32_t), 1, bw) != 1) goto error;
86         if(bwRead((void*) &(dataOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
87         if(bwRead((void*) &(indexOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
88     }
89 
90     zhdr->level = level;
91     zhdr->dataOffset = dataOffset;
92     zhdr->indexOffset = indexOffset;
93     zhdr->idx = calloc(bw->hdr->nLevels, sizeof(bwRTree_t*));
94     if(!zhdr->idx) goto error;
95 
96     return zhdr;
97 
98 error:
99     for(i=0; i<bw->hdr->nLevels; i++) {
100         if(zhdr->idx[i]) bwDestroyIndex(zhdr->idx[i]);
101     }
102     free(zhdr);
103     free(level);
104     free(dataOffset);
105     free(indexOffset);
106     return NULL;
107 }
108 
bwHdrDestroy(bigWigHdr_t * hdr)109 static void bwHdrDestroy(bigWigHdr_t *hdr) {
110     int i;
111     if(hdr->zoomHdrs) {
112         free(hdr->zoomHdrs->level);
113         free(hdr->zoomHdrs->dataOffset);
114         free(hdr->zoomHdrs->indexOffset);
115         for(i=0; i<hdr->nLevels; i++) {
116             if(hdr->zoomHdrs->idx[i]) bwDestroyIndex(hdr->zoomHdrs->idx[i]);
117         }
118         free(hdr->zoomHdrs->idx);
119         free(hdr->zoomHdrs);
120     }
121     free(hdr);
122 }
123 
bwHdrRead(bigWigFile_t * bw)124 static void bwHdrRead(bigWigFile_t *bw) {
125     uint32_t magic;
126     if(bw->isWrite) return;
127     bw->hdr = calloc(1, sizeof(bigWigHdr_t));
128     if(!bw->hdr) return;
129 
130     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error; //0x0
131     if(magic != BIGWIG_MAGIC && magic != BIGBED_MAGIC) goto error;
132 
133     if(bwRead((void*) &(bw->hdr->version), sizeof(uint16_t), 1, bw) != 1) goto error; //0x4
134     if(bwRead((void*) &(bw->hdr->nLevels), sizeof(uint16_t), 1, bw) != 1) goto error; //0x6
135     if(bwRead((void*) &(bw->hdr->ctOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x8
136     if(bwRead((void*) &(bw->hdr->dataOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x10
137     if(bwRead((void*) &(bw->hdr->indexOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x18
138     if(bwRead((void*) &(bw->hdr->fieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x20
139     if(bwRead((void*) &(bw->hdr->definedFieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x22
140     if(bwRead((void*) &(bw->hdr->sqlOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x24
141     if(bwRead((void*) &(bw->hdr->summaryOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x2c
142     if(bwRead((void*) &(bw->hdr->bufSize), sizeof(uint32_t), 1, bw) != 1) goto error; //0x34
143     if(bwRead((void*) &(bw->hdr->extensionOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x38
144 
145     //zoom headers
146     if(bw->hdr->nLevels) {
147         if(!(bw->hdr->zoomHdrs = bwReadZoomHdrs(bw))) goto error;
148     }
149 
150     //File summary information
151     if(bw->hdr->summaryOffset) {
152         if(urlSeek(bw->URL, bw->hdr->summaryOffset) != CURLE_OK) goto error;
153         if(bwRead((void*) &(bw->hdr->nBasesCovered), sizeof(uint64_t), 1, bw) != 1) goto error;
154         if(bwRead((void*) &(bw->hdr->minVal), sizeof(uint64_t), 1, bw) != 1) goto error;
155         if(bwRead((void*) &(bw->hdr->maxVal), sizeof(uint64_t), 1, bw) != 1) goto error;
156         if(bwRead((void*) &(bw->hdr->sumData), sizeof(uint64_t), 1, bw) != 1) goto error;
157         if(bwRead((void*) &(bw->hdr->sumSquared), sizeof(uint64_t), 1, bw) != 1) goto error;
158     }
159 
160     //In case of uncompressed remote files, let the IO functions know to request larger chunks
161     bw->URL->isCompressed = (bw->hdr->bufSize > 0)?1:0;
162 
163     return;
164 
165 error:
166     bwHdrDestroy(bw->hdr);
167     fprintf(stderr, "[bwHdrRead] There was an error while reading in the header!\n");
168     bw->hdr = NULL;
169 }
170 
destroyChromList(chromList_t * cl)171 static void destroyChromList(chromList_t *cl) {
172     uint32_t i;
173     if(!cl) return;
174     if(cl->nKeys && cl->chrom) {
175         for(i=0; i<cl->nKeys; i++) {
176             if(cl->chrom[i]) free(cl->chrom[i]);
177         }
178     }
179     if(cl->chrom) free(cl->chrom);
180     if(cl->len) free(cl->len);
181     free(cl);
182 }
183 
readChromLeaf(bigWigFile_t * bw,chromList_t * cl,uint32_t valueSize)184 static uint64_t readChromLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t valueSize) {
185     uint16_t nVals, i;
186     uint32_t idx;
187     char *chrom = NULL;
188 
189     if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
190     chrom = calloc(valueSize+1, sizeof(char));
191     if(!chrom) return -1;
192 
193     for(i=0; i<nVals; i++) {
194         if(bwRead((void*) chrom, sizeof(char), valueSize, bw) != valueSize) goto error;
195         if(bwRead((void*) &idx, sizeof(uint32_t), 1, bw) != 1) goto error;
196         if(bwRead((void*) &(cl->len[idx]), sizeof(uint32_t), 1, bw) != 1) goto error;
197         cl->chrom[idx] = strdup(chrom);
198         if(!(cl->chrom[idx])) goto error;
199     }
200 
201     free(chrom);
202     return nVals;
203 
204 error:
205     free(chrom);
206     return -1;
207 }
208 
readChromNonLeaf(bigWigFile_t * bw,chromList_t * cl,uint32_t keySize)209 static uint64_t readChromNonLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
210     uint64_t offset , rv = 0, previous;
211     uint16_t nVals, i;
212 
213     if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
214 
215     previous = bwTell(bw) + keySize;
216     for(i=0; i<nVals; i++) {
217         if(bwSetPos(bw, previous)) return -1;
218         if(bwRead((void*) &offset, sizeof(uint64_t), 1, bw) != 1) return -1;
219         if(bwSetPos(bw, offset)) return -1;
220         rv += readChromBlock(bw, cl, keySize);
221         previous += 8 + keySize;
222     }
223 
224     return rv;
225 }
226 
readChromBlock(bigWigFile_t * bw,chromList_t * cl,uint32_t keySize)227 static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
228     uint8_t isLeaf, padding;
229 
230     if(bwRead((void*) &isLeaf, sizeof(uint8_t), 1, bw) != 1) return -1;
231     if(bwRead((void*) &padding, sizeof(uint8_t), 1, bw) != 1) return -1;
232 
233     if(isLeaf) {
234         return readChromLeaf(bw, cl, keySize);
235     } else { //I've never actually observed one of these, which is good since they're pointless
236         return readChromNonLeaf(bw, cl, keySize);
237     }
238 }
239 
bwReadChromList(bigWigFile_t * bw)240 static chromList_t *bwReadChromList(bigWigFile_t *bw) {
241     chromList_t *cl = NULL;
242     uint32_t magic, keySize, valueSize, itemsPerBlock;
243     uint64_t rv, itemCount;
244     if(bw->isWrite) return NULL;
245     if(bwSetPos(bw, bw->hdr->ctOffset)) return NULL;
246 
247     cl = calloc(1, sizeof(chromList_t));
248     if(!cl) return NULL;
249 
250     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
251     if(magic != CIRTREE_MAGIC) goto error;
252 
253     if(bwRead((void*) &itemsPerBlock, sizeof(uint32_t), 1, bw) != 1) goto error;
254     if(bwRead((void*) &keySize, sizeof(uint32_t), 1, bw) != 1) goto error;
255     if(bwRead((void*) &valueSize, sizeof(uint32_t), 1, bw) != 1) goto error;
256     if(bwRead((void*) &itemCount, sizeof(uint64_t), 1, bw) != 1) goto error;
257 
258     cl->nKeys = itemCount;
259     cl->chrom = calloc(itemCount, sizeof(char*));
260     cl->len = calloc(itemCount, sizeof(uint32_t));
261     if(!cl->chrom) goto error;
262     if(!cl->len) goto error;
263 
264     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
265     if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
266 
267     //Read in the blocks
268     rv = readChromBlock(bw, cl, keySize);
269     if(rv == (uint64_t) -1) goto error;
270     if(rv != itemCount) goto error;
271 
272     return cl;
273 
274 error:
275     destroyChromList(cl);
276     return NULL;
277 }
278 
279 //This is here mostly for convenience
bwDestroyWriteBuffer(bwWriteBuffer_t * wb)280 static void bwDestroyWriteBuffer(bwWriteBuffer_t *wb) {
281     if(wb->p) free(wb->p);
282     if(wb->compressP) free(wb->compressP);
283     if(wb->firstZoomBuffer) free(wb->firstZoomBuffer);
284     if(wb->lastZoomBuffer) free(wb->lastZoomBuffer);
285     if(wb->nNodes) free(wb->nNodes);
286     free(wb);
287 }
288 
bwClose(bigWigFile_t * fp)289 void bwClose(bigWigFile_t *fp) {
290     if(!fp) return;
291     if(bwFinalize(fp)) {
292         fprintf(stderr, "[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.\n");
293     }
294     if(fp->URL) urlClose(fp->URL);
295     if(fp->hdr) bwHdrDestroy(fp->hdr);
296     if(fp->cl) destroyChromList(fp->cl);
297     if(fp->idx) bwDestroyIndex(fp->idx);
298     if(fp->writeBuffer) bwDestroyWriteBuffer(fp->writeBuffer);
299     free(fp);
300 }
301 
bwIsBigWig(char * fname,CURLcode (* callBack)(CURL *))302 int bwIsBigWig(char *fname, CURLcode (*callBack) (CURL*)) {
303     uint32_t magic = 0;
304     URL_t *URL = NULL;
305 
306     URL = urlOpen(fname, *callBack, NULL);
307 
308     if(!URL) return 0;
309     if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
310     urlClose(URL);
311     if(magic == BIGWIG_MAGIC) return 1;
312     return 0;
313 }
314 
bbGetSQL(bigWigFile_t * bw)315 char *bbGetSQL(bigWigFile_t *bw) {
316     char *o = NULL;
317     uint64_t len;
318     if(!bw->hdr->sqlOffset) return NULL;
319     len = bw->hdr->summaryOffset - bw->hdr->sqlOffset; //This includes the NULL terminator
320     o = malloc(sizeof(char) * len);
321     if(!o) goto error;
322     if(bwSetPos(bw, bw->hdr->sqlOffset)) goto error;
323     if(bwRead((void*) o, len, 1, bw) != 1) goto error;
324     return o;
325 
326 error:
327     if(o) free(o);
328     printf("Got an error in bbGetSQL!\n");
329     return NULL;
330 }
331 
bbIsBigBed(char * fname,CURLcode (* callBack)(CURL *))332 int bbIsBigBed(char *fname, CURLcode (*callBack) (CURL*)) {
333     uint32_t magic = 0;
334     URL_t *URL = NULL;
335 
336     URL = urlOpen(fname, *callBack, NULL);
337 
338     if(!URL) return 0;
339     if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
340     urlClose(URL);
341     if(magic == BIGBED_MAGIC) return 1;
342     return 0;
343 }
344 
bwOpen(char * fname,CURLcode (* callBack)(CURL *),const char * mode)345 bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack) (CURL*), const char *mode) {
346     bigWigFile_t *bwg = calloc(1, sizeof(bigWigFile_t));
347     if(!bwg) {
348         fprintf(stderr, "[bwOpen] Couldn't allocate space to create the output object!\n");
349         return NULL;
350     }
351     if((!mode) || (strchr(mode, 'w') == NULL)) {
352         bwg->isWrite = 0;
353         bwg->URL = urlOpen(fname, *callBack, NULL);
354         if(!bwg->URL) {
355             fprintf(stderr, "[bwOpen] urlOpen is NULL!\n");
356             goto error;
357         }
358 
359         //Attempt to read in the fixed header
360         bwHdrRead(bwg);
361         if(!bwg->hdr) {
362             fprintf(stderr, "[bwOpen] bwg->hdr is NULL!\n");
363             goto error;
364         }
365 
366         //Read in the chromosome list
367         bwg->cl = bwReadChromList(bwg);
368         if(!bwg->cl) {
369             fprintf(stderr, "[bwOpen] bwg->cl is NULL (%s)!\n", fname);
370             goto error;
371         }
372 
373         //Read in the index
374         if(bwg->hdr->indexOffset) {
375             bwg->idx = bwReadIndex(bwg, 0);
376             if(!bwg->idx) {
377                 fprintf(stderr, "[bwOpen] bwg->idx is NULL bwg->hdr->dataOffset 0x%"PRIx64"!\n", bwg->hdr->dataOffset);
378                 goto error;
379             }
380         }
381     } else {
382         bwg->isWrite = 1;
383         bwg->URL = urlOpen(fname, NULL, "w+");
384         if(!bwg->URL) goto error;
385         bwg->writeBuffer = calloc(1,sizeof(bwWriteBuffer_t));
386         if(!bwg->writeBuffer) goto error;
387         bwg->writeBuffer->l = 24;
388     }
389 
390     return bwg;
391 
392 error:
393     bwClose(bwg);
394     return NULL;
395 }
396 
bbOpen(char * fname,CURLcode (* callBack)(CURL *))397 bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack) (CURL*)) {
398     bigWigFile_t *bb = calloc(1, sizeof(bigWigFile_t));
399     if(!bb) {
400         fprintf(stderr, "[bbOpen] Couldn't allocate space to create the output object!\n");
401         return NULL;
402     }
403 
404     //Set the type to 1 for bigBed
405     bb->type = 1;
406 
407     bb->URL = urlOpen(fname, *callBack, NULL);
408     if(!bb->URL) goto error;
409 
410     //Attempt to read in the fixed header
411     bwHdrRead(bb);
412     if(!bb->hdr) goto error;
413 
414     //Read in the chromosome list
415     bb->cl = bwReadChromList(bb);
416     if(!bb->cl) goto error;
417 
418     //Read in the index
419     bb->idx = bwReadIndex(bb, 0);
420     if(!bb->idx) goto error;
421 
422     return bb;
423 
424 error:
425     bwClose(bb);
426     return NULL;
427 }
428