1 #include "bigWig.h"
2 #include "bwCommon.h"
3 #include <stdlib.h>
4 #include <math.h>
5 #include <string.h>
6 #include <stdio.h>
7
8 static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize);
9
10 //Return the position in the file
bwTell(bigWigFile_t * fp)11 long bwTell(bigWigFile_t *fp) {
12 if(fp->URL->type == BWG_FILE) return ftell(fp->URL->x.fp);
13 return (long) (fp->URL->filePos + fp->URL->bufPos);
14 }
15
16 //Seek to a given position, always from the beginning of the file
17 //Return 0 on success and -1 on error
18 //To do, use the return code of urlSeek() in a more useful way.
bwSetPos(bigWigFile_t * fp,size_t pos)19 int bwSetPos(bigWigFile_t *fp, size_t pos) {
20 CURLcode rv = urlSeek(fp->URL, pos);
21 if(rv == CURLE_OK) return 0;
22 return -1;
23 }
24
25 //returns the number of full members read (nmemb on success, something less on error)
bwRead(void * data,size_t sz,size_t nmemb,bigWigFile_t * fp)26 size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp) {
27 size_t i, rv;
28 for(i=0; i<nmemb; i++) {
29 rv = urlRead(fp->URL, data+i*sz, sz);
30 if(rv != sz) return i;
31 }
32 return nmemb;
33 }
34
35 //Initializes curl and sets global variables
36 //Returns 0 on success and 1 on error
37 //This should be called only once and bwCleanup() must be called when finished.
bwInit(size_t defaultBufSize)38 int bwInit(size_t defaultBufSize) {
39 //set the buffer size, number of iterations, sleep time between iterations, etc.
40 GLOBAL_DEFAULTBUFFERSIZE = defaultBufSize;
41
42 //call curl_global_init()
43 #ifndef NOCURL
44 CURLcode rv;
45 rv = curl_global_init(CURL_GLOBAL_ALL);
46 if(rv != CURLE_OK) return 1;
47 #endif
48 return 0;
49 }
50
51 //This should be called before quiting, to release memory acquired by curl
bwCleanup()52 void bwCleanup() {
53 #ifndef NOCURL
54 curl_global_cleanup();
55 #endif
56 }
57
bwReadZoomHdrs(bigWigFile_t * bw)58 static bwZoomHdr_t *bwReadZoomHdrs(bigWigFile_t *bw) {
59 if(bw->isWrite) return NULL;
60 uint16_t i;
61 bwZoomHdr_t *zhdr = malloc(sizeof(bwZoomHdr_t));
62 if(!zhdr) return NULL;
63 uint32_t *level = malloc(bw->hdr->nLevels * sizeof(uint64_t));
64 if(!level) {
65 free(zhdr);
66 return NULL;
67 }
68 uint32_t padding = 0;
69 uint64_t *dataOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
70 if(!dataOffset) {
71 free(zhdr);
72 free(level);
73 return NULL;
74 }
75 uint64_t *indexOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
76 if(!indexOffset) {
77 free(zhdr);
78 free(level);
79 free(dataOffset);
80 return NULL;
81 }
82
83 for(i=0; i<bw->hdr->nLevels; i++) {
84 if(bwRead((void*) &(level[i]), sizeof(uint32_t), 1, bw) != 1) goto error;
85 if(bwRead((void*) &padding, sizeof(uint32_t), 1, bw) != 1) goto error;
86 if(bwRead((void*) &(dataOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
87 if(bwRead((void*) &(indexOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
88 }
89
90 zhdr->level = level;
91 zhdr->dataOffset = dataOffset;
92 zhdr->indexOffset = indexOffset;
93 zhdr->idx = calloc(bw->hdr->nLevels, sizeof(bwRTree_t*));
94 if(!zhdr->idx) goto error;
95
96 return zhdr;
97
98 error:
99 for(i=0; i<bw->hdr->nLevels; i++) {
100 if(zhdr->idx[i]) bwDestroyIndex(zhdr->idx[i]);
101 }
102 free(zhdr);
103 free(level);
104 free(dataOffset);
105 free(indexOffset);
106 return NULL;
107 }
108
bwHdrDestroy(bigWigHdr_t * hdr)109 static void bwHdrDestroy(bigWigHdr_t *hdr) {
110 int i;
111 if(hdr->zoomHdrs) {
112 free(hdr->zoomHdrs->level);
113 free(hdr->zoomHdrs->dataOffset);
114 free(hdr->zoomHdrs->indexOffset);
115 for(i=0; i<hdr->nLevels; i++) {
116 if(hdr->zoomHdrs->idx[i]) bwDestroyIndex(hdr->zoomHdrs->idx[i]);
117 }
118 free(hdr->zoomHdrs->idx);
119 free(hdr->zoomHdrs);
120 }
121 free(hdr);
122 }
123
bwHdrRead(bigWigFile_t * bw)124 static void bwHdrRead(bigWigFile_t *bw) {
125 uint32_t magic;
126 if(bw->isWrite) return;
127 bw->hdr = calloc(1, sizeof(bigWigHdr_t));
128 if(!bw->hdr) return;
129
130 if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error; //0x0
131 if(magic != BIGWIG_MAGIC && magic != BIGBED_MAGIC) goto error;
132
133 if(bwRead((void*) &(bw->hdr->version), sizeof(uint16_t), 1, bw) != 1) goto error; //0x4
134 if(bwRead((void*) &(bw->hdr->nLevels), sizeof(uint16_t), 1, bw) != 1) goto error; //0x6
135 if(bwRead((void*) &(bw->hdr->ctOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x8
136 if(bwRead((void*) &(bw->hdr->dataOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x10
137 if(bwRead((void*) &(bw->hdr->indexOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x18
138 if(bwRead((void*) &(bw->hdr->fieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x20
139 if(bwRead((void*) &(bw->hdr->definedFieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x22
140 if(bwRead((void*) &(bw->hdr->sqlOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x24
141 if(bwRead((void*) &(bw->hdr->summaryOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x2c
142 if(bwRead((void*) &(bw->hdr->bufSize), sizeof(uint32_t), 1, bw) != 1) goto error; //0x34
143 if(bwRead((void*) &(bw->hdr->extensionOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x38
144
145 //zoom headers
146 if(bw->hdr->nLevels) {
147 if(!(bw->hdr->zoomHdrs = bwReadZoomHdrs(bw))) goto error;
148 }
149
150 //File summary information
151 if(bw->hdr->summaryOffset) {
152 if(urlSeek(bw->URL, bw->hdr->summaryOffset) != CURLE_OK) goto error;
153 if(bwRead((void*) &(bw->hdr->nBasesCovered), sizeof(uint64_t), 1, bw) != 1) goto error;
154 if(bwRead((void*) &(bw->hdr->minVal), sizeof(uint64_t), 1, bw) != 1) goto error;
155 if(bwRead((void*) &(bw->hdr->maxVal), sizeof(uint64_t), 1, bw) != 1) goto error;
156 if(bwRead((void*) &(bw->hdr->sumData), sizeof(uint64_t), 1, bw) != 1) goto error;
157 if(bwRead((void*) &(bw->hdr->sumSquared), sizeof(uint64_t), 1, bw) != 1) goto error;
158 }
159
160 //In case of uncompressed remote files, let the IO functions know to request larger chunks
161 bw->URL->isCompressed = (bw->hdr->bufSize > 0)?1:0;
162
163 return;
164
165 error:
166 bwHdrDestroy(bw->hdr);
167 fprintf(stderr, "[bwHdrRead] There was an error while reading in the header!\n");
168 bw->hdr = NULL;
169 }
170
destroyChromList(chromList_t * cl)171 static void destroyChromList(chromList_t *cl) {
172 uint32_t i;
173 if(!cl) return;
174 if(cl->nKeys && cl->chrom) {
175 for(i=0; i<cl->nKeys; i++) {
176 if(cl->chrom[i]) free(cl->chrom[i]);
177 }
178 }
179 if(cl->chrom) free(cl->chrom);
180 if(cl->len) free(cl->len);
181 free(cl);
182 }
183
readChromLeaf(bigWigFile_t * bw,chromList_t * cl,uint32_t valueSize)184 static uint64_t readChromLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t valueSize) {
185 uint16_t nVals, i;
186 uint32_t idx;
187 char *chrom = NULL;
188
189 if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
190 chrom = calloc(valueSize+1, sizeof(char));
191 if(!chrom) return -1;
192
193 for(i=0; i<nVals; i++) {
194 if(bwRead((void*) chrom, sizeof(char), valueSize, bw) != valueSize) goto error;
195 if(bwRead((void*) &idx, sizeof(uint32_t), 1, bw) != 1) goto error;
196 if(bwRead((void*) &(cl->len[idx]), sizeof(uint32_t), 1, bw) != 1) goto error;
197 cl->chrom[idx] = strdup(chrom);
198 if(!(cl->chrom[idx])) goto error;
199 }
200
201 free(chrom);
202 return nVals;
203
204 error:
205 free(chrom);
206 return -1;
207 }
208
readChromNonLeaf(bigWigFile_t * bw,chromList_t * cl,uint32_t keySize)209 static uint64_t readChromNonLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
210 uint64_t offset , rv = 0, previous;
211 uint16_t nVals, i;
212
213 if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
214
215 previous = bwTell(bw) + keySize;
216 for(i=0; i<nVals; i++) {
217 if(bwSetPos(bw, previous)) return -1;
218 if(bwRead((void*) &offset, sizeof(uint64_t), 1, bw) != 1) return -1;
219 if(bwSetPos(bw, offset)) return -1;
220 rv += readChromBlock(bw, cl, keySize);
221 previous += 8 + keySize;
222 }
223
224 return rv;
225 }
226
readChromBlock(bigWigFile_t * bw,chromList_t * cl,uint32_t keySize)227 static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
228 uint8_t isLeaf, padding;
229
230 if(bwRead((void*) &isLeaf, sizeof(uint8_t), 1, bw) != 1) return -1;
231 if(bwRead((void*) &padding, sizeof(uint8_t), 1, bw) != 1) return -1;
232
233 if(isLeaf) {
234 return readChromLeaf(bw, cl, keySize);
235 } else { //I've never actually observed one of these, which is good since they're pointless
236 return readChromNonLeaf(bw, cl, keySize);
237 }
238 }
239
bwReadChromList(bigWigFile_t * bw)240 static chromList_t *bwReadChromList(bigWigFile_t *bw) {
241 chromList_t *cl = NULL;
242 uint32_t magic, keySize, valueSize, itemsPerBlock;
243 uint64_t rv, itemCount;
244 if(bw->isWrite) return NULL;
245 if(bwSetPos(bw, bw->hdr->ctOffset)) return NULL;
246
247 cl = calloc(1, sizeof(chromList_t));
248 if(!cl) return NULL;
249
250 if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
251 if(magic != CIRTREE_MAGIC) goto error;
252
253 if(bwRead((void*) &itemsPerBlock, sizeof(uint32_t), 1, bw) != 1) goto error;
254 if(bwRead((void*) &keySize, sizeof(uint32_t), 1, bw) != 1) goto error;
255 if(bwRead((void*) &valueSize, sizeof(uint32_t), 1, bw) != 1) goto error;
256 if(bwRead((void*) &itemCount, sizeof(uint64_t), 1, bw) != 1) goto error;
257
258 cl->nKeys = itemCount;
259 cl->chrom = calloc(itemCount, sizeof(char*));
260 cl->len = calloc(itemCount, sizeof(uint32_t));
261 if(!cl->chrom) goto error;
262 if(!cl->len) goto error;
263
264 if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
265 if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
266
267 //Read in the blocks
268 rv = readChromBlock(bw, cl, keySize);
269 if(rv == (uint64_t) -1) goto error;
270 if(rv != itemCount) goto error;
271
272 return cl;
273
274 error:
275 destroyChromList(cl);
276 return NULL;
277 }
278
279 //This is here mostly for convenience
bwDestroyWriteBuffer(bwWriteBuffer_t * wb)280 static void bwDestroyWriteBuffer(bwWriteBuffer_t *wb) {
281 if(wb->p) free(wb->p);
282 if(wb->compressP) free(wb->compressP);
283 if(wb->firstZoomBuffer) free(wb->firstZoomBuffer);
284 if(wb->lastZoomBuffer) free(wb->lastZoomBuffer);
285 if(wb->nNodes) free(wb->nNodes);
286 free(wb);
287 }
288
bwClose(bigWigFile_t * fp)289 void bwClose(bigWigFile_t *fp) {
290 if(!fp) return;
291 if(bwFinalize(fp)) {
292 fprintf(stderr, "[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.\n");
293 }
294 if(fp->URL) urlClose(fp->URL);
295 if(fp->hdr) bwHdrDestroy(fp->hdr);
296 if(fp->cl) destroyChromList(fp->cl);
297 if(fp->idx) bwDestroyIndex(fp->idx);
298 if(fp->writeBuffer) bwDestroyWriteBuffer(fp->writeBuffer);
299 free(fp);
300 }
301
bwIsBigWig(char * fname,CURLcode (* callBack)(CURL *))302 int bwIsBigWig(char *fname, CURLcode (*callBack) (CURL*)) {
303 uint32_t magic = 0;
304 URL_t *URL = NULL;
305
306 URL = urlOpen(fname, *callBack, NULL);
307
308 if(!URL) return 0;
309 if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
310 urlClose(URL);
311 if(magic == BIGWIG_MAGIC) return 1;
312 return 0;
313 }
314
bbGetSQL(bigWigFile_t * bw)315 char *bbGetSQL(bigWigFile_t *bw) {
316 char *o = NULL;
317 uint64_t len;
318 if(!bw->hdr->sqlOffset) return NULL;
319 len = bw->hdr->summaryOffset - bw->hdr->sqlOffset; //This includes the NULL terminator
320 o = malloc(sizeof(char) * len);
321 if(!o) goto error;
322 if(bwSetPos(bw, bw->hdr->sqlOffset)) goto error;
323 if(bwRead((void*) o, len, 1, bw) != 1) goto error;
324 return o;
325
326 error:
327 if(o) free(o);
328 printf("Got an error in bbGetSQL!\n");
329 return NULL;
330 }
331
bbIsBigBed(char * fname,CURLcode (* callBack)(CURL *))332 int bbIsBigBed(char *fname, CURLcode (*callBack) (CURL*)) {
333 uint32_t magic = 0;
334 URL_t *URL = NULL;
335
336 URL = urlOpen(fname, *callBack, NULL);
337
338 if(!URL) return 0;
339 if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
340 urlClose(URL);
341 if(magic == BIGBED_MAGIC) return 1;
342 return 0;
343 }
344
bwOpen(char * fname,CURLcode (* callBack)(CURL *),const char * mode)345 bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack) (CURL*), const char *mode) {
346 bigWigFile_t *bwg = calloc(1, sizeof(bigWigFile_t));
347 if(!bwg) {
348 fprintf(stderr, "[bwOpen] Couldn't allocate space to create the output object!\n");
349 return NULL;
350 }
351 if((!mode) || (strchr(mode, 'w') == NULL)) {
352 bwg->isWrite = 0;
353 bwg->URL = urlOpen(fname, *callBack, NULL);
354 if(!bwg->URL) {
355 fprintf(stderr, "[bwOpen] urlOpen is NULL!\n");
356 goto error;
357 }
358
359 //Attempt to read in the fixed header
360 bwHdrRead(bwg);
361 if(!bwg->hdr) {
362 fprintf(stderr, "[bwOpen] bwg->hdr is NULL!\n");
363 goto error;
364 }
365
366 //Read in the chromosome list
367 bwg->cl = bwReadChromList(bwg);
368 if(!bwg->cl) {
369 fprintf(stderr, "[bwOpen] bwg->cl is NULL (%s)!\n", fname);
370 goto error;
371 }
372
373 //Read in the index
374 if(bwg->hdr->indexOffset) {
375 bwg->idx = bwReadIndex(bwg, 0);
376 if(!bwg->idx) {
377 fprintf(stderr, "[bwOpen] bwg->idx is NULL bwg->hdr->dataOffset 0x%"PRIx64"!\n", bwg->hdr->dataOffset);
378 goto error;
379 }
380 }
381 } else {
382 bwg->isWrite = 1;
383 bwg->URL = urlOpen(fname, NULL, "w+");
384 if(!bwg->URL) goto error;
385 bwg->writeBuffer = calloc(1,sizeof(bwWriteBuffer_t));
386 if(!bwg->writeBuffer) goto error;
387 bwg->writeBuffer->l = 24;
388 }
389
390 return bwg;
391
392 error:
393 bwClose(bwg);
394 return NULL;
395 }
396
bbOpen(char * fname,CURLcode (* callBack)(CURL *))397 bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack) (CURL*)) {
398 bigWigFile_t *bb = calloc(1, sizeof(bigWigFile_t));
399 if(!bb) {
400 fprintf(stderr, "[bbOpen] Couldn't allocate space to create the output object!\n");
401 return NULL;
402 }
403
404 //Set the type to 1 for bigBed
405 bb->type = 1;
406
407 bb->URL = urlOpen(fname, *callBack, NULL);
408 if(!bb->URL) goto error;
409
410 //Attempt to read in the fixed header
411 bwHdrRead(bb);
412 if(!bb->hdr) goto error;
413
414 //Read in the chromosome list
415 bb->cl = bwReadChromList(bb);
416 if(!bb->cl) goto error;
417
418 //Read in the index
419 bb->idx = bwReadIndex(bb, 0);
420 if(!bb->idx) goto error;
421
422 return bb;
423
424 error:
425 bwClose(bb);
426 return NULL;
427 }
428