1 #ifndef LIBBIGWIG_H 2 #define LIBBIGWIG_H 3 4 #include "bigWigIO.h" 5 #include "bwValues.h" 6 #include <inttypes.h> 7 #include <zlib.h> 8 9 #ifdef __cplusplus 10 extern "C" { 11 #endif 12 13 /*! \mainpage libBigWig 14 * 15 * \section Introduction 16 * 17 * libBigWig is a C library for parsing local/remote bigWig and bigBed files. This is similar to Kent's library from UCSC, except 18 * * The license is much more liberal 19 * * This code doesn't call `exit()` on error, thereby killing the calling application. 20 * 21 * External files are accessed using [curl](http://curl.haxx.se/). 22 * 23 * Please submit issues and pull requests [here](https://github.com/dpryan79/libBigWig). 24 * 25 * \section Compilation 26 * 27 * Assuming you already have the curl libraries installed (not just the curl binary!): 28 * 29 * make install prefix=/some/path 30 * 31 * \section Writing bigWig files 32 * 33 * There are three methods for storing values in a bigWig file, further described in the [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html). The entries within the file are grouped into "blocks" and each such block is limited to storing entries of a single type. So, it is unwise to use a single bedGraph-like endtry followed by a single fixed-step entry followed by a variable-step entry, as that would require three separate blocks, with additional space required for each. 34 * 35 * \section Testing file types 36 * 37 * As of version 0.3.0, libBigWig supports reading bigBed files. If an application needs to support both bigBed and bigWig input, then the `bwIsBigWig` and `bbIsBigBed` functions can be used to determine the file type. These both use the "magic" number at the beginning of the file to determine the file type. 38 * 39 * \section Interval and entry iterators 40 * 41 * As of version 0.3.0, libBigWig supports iterating over intervals in bigWig files and entries in bigBed files. The number of intervals/entries returned with each iteration can be controlled by setting the number of blocks processed in each iteration (intervals and entries are group inside of bigWig and bigBed files into blocks of entries). See `test/testIterator.c` for an example. 42 * 43 * \section Examples 44 * 45 * Please see [README.md](README.md) and the files under `test/` for examples. 46 */ 47 48 49 /*! \file bigWig.h 50 * 51 * These are the functions and structured that should be used by external users. While I don't particularly recommend dealing with some of the structures (e.g., a bigWigHdr_t), they're described here in case you need them. 52 * 53 * BTW, this library doesn't switch endianness as appropriate, since I kind of assume that there's only one type produced these days. 54 */ 55 56 /*! 57 * The library version number 58 */ 59 #define LIBBIGWIG_VERSION 0.4.6 60 61 /*! 62 * If 1, then this library was compiled with remote file support. 63 */ 64 #ifdef NOCURL 65 #define LIBBIGWIG_CURL 0 66 #ifndef CURLTYPE_DEFINED 67 #define CURLTYPE_DEFINED 68 typedef int CURLcode; 69 typedef void CURL; 70 #endif 71 #else 72 #define LIBBIGWIG_CURL 1 73 #endif 74 75 /*! 76 * The magic number of a bigWig file. 77 */ 78 #define BIGWIG_MAGIC 0x888FFC26 79 /*! 80 * The magic number of a bigBed file. 81 */ 82 #define BIGBED_MAGIC 0x8789F2EB 83 /*! 84 * The magic number of a "cirTree" block in a file. 85 */ 86 #define CIRTREE_MAGIC 0x78ca8c91 87 /*! 88 * The magic number of an index block in a file. 89 */ 90 #define IDX_MAGIC 0x2468ace0 91 /*! 92 * The default number of children per block. 93 */ 94 #define DEFAULT_nCHILDREN 64 95 /*! 96 * The default decompression buffer size in bytes. This is used to determin 97 */ 98 #define DEFAULT_BLOCKSIZE 32768 99 100 /*! 101 * An enum that dictates the type of statistic to fetch for a given interval 102 */ 103 enum bwStatsType { 104 doesNotExist = -1, /*!< This does nothing */ 105 mean = 0, /*!< The mean value */ 106 average = 0, /*!< The mean value */ 107 stdev = 1, /*!< The standard deviation of the values */ 108 dev = 1, /*!< The standard deviation of the values */ 109 max = 2, /*!< The maximum value */ 110 min = 3, /*!< The minimum value */ 111 cov = 4, /*!< The number of bases covered */ 112 coverage = 4, /*!<The number of bases covered */ 113 sum = 5 /*!< The sum of per-base values */ 114 }; 115 116 //Should hide this from end users 117 /*! 118 * @brief BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers 119 * 120 * N.B., there's 4 bytes of padding in the on disk representation of level and dataOffset. 121 */ 122 typedef struct { 123 uint32_t *level; /**<The zoom level, which is an integer starting with 0.*/ 124 //There's 4 bytes of padding between these 125 uint64_t *dataOffset; /**<The offset to the on-disk start of the data. This isn't used currently.*/ 126 uint64_t *indexOffset; /**<The offset to the on-disk start of the index. This *is* used.*/ 127 bwRTree_t **idx; /**<Index for each zoom level. Represented as a tree*/ 128 } bwZoomHdr_t; 129 130 /*! 131 * @brief The header section of a bigWig file. 132 * 133 * Some of the values aren't currently used for anything. Others may optionally not exist. 134 */ 135 typedef struct { 136 uint16_t version; /**<The version information of the file.*/ 137 uint16_t nLevels; /**<The number of "zoom" levels.*/ 138 uint64_t ctOffset; /**<The offset to the on-disk chromosome tree list.*/ 139 uint64_t dataOffset; /**<The on-disk offset to the first block of data.*/ 140 uint64_t indexOffset; /**<The on-disk offset to the data index.*/ 141 uint16_t fieldCount; /**<Total number of fields.*/ 142 uint16_t definedFieldCount; /**<Number of fixed-format BED fields.*/ 143 uint64_t sqlOffset; /**<The on-disk offset to an SQL string. This is unused.*/ 144 uint64_t summaryOffset; /**<If there's a summary, this is the offset to it on the disk.*/ 145 uint32_t bufSize; /**<The compression buffer size (if the data is compressed).*/ 146 uint64_t extensionOffset; /**<Unused*/ 147 bwZoomHdr_t *zoomHdrs; /**<Pointers to the header for each zoom level.*/ 148 //total Summary 149 uint64_t nBasesCovered; /**<The total bases covered in the file.*/ 150 double minVal; /**<The minimum value in the file.*/ 151 double maxVal; /**<The maximum value in the file.*/ 152 double sumData; /**<The sum of all values in the file.*/ 153 double sumSquared; /**<The sum of the squared values in the file.*/ 154 } bigWigHdr_t; 155 156 //Should probably replace this with a hash 157 /*! 158 * @brief Holds the chromosomes and their lengths 159 */ 160 typedef struct { 161 int64_t nKeys; /**<The number of chromosomes */ 162 char **chrom; /**<A list of null terminated chromosomes */ 163 uint32_t *len; /**<The lengths of each chromosome */ 164 } chromList_t; 165 166 //TODO remove from bigWig.h 167 /// @cond SKIP 168 typedef struct bwLL bwLL; 169 struct bwLL { 170 bwRTreeNode_t *node; 171 struct bwLL *next; 172 }; 173 typedef struct bwZoomBuffer_t bwZoomBuffer_t; 174 struct bwZoomBuffer_t { //each individual entry takes 32 bytes 175 void *p; 176 uint32_t l, m; 177 struct bwZoomBuffer_t *next; 178 }; 179 /// @endcond 180 181 /*! 182 * @brief This is only needed for writing bigWig files (and won't be created otherwise) 183 * This should be removed from bigWig.h 184 */ 185 typedef struct { 186 uint64_t nBlocks; /**<The number of blocks written*/ 187 uint32_t blockSize; /**<The maximum number of children*/ 188 uint64_t nEntries; /**<The number of entries processed. This is used for the first contig and determining how the zoom levels are computed*/ 189 uint64_t runningWidthSum; /**<The running sum of the entry widths for the first contig (again, used for the first contig and computing zoom levels)*/ 190 uint32_t tid; /**<The current TID that's being processed*/ 191 uint32_t start; /**<The start position of the block*/ 192 uint32_t end; /**<The end position of the block*/ 193 uint32_t span; /**<The span of each entry, if applicable*/ 194 uint32_t step; /**<The step size, if applicable*/ 195 uint8_t ltype; /**<The type of the last entry added*/ 196 uint32_t l; /**<The current size of p. This and the type determine the number of items held*/ 197 void *p; /**<A buffer of size hdr->bufSize*/ 198 bwLL *firstIndexNode; /**<The first index node in the linked list*/ 199 bwLL *currentIndexNode; /**<The last index node in a linked list*/ 200 bwZoomBuffer_t **firstZoomBuffer; /**<The first node in a linked list of leaf nodes*/ 201 bwZoomBuffer_t **lastZoomBuffer; /**<The last node in a linked list of leaf nodes*/ 202 uint64_t *nNodes; /**<The number of leaf nodes per zoom level, useful for determining duplicate levels*/ 203 uLongf compressPsz; /**<The size of the compression buffer*/ 204 void *compressP; /**<A compressed buffer of size compressPsz*/ 205 } bwWriteBuffer_t; 206 207 /*! 208 * @brief A structure that holds everything needed to access a bigWig file. 209 */ 210 typedef struct { 211 URL_t *URL; /**<A pointer that can handle both local and remote files (including a buffer if needed).*/ 212 bigWigHdr_t *hdr; /**<The file header.*/ 213 chromList_t *cl; /**<A list of chromosome names (the order is the ID).*/ 214 bwRTree_t *idx; /**<The index for the full dataset.*/ 215 bwWriteBuffer_t *writeBuffer; /**<The buffer used for writing.*/ 216 int isWrite; /**<0: Opened for reading, 1: Opened for writing.*/ 217 int type; /**<0: bigWig, 1: bigBed.*/ 218 } bigWigFile_t; 219 220 /*! 221 * @brief Holds interval:value associations 222 */ 223 typedef struct { 224 uint32_t l; /**<Number of intervals held*/ 225 uint32_t m; /**<Maximum number of values/intervals the struct can hold*/ 226 uint32_t *start; /**<The start positions (0-based half open)*/ 227 uint32_t *end; /**<The end positions (0-based half open)*/ 228 float *value; /**<The value associated with each position*/ 229 } bwOverlappingIntervals_t; 230 231 /*! 232 * @brief Holds interval:str associations 233 */ 234 typedef struct { 235 uint32_t l; /**<Number of intervals held*/ 236 uint32_t m; /**<Maximum number of values/intervals the struct can hold*/ 237 uint32_t *start; /**<The start positions (0-based half open)*/ 238 uint32_t *end; /**<The end positions (0-based half open)*/ 239 char **str; /**<The strings associated with a given entry.*/ 240 } bbOverlappingEntries_t; 241 242 /*! 243 * @brief A structure to hold iterations 244 * One of intervals and entries should be used to access records from bigWig or bigBed files, respectively. 245 */ 246 typedef struct { 247 bigWigFile_t *bw; /**<Pointer to the bigWig/bigBed file.*/ 248 uint32_t tid; /**<The contig/chromosome ID.*/ 249 uint32_t start; /**<Start position of the query interval.*/ 250 uint32_t end; /**<End position of the query interval.*/ 251 uint64_t offset; /**<Offset into the blocks.*/ 252 uint32_t blocksPerIteration; /**<Number of blocks to use per iteration.*/ 253 int withString; /**<For bigBed entries, whether to return the string with the entries.*/ 254 void *blocks; /**<Overlapping blocks.*/ 255 bwOverlappingIntervals_t *intervals; /**<Overlapping intervals (or NULL).*/ 256 bbOverlappingEntries_t *entries; /**<Overlapping entries (or NULL).*/ 257 void *data; /**<Points to either intervals or entries. If there are no further intervals/entries, then this is NULL. Use this to test for whether to continue iterating.*/ 258 } bwOverlapIterator_t; 259 260 /*! 261 * @brief Initializes curl and global variables. This *MUST* be called before other functions (at least if you want to connect to remote files). 262 * For remote file, curl must be initialized and regions of a file read into an internal buffer. If the buffer is too small then an excessive number of connections will be made. If the buffer is too large than more data than required is fetched. 128KiB is likely sufficient for most needs. 263 * @param bufSize The internal buffer size used for remote connection. 264 * @see bwCleanup 265 * @return 0 on success and 1 on error. 266 */ 267 int bwInit(size_t bufSize); 268 269 /*! 270 * @brief The counterpart to bwInit, this cleans up curl. 271 * @see bwInit 272 */ 273 void bwCleanup(void); 274 275 /*! 276 * @brief Determine if a file is a bigWig file. 277 * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file. 278 * @param fname The file name or URL (http, https, and ftp are supported) 279 * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example. 280 * @return 1 if the file appears to be bigWig, otherwise 0. 281 */ 282 int bwIsBigWig(char *fname, CURLcode (*callBack)(CURL*)); 283 284 /*! 285 * @brief Determine is a file is a bigBed file. 286 * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file. 287 * @param fname The file name or URL (http, https, and ftp are supported) 288 * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example. 289 * @return 1 if the file appears to be bigWig, otherwise 0. 290 */ 291 int bbIsBigBed(char *fname, CURLcode (*callBack)(CURL*)); 292 293 /*! 294 * @brief Opens a local or remote bigWig file. 295 * This will open a local or remote bigWig file. Writing of local bigWig files is also supported. 296 * @param fname The file name or URL (http, https, and ftp are supported) 297 * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example. 298 * @param mode The mode, by default "r". Both local and remote files can be read, but only local files can be written. For files being written the callback function is ignored. If and only if the mode contains "w" will the file be opened for writing (in all other cases the file will be opened for reading. 299 * @return A bigWigFile_t * on success and NULL on error. 300 */ 301 bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack)(CURL*), const char* mode); 302 303 /*! 304 * @brief Opens a local or remote bigBed file. 305 * This will open a local or remote bigBed file. Note that this file format can only be read and NOT written! 306 * @param fname The file name or URL (http, https, and ftp are supported) 307 * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example. 308 * @return A bigWigFile_t * on success and NULL on error. 309 */ 310 bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack)(CURL*)); 311 312 /*! 313 * @brief Returns a string containing the SQL entry (or NULL). 314 * The "auto SQL" field contains the names and value types of the entries in 315 * each bigBed entry. If you need to parse a particular value out of each entry, 316 * then you'll need to first parse this. 317 * @param fp The file pointer to a valid bigWigFile_t 318 * @return A char *, which you MUST free! 319 */ 320 char *bbGetSQL(bigWigFile_t *fp); 321 322 /*! 323 * @brief Closes a bigWigFile_t and frees up allocated memory 324 * This closes both bigWig and bigBed files. 325 * @param fp The file pointer. 326 */ 327 void bwClose(bigWigFile_t *fp); 328 329 /******************************************************************************* 330 * 331 * The following are in bwStats.c 332 * 333 *******************************************************************************/ 334 335 /*! 336 * @brief Converts between chromosome name and ID 337 * 338 * @param fp A valid bigWigFile_t pointer 339 * @param chrom A chromosome name 340 * @return An ID, -1 will be returned on error (note that this is an unsigned value, so that's ~4 billion. bigWig/bigBed files can't store that many chromosomes anyway. 341 */ 342 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom); 343 344 /*! 345 * @brief Frees space allocated by `bwGetOverlappingIntervals` 346 * @param o A valid `bwOverlappingIntervals_t` pointer. 347 * @see bwGetOverlappingIntervals 348 */ 349 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o); 350 351 /*! 352 * @brief Frees space allocated by `bbGetOverlappingEntries` 353 * @param o A valid `bbOverlappingEntries_t` pointer. 354 * @see bbGetOverlappingEntries 355 */ 356 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o); 357 358 /*! 359 * @brief Return bigWig entries overlapping an interval. 360 * Find all bigWig entries overlapping a range and returns them, including their associated values. 361 * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file! 362 * @param chrom A valid chromosome name. 363 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 364 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 365 * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and intervals. 366 * @see bwOverlappingIntervals_t 367 * @see bwDestroyOverlappingIntervals 368 * @see bwGetValues 369 */ 370 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end); 371 372 /*! 373 * @brief Return bigBed entries overlapping an interval. 374 * Find all bigBed entries overlapping a range and returns them. 375 * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file! 376 * @param chrom A valid chromosome name. 377 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 378 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 379 * @param withString If not 0, return the string associated with each entry in the output. If 0, there are no associated strings returned. This is useful if the only information needed are the locations of the entries, which require significantly less memory. 380 * @return NULL on error or no overlapping values, otherwise a `bbOverlappingEntries_t *` holding the intervals and (optionally) the associated string. 381 * @see bbOverlappingEntries_t 382 * @see bbDestroyOverlappingEntries 383 */ 384 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString); 385 386 /*! 387 * @brief Creates an iterator over intervals in a bigWig file 388 * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`. 389 * Intervals are in the `intervals` member and `data` can be used to determine when to end iteration. 390 * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file! 391 * @param chrom A valid chromosome name. 392 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 393 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 394 * @param blocksPerIteration The number of blocks (internal groupings of intervals in bigWig files) to return per iteration. 395 * @return NULL on error, otherwise a bwOverlapIterator_t pointer 396 * @see bwOverlapIterator_t 397 * @see bwIteratorNext 398 * @see bwIteratorDestroy 399 */ 400 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration); 401 402 /*! 403 * @brief Creates an iterator over entries in a bigBed file 404 * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`. 405 * Entries are in the `entries` member and `data` can be used to determine when to end iteration. 406 * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file! 407 * @param chrom A valid chromosome name. 408 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 409 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 410 * @param withString Whether the returned entries should include their associated strings. 411 * @param blocksPerIteration The number of blocks (internal groupings of entries in bigBed files) to return per iteration. 412 * @return NULL on error, otherwise a bwOverlapIterator_t pointer 413 * @see bbGetOverlappingEntries 414 * @see bwOverlapIterator_t 415 * @see bwIteratorNext 416 * @see bwIteratorDestroy 417 */ 418 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration); 419 420 /*! 421 * @brief Traverses to the entries/intervals in the next group of blocks. 422 * @param iter A bwOverlapIterator_t pointer that is updated (or destroyed on error) 423 * @return NULL on error, otherwise a bwOverlapIterator_t pointer with the intervals or entries from the next set of blocks. 424 * @see bwOverlapIterator_t 425 * @see bwIteratorDestroy 426 */ 427 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter); 428 429 /*! 430 * @brief Destroys a bwOverlapIterator_t 431 * @param iter The bwOverlapIterator_t that should be destroyed 432 */ 433 void bwIteratorDestroy(bwOverlapIterator_t *iter); 434 435 /*! 436 * @brief Return all per-base bigWig values in a given interval. 437 * Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if `includeNA` is not 0. 438 * @param fp A valid bigWigFile_t pointer. 439 * @param chrom A valid chromosome name. 440 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 441 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 442 * @param includeNA If not 0, report NA values as well (as NA). 443 * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and positions. 444 * @see bwOverlappingIntervals_t 445 * @see bwDestroyOverlappingIntervals 446 * @see bwGetOverlappingIntervals 447 */ 448 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA); 449 450 /*! 451 * @brief Determines per-interval bigWig statistics 452 * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. 453 * @param fp The file from which to extract statistics. 454 * @param chrom A valid chromosome name. 455 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 456 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 457 * @param nBins The number of bins within the interval to calculate statistics for. 458 * @param type The type of statistic. 459 * @see bwStatsType 460 * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows. 461 */ 462 double *bwStats(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type); 463 464 /*! 465 * @brief Determines per-interval bigWig statistics 466 * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. The difference with bwStats is that zoom levels are never used. 467 * @param fp The file from which to extract statistics. 468 * @param chrom A valid chromosome name. 469 * @param start The start position of the interval. This is 0-based half open, so 0 is the first base. 470 * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99. 471 * @param nBins The number of bins within the interval to calculate statistics for. 472 * @param type The type of statistic. 473 * @see bwStatsType 474 * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows. 475 */ 476 double *bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type); 477 478 //Writer functions 479 480 /*! 481 * @brief Create a largely empty bigWig header 482 * Every bigWig file has a header, this creates the template for one. It also takes care of space allocation in the output write buffer. 483 * @param fp The bigWigFile_t* that you want to write to. 484 * @param maxZooms The maximum number of zoom levels. If you specify 0 then there will be no zoom levels. A value <0 or > 65535 will result in a maximum of 10. 485 * @return 0 on success. 486 */ 487 int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms); 488 489 /*! 490 * @brief Take a list of chromosome names and lengths and return a pointer to a chromList_t 491 * This MUST be run before `bwWriteHdr()`. Note that the input is NOT free()d! 492 * @param chroms A list of chromosomes. 493 * @param lengths The length of each chromosome. 494 * @param n The number of chromosomes (thus, the length of `chroms` and `lengths`) 495 * @return A pointer to a chromList_t or NULL on error. 496 */ 497 chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n); 498 499 /*! 500 * @brief Write a the header to a bigWig file. 501 * You must have already opened the output file, created a header and a chromosome list. 502 * @param bw The output bigWigFile_t pointer. 503 * @see bwCreateHdr 504 * @see bwCreateChromList 505 */ 506 int bwWriteHdr(bigWigFile_t *bw); 507 508 /*! 509 * @brief Write a new block of bedGraph-like intervals to a bigWig file 510 * Adds entries of the form: 511 * chromosome start end value 512 * to the file. These will always be added in a new block, so you may have previously used a different storage type. 513 * 514 * In general it's more efficient to use the bwAppend* functions, but then you MUST know that the previously written block is of the same type. In other words, you can only use bwAppendIntervals() after bwAddIntervals() or a previous bwAppendIntervals(). 515 * @param fp The output file pointer. 516 * @param chrom A list of chromosomes, of length `n`. 517 * @param start A list of start positions of length`n`. 518 * @param end A list of end positions of length`n`. 519 * @param values A list of values of length`n`. 520 * @param n The length of the aforementioned lists. 521 * @return 0 on success and another value on error. 522 * @see bwAppendIntervals 523 */ 524 int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n); 525 526 /*! 527 * @brief Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file. 528 * If you have previously used bwAddIntervals() then this will append additional entries into the previous block (or start a new one if needed). 529 * @param fp The output file pointer. 530 * @param start A list of start positions of length`n`. 531 * @param end A list of end positions of length`n`. 532 * @param values A list of values of length`n`. 533 * @param n The length of the aforementioned lists. 534 * @return 0 on success and another value on error. 535 * @warning Do NOT use this after `bwAddIntervalSpanSteps()`, `bwAppendIntervalSpanSteps()`, `bwAddIntervalSpanSteps()`, or `bwAppendIntervalSpanSteps()`. 536 * @see bwAddIntervals 537 */ 538 int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n); 539 540 /*! 541 * @brief Add a new block of variable-step entries to a bigWig file 542 * Adds entries for the form 543 * chromosome start value 544 * to the file. Each block of such entries has an associated "span", so each value describes the region chromosome:start-(start+span) 545 * 546 * This will always start a new block of values. 547 * @param fp The output file pointer. 548 * @param chrom A list of chromosomes, of length `n`. 549 * @param start A list of start positions of length`n`. 550 * @param span The span of each entry (the must all be the same). 551 * @param values A list of values of length`n`. 552 * @param n The length of the aforementioned lists. 553 * @return 0 on success and another value on error. 554 * @see bwAppendIntervalSpans 555 */ 556 int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n); 557 558 /*! 559 * @brief Append to a previous block of variable-step entries. 560 * If you previously used `bwAddIntervalSpans()`, this will continue appending more values to the block(s) it created. 561 * @param fp The output file pointer. 562 * @param start A list of start positions of length`n`. 563 * @param values A list of values of length`n`. 564 * @param n The length of the aforementioned lists. 565 * @return 0 on success and another value on error. 566 * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpanSteps()` or `bwAppendIntervalSpanSteps()` 567 * @see bwAddIntervalSpans 568 */ 569 int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n); 570 571 /*! 572 * @brief Add a new block of fixed-step entries to a bigWig file 573 * Adds entries for the form 574 * value 575 * to the file. Each block of such entries has an associated "span", "step", chromosome and start position. See the wiggle format for more details. 576 * 577 * This will always start a new block of values. 578 * @param fp The output file pointer. 579 * @param chrom The chromosome that the entries describe. 580 * @param start The starting position of the block of entries. 581 * @param span The span of each entry (i.e., the number of bases it describes). 582 * @param step The step between entry start positions. 583 * @param values A list of values of length`n`. 584 * @param n The length of the aforementioned lists. 585 * @return 0 on success and another value on error. 586 * @see bwAddIntervalSpanSteps 587 */ 588 int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n); 589 590 /*! 591 * @brief Append to a previous block of fixed-step entries. 592 * If you previously used `bwAddIntervalSpanSteps()`, this will continue appending more values to the block(s) it created. 593 * @param fp The output file pointer. 594 * @param values A list of values of length`n`. 595 * @param n The length of the aforementioned lists. 596 * @return 0 on success and another value on error. 597 * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpans()` or `bwAppendIntervalSpans()` 598 * @see bwAddIntervalSpanSteps 599 */ 600 int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n); 601 602 #ifdef __cplusplus 603 } 604 #endif 605 606 #endif // LIBBIGWIG_H 607