1 #ifndef LIBBIGWIG_H
2 #define LIBBIGWIG_H
3 
4 #include "bigWigIO.h"
5 #include "bwValues.h"
6 #include <inttypes.h>
7 #include <zlib.h>
8 
9 #ifdef __cplusplus
10 extern "C" {
11 #endif
12 
13 /*! \mainpage libBigWig
14  *
15  * \section Introduction
16  *
17  * libBigWig is a C library for parsing local/remote bigWig and bigBed files. This is similar to Kent's library from UCSC, except
18  *  * The license is much more liberal
19  *  * This code doesn't call `exit()` on error, thereby killing the calling application.
20  *
21  * External files are accessed using [curl](http://curl.haxx.se/).
22  *
23  * Please submit issues and pull requests [here](https://github.com/dpryan79/libBigWig).
24  *
25  * \section Compilation
26  *
27  * Assuming you already have the curl libraries installed (not just the curl binary!):
28  *
29  *     make install prefix=/some/path
30  *
31  * \section Writing bigWig files
32  *
33  * There are three methods for storing values in a bigWig file, further described in the [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html). The entries within the file are grouped into "blocks" and each such block is limited to storing entries of a single type. So, it is unwise to use a single bedGraph-like endtry followed by a single fixed-step entry followed by a variable-step entry, as that would require three separate blocks, with additional space required for each.
34  *
35  * \section Testing file types
36  *
37  * As of version 0.3.0, libBigWig supports reading bigBed files. If an application needs to support both bigBed and bigWig input, then the `bwIsBigWig` and `bbIsBigBed` functions can be used to determine the file type. These both use the "magic" number at the beginning of the file to determine the file type.
38  *
39  * \section Interval and entry iterators
40  *
41  * As of version 0.3.0, libBigWig supports iterating over intervals in bigWig files and entries in bigBed files. The number of intervals/entries returned with each iteration can be controlled by setting the number of blocks processed in each iteration (intervals and entries are group inside of bigWig and bigBed files into blocks of entries). See `test/testIterator.c` for an example.
42  *
43  * \section Examples
44  *
45  * Please see [README.md](README.md) and the files under `test/` for examples.
46  */
47 
48 
49 /*! \file bigWig.h
50  *
51  * These are the functions and structured that should be used by external users. While I don't particularly recommend dealing with some of the structures (e.g., a bigWigHdr_t), they're described here in case you need them.
52  *
53  * BTW, this library doesn't switch endianness as appropriate, since I kind of assume that there's only one type produced these days.
54  */
55 
56 /*!
57  * The library version number
58  */
59 #define LIBBIGWIG_VERSION 0.4.6
60 
61 /*!
62  * If 1, then this library was compiled with remote file support.
63  */
64 #ifdef NOCURL
65 #define LIBBIGWIG_CURL 0
66 #ifndef CURLTYPE_DEFINED
67 #define CURLTYPE_DEFINED
68 typedef int CURLcode;
69 typedef void CURL;
70 #endif
71 #else
72 #define LIBBIGWIG_CURL 1
73 #endif
74 
75 /*!
76  * The magic number of a bigWig file.
77  */
78 #define BIGWIG_MAGIC 0x888FFC26
79 /*!
80  * The magic number of a bigBed file.
81  */
82 #define BIGBED_MAGIC 0x8789F2EB
83 /*!
84  * The magic number of a "cirTree" block in a file.
85  */
86 #define CIRTREE_MAGIC 0x78ca8c91
87 /*!
88  * The magic number of an index block in a file.
89  */
90 #define IDX_MAGIC 0x2468ace0
91 /*!
92  * The default number of children per block.
93  */
94 #define DEFAULT_nCHILDREN 64
95 /*!
96  * The default decompression buffer size in bytes. This is used to determin
97  */
98 #define DEFAULT_BLOCKSIZE 32768
99 
100 /*!
101  * An enum that dictates the type of statistic to fetch for a given interval
102  */
103 enum bwStatsType {
104     doesNotExist = -1, /*!< This does nothing */
105     mean = 0, /*!< The mean value */
106     average = 0, /*!< The mean value */
107     stdev = 1, /*!< The standard deviation of the values */
108     dev = 1, /*!< The standard deviation of the values */
109     max = 2, /*!< The maximum value */
110     min = 3, /*!< The minimum value */
111     cov = 4, /*!< The number of bases covered */
112     coverage = 4, /*!<The number of bases covered */
113     sum = 5 /*!< The sum of per-base values */
114 };
115 
116 //Should hide this from end users
117 /*!
118  * @brief BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers
119  *
120  * N.B., there's 4 bytes of padding in the on disk representation of level and dataOffset.
121  */
122 typedef struct {
123     uint32_t *level; /**<The zoom level, which is an integer starting with 0.*/
124     //There's 4 bytes of padding between these
125     uint64_t *dataOffset; /**<The offset to the on-disk start of the data. This isn't used currently.*/
126     uint64_t *indexOffset; /**<The offset to the on-disk start of the index. This *is* used.*/
127     bwRTree_t **idx; /**<Index for each zoom level. Represented as a tree*/
128 } bwZoomHdr_t;
129 
130 /*!
131  * @brief The header section of a bigWig file.
132  *
133  * Some of the values aren't currently used for anything. Others may optionally not exist.
134  */
135 typedef struct {
136     uint16_t version; /**<The version information of the file.*/
137     uint16_t nLevels; /**<The number of "zoom" levels.*/
138     uint64_t ctOffset; /**<The offset to the on-disk chromosome tree list.*/
139     uint64_t dataOffset; /**<The on-disk offset to the first block of data.*/
140     uint64_t indexOffset; /**<The on-disk offset to the data index.*/
141     uint16_t fieldCount; /**<Total number of fields.*/
142     uint16_t definedFieldCount; /**<Number of fixed-format BED fields.*/
143     uint64_t sqlOffset; /**<The on-disk offset to an SQL string. This is unused.*/
144     uint64_t summaryOffset; /**<If there's a summary, this is the offset to it on the disk.*/
145     uint32_t bufSize; /**<The compression buffer size (if the data is compressed).*/
146     uint64_t extensionOffset; /**<Unused*/
147     bwZoomHdr_t *zoomHdrs; /**<Pointers to the header for each zoom level.*/
148     //total Summary
149     uint64_t nBasesCovered; /**<The total bases covered in the file.*/
150     double minVal; /**<The minimum value in the file.*/
151     double maxVal; /**<The maximum value in the file.*/
152     double sumData; /**<The sum of all values in the file.*/
153     double sumSquared; /**<The sum of the squared values in the file.*/
154 } bigWigHdr_t;
155 
156 //Should probably replace this with a hash
157 /*!
158  * @brief Holds the chromosomes and their lengths
159  */
160 typedef struct {
161     int64_t nKeys; /**<The number of chromosomes */
162     char **chrom; /**<A list of null terminated chromosomes */
163     uint32_t *len; /**<The lengths of each chromosome */
164 } chromList_t;
165 
166 //TODO remove from bigWig.h
167 /// @cond SKIP
168 typedef struct bwLL bwLL;
169 struct bwLL {
170     bwRTreeNode_t *node;
171     struct bwLL *next;
172 };
173 typedef struct bwZoomBuffer_t bwZoomBuffer_t;
174 struct bwZoomBuffer_t { //each individual entry takes 32 bytes
175     void *p;
176     uint32_t l, m;
177     struct bwZoomBuffer_t *next;
178 };
179 /// @endcond
180 
181 /*!
182  * @brief This is only needed for writing bigWig files (and won't be created otherwise)
183  * This should be removed from bigWig.h
184  */
185 typedef struct {
186     uint64_t nBlocks; /**<The number of blocks written*/
187     uint32_t blockSize; /**<The maximum number of children*/
188     uint64_t nEntries; /**<The number of entries processed. This is used for the first contig and determining how the zoom levels are computed*/
189     uint64_t runningWidthSum; /**<The running sum of the entry widths for the first contig (again, used for the first contig and computing zoom levels)*/
190     uint32_t tid; /**<The current TID that's being processed*/
191     uint32_t start; /**<The start position of the block*/
192     uint32_t end; /**<The end position of the block*/
193     uint32_t span; /**<The span of each entry, if applicable*/
194     uint32_t step; /**<The step size, if applicable*/
195     uint8_t ltype; /**<The type of the last entry added*/
196     uint32_t l; /**<The current size of p. This and the type determine the number of items held*/
197     void *p; /**<A buffer of size hdr->bufSize*/
198     bwLL *firstIndexNode; /**<The first index node in the linked list*/
199     bwLL *currentIndexNode; /**<The last index node in a linked list*/
200     bwZoomBuffer_t **firstZoomBuffer; /**<The first node in a linked list of leaf nodes*/
201     bwZoomBuffer_t **lastZoomBuffer; /**<The last node in a linked list of leaf nodes*/
202     uint64_t *nNodes; /**<The number of leaf nodes per zoom level, useful for determining duplicate levels*/
203     uLongf compressPsz; /**<The size of the compression buffer*/
204     void *compressP; /**<A compressed buffer of size compressPsz*/
205 } bwWriteBuffer_t;
206 
207 /*!
208  * @brief A structure that holds everything needed to access a bigWig file.
209  */
210 typedef struct {
211     URL_t *URL; /**<A pointer that can handle both local and remote files (including a buffer if needed).*/
212     bigWigHdr_t *hdr; /**<The file header.*/
213     chromList_t *cl; /**<A list of chromosome names (the order is the ID).*/
214     bwRTree_t *idx; /**<The index for the full dataset.*/
215     bwWriteBuffer_t *writeBuffer; /**<The buffer used for writing.*/
216     int isWrite; /**<0: Opened for reading, 1: Opened for writing.*/
217     int type; /**<0: bigWig, 1: bigBed.*/
218 } bigWigFile_t;
219 
220 /*!
221  * @brief Holds interval:value associations
222  */
223 typedef struct {
224     uint32_t l; /**<Number of intervals held*/
225     uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
226     uint32_t *start; /**<The start positions (0-based half open)*/
227     uint32_t *end; /**<The end positions (0-based half open)*/
228     float *value; /**<The value associated with each position*/
229 } bwOverlappingIntervals_t;
230 
231 /*!
232  * @brief Holds interval:str associations
233  */
234 typedef struct {
235     uint32_t l; /**<Number of intervals held*/
236     uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
237     uint32_t *start; /**<The start positions (0-based half open)*/
238     uint32_t *end; /**<The end positions (0-based half open)*/
239     char **str; /**<The strings associated with a given entry.*/
240 } bbOverlappingEntries_t;
241 
242 /*!
243  * @brief A structure to hold iterations
244  * One of intervals and entries should be used to access records from bigWig or bigBed files, respectively.
245  */
246 typedef struct {
247     bigWigFile_t *bw; /**<Pointer to the bigWig/bigBed file.*/
248     uint32_t tid; /**<The contig/chromosome ID.*/
249     uint32_t start; /**<Start position of the query interval.*/
250     uint32_t end; /**<End position of the query interval.*/
251     uint64_t offset; /**<Offset into the blocks.*/
252     uint32_t blocksPerIteration; /**<Number of blocks to use per iteration.*/
253     int withString; /**<For bigBed entries, whether to return the string with the entries.*/
254     void *blocks; /**<Overlapping blocks.*/
255     bwOverlappingIntervals_t *intervals; /**<Overlapping intervals (or NULL).*/
256     bbOverlappingEntries_t *entries; /**<Overlapping entries (or NULL).*/
257     void *data; /**<Points to either intervals or entries. If there are no further intervals/entries, then this is NULL. Use this to test for whether to continue iterating.*/
258 } bwOverlapIterator_t;
259 
260 /*!
261  * @brief Initializes curl and global variables. This *MUST* be called before other functions (at least if you want to connect to remote files).
262  * For remote file, curl must be initialized and regions of a file read into an internal buffer. If the buffer is too small then an excessive number of connections will be made. If the buffer is too large than more data than required is fetched. 128KiB is likely sufficient for most needs.
263  * @param bufSize The internal buffer size used for remote connection.
264  * @see bwCleanup
265  * @return 0 on success and 1 on error.
266  */
267 int bwInit(size_t bufSize);
268 
269 /*!
270  * @brief The counterpart to bwInit, this cleans up curl.
271  * @see bwInit
272  */
273 void bwCleanup(void);
274 
275 /*!
276  * @brief Determine if a file is a bigWig file.
277  * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
278  * @param fname The file name or URL (http, https, and ftp are supported)
279  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
280  * @return 1 if the file appears to be bigWig, otherwise 0.
281  */
282 int bwIsBigWig(char *fname, CURLcode (*callBack)(CURL*));
283 
284 /*!
285  * @brief Determine is a file is a bigBed file.
286  * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
287  * @param fname The file name or URL (http, https, and ftp are supported)
288  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
289  * @return 1 if the file appears to be bigWig, otherwise 0.
290  */
291 int bbIsBigBed(char *fname, CURLcode (*callBack)(CURL*));
292 
293 /*!
294  * @brief Opens a local or remote bigWig file.
295  * This will open a local or remote bigWig file. Writing of local bigWig files is also supported.
296  * @param fname The file name or URL (http, https, and ftp are supported)
297  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
298  * @param mode The mode, by default "r". Both local and remote files can be read, but only local files can be written. For files being written the callback function is ignored. If and only if the mode contains "w" will the file be opened for writing (in all other cases the file will be opened for reading.
299  * @return A bigWigFile_t * on success and NULL on error.
300  */
301 bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack)(CURL*), const char* mode);
302 
303 /*!
304  * @brief Opens a local or remote bigBed file.
305  * This will open a local or remote bigBed file. Note that this file format can only be read and NOT written!
306  * @param fname The file name or URL (http, https, and ftp are supported)
307  * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
308  * @return A bigWigFile_t * on success and NULL on error.
309  */
310 bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack)(CURL*));
311 
312 /*!
313  * @brief Returns a string containing the SQL entry (or NULL).
314  * The "auto SQL" field contains the names and value types of the entries in
315  * each bigBed entry. If you need to parse a particular value out of each entry,
316  * then you'll need to first parse this.
317  * @param fp The file pointer to a valid bigWigFile_t
318  * @return A char *, which you MUST free!
319  */
320 char *bbGetSQL(bigWigFile_t *fp);
321 
322 /*!
323  * @brief Closes a bigWigFile_t and frees up allocated memory
324  * This closes both bigWig and bigBed files.
325  * @param fp The file pointer.
326  */
327 void bwClose(bigWigFile_t *fp);
328 
329 /*******************************************************************************
330 *
331 * The following are in bwStats.c
332 *
333 *******************************************************************************/
334 
335 /*!
336  * @brief Converts between chromosome name and ID
337  *
338  * @param fp A valid bigWigFile_t pointer
339  * @param chrom A chromosome name
340  * @return An ID, -1 will be returned on error (note that this is an unsigned value, so that's ~4 billion. bigWig/bigBed files can't store that many chromosomes anyway.
341  */
342 uint32_t bwGetTid(bigWigFile_t *fp, char *chrom);
343 
344 /*!
345  * @brief Frees space allocated by `bwGetOverlappingIntervals`
346  * @param o A valid `bwOverlappingIntervals_t` pointer.
347  * @see bwGetOverlappingIntervals
348  */
349 void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o);
350 
351 /*!
352  * @brief Frees space allocated by `bbGetOverlappingEntries`
353  * @param o A valid `bbOverlappingEntries_t` pointer.
354  * @see bbGetOverlappingEntries
355  */
356 void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o);
357 
358 /*!
359  * @brief Return bigWig entries overlapping an interval.
360  * Find all bigWig entries overlapping a range and returns them, including their associated values.
361  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
362  * @param chrom A valid chromosome name.
363  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
364  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
365  * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and intervals.
366  * @see bwOverlappingIntervals_t
367  * @see bwDestroyOverlappingIntervals
368  * @see bwGetValues
369  */
370 bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end);
371 
372 /*!
373  * @brief Return bigBed entries overlapping an interval.
374  * Find all bigBed entries overlapping a range and returns them.
375  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
376  * @param chrom A valid chromosome name.
377  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
378  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
379  * @param withString If not 0, return the string associated with each entry in the output. If 0, there are no associated strings returned. This is useful if the only information needed are the locations of the entries, which require significantly less memory.
380  * @return NULL on error or no overlapping values, otherwise a `bbOverlappingEntries_t *` holding the intervals and (optionally) the associated string.
381  * @see bbOverlappingEntries_t
382  * @see bbDestroyOverlappingEntries
383  */
384 bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString);
385 
386 /*!
387  * @brief Creates an iterator over intervals in a bigWig file
388  * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
389  * Intervals are in the `intervals` member and `data` can be used to determine when to end iteration.
390  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
391  * @param chrom A valid chromosome name.
392  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
393  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
394  * @param blocksPerIteration The number of blocks (internal groupings of intervals in bigWig files) to return per iteration.
395  * @return NULL on error, otherwise a bwOverlapIterator_t pointer
396  * @see bwOverlapIterator_t
397  * @see bwIteratorNext
398  * @see bwIteratorDestroy
399  */
400 bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration);
401 
402 /*!
403  * @brief Creates an iterator over entries in a bigBed file
404  * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
405  * Entries are in the `entries` member and `data` can be used to determine when to end iteration.
406  * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
407  * @param chrom A valid chromosome name.
408  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
409  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
410  * @param withString Whether the returned entries should include their associated strings.
411  * @param blocksPerIteration The number of blocks (internal groupings of entries in bigBed files) to return per iteration.
412  * @return NULL on error, otherwise a bwOverlapIterator_t pointer
413  * @see bbGetOverlappingEntries
414  * @see bwOverlapIterator_t
415  * @see bwIteratorNext
416  * @see bwIteratorDestroy
417  */
418 bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration);
419 
420 /*!
421  * @brief Traverses to the entries/intervals in the next group of blocks.
422  * @param iter A bwOverlapIterator_t pointer that is updated (or destroyed on error)
423  * @return NULL on error, otherwise a bwOverlapIterator_t pointer with the intervals or entries from the next set of blocks.
424  * @see bwOverlapIterator_t
425  * @see bwIteratorDestroy
426  */
427 bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter);
428 
429 /*!
430  * @brief Destroys a bwOverlapIterator_t
431  * @param iter The bwOverlapIterator_t that should be destroyed
432  */
433 void bwIteratorDestroy(bwOverlapIterator_t *iter);
434 
435 /*!
436  * @brief Return all per-base bigWig values in a given interval.
437  * Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if `includeNA` is not 0.
438  * @param fp A valid bigWigFile_t pointer.
439  * @param chrom A valid chromosome name.
440  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
441  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
442  * @param includeNA If not 0, report NA values as well (as NA).
443  * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and positions.
444  * @see bwOverlappingIntervals_t
445  * @see bwDestroyOverlappingIntervals
446  * @see bwGetOverlappingIntervals
447  */
448 bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA);
449 
450 /*!
451  * @brief Determines per-interval bigWig statistics
452  * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals.
453  * @param fp The file from which to extract statistics.
454  * @param chrom A valid chromosome name.
455  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
456  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
457  * @param nBins The number of bins within the interval to calculate statistics for.
458  * @param type The type of statistic.
459  * @see bwStatsType
460  * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
461  */
462 double *bwStats(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
463 
464 /*!
465  * @brief Determines per-interval bigWig statistics
466  * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. The difference with bwStats is that zoom levels are never used.
467  * @param fp The file from which to extract statistics.
468  * @param chrom A valid chromosome name.
469  * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
470  * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
471  * @param nBins The number of bins within the interval to calculate statistics for.
472  * @param type The type of statistic.
473  * @see bwStatsType
474  * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
475 */
476 double *bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
477 
478 //Writer functions
479 
480 /*!
481  * @brief Create a largely empty bigWig header
482  * Every bigWig file has a header, this creates the template for one. It also takes care of space allocation in the output write buffer.
483  * @param fp The bigWigFile_t* that you want to write to.
484  * @param maxZooms The maximum number of zoom levels. If you specify 0 then there will be no zoom levels. A value <0 or > 65535 will result in a maximum of 10.
485  * @return 0 on success.
486  */
487 int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms);
488 
489 /*!
490  * @brief Take a list of chromosome names and lengths and return a pointer to a chromList_t
491  * This MUST be run before `bwWriteHdr()`. Note that the input is NOT free()d!
492  * @param chroms A list of chromosomes.
493  * @param lengths The length of each chromosome.
494  * @param n The number of chromosomes (thus, the length of `chroms` and `lengths`)
495  * @return A pointer to a chromList_t or NULL on error.
496  */
497 chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n);
498 
499 /*!
500  * @brief Write a the header to a bigWig file.
501  * You must have already opened the output file, created a header and a chromosome list.
502  * @param bw The output bigWigFile_t pointer.
503  * @see bwCreateHdr
504  * @see bwCreateChromList
505  */
506 int bwWriteHdr(bigWigFile_t *bw);
507 
508 /*!
509  * @brief Write a new block of bedGraph-like intervals to a bigWig file
510  * Adds entries of the form:
511  * chromosome	start	end	value
512  * to the file. These will always be added in a new block, so you may have previously used a different storage type.
513  *
514  * In general it's more efficient to use the bwAppend* functions, but then you MUST know that the previously written block is of the same type. In other words, you can only use bwAppendIntervals() after bwAddIntervals() or a previous bwAppendIntervals().
515  * @param fp The output file pointer.
516  * @param chrom A list of chromosomes, of length `n`.
517  * @param start A list of start positions of length`n`.
518  * @param end A list of end positions of length`n`.
519  * @param values A list of values of length`n`.
520  * @param n The length of the aforementioned lists.
521  * @return 0 on success and another value on error.
522  * @see bwAppendIntervals
523  */
524 int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n);
525 
526 /*!
527  * @brief Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file.
528  * If you have previously used bwAddIntervals() then this will append additional entries into the previous block (or start a new one if needed).
529  * @param fp The output file pointer.
530  * @param start A list of start positions of length`n`.
531  * @param end A list of end positions of length`n`.
532  * @param values A list of values of length`n`.
533  * @param n The length of the aforementioned lists.
534  * @return 0 on success and another value on error.
535  * @warning Do NOT use this after `bwAddIntervalSpanSteps()`, `bwAppendIntervalSpanSteps()`, `bwAddIntervalSpanSteps()`, or `bwAppendIntervalSpanSteps()`.
536  * @see bwAddIntervals
537  */
538 int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n);
539 
540 /*!
541  * @brief Add a new block of variable-step entries to a bigWig file
542  * Adds entries for the form
543  * chromosome	start	value
544  * to the file. Each block of such entries has an associated "span", so each value describes the region chromosome:start-(start+span)
545  *
546  * This will always start a new block of values.
547  * @param fp The output file pointer.
548  * @param chrom A list of chromosomes, of length `n`.
549  * @param start A list of start positions of length`n`.
550  * @param span The span of each entry (the must all be the same).
551  * @param values A list of values of length`n`.
552  * @param n The length of the aforementioned lists.
553  * @return 0 on success and another value on error.
554  * @see bwAppendIntervalSpans
555  */
556 int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n);
557 
558 /*!
559  * @brief Append to a previous block of variable-step entries.
560  * If you previously used `bwAddIntervalSpans()`, this will continue appending more values to the block(s) it created.
561  * @param fp The output file pointer.
562  * @param start A list of start positions of length`n`.
563  * @param values A list of values of length`n`.
564  * @param n The length of the aforementioned lists.
565  * @return 0 on success and another value on error.
566  * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpanSteps()` or `bwAppendIntervalSpanSteps()`
567  * @see bwAddIntervalSpans
568  */
569 int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n);
570 
571 /*!
572  * @brief Add a new block of fixed-step entries to a bigWig file
573  * Adds entries for the form
574  * value
575  * to the file. Each block of such entries has an associated "span", "step", chromosome and start position. See the wiggle format for more details.
576  *
577  * This will always start a new block of values.
578  * @param fp The output file pointer.
579  * @param chrom The chromosome that the entries describe.
580  * @param start The starting position of the block of entries.
581  * @param span The span of each entry (i.e., the number of bases it describes).
582  * @param step The step between entry start positions.
583  * @param values A list of values of length`n`.
584  * @param n The length of the aforementioned lists.
585  * @return 0 on success and another value on error.
586  * @see bwAddIntervalSpanSteps
587  */
588 int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n);
589 
590 /*!
591  * @brief Append to a previous block of fixed-step entries.
592  * If you previously used `bwAddIntervalSpanSteps()`, this will continue appending more values to the block(s) it created.
593  * @param fp The output file pointer.
594  * @param values A list of values of length`n`.
595  * @param n The length of the aforementioned lists.
596  * @return 0 on success and another value on error.
597  * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpans()` or `bwAppendIntervalSpans()`
598  * @see bwAddIntervalSpanSteps
599  */
600 int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n);
601 
602 #ifdef __cplusplus
603 }
604 #endif
605 
606 #endif // LIBBIGWIG_H
607