1 #include <Python.h>
2 #include <structmember.h>
3 #include "bigWig.h"
4 
5 #define pyBigWigVersion "0.3.18"
6 
7 typedef struct {
8     PyObject_HEAD
9     bigWigFile_t *bw;
10     int32_t lastTid; //The TID of the last written entry (or -1)
11     uint32_t lastSpan; //The span of the last written entry (if applicable)
12     uint32_t lastStep; //The step of the last written entry (if applicable)
13     uint32_t lastStart; //The next start position (if applicable)
14     int lastType; //The type of the last written entry
15 } pyBigWigFile_t;
16 
17 static PyObject *pyBwOpen(PyObject *self, PyObject *pyFname);
18 static PyObject *pyBwEnter(pyBigWigFile_t *self, PyObject *args);
19 static PyObject *pyBwClose(pyBigWigFile_t *pybw, PyObject *args);
20 static PyObject *pyBwGetChroms(pyBigWigFile_t *pybw, PyObject *args);
21 static PyObject *pyIsBigWig(pyBigWigFile_t *pybw, PyObject *args);
22 static PyObject *pyIsBigBed(pyBigWigFile_t *pybw, PyObject *args);
23 static PyObject *pyBwGetStats(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
24 #ifdef WITHNUMPY
25 static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
26 #else
27 static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args);
28 #endif
29 static PyObject *pyBwGetIntervals(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
30 	static PyObject *pyBBGetEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
31 static PyObject *pyBBGetSQL(pyBigWigFile_t *pybw, PyObject *args);
32 static PyObject *pyBwGetHeader(pyBigWigFile_t *pybw, PyObject *args);
33 static PyObject *pyBwAddHeader(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
34 static PyObject *pyBwAddEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds);
35 static void pyBwDealloc(pyBigWigFile_t *pybw);
36 
37 //The function types aren't actually correct...
38 static PyMethodDef bwMethods[] = {
39     {"open", (PyCFunction)pyBwOpen, METH_VARARGS,
40 "Open a bigWig or bigBed file. For remote files, give a URL starting with HTTP,\n\
41 FTP, or HTTPS.\n\
42 \n\
43 Optional arguments:\n\
44     mode: An optional mode. The default is 'r', which opens a file for reading.\n\
45           If you specify a mode containing 'w' then you'll instead open a file\n\
46           for writing. Note that you then need to add an appropriate header\n\
47           before use. For bigBed files, only reading is supported.\n\
48 \n\
49 Returns:\n\
50    A bigWigFile object on success, otherwise None.\n\
51 \n\
52 Arguments:\n\
53     file: The name of a bigWig file.\n\
54 \n\
55 >>> import pyBigWig\n\
56 >>> bw = pyBigWig.open(\"some_file.bw\")\n"},
57     {NULL, NULL, 0, NULL}
58 };
59 
60 static PyMethodDef bwObjMethods[] = {
61     {"header", (PyCFunction)pyBwGetHeader, METH_VARARGS,
62 "Returns the header of a bigWig file. This contains information such as: \n\
63   * The version number of the file ('version').\n\
64   * The number of zoom levels ('nLevels').\n\
65   * The number of bases covered ('nBasesCovered').\n\
66   * The minimum value ('minVal').\n\
67   * The maximum value ('maxVal').\n\
68   * The sum of all values ('sumData').\n\
69   * The sum of the square of all values ('sumSquared').\n\
70 These are returned as a dictionary.\n\
71 \n\
72 >>> import pyBigWig\n\
73 >>> bw = pyBigWig.open(\"some_file.bw\")\n\
74 >>> bw.header()\n\
75 {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L,\n\
76 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}\n\
77 >>> bw.close()\n"},
78     {"close", (PyCFunction)pyBwClose, METH_VARARGS,
79 "Close a bigWig file.\n\
80 \n\
81 >>> import pyBigWig\n\
82 >>> bw = pyBigWig.open(\"some_file.bw\")\n\
83 >>> bw.close()\n"},
84     {"isBigWig", (PyCFunction)pyIsBigWig, METH_VARARGS,
85 "Returns True if the object is a bigWig file (otherwise False).\n\
86 >>> import pyBigWig\n\
87 >>> bw = pyBigWig.open(\"some_file.bigWig\")\n\
88 >>> bw.isBigWig()\n\
89 True\n\
90 >>> bw.isBigBed()\n\
91 False\n"},
92     {"isBigBed", (PyCFunction)pyIsBigBed, METH_VARARGS,
93 "Returns true if the object is a bigBed file (otherwise False).\n\
94 >>> import pyBigWig\n\
95 >>> bw = pyBigWig.open(\"some_file.bigBed\")\n\
96 >>> bw.isBigWig()\n\
97 False\n\
98 >>> bw.isBigBed()\n\
99 True\n"},
100     {"chroms", (PyCFunction)pyBwGetChroms, METH_VARARGS,
101 "Return a chromosome: length dictionary. The order is typically not\n\
102 alphabetical and the lengths are long (thus the 'L' suffix).\n\
103 \n\
104 Optional arguments:\n\
105     chrom: An optional chromosome name\n\
106 \n\
107 Returns:\n\
108     A list of chromosome lengths or a dictionary of them.\n\
109 \n\
110 >>> import pyBigWig\n\
111 >>> bw = pyBigWig.open(\"test/test.bw\")\n\
112 >>> bw.chroms()\n\
113 {'1': 195471971L, '10': 130694993L}\n\
114 \n\
115 Note that you may optionally supply a specific chromosome:\n\
116 \n\
117 >>> bw.chroms(\"chr1\")\n\
118 195471971L\n\
119 \n\
120 If you specify a non-existant chromosome then no output is produced:\n\
121 \n\
122 >>> bw.chroms(\"foo\")\n\
123 >>>\n"},
124     {"stats", (PyCFunction)pyBwGetStats, METH_VARARGS|METH_KEYWORDS,
125 "Return summary statistics for a given range. On error, this function throws a\n\
126 runtime exception.\n\
127 \n\
128 Positional arguments:\n\
129     chr:   Chromosome name\n\
130 \n\
131 Keyword arguments:\n\
132     start: Starting position\n\
133     end:   Ending position\n\
134     type:  Summary type (mean, min, max, coverage, std), default 'mean'.\n\
135     nBins: Number of bins into which the range should be divided before\n\
136            computing summary statistics. The default is 1.\n\
137     exact: By default, pyBigWig uses the same method as Kent's tools from UCSC\n\
138            for computing statistics. This means that 'zoom levels' may be\n\
139            used, rather than actual values (please see the pyBigWig repository\n\
140            on github for further information on this). To avoid this behaviour,\n\
141            simply specify 'exact=True'. Note that values returned will then\n\
142            differ from what UCSC, IGV, and similar other tools will report.\n\
143 \n\
144 >>> import pyBigWig\n\
145 >>> bw = pyBigWig.open(\"test/test.bw\")\n\
146 >>> bw.stats(\"1\", 0, 3)\n\
147 [0.2000000054637591]\n\
148 \n\
149 This is the mean value over the range 1:1-3 (in 1-based coordinates). If\n\
150 the start and end positions aren't given the entire chromosome is used.\n\
151 There are additional optional parameters 'type' and 'nBins'. 'type'\n\
152 specifies the type of summary information to calculate, which is 'mean'\n\
153 by default. Other possibilites for 'type' are: 'min' (minimum value),\n\
154 'max' (maximum value), 'coverage' (number of covered bases), and 'std'\n\
155  (standard deviation). 'nBins' defines how many bins the region will be\n\
156  divided into and defaults to 1.\n\
157 \n\
158 >>> bw.stats(\"1\", 0, 3, type=\"min\")\n\
159 [0.10000000149011612]\n\
160 >>> bw.stats(\"1\", 0, 3, type=\"max\")\n\
161 [0.30000001192092896]\n\
162 >>> bw.stats(\"1\", 0, 10, type=\"coverage\")\n\
163 [0.30000000000000004]\n\
164 >>> bw.stats(\"1\", 0, 3, type=\"std\")\n\
165 [0.10000000521540645]\n\
166 >>> bw.stats(\"1\",99,200, type=\"max\", nBins=2)\n\
167 [1.399999976158142, 1.5]\n"},
168 #ifdef WITHNUMPY
169     {"values", (PyCFunction)pyBwGetValues, METH_VARARGS|METH_KEYWORDS,
170 "Retrieve the value stored for each position (or None). On error, a runtime\n\
171 exception is thrown.\n\
172 \n\
173 Positional arguments:\n\
174     chr:   Chromosome name\n\
175     start: Starting position\n\
176     end:   Ending position\n\
177 \n\
178 Optional arguments:\n\
179     numpy: If True, return a numpy array rather than a list of values. This\n\
180            is generally more memory efficient. Note that this option is only\n\
181            available if pyBigWig was installed with numpy support (check the\n\
182            pyBigWig.numpy() function).\n\
183 \n\
184 >>> import pyBigWig\n\
185 >>> bw = pyBigWig.open(\"test/test.bw\")\n\
186 >>> bw.values(\"1\", 0, 3)\n\
187 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
188 \n\
189 The length of the returned list will always match the length of the\n\
190 range. Any uncovered bases will have a value of None.\n\
191 \n\
192 >>> bw.values(\"1\", 0, 4)\n\
193 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
194 \n"},
195 #else
196     {"values", (PyCFunction)pyBwGetValues, METH_VARARGS,
197 "Retrieve the value stored for each position (or None). On error, a runtime\n\
198 exception is thrown.\n\
199 \n\
200 Positional arguments:\n\
201     chr:   Chromosome name\n\
202     start: Starting position\n\
203     end:   Ending position\n\
204 \n\
205 >>> import pyBigWig\n\
206 >>> bw = pyBigWig.open(\"test/test.bw\")\n\
207 >>> bw.values(\"1\", 0, 3)\n\
208 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\
209 \n\
210 The length of the returned list will always match the length of the\n\
211 range. Any uncovered bases will have a value of None.\n\
212 \n\
213 >>> bw.values(\"1\", 0, 4)\n\
214 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\
215 \n"},
216 #endif
217     {"intervals", (PyCFunction)pyBwGetIntervals, METH_VARARGS|METH_KEYWORDS,
218 "Retrieve each interval covering a part of a chromosome/region. On error, a\n\
219 runtime exception is thrown.\n\
220 \n\
221 Positional arguments:\n\
222     chr:   Chromosome name\n\
223 \n\
224 Keyword arguments:\n\
225     start: Starting position\n\
226     end:   Ending position\n\
227 \n\
228 If start and end aren't specified, the entire chromosome is returned.\n\
229 The returned object is a tuple containing the starting position, end\n\
230 position, and value of each interval in the file. As with all bigWig\n\
231 positions, those returned are 0-based half-open (e.g., a start of 0 and\n\
232 end of 10 specifies the first 10 positions).\n\
233 \n\
234 >>> import pyBigWig\n\
235 >>> bw = pyBigWig.open(\"test/test.bw\")\n\
236 >>> bw.intervals(\"1\", 0, 3)\n\
237 ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224),\n\
238  (2, 3, 0.30000001192092896))\n\
239 >>> bw.close()"},
240     {"entries", (PyCFunction) pyBBGetEntries, METH_VARARGS|METH_KEYWORDS,
241 "Retrieves entries from a bigBed file. These can optionally contain the string\n\
242 associated with each entry.\n\
243 \n\
244 Positional arguments:\n\
245     chr:   Chromosome name\n\
246 \n\
247 Keyword arguments:\n\
248     start: Starting position\n\
249     end:   Ending position\n\
250     withString: If True, return the string associated with each entry.\n\
251            Default True.\n\
252 \n\
253 The output is a list of tuples, with members \"start\", \"end\", and \"string\"\n\
254 (assuming \"withString=True\"). If there are no overlapping entries, then None\n\
255 is returned.\n\
256 \n\
257 >>> import pyBigWig\n\
258 >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
259 >>> print(bw.entries('chr1',10000000,10020000))\n\
260 [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'),\n\
261 (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'),\n\
262 (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]\n\
263 >>> print(bb.entries(\"chr1\", 10000000, 10000500, withString=False))\n\
264 [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]\n\
265 \n"},
266     {"SQL", (PyCFunction) pyBBGetSQL, METH_VARARGS,
267 "Returns the SQL string associated with the file. This is typically useful for\n\
268 bigBed files, where this determines what is held in each column of the text\n\
269 string associated with entries.\n\
270 \n\
271 If there is no SQL string, then None is returned.\n\
272 \n\
273 >>> import pyBigWig\n\
274 >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\
275 >>> print(bb.SQL())\n\
276 table RnaElements\n\
277 \"BED6 + 3 scores for RNA Elements data \"\n\
278     (\n\
279     string chrom;      \"Reference sequence chromosome or scaffold\"\n\
280     uint   chromStart; \"Start position in chromosome\"\n\
281     uint   chromEnd;   \"End position in chromosome\"\n\
282     string name;       \"Name of item\"\n\
283     uint   score;      \"Normalized score from 0-1000\"\n\
284     char[1] strand;    \"+ or - or . for unknown\"\n\
285     float level;       \"Expression level such as RPKM or FPKM. Set to -1 for no data.\"\n\
286     float signif;      \"Statistical significance such as IDR. Set to -1 for no data.\"\n\
287     uint score2;       \"Additional measurement/count e.g. number of reads. Set to 0 for no data.\"\n\
288     )\n\
289 \n\
290 \n"},
291     {"addHeader", (PyCFunction)pyBwAddHeader, METH_VARARGS|METH_KEYWORDS,
292 "Adds a header to a file opened for writing. This MUST be called before adding\n\
293 any entries. On error, a runtime exception is thrown.\n\
294 \n\
295 Positional arguments:\n\
296     cl:    A chromosome list, of the form (('chr1', 1000), ('chr2', 2000), ...).\n\
297            In other words, each element of the list is a tuple containing a\n\
298            chromosome name and its associated length.\n\
299 \n\
300 Keyword arguments:\n\
301     maxZooms:  The maximum number of zoom levels. The value must be >=0. The\n\
302                default is 10.\n\
303 \n\
304 >>> import pyBigWig\n\
305 >>> import tempfile\n\
306 >>> import os\n\
307 >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
308 >>> oname = ofile.name\n\
309 >>> ofile.close()\n\
310 >>> bw = pyBigWig.open(oname, 'w')\n\
311 >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)], maxZooms=0)\n\
312 >>> bw.close()\n\
313 >>> os.remove(oname)"},
314     {"addEntries", (PyCFunction)pyBwAddEntries, METH_VARARGS|METH_KEYWORDS,
315 "Adds one or more entries to a bigWig file. This returns nothing, but throws a\n\
316 runtime exception on error.\n\
317 \n\
318 This function always accepts an optional 'validate' option. If set to 'True',\n\
319 which is the default, the input entries are checked to ensure that they come\n\
320 after previously entered entries. This comes with significant overhead, so if\n\
321 this is instead 'False' then this validation is not performed.\n\
322 \n\
323 There are three manners in which entries can be stored in bigWig files.\n\
324 \n\
325 \n\
326 bedGraph-like entries (12 bytes each):\n\
327 \n\
328 Positional arguments:\n\
329     chrom:  A list of chromosome. These MUST match those added with addHeader().\n\
330     starts: A list of start positions. These are 0-based.\n\
331 \n\
332 Keyword arguments:\n\
333     ends:   A list of end positions. These are 0-based half open, so a start of\n\
334             0 and end of 10 specifies the first 10 bases.\n\
335     values: A list of values.\n\
336 \n\
337 \n\
338 Variable-step entries (8 bytes each):\n\
339 \n\
340 Positional arguments:\n\
341     chrom:  A chromosome name. This MUST match one added with addHeader().\n\
342     starts: A list of start positions. These are 0-based.\n\
343 \n\
344 Keyword arguments:\n\
345     values: A list of values.\n\
346     span:   A span width. This is an integer value and specifies how many bases\n\
347             each entry describes. An entry with a start position of 0 and a span\n\
348             of 10 describes the first 10 bases.\n\
349 \n\
350 \n\
351 Fixed-step entries (4 bytes each):\n\
352 \n\
353 Positional arguments:\n\
354     chrom:  A chromosome name. This MUST match one added with addHeader().\n\
355     starts: A start position. These are 0-based. The start position of each\n\
356             entry starts 'step' after the previous and describes 'span' bases.\n\
357 \n\
358 Keyword arguments:\n\
359     values: A list of values.\n\
360     span:   A span width. This is an integer value and specifies how many bases\n\
361             each entry describes. An entry with a start position of 0 and a span\n\
362             of 10 describes the first 10 bases.\n\
363     step:   A step width. Each subsequent entry begins this number of bases\n\
364             after the previous. So if the first entry has a start of 0 and step\n\
365             or 30, the second entry will start at 30.\n\
366 \n\
367 >>> import pyBigWig\n\
368 >>> import tempfile\n\
369 >>> import os\n\
370 >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\
371 >>> oname = ofile.name\n\
372 >>> ofile.close()\n\
373 >>> bw = pyBigWig.open(oname, 'w')\n\
374 >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)])\n\
375 >>> #Add some bedGraph-like entries\n\
376 >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])\n\
377 >>> #Variable-step entries, the span 500-520, 600-620, and 635-655\n\
378 >>> bw.addEntries(\"1\", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)\n\
379 >>> #Fixed-step entries, the bases described are 900-920, 930-950, and 960-980\n\
380 >>> bw.addEntries(\"1\", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)\n\
381 >>> #This only works due to using validate=False. Obviously the file is then corrupt.\n\
382 >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0], validate=False)\n\
383 >>> bw.close()\n\
384 >>> os.remove(oname)"},
385     {"__enter__", (PyCFunction)pyBwEnter, METH_NOARGS, NULL},
386     {"__exit__", (PyCFunction)pyBwClose, METH_VARARGS, NULL},
387     {NULL, NULL, 0, NULL}
388 };
389 
390 #if PY_MAJOR_VERSION >= 3
391 struct pyBigWigmodule_state {
392     PyObject *error;
393 };
394 
395 #define GETSTATE(m) ((struct pyBigWigmodule_state*)PyModule_GetState(m))
396 
397 static PyModuleDef pyBigWigmodule = {
398     PyModuleDef_HEAD_INIT,
399     "pyBigWig",
400     "A python module for bigWig file access",
401     -1,
402     bwMethods,
403     NULL, NULL, NULL, NULL
404 };
405 #endif
406 
407 //Should set tp_dealloc, tp_print, tp_repr, tp_str, tp_members
408 static PyTypeObject bigWigFile = {
409 #if PY_MAJOR_VERSION >= 3
410     PyVarObject_HEAD_INIT(NULL, 0)
411 #else
412     PyObject_HEAD_INIT(NULL)
413     0,              /*ob_size*/
414 #endif
415     "pyBigWig.bigWigFile",     /*tp_name*/
416     sizeof(pyBigWigFile_t),      /*tp_basicsize*/
417     0,                         /*tp_itemsize*/
418     (destructor)pyBwDealloc,     /*tp_dealloc*/
419     0,                         /*tp_print*/
420     0,                         /*tp_getattr*/
421     0,                         /*tp_setattr*/
422     0,                         /*tp_compare*/
423     0,                         /*tp_repr*/
424     0,                         /*tp_as_number*/
425     0,                         /*tp_as_sequence*/
426     0,                         /*tp_as_mapping*/
427     0,                         /*tp_hash*/
428     0,                         /*tp_call*/
429     0,                         /*tp_str*/
430     PyObject_GenericGetAttr, /*tp_getattro*/
431     PyObject_GenericSetAttr, /*tp_setattro*/
432     0,                         /*tp_as_buffer*/
433 #if PY_MAJOR_VERSION >= 3
434     Py_TPFLAGS_DEFAULT,        /*tp_flags*/
435 #else
436     Py_TPFLAGS_HAVE_CLASS,     /*tp_flags*/
437 #endif
438     "bigWig File",             /*tp_doc*/
439     0,                         /*tp_traverse*/
440     0,                         /*tp_clear*/
441     0,                         /*tp_richcompare*/
442     0,                         /*tp_weaklistoffset*/
443     0,                         /*tp_iter*/
444     0,                         /*tp_iternext*/
445     bwObjMethods,                 /*tp_methods*/
446     0,                         /*tp_members*/
447     0,                         /*tp_getset*/
448     0,                         /*tp_base*/
449     0,                         /*tp_dict*/
450     0,                         /*tp_descr_get*/
451     0,                         /*tp_descr_set*/
452     0,                         /*tp_dictoffset*/
453     0,                         /*tp_init*/
454     0,                         /*tp_alloc*/
455     0,                         /*tp_new*/
456     0,0,0,0,0,0
457 };
458