1 #include <Python.h> 2 #include <structmember.h> 3 #include "bigWig.h" 4 5 #define pyBigWigVersion "0.3.18" 6 7 typedef struct { 8 PyObject_HEAD 9 bigWigFile_t *bw; 10 int32_t lastTid; //The TID of the last written entry (or -1) 11 uint32_t lastSpan; //The span of the last written entry (if applicable) 12 uint32_t lastStep; //The step of the last written entry (if applicable) 13 uint32_t lastStart; //The next start position (if applicable) 14 int lastType; //The type of the last written entry 15 } pyBigWigFile_t; 16 17 static PyObject *pyBwOpen(PyObject *self, PyObject *pyFname); 18 static PyObject *pyBwEnter(pyBigWigFile_t *self, PyObject *args); 19 static PyObject *pyBwClose(pyBigWigFile_t *pybw, PyObject *args); 20 static PyObject *pyBwGetChroms(pyBigWigFile_t *pybw, PyObject *args); 21 static PyObject *pyIsBigWig(pyBigWigFile_t *pybw, PyObject *args); 22 static PyObject *pyIsBigBed(pyBigWigFile_t *pybw, PyObject *args); 23 static PyObject *pyBwGetStats(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 24 #ifdef WITHNUMPY 25 static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 26 #else 27 static PyObject *pyBwGetValues(pyBigWigFile_t *pybw, PyObject *args); 28 #endif 29 static PyObject *pyBwGetIntervals(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 30 static PyObject *pyBBGetEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 31 static PyObject *pyBBGetSQL(pyBigWigFile_t *pybw, PyObject *args); 32 static PyObject *pyBwGetHeader(pyBigWigFile_t *pybw, PyObject *args); 33 static PyObject *pyBwAddHeader(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 34 static PyObject *pyBwAddEntries(pyBigWigFile_t *pybw, PyObject *args, PyObject *kwds); 35 static void pyBwDealloc(pyBigWigFile_t *pybw); 36 37 //The function types aren't actually correct... 38 static PyMethodDef bwMethods[] = { 39 {"open", (PyCFunction)pyBwOpen, METH_VARARGS, 40 "Open a bigWig or bigBed file. For remote files, give a URL starting with HTTP,\n\ 41 FTP, or HTTPS.\n\ 42 \n\ 43 Optional arguments:\n\ 44 mode: An optional mode. The default is 'r', which opens a file for reading.\n\ 45 If you specify a mode containing 'w' then you'll instead open a file\n\ 46 for writing. Note that you then need to add an appropriate header\n\ 47 before use. For bigBed files, only reading is supported.\n\ 48 \n\ 49 Returns:\n\ 50 A bigWigFile object on success, otherwise None.\n\ 51 \n\ 52 Arguments:\n\ 53 file: The name of a bigWig file.\n\ 54 \n\ 55 >>> import pyBigWig\n\ 56 >>> bw = pyBigWig.open(\"some_file.bw\")\n"}, 57 {NULL, NULL, 0, NULL} 58 }; 59 60 static PyMethodDef bwObjMethods[] = { 61 {"header", (PyCFunction)pyBwGetHeader, METH_VARARGS, 62 "Returns the header of a bigWig file. This contains information such as: \n\ 63 * The version number of the file ('version').\n\ 64 * The number of zoom levels ('nLevels').\n\ 65 * The number of bases covered ('nBasesCovered').\n\ 66 * The minimum value ('minVal').\n\ 67 * The maximum value ('maxVal').\n\ 68 * The sum of all values ('sumData').\n\ 69 * The sum of the square of all values ('sumSquared').\n\ 70 These are returned as a dictionary.\n\ 71 \n\ 72 >>> import pyBigWig\n\ 73 >>> bw = pyBigWig.open(\"some_file.bw\")\n\ 74 >>> bw.header()\n\ 75 {'maxVal': 2L, 'sumData': 272L, 'minVal': 0L, 'version': 4L,\n\ 76 'sumSquared': 500L, 'nLevels': 1L, 'nBasesCovered': 154L}\n\ 77 >>> bw.close()\n"}, 78 {"close", (PyCFunction)pyBwClose, METH_VARARGS, 79 "Close a bigWig file.\n\ 80 \n\ 81 >>> import pyBigWig\n\ 82 >>> bw = pyBigWig.open(\"some_file.bw\")\n\ 83 >>> bw.close()\n"}, 84 {"isBigWig", (PyCFunction)pyIsBigWig, METH_VARARGS, 85 "Returns True if the object is a bigWig file (otherwise False).\n\ 86 >>> import pyBigWig\n\ 87 >>> bw = pyBigWig.open(\"some_file.bigWig\")\n\ 88 >>> bw.isBigWig()\n\ 89 True\n\ 90 >>> bw.isBigBed()\n\ 91 False\n"}, 92 {"isBigBed", (PyCFunction)pyIsBigBed, METH_VARARGS, 93 "Returns true if the object is a bigBed file (otherwise False).\n\ 94 >>> import pyBigWig\n\ 95 >>> bw = pyBigWig.open(\"some_file.bigBed\")\n\ 96 >>> bw.isBigWig()\n\ 97 False\n\ 98 >>> bw.isBigBed()\n\ 99 True\n"}, 100 {"chroms", (PyCFunction)pyBwGetChroms, METH_VARARGS, 101 "Return a chromosome: length dictionary. The order is typically not\n\ 102 alphabetical and the lengths are long (thus the 'L' suffix).\n\ 103 \n\ 104 Optional arguments:\n\ 105 chrom: An optional chromosome name\n\ 106 \n\ 107 Returns:\n\ 108 A list of chromosome lengths or a dictionary of them.\n\ 109 \n\ 110 >>> import pyBigWig\n\ 111 >>> bw = pyBigWig.open(\"test/test.bw\")\n\ 112 >>> bw.chroms()\n\ 113 {'1': 195471971L, '10': 130694993L}\n\ 114 \n\ 115 Note that you may optionally supply a specific chromosome:\n\ 116 \n\ 117 >>> bw.chroms(\"chr1\")\n\ 118 195471971L\n\ 119 \n\ 120 If you specify a non-existant chromosome then no output is produced:\n\ 121 \n\ 122 >>> bw.chroms(\"foo\")\n\ 123 >>>\n"}, 124 {"stats", (PyCFunction)pyBwGetStats, METH_VARARGS|METH_KEYWORDS, 125 "Return summary statistics for a given range. On error, this function throws a\n\ 126 runtime exception.\n\ 127 \n\ 128 Positional arguments:\n\ 129 chr: Chromosome name\n\ 130 \n\ 131 Keyword arguments:\n\ 132 start: Starting position\n\ 133 end: Ending position\n\ 134 type: Summary type (mean, min, max, coverage, std), default 'mean'.\n\ 135 nBins: Number of bins into which the range should be divided before\n\ 136 computing summary statistics. The default is 1.\n\ 137 exact: By default, pyBigWig uses the same method as Kent's tools from UCSC\n\ 138 for computing statistics. This means that 'zoom levels' may be\n\ 139 used, rather than actual values (please see the pyBigWig repository\n\ 140 on github for further information on this). To avoid this behaviour,\n\ 141 simply specify 'exact=True'. Note that values returned will then\n\ 142 differ from what UCSC, IGV, and similar other tools will report.\n\ 143 \n\ 144 >>> import pyBigWig\n\ 145 >>> bw = pyBigWig.open(\"test/test.bw\")\n\ 146 >>> bw.stats(\"1\", 0, 3)\n\ 147 [0.2000000054637591]\n\ 148 \n\ 149 This is the mean value over the range 1:1-3 (in 1-based coordinates). If\n\ 150 the start and end positions aren't given the entire chromosome is used.\n\ 151 There are additional optional parameters 'type' and 'nBins'. 'type'\n\ 152 specifies the type of summary information to calculate, which is 'mean'\n\ 153 by default. Other possibilites for 'type' are: 'min' (minimum value),\n\ 154 'max' (maximum value), 'coverage' (number of covered bases), and 'std'\n\ 155 (standard deviation). 'nBins' defines how many bins the region will be\n\ 156 divided into and defaults to 1.\n\ 157 \n\ 158 >>> bw.stats(\"1\", 0, 3, type=\"min\")\n\ 159 [0.10000000149011612]\n\ 160 >>> bw.stats(\"1\", 0, 3, type=\"max\")\n\ 161 [0.30000001192092896]\n\ 162 >>> bw.stats(\"1\", 0, 10, type=\"coverage\")\n\ 163 [0.30000000000000004]\n\ 164 >>> bw.stats(\"1\", 0, 3, type=\"std\")\n\ 165 [0.10000000521540645]\n\ 166 >>> bw.stats(\"1\",99,200, type=\"max\", nBins=2)\n\ 167 [1.399999976158142, 1.5]\n"}, 168 #ifdef WITHNUMPY 169 {"values", (PyCFunction)pyBwGetValues, METH_VARARGS|METH_KEYWORDS, 170 "Retrieve the value stored for each position (or None). On error, a runtime\n\ 171 exception is thrown.\n\ 172 \n\ 173 Positional arguments:\n\ 174 chr: Chromosome name\n\ 175 start: Starting position\n\ 176 end: Ending position\n\ 177 \n\ 178 Optional arguments:\n\ 179 numpy: If True, return a numpy array rather than a list of values. This\n\ 180 is generally more memory efficient. Note that this option is only\n\ 181 available if pyBigWig was installed with numpy support (check the\n\ 182 pyBigWig.numpy() function).\n\ 183 \n\ 184 >>> import pyBigWig\n\ 185 >>> bw = pyBigWig.open(\"test/test.bw\")\n\ 186 >>> bw.values(\"1\", 0, 3)\n\ 187 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\ 188 \n\ 189 The length of the returned list will always match the length of the\n\ 190 range. Any uncovered bases will have a value of None.\n\ 191 \n\ 192 >>> bw.values(\"1\", 0, 4)\n\ 193 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\ 194 \n"}, 195 #else 196 {"values", (PyCFunction)pyBwGetValues, METH_VARARGS, 197 "Retrieve the value stored for each position (or None). On error, a runtime\n\ 198 exception is thrown.\n\ 199 \n\ 200 Positional arguments:\n\ 201 chr: Chromosome name\n\ 202 start: Starting position\n\ 203 end: Ending position\n\ 204 \n\ 205 >>> import pyBigWig\n\ 206 >>> bw = pyBigWig.open(\"test/test.bw\")\n\ 207 >>> bw.values(\"1\", 0, 3)\n\ 208 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896]\n\ 209 \n\ 210 The length of the returned list will always match the length of the\n\ 211 range. Any uncovered bases will have a value of None.\n\ 212 \n\ 213 >>> bw.values(\"1\", 0, 4)\n\ 214 [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, None]\n\ 215 \n"}, 216 #endif 217 {"intervals", (PyCFunction)pyBwGetIntervals, METH_VARARGS|METH_KEYWORDS, 218 "Retrieve each interval covering a part of a chromosome/region. On error, a\n\ 219 runtime exception is thrown.\n\ 220 \n\ 221 Positional arguments:\n\ 222 chr: Chromosome name\n\ 223 \n\ 224 Keyword arguments:\n\ 225 start: Starting position\n\ 226 end: Ending position\n\ 227 \n\ 228 If start and end aren't specified, the entire chromosome is returned.\n\ 229 The returned object is a tuple containing the starting position, end\n\ 230 position, and value of each interval in the file. As with all bigWig\n\ 231 positions, those returned are 0-based half-open (e.g., a start of 0 and\n\ 232 end of 10 specifies the first 10 positions).\n\ 233 \n\ 234 >>> import pyBigWig\n\ 235 >>> bw = pyBigWig.open(\"test/test.bw\")\n\ 236 >>> bw.intervals(\"1\", 0, 3)\n\ 237 ((0, 1, 0.10000000149011612), (1, 2, 0.20000000298023224),\n\ 238 (2, 3, 0.30000001192092896))\n\ 239 >>> bw.close()"}, 240 {"entries", (PyCFunction) pyBBGetEntries, METH_VARARGS|METH_KEYWORDS, 241 "Retrieves entries from a bigBed file. These can optionally contain the string\n\ 242 associated with each entry.\n\ 243 \n\ 244 Positional arguments:\n\ 245 chr: Chromosome name\n\ 246 \n\ 247 Keyword arguments:\n\ 248 start: Starting position\n\ 249 end: Ending position\n\ 250 withString: If True, return the string associated with each entry.\n\ 251 Default True.\n\ 252 \n\ 253 The output is a list of tuples, with members \"start\", \"end\", and \"string\"\n\ 254 (assuming \"withString=True\"). If there are no overlapping entries, then None\n\ 255 is returned.\n\ 256 \n\ 257 >>> import pyBigWig\n\ 258 >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\ 259 >>> print(bw.entries('chr1',10000000,10020000))\n\ 260 [(10009333, 10009640, '61035\t130\t-\t0.026\t0.42\t404'),\n\ 261 (10014007, 10014289, '61047\t136\t-\t0.029\t0.42\t404'),\n\ 262 (10014373, 10024307, '61048\t630\t-\t5.420\t0.00\t2672399')]\n\ 263 >>> print(bb.entries(\"chr1\", 10000000, 10000500, withString=False))\n\ 264 [(10009333, 10009640), (10014007, 10014289), (10014373, 10024307)]\n\ 265 \n"}, 266 {"SQL", (PyCFunction) pyBBGetSQL, METH_VARARGS, 267 "Returns the SQL string associated with the file. This is typically useful for\n\ 268 bigBed files, where this determines what is held in each column of the text\n\ 269 string associated with entries.\n\ 270 \n\ 271 If there is no SQL string, then None is returned.\n\ 272 \n\ 273 >>> import pyBigWig\n\ 274 >>> bb = pyBigWig.open(\"https://www.encodeproject.org/files/ENCFF001JBR/@@download/ENCFF001JBR.bigBed\")\n\ 275 >>> print(bb.SQL())\n\ 276 table RnaElements\n\ 277 \"BED6 + 3 scores for RNA Elements data \"\n\ 278 (\n\ 279 string chrom; \"Reference sequence chromosome or scaffold\"\n\ 280 uint chromStart; \"Start position in chromosome\"\n\ 281 uint chromEnd; \"End position in chromosome\"\n\ 282 string name; \"Name of item\"\n\ 283 uint score; \"Normalized score from 0-1000\"\n\ 284 char[1] strand; \"+ or - or . for unknown\"\n\ 285 float level; \"Expression level such as RPKM or FPKM. Set to -1 for no data.\"\n\ 286 float signif; \"Statistical significance such as IDR. Set to -1 for no data.\"\n\ 287 uint score2; \"Additional measurement/count e.g. number of reads. Set to 0 for no data.\"\n\ 288 )\n\ 289 \n\ 290 \n"}, 291 {"addHeader", (PyCFunction)pyBwAddHeader, METH_VARARGS|METH_KEYWORDS, 292 "Adds a header to a file opened for writing. This MUST be called before adding\n\ 293 any entries. On error, a runtime exception is thrown.\n\ 294 \n\ 295 Positional arguments:\n\ 296 cl: A chromosome list, of the form (('chr1', 1000), ('chr2', 2000), ...).\n\ 297 In other words, each element of the list is a tuple containing a\n\ 298 chromosome name and its associated length.\n\ 299 \n\ 300 Keyword arguments:\n\ 301 maxZooms: The maximum number of zoom levels. The value must be >=0. The\n\ 302 default is 10.\n\ 303 \n\ 304 >>> import pyBigWig\n\ 305 >>> import tempfile\n\ 306 >>> import os\n\ 307 >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\ 308 >>> oname = ofile.name\n\ 309 >>> ofile.close()\n\ 310 >>> bw = pyBigWig.open(oname, 'w')\n\ 311 >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)], maxZooms=0)\n\ 312 >>> bw.close()\n\ 313 >>> os.remove(oname)"}, 314 {"addEntries", (PyCFunction)pyBwAddEntries, METH_VARARGS|METH_KEYWORDS, 315 "Adds one or more entries to a bigWig file. This returns nothing, but throws a\n\ 316 runtime exception on error.\n\ 317 \n\ 318 This function always accepts an optional 'validate' option. If set to 'True',\n\ 319 which is the default, the input entries are checked to ensure that they come\n\ 320 after previously entered entries. This comes with significant overhead, so if\n\ 321 this is instead 'False' then this validation is not performed.\n\ 322 \n\ 323 There are three manners in which entries can be stored in bigWig files.\n\ 324 \n\ 325 \n\ 326 bedGraph-like entries (12 bytes each):\n\ 327 \n\ 328 Positional arguments:\n\ 329 chrom: A list of chromosome. These MUST match those added with addHeader().\n\ 330 starts: A list of start positions. These are 0-based.\n\ 331 \n\ 332 Keyword arguments:\n\ 333 ends: A list of end positions. These are 0-based half open, so a start of\n\ 334 0 and end of 10 specifies the first 10 bases.\n\ 335 values: A list of values.\n\ 336 \n\ 337 \n\ 338 Variable-step entries (8 bytes each):\n\ 339 \n\ 340 Positional arguments:\n\ 341 chrom: A chromosome name. This MUST match one added with addHeader().\n\ 342 starts: A list of start positions. These are 0-based.\n\ 343 \n\ 344 Keyword arguments:\n\ 345 values: A list of values.\n\ 346 span: A span width. This is an integer value and specifies how many bases\n\ 347 each entry describes. An entry with a start position of 0 and a span\n\ 348 of 10 describes the first 10 bases.\n\ 349 \n\ 350 \n\ 351 Fixed-step entries (4 bytes each):\n\ 352 \n\ 353 Positional arguments:\n\ 354 chrom: A chromosome name. This MUST match one added with addHeader().\n\ 355 starts: A start position. These are 0-based. The start position of each\n\ 356 entry starts 'step' after the previous and describes 'span' bases.\n\ 357 \n\ 358 Keyword arguments:\n\ 359 values: A list of values.\n\ 360 span: A span width. This is an integer value and specifies how many bases\n\ 361 each entry describes. An entry with a start position of 0 and a span\n\ 362 of 10 describes the first 10 bases.\n\ 363 step: A step width. Each subsequent entry begins this number of bases\n\ 364 after the previous. So if the first entry has a start of 0 and step\n\ 365 or 30, the second entry will start at 30.\n\ 366 \n\ 367 >>> import pyBigWig\n\ 368 >>> import tempfile\n\ 369 >>> import os\n\ 370 >>> ofile = tempfile.NamedTemporaryFile(delete=False)\n\ 371 >>> oname = ofile.name\n\ 372 >>> ofile.close()\n\ 373 >>> bw = pyBigWig.open(oname, 'w')\n\ 374 >>> bw.addHeader([(\"1\", 1000000), (\"2\", 1500000)])\n\ 375 >>> #Add some bedGraph-like entries\n\ 376 >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0])\n\ 377 >>> #Variable-step entries, the span 500-520, 600-620, and 635-655\n\ 378 >>> bw.addEntries(\"1\", [500, 600, 635], values=[-2.0, 150.0, 25.0], span=20)\n\ 379 >>> #Fixed-step entries, the bases described are 900-920, 930-950, and 960-980\n\ 380 >>> bw.addEntries(\"1\", 900, values=[-5.0, -20.0, 25.0], span=20, step=30)\n\ 381 >>> #This only works due to using validate=False. Obviously the file is then corrupt.\n\ 382 >>> bw.addEntries([\"1\", \"1\", \"1\"], [0, 100, 125], ends=[5, 120, 126], values=[0.0, 1.0, 200.0], validate=False)\n\ 383 >>> bw.close()\n\ 384 >>> os.remove(oname)"}, 385 {"__enter__", (PyCFunction)pyBwEnter, METH_NOARGS, NULL}, 386 {"__exit__", (PyCFunction)pyBwClose, METH_VARARGS, NULL}, 387 {NULL, NULL, 0, NULL} 388 }; 389 390 #if PY_MAJOR_VERSION >= 3 391 struct pyBigWigmodule_state { 392 PyObject *error; 393 }; 394 395 #define GETSTATE(m) ((struct pyBigWigmodule_state*)PyModule_GetState(m)) 396 397 static PyModuleDef pyBigWigmodule = { 398 PyModuleDef_HEAD_INIT, 399 "pyBigWig", 400 "A python module for bigWig file access", 401 -1, 402 bwMethods, 403 NULL, NULL, NULL, NULL 404 }; 405 #endif 406 407 //Should set tp_dealloc, tp_print, tp_repr, tp_str, tp_members 408 static PyTypeObject bigWigFile = { 409 #if PY_MAJOR_VERSION >= 3 410 PyVarObject_HEAD_INIT(NULL, 0) 411 #else 412 PyObject_HEAD_INIT(NULL) 413 0, /*ob_size*/ 414 #endif 415 "pyBigWig.bigWigFile", /*tp_name*/ 416 sizeof(pyBigWigFile_t), /*tp_basicsize*/ 417 0, /*tp_itemsize*/ 418 (destructor)pyBwDealloc, /*tp_dealloc*/ 419 0, /*tp_print*/ 420 0, /*tp_getattr*/ 421 0, /*tp_setattr*/ 422 0, /*tp_compare*/ 423 0, /*tp_repr*/ 424 0, /*tp_as_number*/ 425 0, /*tp_as_sequence*/ 426 0, /*tp_as_mapping*/ 427 0, /*tp_hash*/ 428 0, /*tp_call*/ 429 0, /*tp_str*/ 430 PyObject_GenericGetAttr, /*tp_getattro*/ 431 PyObject_GenericSetAttr, /*tp_setattro*/ 432 0, /*tp_as_buffer*/ 433 #if PY_MAJOR_VERSION >= 3 434 Py_TPFLAGS_DEFAULT, /*tp_flags*/ 435 #else 436 Py_TPFLAGS_HAVE_CLASS, /*tp_flags*/ 437 #endif 438 "bigWig File", /*tp_doc*/ 439 0, /*tp_traverse*/ 440 0, /*tp_clear*/ 441 0, /*tp_richcompare*/ 442 0, /*tp_weaklistoffset*/ 443 0, /*tp_iter*/ 444 0, /*tp_iternext*/ 445 bwObjMethods, /*tp_methods*/ 446 0, /*tp_members*/ 447 0, /*tp_getset*/ 448 0, /*tp_base*/ 449 0, /*tp_dict*/ 450 0, /*tp_descr_get*/ 451 0, /*tp_descr_set*/ 452 0, /*tp_dictoffset*/ 453 0, /*tp_init*/ 454 0, /*tp_alloc*/ 455 0, /*tp_new*/ 456 0,0,0,0,0,0 457 }; 458