1#! /usr/bin/python 2 3'''pysam - a python module for reading, manipulating and writing 4genomic data sets. 5 6pysam is a lightweight wrapper of the htslib C-API and provides 7facilities to read and write SAM/BAM/VCF/BCF/BED/GFF/GTF/FASTA/FASTQ 8files as well as access to the command line functionality of the 9samtools and bcftools packages. The module supports compression and 10random access through indexing. 11 12This module provides a low-level wrapper around the htslib C-API as 13using cython and a high-level API for convenient access to the data 14within standard genomic file formats. 15 16See: 17http://www.htslib.org 18https://github.com/pysam-developers/pysam 19http://pysam.readthedocs.org/en/stable 20 21''' 22 23import collections 24import glob 25import os 26import platform 27import re 28import subprocess 29import sys 30import sysconfig 31from contextlib import contextmanager 32from distutils import log 33from setuptools import setup, Command 34from setuptools.command.sdist import sdist 35 36from cy_build import CyExtension as Extension, cy_build_ext as build_ext 37try: 38 import cython 39 HAVE_CYTHON = True 40except ImportError: 41 HAVE_CYTHON = False 42 43IS_PYTHON3 = sys.version_info.major >= 3 44 45 46@contextmanager 47def changedir(path): 48 save_dir = os.getcwd() 49 os.chdir(path) 50 try: 51 yield 52 finally: 53 os.chdir(save_dir) 54 55 56def run_configure(option): 57 sys.stdout.flush() 58 try: 59 retcode = subprocess.call( 60 " ".join(("./configure", option)), 61 shell=True) 62 if retcode != 0: 63 return False 64 else: 65 return True 66 except OSError as e: 67 return False 68 69 70def run_make_print_config(): 71 stdout = subprocess.check_output(["gmake", "-s", "print-config"]) 72 if IS_PYTHON3: 73 stdout = stdout.decode("ascii") 74 75 make_print_config = {} 76 for line in stdout.splitlines(): 77 if "=" in line: 78 row = line.split("=") 79 if len(row) == 2: 80 make_print_config.update( 81 {row[0].strip(): row[1].strip()}) 82 return make_print_config 83 84 85# This function emulates the way distutils combines settings from sysconfig, 86# environment variables, and the extension being built. It returns a dictionary 87# representing the usual set of variables, suitable for writing to a generated 88# file or for running configure (provided the returned LIBS is ignored). 89def build_config_dict(ext): 90 def env(var): 91 return [os.environ[var]] if var in os.environ else [] 92 93 def sc(var): 94 value = sysconfig.get_config_var(var) 95 return [value] if value is not None else [] 96 97 def optionise(option, valuelist): 98 def quote(s): return "'"+s+"'" if " " in s else s 99 return list(quote(option+v) for v in valuelist) 100 101 def kvtuples(pairlist): 102 def appendoptvalue(t): return t[0] if t[1] is None else t[0]+"="+t[1] 103 return map(appendoptvalue, pairlist) 104 105 # For CC, select the first of these that is set 106 cc = (env('CC') + sc('CC') + ['gcc'])[0] 107 108 # distutils ignores sysconfig for CPPFLAGS 109 cppflags = " ".join(env('CPPFLAGS') + optionise('-I', ext.include_dirs) + 110 optionise('-D', kvtuples(ext.define_macros)) + 111 optionise('-U', ext.undef_macros)) 112 113 cflags = " ".join(sc('CFLAGS') + env('CFLAGS') + ext.extra_compile_args) 114 115 # distutils actually includes $CPPFLAGS here too, but that's weird and 116 # unnecessary for us as we know the output LDFLAGS will be used correctly 117 ldflags = " ".join(sc('LDFLAGS') + env('LDFLAGS') + env('CFLAGS') + 118 optionise('-L', ext.library_dirs) + 119 ext.extra_link_args) 120 121 # ext.libraries is computed (incorporating $LIBS etc) during configure 122 libs = " ".join(optionise('-l', ext.libraries)) 123 124 return { 'CC': cc, 'CPPFLAGS': cppflags, 'CFLAGS': cflags, 125 'LDFLAGS': ldflags, 'LIBS': libs } 126 127 128def write_configvars_header(filename, ext, prefix): 129 config = build_config_dict(ext) 130 if prefix != 'HTS': 131 config['HTSDIR'] = '(unused)' 132 config['CURSES_LIB'] = '(unused)' 133 134 log.info("creating %s for '%s' extension", filename, ext.name) 135 with open(filename, "w") as outf: 136 for var, value in config.items(): 137 outf.write('#define {}_{} "{}"\n'.format(prefix, var, value)) 138 139 140@contextmanager 141def set_compiler_envvars(): 142 tmp_vars = [] 143 for var in ['CC', 'CFLAGS', 'LDFLAGS']: 144 if var in os.environ: 145 print ("# pysam: (env) {}={}".format(var, os.environ[var])) 146 elif var in sysconfig.get_config_vars(): 147 value = sysconfig.get_config_var(var) 148 print ("# pysam: (sysconfig) {}={}".format(var, value)) 149 os.environ[var] = value 150 tmp_vars += [var] 151 152 try: 153 yield 154 finally: 155 for var in tmp_vars: 156 del os.environ[var] 157 158 159def configure_library(library_dir, env_options=None, options=[]): 160 161 configure_script = os.path.join(library_dir, "configure") 162 163 on_rtd = os.environ.get("READTHEDOCS") == "True" 164 # RTD has no bzip2 development libraries installed: 165 if on_rtd: 166 env_options = "--disable-bz2" 167 168 if not os.path.exists(configure_script): 169 raise ValueError( 170 "configure script {} does not exist".format(configure_script)) 171 172 with changedir(library_dir), set_compiler_envvars(): 173 if env_options is not None: 174 if run_configure(env_options): 175 return env_options 176 177 for option in options: 178 if run_configure(option): 179 return option 180 181 return None 182 183 184def distutils_dir_name(dname): 185 """Returns the name of a distutils build directory 186 see: http://stackoverflow.com/questions/14320220/ 187 testing-python-c-libraries-get-build-path 188 """ 189 f = "{dirname}.{platform}-{version[0]}.{version[1]}" 190 return f.format(dirname=dname, 191 platform=sysconfig.get_platform(), 192 version=sys.version_info) 193 194 195def get_pysam_version(): 196 sys.path.insert(0, "pysam") 197 import version 198 return version.__version__ 199 200 201# Override sdist command to ensure Cythonized *.c files are included. 202class cythonize_sdist(sdist): 203 # Remove when setuptools (as installed on GH runners) has these options 204 if not any(opt[0] == 'owner=' for opt in sdist.user_options): 205 sdist.user_options.append(('owner=', 'u', 'Specify owner inside tar')) 206 if not any(opt[0] == 'group=' for opt in sdist.user_options): 207 sdist.user_options.append(('group=', 'g', 'Specify group inside tar')) 208 209 def run(self): 210 from Cython.Build import cythonize 211 cythonize(self.distribution.ext_modules) 212 super().run() 213 214 215class clean_ext(Command): 216 description = "clean up Cython temporary files" 217 user_options = [] 218 219 def initialize_options(self): 220 pass 221 222 def finalize_options(self): 223 pass 224 225 def run(self): 226 objs = glob.glob(os.path.join("pysam", "libc*.c")) 227 if objs: 228 log.info("removing 'pysam/libc*.c' (%s Cython objects)", len(objs)) 229 for obj in objs: 230 os.remove(obj) 231 232 headers = (glob.glob(os.path.join("htslib", "*config*.h")) + 233 glob.glob(os.path.join("samtools", "*config*.h")) + 234 glob.glob(os.path.join("bcftools", "*config*.h"))) 235 if headers: 236 log.info("removing '*/*config*.h' (%s generated headers)", len(headers)) 237 for header in headers: 238 os.remove(header) 239 240 241# How to link against HTSLIB 242# shared: build shared chtslib from builtin htslib code. 243# external: use shared libhts.so compiled outside of 244# pysam 245# separate: use included htslib and include in each extension 246# module. No dependencies between modules and works with 247# setup.py install, but wasteful in terms of memory and 248# compilation time. Fallback if shared module compilation 249# fails. 250 251HTSLIB_MODE = os.environ.get("HTSLIB_MODE", "shared") 252HTSLIB_LIBRARY_DIR = os.environ.get("HTSLIB_LIBRARY_DIR", None) 253HTSLIB_INCLUDE_DIR = os.environ.get("HTSLIB_INCLUDE_DIR", None) 254HTSLIB_CONFIGURE_OPTIONS = os.environ.get("HTSLIB_CONFIGURE_OPTIONS", None) 255HTSLIB_SOURCE = None 256 257package_list = ['pysam', 258 'pysam.include', 259 'pysam.include.samtools', 260 'pysam.include.bcftools'] 261package_dirs = {'pysam': 'pysam', 262 'pysam.include.samtools': 'samtools', 263 'pysam.include.bcftools': 'bcftools'} 264 265# list of config files that will be automatically generated should 266# they not already exist or be created by configure scripts in the 267# subpackages. 268config_headers = ["samtools/config.h", 269 "bcftools/config.h"] 270 271# If cython is available, the pysam will be built using cython from 272# the .pyx files. If no cython is available, the C-files included in the 273# distribution will be used. 274if HAVE_CYTHON: 275 print ("# pysam: cython is available - using cythonize if necessary") 276 source_pattern = "pysam/libc%s.pyx" 277else: 278 print ("# pysam: no cython available - using pre-compiled C") 279 source_pattern = "pysam/libc%s.c" 280 281# Exit if there are no pre-compiled files and no cython available 282fn = source_pattern % "htslib" 283if not os.path.exists(fn): 284 raise ValueError( 285 "no cython installed, but can not find {}." 286 "Make sure that cython is installed when building " 287 "from the repository" 288 .format(fn)) 289 290print ("# pysam: htslib mode is {}".format(HTSLIB_MODE)) 291print ("# pysam: HTSLIB_CONFIGURE_OPTIONS={}".format( 292 HTSLIB_CONFIGURE_OPTIONS)) 293htslib_configure_options = None 294 295if HTSLIB_MODE in ['shared', 'separate']: 296 package_list += ['pysam.include.htslib', 297 'pysam.include.htslib.htslib'] 298 package_dirs.update({'pysam.include.htslib':'htslib'}) 299 300 htslib_configure_options = configure_library( 301 "htslib", 302 HTSLIB_CONFIGURE_OPTIONS, 303 ["--enable-libcurl", 304 "--disable-libcurl"]) 305 306 HTSLIB_SOURCE = "builtin" 307 print ("# pysam: htslib configure options: {}".format( 308 str(htslib_configure_options))) 309 310 config_headers += ["htslib/config.h"] 311 if htslib_configure_options is None: 312 # create empty config.h file 313 with open("htslib/config.h", "w") as outf: 314 outf.write( 315 "/* empty config.h created by pysam */\n") 316 outf.write( 317 "/* conservative compilation options */\n") 318 319 with changedir("htslib"): 320 htslib_make_options = run_make_print_config() 321 322 for key, value in htslib_make_options.items(): 323 print ("# pysam: htslib_config {}={}".format(key, value)) 324 325 external_htslib_libraries = ['z'] 326 if "LIBS" in htslib_make_options: 327 external_htslib_libraries.extend( 328 [re.sub("^-l", "", x) for x in htslib_make_options["LIBS"].split(" ") if x.strip()]) 329 330 shared_htslib_sources = [re.sub("\.o", ".c", os.path.join("htslib", x)) 331 for x in 332 htslib_make_options["LIBHTS_OBJS"].split(" ")] 333 334 htslib_sources = [] 335 336if HTSLIB_LIBRARY_DIR: 337 # linking against a shared, externally installed htslib version, no 338 # sources required for htslib 339 htslib_sources = [] 340 shared_htslib_sources = [] 341 chtslib_sources = [] 342 htslib_library_dirs = [HTSLIB_LIBRARY_DIR] 343 htslib_include_dirs = [HTSLIB_INCLUDE_DIR] 344 external_htslib_libraries = ['z', 'hts'] 345elif HTSLIB_MODE == 'separate': 346 # add to each pysam component a separately compiled 347 # htslib 348 htslib_sources = shared_htslib_sources 349 shared_htslib_sources = htslib_sources 350 htslib_library_dirs = [] 351 htslib_include_dirs = ['htslib'] 352elif HTSLIB_MODE == 'shared': 353 # link each pysam component against the same 354 # htslib built from sources included in the pysam 355 # package. 356 htslib_library_dirs = [ 357 "pysam", # when using setup.py develop? 358 ".", # when using setup.py develop? 359 os.path.join("build", distutils_dir_name("lib"), "pysam")] 360 361 htslib_include_dirs = ['htslib'] 362else: 363 raise ValueError("unknown HTSLIB value '%s'" % HTSLIB_MODE) 364 365# build config.py 366with open(os.path.join("pysam", "config.py"), "w") as outf: 367 outf.write('HTSLIB = "{}"\n'.format(HTSLIB_SOURCE)) 368 config_values = collections.defaultdict(int) 369 370 if HTSLIB_SOURCE == "builtin": 371 with open(os.path.join("htslib", "config.h")) as inf: 372 for line in inf: 373 if line.startswith("#define"): 374 key, value = re.match( 375 "#define (\S+)\s+(\S+)", line).groups() 376 config_values[key] = value 377 for key in ["ENABLE_GCS", 378 "ENABLE_PLUGINS", 379 "ENABLE_S3", 380 "HAVE_COMMONCRYPTO", 381 "HAVE_HMAC", 382 "HAVE_LIBBZ2", 383 "HAVE_LIBCURL", 384 "HAVE_LIBDEFLATE", 385 "HAVE_LIBLZMA", 386 "HAVE_MMAP"]: 387 outf.write("{} = {}\n".format(key, config_values[key])) 388 print ("# pysam: config_option: {}={}".format(key, config_values[key])) 389 390# create empty config.h files if they have not been created automatically 391# or created by the user: 392for fn in config_headers: 393 if not os.path.exists(fn): 394 with open(fn, "w") as outf: 395 outf.write( 396 "/* empty config.h created by pysam */\n") 397 outf.write( 398 "/* conservative compilation options */\n") 399 400####################################################### 401# Windows compatibility - untested 402if platform.system() == 'Windows': 403 include_os = ['win32'] 404 os_c_files = ['win32/getopt.c'] 405 extra_compile_args = [] 406else: 407 include_os = [] 408 os_c_files = [] 409 # for python 3.4, see for example 410 # http://stackoverflow.com/questions/25587039/ 411 # error-compiling-rpy2-on-python3-4-due-to-werror- 412 # declaration-after-statement 413 extra_compile_args = [ 414 "-Wno-unused", 415 "-Wno-strict-prototypes", 416 "-Wno-sign-compare", 417 "-Wno-error=declaration-after-statement"] 418 419define_macros = [] 420 421suffix = sysconfig.get_config_var('EXT_SUFFIX') 422if not suffix: 423 suffix = sysconfig.get_config_var('SO') 424 425internal_htslib_libraries = [ 426 os.path.splitext("chtslib{}".format(suffix))[0]] 427internal_samtools_libraries = [ 428 os.path.splitext("csamtools{}".format(suffix))[0], 429 os.path.splitext("cbcftools{}".format(suffix))[0], 430 ] 431internal_pysamutil_libraries = [ 432 os.path.splitext("cutils{}".format(suffix))[0]] 433 434libraries_for_pysam_module = external_htslib_libraries + internal_htslib_libraries + internal_pysamutil_libraries 435 436# Order of modules matters in order to make sure that dependencies are resolved. 437# The structures of dependencies is as follows: 438# libchtslib: htslib utility functions and htslib itself if builtin is set. 439# libcsamtools: samtools code (builtin) 440# libcbcftools: bcftools code (builtin) 441# libcutils: General utility functions, depends on all of the above 442# libcXXX (pysam module): depends on libchtslib and libcutils 443 444# The list below uses the union of include_dirs and library_dirs for 445# reasons of simplicity. 446 447def prebuild_libchtslib(ext, force): 448 if HTSLIB_MODE not in ['shared', 'separate']: return 449 write_configvars_header("htslib/config_vars.h", ext, "HTS") 450 451def prebuild_libcsamtools(ext, force): 452 write_configvars_header("samtools/samtools_config_vars.h", ext, "SAMTOOLS") 453 454modules = [ 455 dict(name="pysam.libchtslib", 456 prebuild_func=prebuild_libchtslib, 457 sources=[source_pattern % "htslib", "pysam/htslib_util.c"] + shared_htslib_sources + os_c_files, 458 libraries=external_htslib_libraries), 459 dict(name="pysam.libcsamtools", 460 prebuild_func=prebuild_libcsamtools, 461 sources=[source_pattern % "samtools"] + glob.glob(os.path.join("samtools", "*.pysam.c")) + 462 [os.path.join("samtools", "lz4", "lz4.c")] + htslib_sources + os_c_files, 463 libraries=external_htslib_libraries + internal_htslib_libraries), 464 dict(name="pysam.libcbcftools", 465 sources=[source_pattern % "bcftools"] + glob.glob(os.path.join("bcftools", "*.pysam.c")) + htslib_sources + os_c_files, 466 libraries=external_htslib_libraries + internal_htslib_libraries), 467 dict(name="pysam.libcutils", 468 sources=[source_pattern % "utils", "pysam/pysam_util.c"] + htslib_sources + os_c_files, 469 libraries=external_htslib_libraries + internal_htslib_libraries + internal_samtools_libraries), 470 dict(name="pysam.libcalignmentfile", 471 sources=[source_pattern % "alignmentfile"] + htslib_sources + os_c_files, 472 libraries=libraries_for_pysam_module), 473 dict(name="pysam.libcsamfile", 474 sources=[source_pattern % "samfile"] + htslib_sources + os_c_files, 475 libraries=libraries_for_pysam_module), 476 dict(name="pysam.libcalignedsegment", 477 sources=[source_pattern % "alignedsegment"] + htslib_sources + os_c_files, 478 libraries=libraries_for_pysam_module), 479 dict(name="pysam.libctabix", 480 sources=[source_pattern % "tabix"] + htslib_sources + os_c_files, 481 libraries=libraries_for_pysam_module), 482 dict(name="pysam.libcfaidx", 483 sources=[source_pattern % "faidx"] + htslib_sources + os_c_files, 484 libraries=libraries_for_pysam_module), 485 dict(name="pysam.libcbcf", 486 sources=[source_pattern % "bcf"] + htslib_sources + os_c_files, 487 libraries=libraries_for_pysam_module), 488 dict(name="pysam.libcbgzf", 489 sources=[source_pattern % "bgzf"] + htslib_sources + os_c_files, 490 libraries=libraries_for_pysam_module), 491 dict(name="pysam.libctabixproxies", 492 sources=[source_pattern % "tabixproxies"] + htslib_sources + os_c_files, 493 libraries=libraries_for_pysam_module), 494 dict(name="pysam.libcvcf", 495 sources=[source_pattern % "vcf"] + htslib_sources + os_c_files, 496 libraries=libraries_for_pysam_module), 497] 498 499common_options = dict( 500 language="c", 501 extra_compile_args=extra_compile_args, 502 define_macros=define_macros, 503 # for out-of-tree compilation, use absolute paths 504 library_dirs=[os.path.abspath(x) for x in ["pysam"] + htslib_library_dirs], 505 include_dirs=[os.path.abspath(x) for x in htslib_include_dirs + \ 506 ["samtools", "samtools/lz4", "bcftools", "pysam", "."] + include_os]) 507 508# add common options (in python >3.5, could use n = {**a, **b} 509for module in modules: 510 module.update(**common_options) 511 512classifiers = """ 513Development Status :: 4 - Beta 514Intended Audience :: Science/Research 515Intended Audience :: Developers 516License :: OSI Approved 517Programming Language :: Python 518Topic :: Software Development 519Topic :: Scientific/Engineering 520Operating System :: POSIX 521Operating System :: Unix 522Operating System :: MacOS 523""" 524 525metadata = { 526 'name': "pysam", 527 'version': get_pysam_version(), 528 'description': "pysam", 529 'long_description': __doc__, 530 'author': "Andreas Heger", 531 'author_email': "andreas.heger@gmail.com", 532 'license': "MIT", 533 'platforms': ["POSIX", "UNIX", "MacOS"], 534 'classifiers': [_f for _f in classifiers.split("\n") if _f], 535 'url': "https://github.com/pysam-developers/pysam", 536 'packages': package_list, 537 'requires': ['cython (>=0.29.12)'], 538 'ext_modules': [Extension(**opts) for opts in modules], 539 'cmdclass': {'build_ext': build_ext, 'clean_ext': clean_ext, 'sdist': cythonize_sdist}, 540 'package_dir': package_dirs, 541 'package_data': {'': ['*.pxd', '*.h'], }, 542 # do not pack in order to permit linking to csamtools.so 543 'zip_safe': False, 544} 545 546if __name__ == '__main__': 547 dist = setup(**metadata) 548