1#! /usr/bin/python
2
3'''pysam - a python module for reading, manipulating and writing
4genomic data sets.
5
6pysam is a lightweight wrapper of the htslib C-API and provides
7facilities to read and write SAM/BAM/VCF/BCF/BED/GFF/GTF/FASTA/FASTQ
8files as well as access to the command line functionality of the
9samtools and bcftools packages. The module supports compression and
10random access through indexing.
11
12This module provides a low-level wrapper around the htslib C-API as
13using cython and a high-level API for convenient access to the data
14within standard genomic file formats.
15
16See:
17http://www.htslib.org
18https://github.com/pysam-developers/pysam
19http://pysam.readthedocs.org/en/stable
20
21'''
22
23import collections
24import glob
25import os
26import platform
27import re
28import subprocess
29import sys
30import sysconfig
31from contextlib import contextmanager
32from distutils import log
33from setuptools import setup, Command
34from setuptools.command.sdist import sdist
35
36from cy_build import CyExtension as Extension, cy_build_ext as build_ext
37try:
38    import cython
39    HAVE_CYTHON = True
40except ImportError:
41    HAVE_CYTHON = False
42
43IS_PYTHON3 = sys.version_info.major >= 3
44
45
46@contextmanager
47def changedir(path):
48    save_dir = os.getcwd()
49    os.chdir(path)
50    try:
51        yield
52    finally:
53        os.chdir(save_dir)
54
55
56def run_configure(option):
57    sys.stdout.flush()
58    try:
59        retcode = subprocess.call(
60            " ".join(("./configure", option)),
61            shell=True)
62        if retcode != 0:
63            return False
64        else:
65            return True
66    except OSError as e:
67        return False
68
69
70def run_make_print_config():
71    stdout = subprocess.check_output(["gmake", "-s", "print-config"])
72    if IS_PYTHON3:
73        stdout = stdout.decode("ascii")
74
75    make_print_config = {}
76    for line in stdout.splitlines():
77        if "=" in line:
78            row = line.split("=")
79            if len(row) == 2:
80                make_print_config.update(
81                    {row[0].strip(): row[1].strip()})
82    return make_print_config
83
84
85# This function emulates the way distutils combines settings from sysconfig,
86# environment variables, and the extension being built. It returns a dictionary
87# representing the usual set of variables, suitable for writing to a generated
88# file or for running configure (provided the returned LIBS is ignored).
89def build_config_dict(ext):
90    def env(var):
91        return [os.environ[var]] if var in os.environ else []
92
93    def sc(var):
94        value = sysconfig.get_config_var(var)
95        return [value] if value is not None else []
96
97    def optionise(option, valuelist):
98        def quote(s): return "'"+s+"'" if " " in s else s
99        return list(quote(option+v) for v in valuelist)
100
101    def kvtuples(pairlist):
102        def appendoptvalue(t): return t[0] if t[1] is None else t[0]+"="+t[1]
103        return map(appendoptvalue, pairlist)
104
105    # For CC, select the first of these that is set
106    cc = (env('CC') + sc('CC') + ['gcc'])[0]
107
108    # distutils ignores sysconfig for CPPFLAGS
109    cppflags = " ".join(env('CPPFLAGS') + optionise('-I', ext.include_dirs) +
110                        optionise('-D', kvtuples(ext.define_macros)) +
111                        optionise('-U', ext.undef_macros))
112
113    cflags = " ".join(sc('CFLAGS') + env('CFLAGS') + ext.extra_compile_args)
114
115    # distutils actually includes $CPPFLAGS here too, but that's weird and
116    # unnecessary for us as we know the output LDFLAGS will be used correctly
117    ldflags = " ".join(sc('LDFLAGS') + env('LDFLAGS') + env('CFLAGS') +
118                       optionise('-L', ext.library_dirs) +
119                       ext.extra_link_args)
120
121    # ext.libraries is computed (incorporating $LIBS etc) during configure
122    libs = " ".join(optionise('-l', ext.libraries))
123
124    return { 'CC': cc, 'CPPFLAGS': cppflags, 'CFLAGS': cflags,
125             'LDFLAGS': ldflags, 'LIBS': libs }
126
127
128def write_configvars_header(filename, ext, prefix):
129    config = build_config_dict(ext)
130    if prefix != 'HTS':
131        config['HTSDIR'] = '(unused)'
132        config['CURSES_LIB'] = '(unused)'
133
134    log.info("creating %s for '%s' extension", filename, ext.name)
135    with open(filename, "w") as outf:
136        for var, value in config.items():
137            outf.write('#define {}_{} "{}"\n'.format(prefix, var, value))
138
139
140@contextmanager
141def set_compiler_envvars():
142    tmp_vars = []
143    for var in ['CC', 'CFLAGS', 'LDFLAGS']:
144        if var in os.environ:
145            print ("# pysam: (env) {}={}".format(var, os.environ[var]))
146        elif var in sysconfig.get_config_vars():
147            value = sysconfig.get_config_var(var)
148            print ("# pysam: (sysconfig) {}={}".format(var, value))
149            os.environ[var] = value
150            tmp_vars += [var]
151
152    try:
153        yield
154    finally:
155        for var in tmp_vars:
156            del os.environ[var]
157
158
159def configure_library(library_dir, env_options=None, options=[]):
160
161    configure_script = os.path.join(library_dir, "configure")
162
163    on_rtd = os.environ.get("READTHEDOCS") == "True"
164    # RTD has no bzip2 development libraries installed:
165    if on_rtd:
166        env_options = "--disable-bz2"
167
168    if not os.path.exists(configure_script):
169        raise ValueError(
170            "configure script {} does not exist".format(configure_script))
171
172    with changedir(library_dir), set_compiler_envvars():
173        if env_options is not None:
174            if run_configure(env_options):
175                return env_options
176
177        for option in options:
178            if run_configure(option):
179                return option
180
181    return None
182
183
184def distutils_dir_name(dname):
185    """Returns the name of a distutils build directory
186    see: http://stackoverflow.com/questions/14320220/
187               testing-python-c-libraries-get-build-path
188    """
189    f = "{dirname}.{platform}-{version[0]}.{version[1]}"
190    return f.format(dirname=dname,
191                    platform=sysconfig.get_platform(),
192                    version=sys.version_info)
193
194
195def get_pysam_version():
196    sys.path.insert(0, "pysam")
197    import version
198    return version.__version__
199
200
201# Override sdist command to ensure Cythonized *.c files are included.
202class cythonize_sdist(sdist):
203    # Remove when setuptools (as installed on GH runners) has these options
204    if not any(opt[0] == 'owner=' for opt in sdist.user_options):
205        sdist.user_options.append(('owner=', 'u', 'Specify owner inside tar'))
206    if not any(opt[0] == 'group=' for opt in sdist.user_options):
207        sdist.user_options.append(('group=', 'g', 'Specify group inside tar'))
208
209    def run(self):
210        from Cython.Build import cythonize
211        cythonize(self.distribution.ext_modules)
212        super().run()
213
214
215class clean_ext(Command):
216    description = "clean up Cython temporary files"
217    user_options = []
218
219    def initialize_options(self):
220        pass
221
222    def finalize_options(self):
223        pass
224
225    def run(self):
226        objs = glob.glob(os.path.join("pysam", "libc*.c"))
227        if objs:
228            log.info("removing 'pysam/libc*.c' (%s Cython objects)", len(objs))
229        for obj in objs:
230            os.remove(obj)
231
232        headers = (glob.glob(os.path.join("htslib",   "*config*.h")) +
233                   glob.glob(os.path.join("samtools", "*config*.h")) +
234                   glob.glob(os.path.join("bcftools", "*config*.h")))
235        if headers:
236            log.info("removing '*/*config*.h' (%s generated headers)", len(headers))
237        for header in headers:
238            os.remove(header)
239
240
241# How to link against HTSLIB
242# shared:   build shared chtslib from builtin htslib code.
243# external: use shared libhts.so compiled outside of
244#           pysam
245# separate: use included htslib and include in each extension
246#           module. No dependencies between modules and works with
247#           setup.py install, but wasteful in terms of memory and
248#           compilation time. Fallback if shared module compilation
249#           fails.
250
251HTSLIB_MODE = os.environ.get("HTSLIB_MODE", "shared")
252HTSLIB_LIBRARY_DIR = os.environ.get("HTSLIB_LIBRARY_DIR", None)
253HTSLIB_INCLUDE_DIR = os.environ.get("HTSLIB_INCLUDE_DIR", None)
254HTSLIB_CONFIGURE_OPTIONS = os.environ.get("HTSLIB_CONFIGURE_OPTIONS", None)
255HTSLIB_SOURCE = None
256
257package_list = ['pysam',
258                'pysam.include',
259                'pysam.include.samtools',
260                'pysam.include.bcftools']
261package_dirs = {'pysam': 'pysam',
262                'pysam.include.samtools': 'samtools',
263                'pysam.include.bcftools': 'bcftools'}
264
265# list of config files that will be automatically generated should
266# they not already exist or be created by configure scripts in the
267# subpackages.
268config_headers = ["samtools/config.h",
269                  "bcftools/config.h"]
270
271# If cython is available, the pysam will be built using cython from
272# the .pyx files. If no cython is available, the C-files included in the
273# distribution will be used.
274if HAVE_CYTHON:
275    print ("# pysam: cython is available - using cythonize if necessary")
276    source_pattern = "pysam/libc%s.pyx"
277else:
278    print ("# pysam: no cython available - using pre-compiled C")
279    source_pattern = "pysam/libc%s.c"
280
281# Exit if there are no pre-compiled files and no cython available
282fn = source_pattern % "htslib"
283if not os.path.exists(fn):
284    raise ValueError(
285        "no cython installed, but can not find {}."
286        "Make sure that cython is installed when building "
287        "from the repository"
288        .format(fn))
289
290print ("# pysam: htslib mode is {}".format(HTSLIB_MODE))
291print ("# pysam: HTSLIB_CONFIGURE_OPTIONS={}".format(
292    HTSLIB_CONFIGURE_OPTIONS))
293htslib_configure_options = None
294
295if HTSLIB_MODE in ['shared', 'separate']:
296    package_list += ['pysam.include.htslib',
297                     'pysam.include.htslib.htslib']
298    package_dirs.update({'pysam.include.htslib':'htslib'})
299
300    htslib_configure_options = configure_library(
301        "htslib",
302        HTSLIB_CONFIGURE_OPTIONS,
303        ["--enable-libcurl",
304         "--disable-libcurl"])
305
306    HTSLIB_SOURCE = "builtin"
307    print ("# pysam: htslib configure options: {}".format(
308        str(htslib_configure_options)))
309
310    config_headers += ["htslib/config.h"]
311    if htslib_configure_options is None:
312        # create empty config.h file
313        with open("htslib/config.h", "w") as outf:
314            outf.write(
315                "/* empty config.h created by pysam */\n")
316            outf.write(
317                "/* conservative compilation options */\n")
318
319    with changedir("htslib"):
320        htslib_make_options = run_make_print_config()
321
322    for key, value in htslib_make_options.items():
323        print ("# pysam: htslib_config {}={}".format(key, value))
324
325    external_htslib_libraries = ['z']
326    if "LIBS" in htslib_make_options:
327        external_htslib_libraries.extend(
328            [re.sub("^-l", "", x) for x in htslib_make_options["LIBS"].split(" ") if x.strip()])
329
330    shared_htslib_sources = [re.sub("\.o", ".c", os.path.join("htslib", x))
331                             for x in
332                             htslib_make_options["LIBHTS_OBJS"].split(" ")]
333
334    htslib_sources = []
335
336if HTSLIB_LIBRARY_DIR:
337    # linking against a shared, externally installed htslib version, no
338    # sources required for htslib
339    htslib_sources = []
340    shared_htslib_sources = []
341    chtslib_sources = []
342    htslib_library_dirs = [HTSLIB_LIBRARY_DIR]
343    htslib_include_dirs = [HTSLIB_INCLUDE_DIR]
344    external_htslib_libraries = ['z', 'hts']
345elif HTSLIB_MODE == 'separate':
346    # add to each pysam component a separately compiled
347    # htslib
348    htslib_sources = shared_htslib_sources
349    shared_htslib_sources = htslib_sources
350    htslib_library_dirs = []
351    htslib_include_dirs = ['htslib']
352elif HTSLIB_MODE == 'shared':
353    # link each pysam component against the same
354    # htslib built from sources included in the pysam
355    # package.
356    htslib_library_dirs = [
357        "pysam",  # when using setup.py develop?
358        ".",  # when using setup.py develop?
359        os.path.join("build", distutils_dir_name("lib"), "pysam")]
360
361    htslib_include_dirs = ['htslib']
362else:
363    raise ValueError("unknown HTSLIB value '%s'" % HTSLIB_MODE)
364
365# build config.py
366with open(os.path.join("pysam", "config.py"), "w") as outf:
367    outf.write('HTSLIB = "{}"\n'.format(HTSLIB_SOURCE))
368    config_values = collections.defaultdict(int)
369
370    if HTSLIB_SOURCE == "builtin":
371        with open(os.path.join("htslib", "config.h")) as inf:
372            for line in inf:
373                if line.startswith("#define"):
374                    key, value = re.match(
375                        "#define (\S+)\s+(\S+)", line).groups()
376                    config_values[key] = value
377            for key in ["ENABLE_GCS",
378                        "ENABLE_PLUGINS",
379                        "ENABLE_S3",
380                        "HAVE_COMMONCRYPTO",
381                        "HAVE_HMAC",
382                        "HAVE_LIBBZ2",
383                        "HAVE_LIBCURL",
384                        "HAVE_LIBDEFLATE",
385                        "HAVE_LIBLZMA",
386                        "HAVE_MMAP"]:
387                outf.write("{} = {}\n".format(key, config_values[key]))
388                print ("# pysam: config_option: {}={}".format(key, config_values[key]))
389
390# create empty config.h files if they have not been created automatically
391# or created by the user:
392for fn in config_headers:
393    if not os.path.exists(fn):
394        with open(fn, "w") as outf:
395            outf.write(
396                "/* empty config.h created by pysam */\n")
397            outf.write(
398                "/* conservative compilation options */\n")
399
400#######################################################
401# Windows compatibility - untested
402if platform.system() == 'Windows':
403    include_os = ['win32']
404    os_c_files = ['win32/getopt.c']
405    extra_compile_args = []
406else:
407    include_os = []
408    os_c_files = []
409    # for python 3.4, see for example
410    # http://stackoverflow.com/questions/25587039/
411    # error-compiling-rpy2-on-python3-4-due-to-werror-
412    # declaration-after-statement
413    extra_compile_args = [
414        "-Wno-unused",
415        "-Wno-strict-prototypes",
416        "-Wno-sign-compare",
417        "-Wno-error=declaration-after-statement"]
418
419define_macros = []
420
421suffix = sysconfig.get_config_var('EXT_SUFFIX')
422if not suffix:
423    suffix = sysconfig.get_config_var('SO')
424
425internal_htslib_libraries = [
426    os.path.splitext("chtslib{}".format(suffix))[0]]
427internal_samtools_libraries = [
428    os.path.splitext("csamtools{}".format(suffix))[0],
429    os.path.splitext("cbcftools{}".format(suffix))[0],
430    ]
431internal_pysamutil_libraries = [
432    os.path.splitext("cutils{}".format(suffix))[0]]
433
434libraries_for_pysam_module = external_htslib_libraries + internal_htslib_libraries + internal_pysamutil_libraries
435
436# Order of modules matters in order to make sure that dependencies are resolved.
437# The structures of dependencies is as follows:
438# libchtslib: htslib utility functions and htslib itself if builtin is set.
439# libcsamtools: samtools code (builtin)
440# libcbcftools: bcftools code (builtin)
441# libcutils: General utility functions, depends on all of the above
442# libcXXX (pysam module): depends on libchtslib and libcutils
443
444# The list below uses the union of include_dirs and library_dirs for
445# reasons of simplicity.
446
447def prebuild_libchtslib(ext, force):
448    if HTSLIB_MODE not in ['shared', 'separate']: return
449    write_configvars_header("htslib/config_vars.h", ext, "HTS")
450
451def prebuild_libcsamtools(ext, force):
452    write_configvars_header("samtools/samtools_config_vars.h", ext, "SAMTOOLS")
453
454modules = [
455    dict(name="pysam.libchtslib",
456         prebuild_func=prebuild_libchtslib,
457         sources=[source_pattern % "htslib", "pysam/htslib_util.c"] + shared_htslib_sources + os_c_files,
458         libraries=external_htslib_libraries),
459    dict(name="pysam.libcsamtools",
460         prebuild_func=prebuild_libcsamtools,
461         sources=[source_pattern % "samtools"] + glob.glob(os.path.join("samtools", "*.pysam.c")) +
462         [os.path.join("samtools", "lz4", "lz4.c")] + htslib_sources + os_c_files,
463         libraries=external_htslib_libraries + internal_htslib_libraries),
464    dict(name="pysam.libcbcftools",
465         sources=[source_pattern % "bcftools"] + glob.glob(os.path.join("bcftools", "*.pysam.c")) + htslib_sources + os_c_files,
466         libraries=external_htslib_libraries + internal_htslib_libraries),
467    dict(name="pysam.libcutils",
468         sources=[source_pattern % "utils", "pysam/pysam_util.c"] + htslib_sources + os_c_files,
469         libraries=external_htslib_libraries + internal_htslib_libraries + internal_samtools_libraries),
470    dict(name="pysam.libcalignmentfile",
471         sources=[source_pattern % "alignmentfile"] + htslib_sources + os_c_files,
472         libraries=libraries_for_pysam_module),
473    dict(name="pysam.libcsamfile",
474         sources=[source_pattern % "samfile"] + htslib_sources + os_c_files,
475         libraries=libraries_for_pysam_module),
476    dict(name="pysam.libcalignedsegment",
477         sources=[source_pattern % "alignedsegment"] + htslib_sources + os_c_files,
478         libraries=libraries_for_pysam_module),
479    dict(name="pysam.libctabix",
480         sources=[source_pattern % "tabix"] + htslib_sources + os_c_files,
481         libraries=libraries_for_pysam_module),
482    dict(name="pysam.libcfaidx",
483         sources=[source_pattern % "faidx"] + htslib_sources + os_c_files,
484         libraries=libraries_for_pysam_module),
485    dict(name="pysam.libcbcf",
486         sources=[source_pattern % "bcf"] + htslib_sources + os_c_files,
487         libraries=libraries_for_pysam_module),
488    dict(name="pysam.libcbgzf",
489         sources=[source_pattern % "bgzf"] + htslib_sources + os_c_files,
490         libraries=libraries_for_pysam_module),
491    dict(name="pysam.libctabixproxies",
492         sources=[source_pattern % "tabixproxies"] + htslib_sources + os_c_files,
493         libraries=libraries_for_pysam_module),
494    dict(name="pysam.libcvcf",
495         sources=[source_pattern % "vcf"] + htslib_sources + os_c_files,
496         libraries=libraries_for_pysam_module),
497]
498
499common_options = dict(
500    language="c",
501    extra_compile_args=extra_compile_args,
502    define_macros=define_macros,
503    # for out-of-tree compilation, use absolute paths
504    library_dirs=[os.path.abspath(x) for x in ["pysam"] + htslib_library_dirs],
505    include_dirs=[os.path.abspath(x) for x in htslib_include_dirs + \
506                  ["samtools", "samtools/lz4", "bcftools", "pysam", "."] + include_os])
507
508# add common options (in python >3.5, could use n = {**a, **b}
509for module in modules:
510    module.update(**common_options)
511
512classifiers = """
513Development Status :: 4 - Beta
514Intended Audience :: Science/Research
515Intended Audience :: Developers
516License :: OSI Approved
517Programming Language :: Python
518Topic :: Software Development
519Topic :: Scientific/Engineering
520Operating System :: POSIX
521Operating System :: Unix
522Operating System :: MacOS
523"""
524
525metadata = {
526    'name': "pysam",
527    'version': get_pysam_version(),
528    'description': "pysam",
529    'long_description': __doc__,
530    'author': "Andreas Heger",
531    'author_email': "andreas.heger@gmail.com",
532    'license': "MIT",
533    'platforms': ["POSIX", "UNIX", "MacOS"],
534    'classifiers': [_f for _f in classifiers.split("\n") if _f],
535    'url': "https://github.com/pysam-developers/pysam",
536    'packages': package_list,
537    'requires': ['cython (>=0.29.12)'],
538    'ext_modules': [Extension(**opts) for opts in modules],
539    'cmdclass': {'build_ext': build_ext, 'clean_ext': clean_ext, 'sdist': cythonize_sdist},
540    'package_dir': package_dirs,
541    'package_data': {'': ['*.pxd', '*.h'], },
542    # do not pack in order to permit linking to csamtools.so
543    'zip_safe': False,
544}
545
546if __name__ == '__main__':
547    dist = setup(**metadata)
548