1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4##############################################################################
5##  DendroPy Phylogenetic Computing Library.
6##
7##  Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
8##  All rights reserved.
9##
10##  See "LICENSE.rst" for terms and conditions of usage.
11##
12##  If you use this work or any portion thereof in published work,
13##  please cite it as:
14##
15##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
16##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
17##
18##############################################################################
19
20"""
21Wrappers for interacting with SeqGen. Originally part of PySeqGen.
22"""
23
24import subprocess
25import uuid
26import tempfile
27import socket
28import random
29import os
30import sys
31
32from optparse import OptionGroup
33from optparse import OptionParser
34
35import dendropy
36from dendropy.utility.messaging import get_logger
37from dendropy.utility import processio
38_LOG = get_logger("interop.seqgen")
39
40HOSTNAME = socket.gethostname()
41PID = os.getpid()
42
43def _get_strongly_unique_tempfile(dir=None):
44    return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4()))
45
46def _get_tempfile(dir=None):
47    return tempfile.NamedTemporaryFile(dir=dir)
48
49class SeqGen(object):
50    """
51    This class wraps all attributes and input needed to make a call to SeqGen.
52    """
53
54    class SubstitutionModel(object):
55        def __init__(self, idstr):
56            self.idstr = idstr
57        def __str__(self):
58            return self.idstr
59
60    F84 = SubstitutionModel("F84")
61    HKY = SubstitutionModel("HKY")
62    GTR = SubstitutionModel("GTR")
63    JTT = SubstitutionModel("JTT")
64    WAG = SubstitutionModel("WAG")
65    PAM = SubstitutionModel("PAM")
66    BLOSUM = SubstitutionModel("BLOSUM")
67    MTREV = SubstitutionModel("MTREV")
68    CPREV = SubstitutionModel("CPREV")
69    GENERAL = SubstitutionModel("GENERAL")
70    MODELS = [F84, HKY, GTR, JTT, WAG, PAM, BLOSUM, MTREV, CPREV, GENERAL]
71    MODEL_IDS = [str(m) for m in MODELS]
72
73    def get_model(idstr):
74        for model in SeqGen.MODELS:
75            if idstr.upper() == model.idstr.upper():
76                return model
77        return None
78    get_model = staticmethod(get_model)
79
80    def __init__(self, strongly_unique_tempfiles=False):
81        """
82        Sets up all properties, which (generally) map directly to command
83        parameters of Seq-Gen.
84        """
85
86        # control tempfile generation
87        if strongly_unique_tempfiles:
88            self.get_tempfile = _get_strongly_unique_tempfile
89        else:
90            self.get_tempfile = _get_tempfile
91
92        # python object specific attributes
93        self.seqgen_path = 'seq-gen'
94        self.rng_seed = None
95        self._rng = None
96
97        # following are passed to seq-gen in one form or another
98        self.char_model = 'HKY'
99        self.seq_len = None
100        self.num_partitions = None
101        self.scale_branch_lens = None
102        self.scale_tree_len = None
103        self.codon_pos_rates = None
104        self.gamma_shape = None
105        self.gamma_cats = None
106        self.prop_invar = None
107        self.state_freqs = None
108        self.ti_tv = 0.5 # = kappa of 1.0, i.e. JC
109        self.general_rates = None
110        self.ancestral_seq = None
111        self.output_text_append = None
112        self.write_ancestral_seqs = False
113        self.write_site_rates = False
114
115    def _get_rng(self):
116        if self._rng is None:
117            self._rng = random.Random(self.rng_seed)
118        return self._rng
119    def _set_rng(self, rng):
120        self._rng = rng
121    rng = property(_get_rng, _set_rng)
122
123    def _get_kappa(self):
124        return float(self.ti_tv) / 2
125    def _set_kappa(self, kappa):
126        self.ti_tv = kappa * 2
127
128    def _compose_arguments(self):
129        """
130        Composes and returns a list of strings that make up the arguments to a Seq-Gen
131        call, based on the attribute values of the object.
132        """
133        args = []
134        args.append(self.seqgen_path)
135        if self.char_model:
136            args.append("-m%s" % str(self.char_model))
137        if self.seq_len:
138            args.append("-l%s" % self.seq_len)
139        if self.num_partitions:
140            args.append("-p%s" % self.num_partitions)
141        if self.scale_branch_lens:
142            args.append("-s%s" % self.scale_branch_lens)
143        if self.scale_tree_len:
144            args.append("-d%s" % self.scale_tree_len)
145        if self.codon_pos_rates:
146            args.append("-c%s" % (",".join(self.codon_pos_rates)))
147        if self.gamma_shape:
148            args.append("-a%s" % self.gamma_shape)
149        if self.gamma_cats:
150            args.append("-g%s" % self.gamma_cats)
151        if self.prop_invar:
152            args.append("-i%s" % self.prop_invar)
153        if self.state_freqs:
154            if isinstance(self.state_freqs, str):
155                args.append("-f%s" % self.state_freqs)
156            else:
157                args.append("-f%s" % (",".join([str(s) for s in self.state_freqs])))
158        if self.ti_tv and (self.char_model in ['HKY', 'F84']):
159            args.append("-t%s" % self.ti_tv)
160        if self.general_rates:
161            if isinstance(self.general_rates, str):
162                args.append("-r%s" % self.general_rates)
163            else:
164                args.append("-r%s" % (",".join([str(r) for r in self.general_rates])))
165        if self.ancestral_seq:
166            args.append("-k%s" % self.ancestral_seq)
167        if self.output_text_append:
168            args.append("-x'%s'" % self.output_text_append)
169        if self.write_ancestral_seqs:
170            args.append("-wa")
171        if self.write_site_rates:
172            args.append("-wr")
173
174        # following are controlled directly by the wrapper
175        # silent running
176        args.append("-q")
177        # we explicitly pass a random number seed on each call
178        args.append("-z%s" % self.rng.randint(0, sys.maxsize))
179        # force nexus
180        args.append("-on")
181        # force one dataset at a time
182        args.append("-n1")
183        return args
184
185    def generate(self, trees, dataset=None, taxon_namespace=None, **kwargs):
186        args=self._compose_arguments()
187        tree_inputf = self.get_tempfile()
188        trees.write_to_path(tree_inputf.name,
189                "newick",
190                suppress_rooting=True,
191                suppress_internal_node_labels=True)
192        tree_inputf.flush()
193        args.append(tree_inputf.name)
194        #_LOG.debug("seq-gen args: = %s" % " ".join(args))
195        run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
196        stdout, stderr = processio.communicate(run)
197        if stderr or run.returncode != 0:
198            raise RuntimeError("Seq-gen error: %s" % stderr)
199        if taxon_namespace is None:
200            taxon_namespace = trees.taxon_namespace
201        if dataset is None:
202            dataset = dendropy.DataSet(**kwargs)
203            if taxon_namespace is not None:
204                dataset.attach_taxon_namespace(taxon_namespace)
205        dataset.read(data=stdout, schema="nexus")
206        return dataset
207
208
209
210