1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3 4############################################################################## 5## DendroPy Phylogenetic Computing Library. 6## 7## Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder. 8## All rights reserved. 9## 10## See "LICENSE.rst" for terms and conditions of usage. 11## 12## If you use this work or any portion thereof in published work, 13## please cite it as: 14## 15## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library 16## for phylogenetic computing. Bioinformatics 26: 1569-1571. 17## 18############################################################################## 19 20""" 21Wrappers for interacting with SeqGen. Originally part of PySeqGen. 22""" 23 24import subprocess 25import uuid 26import tempfile 27import socket 28import random 29import os 30import sys 31 32from optparse import OptionGroup 33from optparse import OptionParser 34 35import dendropy 36from dendropy.utility.messaging import get_logger 37from dendropy.utility import processio 38_LOG = get_logger("interop.seqgen") 39 40HOSTNAME = socket.gethostname() 41PID = os.getpid() 42 43def _get_strongly_unique_tempfile(dir=None): 44 return tempfile.NamedTemporaryFile(dir=dir, prefix="dendropy_tempfile-{0}-{1}-{2}".format(HOSTNAME, PID, uuid.uuid4())) 45 46def _get_tempfile(dir=None): 47 return tempfile.NamedTemporaryFile(dir=dir) 48 49class SeqGen(object): 50 """ 51 This class wraps all attributes and input needed to make a call to SeqGen. 52 """ 53 54 class SubstitutionModel(object): 55 def __init__(self, idstr): 56 self.idstr = idstr 57 def __str__(self): 58 return self.idstr 59 60 F84 = SubstitutionModel("F84") 61 HKY = SubstitutionModel("HKY") 62 GTR = SubstitutionModel("GTR") 63 JTT = SubstitutionModel("JTT") 64 WAG = SubstitutionModel("WAG") 65 PAM = SubstitutionModel("PAM") 66 BLOSUM = SubstitutionModel("BLOSUM") 67 MTREV = SubstitutionModel("MTREV") 68 CPREV = SubstitutionModel("CPREV") 69 GENERAL = SubstitutionModel("GENERAL") 70 MODELS = [F84, HKY, GTR, JTT, WAG, PAM, BLOSUM, MTREV, CPREV, GENERAL] 71 MODEL_IDS = [str(m) for m in MODELS] 72 73 def get_model(idstr): 74 for model in SeqGen.MODELS: 75 if idstr.upper() == model.idstr.upper(): 76 return model 77 return None 78 get_model = staticmethod(get_model) 79 80 def __init__(self, strongly_unique_tempfiles=False): 81 """ 82 Sets up all properties, which (generally) map directly to command 83 parameters of Seq-Gen. 84 """ 85 86 # control tempfile generation 87 if strongly_unique_tempfiles: 88 self.get_tempfile = _get_strongly_unique_tempfile 89 else: 90 self.get_tempfile = _get_tempfile 91 92 # python object specific attributes 93 self.seqgen_path = 'seq-gen' 94 self.rng_seed = None 95 self._rng = None 96 97 # following are passed to seq-gen in one form or another 98 self.char_model = 'HKY' 99 self.seq_len = None 100 self.num_partitions = None 101 self.scale_branch_lens = None 102 self.scale_tree_len = None 103 self.codon_pos_rates = None 104 self.gamma_shape = None 105 self.gamma_cats = None 106 self.prop_invar = None 107 self.state_freqs = None 108 self.ti_tv = 0.5 # = kappa of 1.0, i.e. JC 109 self.general_rates = None 110 self.ancestral_seq = None 111 self.output_text_append = None 112 self.write_ancestral_seqs = False 113 self.write_site_rates = False 114 115 def _get_rng(self): 116 if self._rng is None: 117 self._rng = random.Random(self.rng_seed) 118 return self._rng 119 def _set_rng(self, rng): 120 self._rng = rng 121 rng = property(_get_rng, _set_rng) 122 123 def _get_kappa(self): 124 return float(self.ti_tv) / 2 125 def _set_kappa(self, kappa): 126 self.ti_tv = kappa * 2 127 128 def _compose_arguments(self): 129 """ 130 Composes and returns a list of strings that make up the arguments to a Seq-Gen 131 call, based on the attribute values of the object. 132 """ 133 args = [] 134 args.append(self.seqgen_path) 135 if self.char_model: 136 args.append("-m%s" % str(self.char_model)) 137 if self.seq_len: 138 args.append("-l%s" % self.seq_len) 139 if self.num_partitions: 140 args.append("-p%s" % self.num_partitions) 141 if self.scale_branch_lens: 142 args.append("-s%s" % self.scale_branch_lens) 143 if self.scale_tree_len: 144 args.append("-d%s" % self.scale_tree_len) 145 if self.codon_pos_rates: 146 args.append("-c%s" % (",".join(self.codon_pos_rates))) 147 if self.gamma_shape: 148 args.append("-a%s" % self.gamma_shape) 149 if self.gamma_cats: 150 args.append("-g%s" % self.gamma_cats) 151 if self.prop_invar: 152 args.append("-i%s" % self.prop_invar) 153 if self.state_freqs: 154 if isinstance(self.state_freqs, str): 155 args.append("-f%s" % self.state_freqs) 156 else: 157 args.append("-f%s" % (",".join([str(s) for s in self.state_freqs]))) 158 if self.ti_tv and (self.char_model in ['HKY', 'F84']): 159 args.append("-t%s" % self.ti_tv) 160 if self.general_rates: 161 if isinstance(self.general_rates, str): 162 args.append("-r%s" % self.general_rates) 163 else: 164 args.append("-r%s" % (",".join([str(r) for r in self.general_rates]))) 165 if self.ancestral_seq: 166 args.append("-k%s" % self.ancestral_seq) 167 if self.output_text_append: 168 args.append("-x'%s'" % self.output_text_append) 169 if self.write_ancestral_seqs: 170 args.append("-wa") 171 if self.write_site_rates: 172 args.append("-wr") 173 174 # following are controlled directly by the wrapper 175 # silent running 176 args.append("-q") 177 # we explicitly pass a random number seed on each call 178 args.append("-z%s" % self.rng.randint(0, sys.maxsize)) 179 # force nexus 180 args.append("-on") 181 # force one dataset at a time 182 args.append("-n1") 183 return args 184 185 def generate(self, trees, dataset=None, taxon_namespace=None, **kwargs): 186 args=self._compose_arguments() 187 tree_inputf = self.get_tempfile() 188 trees.write_to_path(tree_inputf.name, 189 "newick", 190 suppress_rooting=True, 191 suppress_internal_node_labels=True) 192 tree_inputf.flush() 193 args.append(tree_inputf.name) 194 #_LOG.debug("seq-gen args: = %s" % " ".join(args)) 195 run = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 196 stdout, stderr = processio.communicate(run) 197 if stderr or run.returncode != 0: 198 raise RuntimeError("Seq-gen error: %s" % stderr) 199 if taxon_namespace is None: 200 taxon_namespace = trees.taxon_namespace 201 if dataset is None: 202 dataset = dendropy.DataSet(**kwargs) 203 if taxon_namespace is not None: 204 dataset.attach_taxon_namespace(taxon_namespace) 205 dataset.read(data=stdout, schema="nexus") 206 return dataset 207 208 209 210