1#!/usr/bin/env python 2 3############################################################################### 4# # 5# This program is free software: you can redistribute it and/or modify # 6# it under the terms of the GNU General Public License as published by # 7# the Free Software Foundation, either version 3 of the License, or # 8# (at your option) any later version. # 9# # 10# This program is distributed in the hope that it will be useful, # 11# but WITHOUT ANY WARRANTY; without even the implied warranty of # 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13# GNU General Public License for more details. # 14# # 15# You should have received a copy of the GNU General Public License # 16# along with this program. If not, see <http://www.gnu.org/licenses/>. # 17# # 18############################################################################### 19 20__prog_desc__ = 'prune taxa with identical sequences from tree' 21 22__author__ = 'Donovan Parks' 23__copyright__ = 'Copyright 2013' 24__credits__ = ['Donovan Parks'] 25__license__ = 'GPL3' 26__version__ = '0.1' 27__maintainer__ = 'Donovan Parks' 28__email__ = 'donovan.parks@gmail.com' 29__status__ = 'Development' 30 31import argparse 32 33import dendropy 34 35class PruneTree(object): 36 def __init__(self): 37 pass 38 39 def __readDuplicateTaxa(self, dupSeqFile): 40 dupTaxa = [] 41 42 for line in open(dupSeqFile): 43 lineSplit = line.split() 44 45 for i in xrange(1, len(lineSplit)): 46 dupTaxa.append(lineSplit[i].strip()) 47 48 return dupTaxa 49 50 51 def run(self, dupSeqFile, inputTree, outputTree): 52 # get list of taxa with duplicate sequences 53 dupTaxa = self.__readDuplicateTaxa(dupSeqFile) 54 print 'Pruing %d taxa.' % len(dupTaxa) 55 56 # prune duplicate taxa from tree 57 tree = dendropy.Tree.get_from_path(inputTree, schema='newick', as_rooted=True, preserve_underscores=True) 58 59 tree.prune_taxa_with_labels(dupTaxa) 60 61 tree.write_to_path(outputTree, schema='newick', suppress_rooting=True) 62 63if __name__ == '__main__': 64 print 'RerootTree v' + __version__ + ': ' + __prog_desc__ 65 print ' by ' + __author__ + ' (' + __email__ + ')' + '\n' 66 67 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 68 parser.add_argument('duplicate_seq_file', help='file indicating deplicate sequences as determine with seqmagick') 69 parser.add_argument('input_tree', help='tree to prunt') 70 parser.add_argument('output_tree', help='output tree') 71 72 args = parser.parse_args() 73 74 PruneTree = PruneTree() 75 PruneTree.run(args.duplicate_seq_file, args.input_tree, args.output_tree) 76