1#!/usr/bin/env python
2
3###############################################################################
4#                                                                             #
5#    This program is free software: you can redistribute it and/or modify     #
6#    it under the terms of the GNU General Public License as published by     #
7#    the Free Software Foundation, either version 3 of the License, or        #
8#    (at your option) any later version.                                      #
9#                                                                             #
10#    This program is distributed in the hope that it will be useful,          #
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
13#    GNU General Public License for more details.                             #
14#                                                                             #
15#    You should have received a copy of the GNU General Public License        #
16#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
17#                                                                             #
18###############################################################################
19
20__prog_desc__ = 'prune taxa with identical sequences from tree'
21
22__author__ = 'Donovan Parks'
23__copyright__ = 'Copyright 2013'
24__credits__ = ['Donovan Parks']
25__license__ = 'GPL3'
26__version__ = '0.1'
27__maintainer__ = 'Donovan Parks'
28__email__ = 'donovan.parks@gmail.com'
29__status__ = 'Development'
30
31import argparse
32
33import dendropy
34
35class PruneTree(object):
36    def __init__(self):
37        pass
38
39    def __readDuplicateTaxa(self, dupSeqFile):
40        dupTaxa = []
41
42        for line in open(dupSeqFile):
43            lineSplit = line.split()
44
45            for i in xrange(1, len(lineSplit)):
46                dupTaxa.append(lineSplit[i].strip())
47
48        return dupTaxa
49
50
51    def run(self, dupSeqFile, inputTree, outputTree):
52        # get list of taxa with duplicate sequences
53        dupTaxa = self.__readDuplicateTaxa(dupSeqFile)
54        print 'Pruing %d taxa.' % len(dupTaxa)
55
56        # prune duplicate taxa from tree
57        tree = dendropy.Tree.get_from_path(inputTree, schema='newick', as_rooted=True, preserve_underscores=True)
58
59        tree.prune_taxa_with_labels(dupTaxa)
60
61        tree.write_to_path(outputTree, schema='newick', suppress_rooting=True)
62
63if __name__ == '__main__':
64    print 'RerootTree v' + __version__ + ': ' + __prog_desc__
65    print '  by ' + __author__ + ' (' + __email__ + ')' + '\n'
66
67    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
68    parser.add_argument('duplicate_seq_file', help='file indicating deplicate sequences as determine with seqmagick')
69    parser.add_argument('input_tree', help='tree to prunt')
70    parser.add_argument('output_tree', help='output tree')
71
72    args = parser.parse_args()
73
74    PruneTree = PruneTree()
75    PruneTree.run(args.duplicate_seq_file, args.input_tree, args.output_tree)
76