1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4##############################################################################
5##  DendroPy Phylogenetic Computing Library.
6##
7##  Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
8##  All rights reserved.
9##
10##  See "LICENSE.rst" for terms and conditions of usage.
11##
12##  If you use this work or any portion thereof in published work,
13##  please cite it as:
14##
15##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
16##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
17##
18##############################################################################
19
20"""
21Implementation of NEXUS-schema tree iterator.
22"""
23
24import sys
25if not (sys.version_info.major >= 3 and sys.version_info.minor >= 4):
26    from dendropy.utility.filesys import pre_py34_open as open
27from dendropy.utility import textprocessing
28from dendropy.dataio import ioservice
29from dendropy.dataio import nexusreader
30from dendropy.dataio import nexusprocessing
31
32class NexusTreeDataYielder(
33        ioservice.TreeDataYielder,
34        nexusreader.NexusReader):
35
36    def __init__(self,
37            files=None,
38            taxon_namespace=None,
39            tree_type=None,
40            **kwargs):
41        """
42
43        Parameters
44        ----------
45        files : iterable of sources
46            Iterable of sources, which can either be strings specifying file
47            paths or file-like objects open for reading. If a source element is
48            a string (``isinstance(i,str) == True``), then it is assumed to be
49            a path to a file. Otherwise, the source is assumed to be a file-like
50            object.
51        taxon_namespace : |TaxonNamespace| instance
52            The operational taxonomic unit concept namespace to use to manage
53            taxon definitions.
54        \*\*kwargs : keyword arguments
55            These will be passed directly to the base `nexusreader.NexusReader`
56            class. See `nexusreader.NexusReader` for details.
57        """
58        ioservice.TreeDataYielder.__init__(self,
59                files=files,
60                taxon_namespace=taxon_namespace,
61                tree_type=tree_type)
62        self.assume_newick_if_not_nexus = kwargs.pop("assume_newick_if_not_nexus", False)
63        kwargs["attached_taxon_namespace"] = self.attached_taxon_namespace
64        nexusreader.NexusReader.__init__(self, **kwargs)
65        self.exclude_chars = True
66        self.exclude_trees = False
67
68    ###########################################################################
69    ## Implementation of DataYielder interface
70
71    def _yield_items_from_stream(self, stream):
72        if self._nexus_tokenizer is None:
73            self.create_tokenizer(stream,
74                preserve_unquoted_underscores=self.preserve_underscores)
75        else:
76            self._nexus_tokenizer.set_stream(stream)
77        token = self._nexus_tokenizer.next_token()
78        if token.upper() != "#NEXUS":
79            if self.assume_newick_if_not_nexus:
80                taxon_symbol_mapper = self._get_taxon_symbol_mapper(
81                        taxon_namespace=self.attached_taxon_namespace,
82                        enable_lookup_by_taxon_number=False,
83                        )
84                while True:
85                    tree = self._build_tree_from_newick_tree_string(
86                            tree_factory=self.tree_factory,
87                            taxon_symbol_mapper=taxon_symbol_mapper)
88                    if tree is None:
89                        break
90                    yield tree
91            else:
92                raise self._nexus_error("Expecting '#NEXUS', but found '{}'".format(token),
93                        nexusreader.NexusReader.NotNexusFileError)
94        while not self._nexus_tokenizer.is_eof():
95            token = self._nexus_tokenizer.next_token_ucase()
96            while token != None and token != 'BEGIN' and not self._nexus_tokenizer.is_eof():
97                token = self._nexus_tokenizer.next_token_ucase()
98            self._nexus_tokenizer.process_and_clear_comments_for_item(
99                    self._global_annotations_target,
100                    self.extract_comment_metadata)
101            token = self._nexus_tokenizer.next_token_ucase()
102            if token == 'TAXA':
103                self._parse_taxa_block()
104            elif token == 'TREES':
105                for tree in self._yield_from_trees_block():
106                    yield tree
107            elif token == 'BEGIN':
108                raise self._nexus_error("'BEGIN' found without completion of previous block",
109                        nexusreader.NexusReader.IncompleteBlockError)
110            else:
111                # unknown block
112                token = self._consume_to_end_of_block(token)
113
114    ###########################################################################
115    ## Supporting Functions
116
117    def _yield_from_trees_block(self):
118        """
119        Expectations:
120            - current token: "TREES" [part of "BEGIN TREES"]
121        """
122        token = self._nexus_tokenizer.cast_current_token_to_ucase()
123        if token != "TREES":
124            raise self._nexus_error("Expecting 'TREES' token, but instead found '{}'".format(token))
125        if self.exclude_trees:
126            self._consume_to_end_of_block(self._nexus_tokenizer.current_token)
127            return
128        self._nexus_tokenizer.skip_to_semicolon() # move past "BEGIN TREES" command
129        link_title = None
130        taxon_namespace = None
131        taxon_symbol_mapper = None
132        trees_block = None
133        block_title = None
134        while ((not self._nexus_tokenizer.is_eof())
135                and token is not None
136                and token != 'END'
137                and token != 'ENDBLOCK'):
138            token = self._nexus_tokenizer.next_token_ucase()
139            if token == 'LINK':
140                link_title = self._parse_link_statement().get("taxa")
141            elif token == 'TITLE':
142                block_title = self._parse_title_statement()
143                token = "" # clear; repopulate at start of loop
144            elif token == 'TRANSLATE':
145                if taxon_namespace is None:
146                    taxon_namespace = self._get_taxon_namespace(link_title)
147                taxon_symbol_mapper = self._parse_translate_statement(taxon_namespace)
148                token = "" # clear; repopulate at start of loop
149            elif token == 'TREE':
150                if taxon_namespace is None:
151                    taxon_namespace = self._get_taxon_namespace(link_title)
152                if taxon_symbol_mapper is None:
153                    taxon_symbol_mapper = self._get_taxon_symbol_mapper(taxon_namespace=taxon_namespace)
154                pre_tree_comments = self._nexus_tokenizer.pull_captured_comments()
155                tree_factory = self.tree_factory
156                while True:
157                    ## After the following, the current token
158                    ## will be the token immediately following
159                    ## the terminating semi-colon of a tree
160                    ## statement. Typically, this will be
161                    ## 'TREE' if there is another tree, or
162                    ## 'END'/'ENDBLOCK'.
163                    tree = self._parse_tree_statement(
164                            tree_factory=tree_factory,
165                            taxon_symbol_mapper=taxon_symbol_mapper)
166                    yield tree
167                    if self._nexus_tokenizer.is_eof() or not self._nexus_tokenizer.current_token:
168                        break
169                    if self._nexus_tokenizer.cast_current_token_to_ucase() != "TREE":
170                        token = self._nexus_tokenizer.current_token
171                        break
172            elif token == 'BEGIN':
173                raise self._nexus_error("'BEGIN' found without completion of previous block",
174                        nexusreader.NexusReader.IncompleteBlockError)
175        self._nexus_tokenizer.skip_to_semicolon() # move past END command
176        return
177
178class NexusNewickTreeDataYielder(NexusTreeDataYielder):
179
180    def __init__(self,
181            files=None,
182            taxon_namespace=None,
183            tree_type=None,
184            **kwargs):
185        kwargs["assume_newick_if_not_nexus"] = kwargs.get("assume_newick_if_not_nexus", True)
186        NexusTreeDataYielder.__init__(self,
187                files=files,
188                taxon_namespace=taxon_namespace,
189                tree_type=tree_type,
190                **kwargs)
191