1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3 4############################################################################## 5## DendroPy Phylogenetic Computing Library. 6## 7## Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder. 8## All rights reserved. 9## 10## See "LICENSE.rst" for terms and conditions of usage. 11## 12## If you use this work or any portion thereof in published work, 13## please cite it as: 14## 15## Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library 16## for phylogenetic computing. Bioinformatics 26: 1569-1571. 17## 18############################################################################## 19 20""" 21Implementation of NEXUS-schema tree iterator. 22""" 23 24import sys 25if not (sys.version_info.major >= 3 and sys.version_info.minor >= 4): 26 from dendropy.utility.filesys import pre_py34_open as open 27from dendropy.utility import textprocessing 28from dendropy.dataio import ioservice 29from dendropy.dataio import nexusreader 30from dendropy.dataio import nexusprocessing 31 32class NexusTreeDataYielder( 33 ioservice.TreeDataYielder, 34 nexusreader.NexusReader): 35 36 def __init__(self, 37 files=None, 38 taxon_namespace=None, 39 tree_type=None, 40 **kwargs): 41 """ 42 43 Parameters 44 ---------- 45 files : iterable of sources 46 Iterable of sources, which can either be strings specifying file 47 paths or file-like objects open for reading. If a source element is 48 a string (``isinstance(i,str) == True``), then it is assumed to be 49 a path to a file. Otherwise, the source is assumed to be a file-like 50 object. 51 taxon_namespace : |TaxonNamespace| instance 52 The operational taxonomic unit concept namespace to use to manage 53 taxon definitions. 54 \*\*kwargs : keyword arguments 55 These will be passed directly to the base `nexusreader.NexusReader` 56 class. See `nexusreader.NexusReader` for details. 57 """ 58 ioservice.TreeDataYielder.__init__(self, 59 files=files, 60 taxon_namespace=taxon_namespace, 61 tree_type=tree_type) 62 self.assume_newick_if_not_nexus = kwargs.pop("assume_newick_if_not_nexus", False) 63 kwargs["attached_taxon_namespace"] = self.attached_taxon_namespace 64 nexusreader.NexusReader.__init__(self, **kwargs) 65 self.exclude_chars = True 66 self.exclude_trees = False 67 68 ########################################################################### 69 ## Implementation of DataYielder interface 70 71 def _yield_items_from_stream(self, stream): 72 if self._nexus_tokenizer is None: 73 self.create_tokenizer(stream, 74 preserve_unquoted_underscores=self.preserve_underscores) 75 else: 76 self._nexus_tokenizer.set_stream(stream) 77 token = self._nexus_tokenizer.next_token() 78 if token.upper() != "#NEXUS": 79 if self.assume_newick_if_not_nexus: 80 taxon_symbol_mapper = self._get_taxon_symbol_mapper( 81 taxon_namespace=self.attached_taxon_namespace, 82 enable_lookup_by_taxon_number=False, 83 ) 84 while True: 85 tree = self._build_tree_from_newick_tree_string( 86 tree_factory=self.tree_factory, 87 taxon_symbol_mapper=taxon_symbol_mapper) 88 if tree is None: 89 break 90 yield tree 91 else: 92 raise self._nexus_error("Expecting '#NEXUS', but found '{}'".format(token), 93 nexusreader.NexusReader.NotNexusFileError) 94 while not self._nexus_tokenizer.is_eof(): 95 token = self._nexus_tokenizer.next_token_ucase() 96 while token != None and token != 'BEGIN' and not self._nexus_tokenizer.is_eof(): 97 token = self._nexus_tokenizer.next_token_ucase() 98 self._nexus_tokenizer.process_and_clear_comments_for_item( 99 self._global_annotations_target, 100 self.extract_comment_metadata) 101 token = self._nexus_tokenizer.next_token_ucase() 102 if token == 'TAXA': 103 self._parse_taxa_block() 104 elif token == 'TREES': 105 for tree in self._yield_from_trees_block(): 106 yield tree 107 elif token == 'BEGIN': 108 raise self._nexus_error("'BEGIN' found without completion of previous block", 109 nexusreader.NexusReader.IncompleteBlockError) 110 else: 111 # unknown block 112 token = self._consume_to_end_of_block(token) 113 114 ########################################################################### 115 ## Supporting Functions 116 117 def _yield_from_trees_block(self): 118 """ 119 Expectations: 120 - current token: "TREES" [part of "BEGIN TREES"] 121 """ 122 token = self._nexus_tokenizer.cast_current_token_to_ucase() 123 if token != "TREES": 124 raise self._nexus_error("Expecting 'TREES' token, but instead found '{}'".format(token)) 125 if self.exclude_trees: 126 self._consume_to_end_of_block(self._nexus_tokenizer.current_token) 127 return 128 self._nexus_tokenizer.skip_to_semicolon() # move past "BEGIN TREES" command 129 link_title = None 130 taxon_namespace = None 131 taxon_symbol_mapper = None 132 trees_block = None 133 block_title = None 134 while ((not self._nexus_tokenizer.is_eof()) 135 and token is not None 136 and token != 'END' 137 and token != 'ENDBLOCK'): 138 token = self._nexus_tokenizer.next_token_ucase() 139 if token == 'LINK': 140 link_title = self._parse_link_statement().get("taxa") 141 elif token == 'TITLE': 142 block_title = self._parse_title_statement() 143 token = "" # clear; repopulate at start of loop 144 elif token == 'TRANSLATE': 145 if taxon_namespace is None: 146 taxon_namespace = self._get_taxon_namespace(link_title) 147 taxon_symbol_mapper = self._parse_translate_statement(taxon_namespace) 148 token = "" # clear; repopulate at start of loop 149 elif token == 'TREE': 150 if taxon_namespace is None: 151 taxon_namespace = self._get_taxon_namespace(link_title) 152 if taxon_symbol_mapper is None: 153 taxon_symbol_mapper = self._get_taxon_symbol_mapper(taxon_namespace=taxon_namespace) 154 pre_tree_comments = self._nexus_tokenizer.pull_captured_comments() 155 tree_factory = self.tree_factory 156 while True: 157 ## After the following, the current token 158 ## will be the token immediately following 159 ## the terminating semi-colon of a tree 160 ## statement. Typically, this will be 161 ## 'TREE' if there is another tree, or 162 ## 'END'/'ENDBLOCK'. 163 tree = self._parse_tree_statement( 164 tree_factory=tree_factory, 165 taxon_symbol_mapper=taxon_symbol_mapper) 166 yield tree 167 if self._nexus_tokenizer.is_eof() or not self._nexus_tokenizer.current_token: 168 break 169 if self._nexus_tokenizer.cast_current_token_to_ucase() != "TREE": 170 token = self._nexus_tokenizer.current_token 171 break 172 elif token == 'BEGIN': 173 raise self._nexus_error("'BEGIN' found without completion of previous block", 174 nexusreader.NexusReader.IncompleteBlockError) 175 self._nexus_tokenizer.skip_to_semicolon() # move past END command 176 return 177 178class NexusNewickTreeDataYielder(NexusTreeDataYielder): 179 180 def __init__(self, 181 files=None, 182 taxon_namespace=None, 183 tree_type=None, 184 **kwargs): 185 kwargs["assume_newick_if_not_nexus"] = kwargs.get("assume_newick_if_not_nexus", True) 186 NexusTreeDataYielder.__init__(self, 187 files=files, 188 taxon_namespace=taxon_namespace, 189 tree_type=tree_type, 190 **kwargs) 191