1#! /usr/bin/env python
2# -*- coding: utf-8 -*-
3
4##############################################################################
5##  DendroPy Phylogenetic Computing Library.
6##
7##  Copyright 2010-2015 Jeet Sukumaran and Mark T. Holder.
8##  All rights reserved.
9##
10##  See "LICENSE.rst" for terms and conditions of usage.
11##
12##  If you use this work or any portion thereof in published work,
13##  please cite it as:
14##
15##     Sukumaran, J. and M. T. Holder. 2010. DendroPy: a Python library
16##     for phylogenetic computing. Bioinformatics 26: 1569-1571.
17##
18##############################################################################
19
20"""
21Taxon management.
22
23Operational taxonomic unit concepts are essentially names for taxa in the "real
24world". Operational taxonomic unit concepts are organized into taxonomic
25namespaces. A taxonomic namespace is a self-contained and
26functionally-complete collection of mutually-distinct operational taxonomic
27unit concepts, and provide the semantic context in which operational taxonomic
28units from across various data sources of different formats and provenances can
29be related through correct interpretation of their taxon labels.
30
31    * Operational taxonomic units are modeled by a |Taxon| object.
32
33    * Taxonomic namespaces, in which operational taxonomic units are organized,
34      are modeled by a |TaxonNamespace| object.
35
36    * A |TaxonNamespace| manages a collection of |Taxon| objects, where each
37      object represents a distinct operational taxonomic unit concept within
38      the taxonomic namespace represented by that |TaxonNamespace| object.
39
40    * Each |Taxon| object can belong to one and only one |TaxonNamespace|:
41      |Taxon| objects are not shared across |TaxonNamespace| objects.
42
43    * Each |Taxon| object has an attribute, ``label``, whose (string) value
44      is the name of the operational taxon unit concept that it represents.
45
46    * Different |Taxon| objects represent different operational taxonomic
47      unit concepts, even if they have the same label value.
48
49    * All client objects (`TaxonNamespaceAssociated` objects) that reference
50      the same |TaxonNamespace| reference the same "universe" or domain of
51      operational taxonomic unit concepts.
52
53    * Operational taxonomic units from across different data sources are mapped
54      to distinct |Taxon| objects within a particular |TaxonNamespace| based on
55      matching the string values of labels of the |Taxon| object.
56
57    * A particular taxonomic unit concept in one data source will only be
58      correctly related to the same taxonomic unit concept (i.e, the same
59      |Taxon| object) in another data source only if they have both
60      been parsed with reference to the same taxonomic namespace (i.e., the
61      same |TaxonNamespace| has been used).
62
63    * A |TaxonNamespace| assigned an "accession index" to every |Taxon| object
64      added to it. This is a stable and unique number within the context of any
65      given |TaxonNamespace| object (though a |Taxon| object may have different
66      accession indexes in different |TaxonNamespace| objects if it
67      belongs to multiple namespaces). This number is will be used to
68      calculate the "split bitmask" hash of the trivial split or external edge
69      subtending the node to which this |Taxon| object is assigned on a tree.
70      The concept of a "split bitmask" hash is fundamental to DendroPy's tree
71      operations. The split bitmask is a hash that uniquely identifies every
72      split on a tree.  It is calculated by OR'ing the split bitmask of all the
73      child splits of the given split. Terminal edges, of course, do not have
74      child edges, and their split bitmask is given by the accession index of
75      the |Taxon| object at their head or target nodes.
76"""
77
78
79import warnings
80import collections
81import copy
82from dendropy.utility.textprocessing import StringIO
83from dendropy.datamodel import basemodel
84from dendropy.utility import bitprocessing
85from dendropy.utility import textprocessing
86from dendropy.utility import container
87from dendropy.utility import error
88from dendropy.utility import deprecate
89
90##############################################################################
91## Helper functions
92
93def taxon_set_deprecation_warning(stacklevel=6):
94    deprecate.dendropy_deprecation_warning(
95            message="Deprecated since DendroPy 4: 'taxon_set' will no longer be supported in future releases; use 'taxon_namespace' instead",
96            stacklevel=stacklevel)
97
98def process_kwargs_dict_for_taxon_namespace(kwargs_dict, default=None):
99    if "taxon_set" in kwargs_dict:
100        if "taxon_namespace" in kwargs_dict:
101            raise TypeError("Cannot specify both 'taxon_namespace' and 'taxon_set' (legacy support) simultaneously")
102        else:
103            taxon_set_deprecation_warning()
104            return kwargs_dict.pop("taxon_set", default)
105    else:
106        return kwargs_dict.pop("taxon_namespace", default)
107
108def process_attached_taxon_namespace_directives(kwargs_dict):
109    """
110    The following idioms are supported:
111
112        `taxon_namespace=tns`
113            Attach ``tns`` as the bound (single, unified) taxonomic namespace
114            reference for all objects.
115        `attached_taxon_namespace=tns`
116            Attach ``tns`` as the bound (single, unified) taxonomic namespace
117            reference for all objects.
118        `attach_taxon_namespace=True, attached_taxon_namespace=tns`
119            Attach ``tns`` as the bound (single, unified) taxonomic namespace
120            reference for all objects.
121        `attach_taxon_namespace=True`
122            Create a *new* |TaxonNamespace| and set it as the bound
123            (single, unified) taxonomic namespace reference for all
124            objects.
125    """
126    deprecated_kw = [
127            "taxon_namespace",
128            "attach_taxon_namespace",
129            "attached_taxon_namespace",
130            "taxon_set",
131            "attach_taxon_set",
132            "attached_taxon_set",
133            ]
134    for kw in deprecated_kw:
135        if kw in kwargs_dict:
136            raise TypeError("'{}' is no longer supported as a keyword argument. Use the instance method 'attach_taxon_namespace()' of the data object instead to bind the object to a single TaxonNamespace".format(kw))
137    taxon_namespace = None
138    attach_taxon_namespace = False
139    if ( ("taxon_set" in kwargs_dict or "taxon_namespace" in kwargs_dict)
140            and ("attached_taxon_set" in kwargs_dict or "attached_taxon_namespace" in kwargs_dict)
141            ):
142        raise TypeError("Cannot specify both 'taxon_namespace'/'taxon_set' and 'attached_taxon_namespace'/'attached_taxon_set' together")
143    if "taxon_set" in kwargs_dict:
144        if "taxon_namespace" in kwargs_dict:
145            raise TypeError("Both 'taxon_namespace' and 'taxon_set' cannot be specified simultaneously: use 'taxon_namespace' ('taxon_set' is only supported for legacy reasons)")
146        kwargs_dict["taxon_namespace"] = kwargs_dict["taxon_set"]
147        del kwargs_dict["taxon_set"]
148    if "attached_taxon_set" in kwargs_dict:
149        if "attached_taxon_namespace" in kwargs_dict:
150            raise TypeError("Both 'attached_taxon_namespace' and 'attached_taxon_set' cannot be specified simultaneously: use 'attached_taxon_namespace' ('attached_taxon_set' is only supported for legacy reasons)")
151        kwargs_dict["attached_taxon_namespace"] = kwargs_dict["attached_taxon_set"]
152        del kwargs_dict["attached_taxon_set"]
153    if "taxon_namespace" in kwargs_dict:
154        taxon_namespace = kwargs_dict.pop("taxon_namespace", None)
155        attach_taxon_namespace = True
156    elif "attached_taxon_namespace" in kwargs_dict:
157        taxon_namespace = kwargs_dict["attached_taxon_namespace"]
158        if not isinstance(taxon_namespace, TaxonNamespace):
159            raise TypeError("'attached_taxon_namespace' argument must be an instance of TaxonNamespace")
160        attach_taxon_namespace = True
161    else:
162        taxon_namespace = None
163        attach_taxon_namespace = kwargs_dict.get("attach_taxon_namespace", False)
164    kwargs_dict.pop("taxon_namespace", None)
165    kwargs_dict.pop("attach_taxon_namespace", None)
166    kwargs_dict.pop("attached_taxon_namespace", None)
167    return (attach_taxon_namespace, taxon_namespace)
168
169##############################################################################
170## TaxonNamespaceAssociated
171
172class TaxonNamespaceAssociated(object):
173    """
174    Provides infrastructure for the maintenance of references to taxa.
175    """
176
177    # def initialize_taxon_namespace_from_kwargs_dict(self, kwargs_dict):
178    #     tns = process_kwargs_dict_for_taxon_namespace(kwargs_dict)
179    #     if tns is None:
180    #         self.taxon_namespace = TaxonNamespace()
181    #     else:
182    #         self.taxon_namespace = tns
183    #     return self.taxon_namespace
184
185    def __init__(self, taxon_namespace=None):
186        if taxon_namespace is None:
187            self._taxon_namespace = TaxonNamespace()
188        else:
189            self._taxon_namespace = taxon_namespace
190        self.automigrate_taxon_namespace_on_assignment = False
191
192    def _get_taxon_namespace(self):
193        return self._taxon_namespace
194    def _set_taxon_namespace(self, tns):
195        if self.automigrate_taxon_namespace_on_assignment:
196            if tns is not None and self._taxon_namespace is not tns:
197                self.migrate_taxon_namespace(tns)
198            elif tns is None:
199                self._taxon_namespace = None
200        else:
201            self._taxon_namespace = tns
202    def _del_taxon_namespace(self):
203        raise TypeError("Cannot delete 'taxon_namespace' attribute")
204    taxon_namespace = property(_get_taxon_namespace, _set_taxon_namespace, _del_taxon_namespace)
205
206    def _get_taxon_set(self):
207        # raise NotImplementedError("'taxon_set' is no longer supported: use 'taxon_namespace' instead")
208        taxon_set_deprecation_warning()
209        return self.taxon_namespace
210    def _set_taxon_set(self, v):
211        # raise NotImplementedError("'taxon_set' is no longer supported: use 'taxon_namespace' instead")
212        taxon_set_deprecation_warning()
213        self.taxon_namespace = v
214    def _del_taxon_set(self):
215        # raise NotImplementedError("'taxon_set' is no longer supported: use 'taxon_namespace' instead")
216        taxon_set_deprecation_warning()
217    taxon_set = property(_get_taxon_set, _set_taxon_set, _del_taxon_set)
218
219    def migrate_taxon_namespace(self,
220            taxon_namespace,
221            unify_taxa_by_label=True,
222            taxon_mapping_memo=None):
223        """
224        Move this object and all members to a new operational taxonomic unit
225        concept namespace scope.
226
227        Current :attr:`self.taxon_namespace` value will be replaced with value
228        given in ``taxon_namespace`` if this is not |None|, or a new
229        |TaxonNamespace| object. Following this,
230        ``reconstruct_taxon_namespace()`` will be called: each distinct
231        |Taxon| object associated with ``self`` or members of ``self`` that
232        is not alread in ``taxon_namespace`` will be replaced with a new
233        |Taxon| object that will be created with the same label and
234        added to :attr:`self.taxon_namespace`.  Calling this method results in
235        the object (and all its member objects) being associated with a new,
236        independent taxon namespace.
237
238        Label mapping case sensitivity follows the
239        ``self.taxon_namespace.is_case_sensitive`` setting. If
240        |False| and ``unify_taxa_by_label`` is also |True|, then the
241        establishment of correspondence between |Taxon| objects in the
242        old and new namespaces with be based on case-insensitive matching of
243        labels. E.g., if there are four |Taxon| objects with labels
244        'Foo', 'Foo', 'FOO', and 'FoO' in the old namespace, then all objects
245        that reference these will reference a single new |Taxon| object
246        in the new namespace (with a label some existing casing variant of
247        'foo'). If |True|: if ``unify_taxa_by_label`` is |True|,
248        |Taxon| objects with labels identical except in case will be
249        considered distinct.
250
251        Parameters
252        ----------
253        taxon_namespace : |TaxonNamespace|
254            The |TaxonNamespace| into the scope of which this object
255            will be moved.
256
257        unify_taxa_by_label : boolean, optional
258            If |True|, then references to distinct |Taxon| objects with
259            identical labels in the current namespace will be replaced with a
260            reference to a single |Taxon| object in the new namespace.
261            If |False|: references to distinct |Taxon| objects will
262            remain distinct, even if the labels are the same.
263
264        taxon_mapping_memo : dictionary
265            Similar to ``memo`` of deepcopy, this is a dictionary that maps
266            |Taxon| objects in the old namespace to corresponding
267            |Taxon| objects in the new namespace. Mostly for interal
268            use when migrating complex data to a new namespace. Note that
269            any mappings here take precedence over all other options: if a
270            |Taxon| object in the old namespace is found in this
271            dictionary, the counterpart in the new namespace will be whatever
272            value is mapped, regardless of, e.g. label values.
273
274        Examples
275        --------
276        Use this method to move an object from one taxon namespace to
277        another.
278
279        For example, to get a copy of an object associated with another taxon
280        namespace and associate it with a different namespace::
281
282            # Get handle to the new TaxonNamespace
283            other_taxon_namespace = some_other_data.taxon_namespace
284
285            # Get a taxon-namespace scoped copy of a tree
286            # in another namespace
287            t2 = Tree(t1)
288
289            # Replace taxon namespace of copy
290            t2.migrate_taxon_namespace(other_taxon_namespace)
291
292        You can also use this method to get a copy of a structure and then
293        move it to a new namespace:
294
295            t2 = Tree(t1)
296            t2.migrate_taxon_namespace(TaxonNamespace())
297
298            # Note: the same effect can be achived by:
299            t3 = copy.deepcopy(t1)
300
301        See Also
302        --------
303        reconstruct_taxon_namespace
304
305        """
306        if taxon_namespace is None:
307            taxon_namespace = taxon.TaxonNamespace()
308        self._taxon_namespace = taxon_namespace
309        self.reconstruct_taxon_namespace(
310                unify_taxa_by_label=unify_taxa_by_label,
311                taxon_mapping_memo=taxon_mapping_memo)
312
313    def reconstruct_taxon_namespace(self,
314            unify_taxa_by_label=True,
315            taxon_mapping_memo=None):
316        """
317        Repopulates the current taxon namespace with new taxon objects,
318        preserving labels. Each distinct |Taxon| object associated with
319        ``self`` or members of ``self`` that is not already in
320        ``self.taxon_namespace`` will be replaced with a new |Taxon|
321        object that will be created with the same label and added to
322        :attr:`self.taxon_namespace`.
323
324        Label mapping case sensitivity follows the
325        ``self.taxon_namespace.is_case_sensitive`` setting. If
326        |False| and ``unify_taxa_by_label`` is also |True|, then the
327        establishment of correspondence between |Taxon| objects in the
328        old and new namespaces with be based on case-insensitive matching of
329        labels. E.g., if there are four |Taxon| objects with labels
330        'Foo', 'Foo', 'FOO', and 'FoO' in the old namespace, then all objects
331        that reference these will reference a single new |Taxon| object
332        in the new namespace (with a label some existing casing variant of
333        'foo'). If |True|: if ``unify_taxa_by_label`` is |True|,
334        |Taxon| objects with labels identical except in case will be
335        considered distinct.
336
337        Note
338        ----
339        Existing |Taxon| objects in ``self.taxon_namespace`` are *not*
340        removed. This method should thus only be called *only* when
341        ``self.taxon_namespace`` has been changed. In fact, typical usage would
342        not involve calling this method directly, but rather through
343
344        Parameters
345        ----------
346        unify_taxa_by_label : boolean, optional
347            If |True|, then references to distinct |Taxon| objects with
348            identical labels in the current namespace will be replaced with a
349            reference to a single |Taxon| object in the new namespace.
350            If |False|: references to distinct |Taxon| objects will
351            remain distinct, even if the labels are the same.
352
353        taxon_mapping_memo : dictionary
354            Similar to ``memo`` of deepcopy, this is a dictionary that maps
355            |Taxon| objects in the old namespace to corresponding
356            |Taxon| objects in the new namespace. Mostly for interal
357            use when migrating complex data to a new namespace.
358        """
359        raise NotImplementedError()
360
361    def update_taxon_namespace(self):
362        """
363        All |Taxon| objects associated with ``self`` or members of ``self``
364        that are not in ``self.taxon_namespace`` will be added. Note that, unlike
365        ``reconstruct_taxon_namespace``, no new |Taxon| objects
366        will be created.
367        """
368        raise NotImplementedError()
369
370    def purge_taxon_namespace(self):
371        """
372        Remove all |Taxon| instances in ``self.taxon_namespace`` that are
373        not associated with ``self`` or any item in ``self``.
374        """
375        taxa = self.poll_taxa()
376        to_remove = [t for t in self.taxon_namespace if t not in taxa]
377        for t in to_remove:
378            self.taxon_namespace.remove_taxon(t)
379
380    def poll_taxa(self, taxa=None):
381        """
382        Returns a set populated with all of |Taxon| instances associated
383        with ``self``.
384
385        Parameters
386        ----------
387        taxa : set()
388            Set to populate. If not specified, a new one will be created.
389
390        Returns
391        -------
392        taxa : set[|Taxon|]
393            Set of taxa associated with ``self``.
394        """
395        raise NotImplementedError()
396
397    def reindex_taxa(self, taxon_namespace=None, clear=False):
398        """
399        DEPRECATED: Use `migrate_taxon_namespace()` instead.
400        Rebuilds ``taxon_namespace`` from scratch, or assigns |Taxon| objects from
401        given |TaxonNamespace| object ``taxon_namespace`` based on label values.
402        """
403        deprecate.dendropy_deprecation_warning(
404                message="Deprecated since DendroPy 4: '{class_name}.reindex_taxa()' will no longer be supported in future releases; use '{class_name}.migrate_taxon_namespace()' instead".format(class_name=self.__class__.__name__),
405                stacklevel=3)
406        if taxon_namespace is not None:
407            self.taxon_namespace = taxon_namespace
408        if clear:
409            self.taxon_namespace.clear()
410        self.reindex_subcomponent_taxa()
411        return self.taxon_namespace
412
413    def reindex_subcomponent_taxa():
414        """
415        DEPRECATED: Use :meth:`reconstruct_taxon_namespace()` instead.
416        Derived classes should override this to ensure that their various
417        components, attributes and members all refer to the same |TaxonNamespace|
418        object as ``self.taxon_namespace``, and that ``self.taxon_namespace`` has all
419        the |Taxon| objects in the various members.
420        """
421        raise NotImplementedError()
422
423
424##############################################################################
425## TaxonNamespace
426
427class TaxonNamespace(
428        basemodel.Deserializable,
429        basemodel.MultiReadable,
430        basemodel.Serializable,
431        basemodel.DataObject,
432        basemodel.Annotable):
433
434    """
435    A collection of |Taxon| objects representing a self-contained and complete
436    domain of distinct operational taxonomic unit definitions.
437    Provides the common semantic context in which operational taxonomic units
438    referenced by various phylogenetic data objects (e.g., trees or alignments)
439    can be related.
440    """
441
442    ### Life-cycle
443
444    def __init__(self, *args, **kwargs):
445        """
446        Parameters
447        ----------
448
449        \*args : positional arguments, optional
450            Accepts a single iterable as an optional positional argument.  If a
451            |TaxonNamespace| object is passed as the positional argument, then
452            clones or deep-copies of its member |Taxon| objects will be added
453            to this one.  If any other iterable is passed as the positional
454            argument, then each string in the iterable will result in a new
455            |Taxon| object being constructed and added to the namespace with
456            the string as its label (name), while each Taxon object in the
457            iterable will be added to the namespace directly.
458
459        \*\*kwargs : keyword arguments
460            label : string
461                The label or name for this namespace.
462            is_mutable : boolean, optional (default = |True|)
463                If |True| (default), then |Taxon| objects can be added to this
464                namespace. If |False|, then adding |Taxon| objects will result
465                in an error.
466            is_case_sensitive : boolean, optional (default = |False|)
467                Whether or not taxon names are considered case sensitive or
468                insensitive.
469
470        Notes
471        -----
472        An empty |TaxonNamespace| can be created (with optional) label and |Taxon|
473        objects added later:
474
475        >>> tns = dendropy.TaxonNamespace(label="taxa")
476        >>> t1 = Taxon("a")
477        >>> tns.add_taxon(t1)
478        >>> t2 = Taxon("b")
479        >>> tns.add_taxon(t2)
480        >>> tns.add_taxon("c")
481        >>> tns
482        <TaxonNamespace 0x106509090 'taxa': [<Taxon 0x10661f050 'a'>, <Taxon 0x10651c590 'b'>, <Taxon 0x106642a90 'c'>]>
483
484        Alternatively, an iterable can be passed in as an initializer, and all
485        |Taxon| objects will be added directly while, for each string, a new
486        |Taxon| object will be created and added. So, the below are all equivalent
487        to the above:
488
489        >>> tns = dendropy.TaxonNamespace(["a", "b", "c"], label="taxa")
490
491        >>> taxa = [Taxon(n) for n in ["a", "b", "c"]]
492        >>> tns = dendropy.taxonnamespace(taxa, label="taxa")
493
494        >>> t1 = Taxon("a")
495        >>> t2 = Taxon("b")
496        >>> taxa = [t1, t2, "c"]
497        >>> tns = dendropy.TaxonNamespace(taxa, label="taxa")
498
499        If a |TaxonNamespace| object is passed as the
500        initializer argument, a *shallow* copy of the object is constructed:
501
502        >>> tns1 = dendropy.TaxonNamespace(["a", "b", "c"], label="taxa1")
503        >>> tns1
504        <TaxonNamespace 0x1097275d0 'taxa1': [<Taxon 0x109727610 'a'>, <Taxon 0x109727e10 'b'>, <Taxon 0x109727e90 'c'>]>
505        >>> tns2 = dendropy.TaxonNamespace(tns1, label="2")
506        >>> tns2
507        <TaxonNamespace 0x109727d50 'taxa1': [<Taxon 0x109727610 'a'>, <Taxon 0x109727e10 'b'>, <Taxon 0x109727e90 'c'>]>
508
509        Thus, while "``tns1``" and "``tns2``" are independent collections, and
510        addition/deletion of |Taxon| instances to one will not effect
511        the other, the label of a |Taxon| instance that is an element in
512        one will of course effect the same instance if it is in the other:
513
514        >>> print(tns1[0].label)
515        >>> a
516        >>> print(tns2[0].label)
517        >>> a
518        >>> tns1[0].label = "Z"
519        >>> print(tns1[0].label)
520        >>> Z
521        >>> print(tns2[0].label)
522        >>> Z
523
524        In contrast to actual data (i.e., the |Taxon| objects), alll
525        metadata associated with "``tns2``" (i.e., the |AnnotationSet| object,
526        in the :attr:`TaxonNamespace.annotations` attribute), will be a full,
527        independent deep-copy.
528
529        If what is needed is a true deep-copy of the data of a particular
530        |TaxonNamespace| object, including copies of the member
531        |Taxon| instances, then this can be achieved using
532        :func:`copy.deepcopy()`.
533
534        >>> import copy
535        >>> tns1 = dendropy.TaxonNamespace(["a", "b", "c"], label="taxa1")
536        >>> tns2 = copy.deepcopy(tns1)
537        """
538        kwargs_set_label = kwargs.pop("label", None)
539        self.comments = []
540        self.is_mutable = kwargs.pop('is_mutable', True)
541        self.is_case_sensitive = kwargs.pop('is_case_sensitive', False)
542        self._accession_index_taxon_map = {}
543        self._taxa = []
544        self._taxon_accession_index_map = {}
545        self._taxon_bitmask_map = {}
546        # self._split_bitmask_taxon_map = {}
547        self._current_accession_count = 0
548        if len(args) > 1:
549            raise TypeError("TaxonNamespace() takes at most 1 non-keyword argument ({} given)".format(len(args)))
550        elif len(args) == 1:
551            # special case: construct from argument
552            basemodel.DataObject.__init__(self, label=kwargs_set_label)
553            other = args[0]
554            for i in other:
555                if isinstance(i, Taxon):
556                    self.add_taxon(i)
557                else:
558                    self.new_taxon(label=i)
559            if isinstance(other, TaxonNamespace):
560                memo = { id(other): self, id(other._taxa): self._taxa }
561                for t1, t2 in zip(self._taxa, other._taxa):
562                    memo[id(t2)] = t1
563                for k in other.__dict__:
564                    if k == "_annotations" or k == "_taxa":
565                        continue
566                    self.__dict__[k] = copy.deepcopy(other.__dict__[k], memo)
567                self.deep_copy_annotations_from(other, memo=memo)
568                # self.copy_annotations_from(other, attribute_object_mapper=memo)
569            # override with label with value passed as argument
570            if kwargs_set_label is not None:
571                self.label = kwargs_set_label
572        else:
573            basemodel.DataObject.__init__(self, label=kwargs_set_label)
574        if kwargs:
575            raise TypeError("Unrecognized or unsupported arguments: {}".format(kwargs))
576
577    def __copy__(self):
578        return TaxonNamespace(self)
579
580    def taxon_namespace_scoped_copy(self, memo=None):
581        self.populate_memo_for_taxon_namespace_scoped_copy(memo=memo)
582        return self
583
584    def __deepcopy__(self, memo):
585        if memo is None:
586            memo = {}
587        o = self.__class__.__new__(self.__class__)
588        memo[id(self)] = o
589        o._taxa = []
590        memo[id(self._taxa)] = o._taxa
591        for t in self._taxa:
592            o._taxa.append(copy.deepcopy(t, memo))
593        for k in self.__dict__:
594            if k == "_annotations" or k == "_taxa":
595                continue
596            o.__dict__[k] = copy.deepcopy(self.__dict__[k], memo)
597        o.deep_copy_annotations_from(self, memo=memo)
598        # o.copy_annotations_from(self, attribute_object_mapper=memo)
599        return o
600
601    def populate_memo_for_taxon_namespace_scoped_copy(self, memo):
602        if memo is not None:
603            memo[id(self)] = self
604            for taxon in self._taxa:
605                memo[id(taxon)] = taxon
606        return memo
607
608    ### Identity and Comparison
609
610    def __str__(self):
611        return "[{}]".format(", ".join([str(i) for i in self._taxa]))
612
613    def __repr__(self):
614        return "<{} {} '{}': [{}]>".format(self.__class__.__name__, hex(id(self)), self.label, ", ".join(repr(i) for i in self._taxa))
615
616    def __hash__(self):
617        return id(self)
618
619    def __lt__(self, other):
620        return self._taxa < o._taxa
621
622    def __eq__(self, other):
623        # enforce non-equivalence of non-identical namespaces
624        return self is other
625        # if not isinstance(other, self.__class__):
626        #     return False
627        # return (self.label == other.label
628        #         and self._taxa == other._taxa
629        #         and basemodel.Annotable.__eq__(self, other))
630
631    ### Collection Iteration
632
633    def __iter__(self):
634        return iter(self._taxa)
635
636    def __reversed__(self):
637        return reversed(self._taxa)
638
639    ### Collection Data
640
641    def __len__(self):
642        """
643        Returns number of |Taxon| objects in this |TaxonNamespace|.
644        """
645        return len(self._taxa)
646
647    ### Collection Access and Management
648
649    def __getitem__(self, key):
650        """
651        Returns |Taxon| object with index or slice given by ``key``.
652        """
653        if isinstance(key, int) or isinstance(key, slice):
654            return self._taxa[key]
655        raise ValueError("'TaxonNamespace[]' now only accepts indexes or slices. To access Taxon objects by label, use 'TaxonNamespace.get_taxon()' or 'TaxonNamespace.findall()'")
656
657    def __setitem__(self, key, value):
658        raise NotImplementedError("Item assignment not supported")
659
660    def __delitem__(self, key):
661        self.remove_taxon(self[key])
662
663    def __contains__(self, taxon):
664        """
665        Returns |True| if Taxon object ``taxon`` is in self.
666        """
667        # look-up in dictionary for O(1) instead of O(n) in list
668        return taxon in self._taxon_accession_index_map
669
670    def _lookup_label(self,
671            label,
672            is_case_sensitive=None,
673            first_match_only=False,
674            error_if_not_found=False,
675            ):
676        """
677        Return |Taxon| object(s) with label matching ``label``.
678
679        Parameters
680        ----------
681        label : str
682            The label for which to search.
683        is_case_sensitive : |None| or bool
684            By default, label lookup will use the
685            ``is_case_sensitive`` attribute of ``self`` to decide
686            whether or not to respect case when trying to match labels to
687            operational taxonomic unit names represented by |Taxon|
688            instances. This can be over-ridden by specifying
689            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
690            (forcing case-insensitivity).
691        first_match_only : bool
692            If |False|, then the entire namespace will be searched and *all*
693            |Taxon| objects with the matching labels will be returned
694            as a list. If |True| then the function will return after
695            processing the first |Taxon| object with a matching label
696            (i.e., the entire namespace is not searched). Setting this
697            argument to |True| will be more efficient and should be preferred
698            if there are no redundant or duplicate labels.
699        error_if_not_found : bool
700            If |True|, then a LookupError is raised if there are no matches.
701
702        Returns
703        -------
704        t : |None| or |Taxon| instance or list[|Taxon|]
705            If no |Taxon| instances have ``label`` attributes that match
706            the ``label`` argument, then |None|. Otherise, if
707            `first_match_only==True`, then a |Taxon| instance with
708            ``label`` attribute matching the value of the ``label`` argument; if
709            `first_match_only==False`, a list of one or more |Taxon|
710            instances with a ``label`` attribute matching the ``label`` argument.
711        """
712        taxa = []
713        if is_case_sensitive is True or (is_case_sensitive is None and self.is_case_sensitive):
714            for taxon in self._taxa:
715                if label == taxon.label:
716                    if first_match_only:
717                        return taxon
718                    else:
719                        taxa.append(taxon)
720        else:
721            label = str(label).lower()
722            for taxon in self._taxa:
723                if label == taxon.lower_cased_label:
724                    if first_match_only:
725                        return taxon
726                    else:
727                        taxa.append(taxon)
728        if len(taxa) == 0:
729            if error_if_not_found:
730                raise LookupError(label)
731            else:
732                return None
733        return taxa
734
735    ### Adding Taxa
736
737    def add_taxon(self, taxon):
738        """
739        Adds a new |Taxon| object to ``self``.
740
741        If ``taxon`` is not already in the collection of |Taxon| objects in this
742        namespace, and this namespace is mutable, it is added to the
743        collection. If it is already in the collection, then nothing happens.
744        If it is not already in the collection, but the namespace is not
745        mutable, then TypeError is raised.
746
747        Parameters
748        ----------
749        taxon : |Taxon|
750            The |Taxon| object to be accessioned or registered in this
751            collection.
752
753        Raises
754        ------
755        TypeError
756            If this namespace is immutable (i.e.
757            :attr:`TaxonNamespace.is_mutable` is |False|).
758
759        """
760        # NOTE
761        # Previously, this was:
762        #
763        #     if taxon in self._taxa:
764        #
765        # Changing the membership lookup to dictionaries resulted in 10x
766        # increase in speed!!!!
767        if taxon in self._taxon_accession_index_map:
768            return
769        if not self.is_mutable:
770            raise error.ImmutableTaxonNamespaceError("Taxon '{}' cannot be added to an immutable TaxonNamespace".format((taxon.label)))
771        self._taxa.append(taxon)
772        self._accession_index_taxon_map[self._current_accession_count] = taxon
773        self._taxon_accession_index_map[taxon] = self._current_accession_count
774        self._current_accession_count += 1
775
776    def append(self, taxon):
777        """
778        LEGACY. Use 'add_taxon()' instead.
779        """
780        return self.add_taxon(taxon)
781
782    def add_taxa(self, taxa):
783        """
784        Adds multiple |Taxon| objects to self.
785
786        Each |Taxon| object in ``taxa`` that is not already in the collection of
787        |Taxon| objects in this namespace is added to it. If any of the |Taxon|
788        objects are already in the collection, then nothing happens. If the
789        namespace is immutable, then TypeError is raised when trying
790        to add |Taxon| objects.
791
792        Parameters
793        ----------
794        taxa : collections.Iterable [|Taxon|]
795            A list of |Taxon| objects to be accessioned or registered in this
796            collection.
797
798        Raises
799        ------
800        TypeError
801            If this namespace is immutable (i.e. :attr:`TaxonNamespace.is_mutable` is
802            |False|).
803        """
804        for t in taxa:
805            self.add_taxon(t)
806
807    def new_taxon(self, label):
808        """
809        Creates, adds, and returns a new |Taxon| object with corresponding
810        label.
811
812        Parameters
813        ----------
814        label : string or string-like
815            The name or label of the new operational taxonomic unit concept.
816
817        Returns
818        -------
819        taxon: |Taxon|
820            The new |Taxon| object,
821
822        """
823        if not self.is_mutable:
824            raise error.ImmutableTaxonNamespaceError("Taxon '{}' cannot be added to an immutable TaxonNamespace".format(label))
825        taxon = Taxon(label=label)
826        self.add_taxon(taxon)
827        return taxon
828
829    def new_taxa(self, labels):
830        """
831        Creates and add a new |Taxon| with corresponding label for each label
832        in ``labels``. Returns list of |Taxon| objects created.
833
834        Parameters
835        ----------
836        labels : ``collections.Iterable`` [string]
837            The values of the ``label`` attributes of the new |Taxon| objects to
838            be created, added to this namespace collection, and returned.
839
840        Returns
841        -------
842        taxa : ``collections.Iterable`` [|Taxon|]
843            A list of |Taxon| objects created and added.
844
845        Raises
846        ------
847        TypeError
848            If this namespace is immutable (i.e.
849            :attr:`TaxonNamespace.is_mutable` is |False|).
850
851        """
852        if not self.is_mutable:
853            raise error.ImmutableTaxonNamespaceError("Taxon objects cannot be added to an immutable TaxonNamespace")
854        taxa = []
855        for label in labels:
856            taxa.append(self.new_taxon(label=label))
857        return taxa
858
859    ### Removing Taxa
860
861    def remove_taxon(self, taxon):
862        """
863        Removes specified |Taxon| object from the collection in this namespace.
864
865        Parameters
866        ----------
867        taxon : a |Taxon| object
868            The |Taxon| object to be removed.
869
870        Raises
871        ------
872        ValueError
873            If ``taxon`` is not in the collection of this namespace.
874        """
875        if taxon not in self._taxa:
876            raise ValueError(taxon)
877        self._taxa.remove(taxon)
878        # assert taxon not in self._taxa
879        while taxon in self._taxa:
880            self._taxa.remove(taxon)
881        idx = self._taxon_accession_index_map.pop(taxon, None)
882        if idx is not None:
883            self._accession_index_taxon_map.pop(idx, None)
884            self._taxon_accession_index_map.pop(taxon, None)
885        bm = self._taxon_bitmask_map.pop(taxon, None)
886        if bm is not None:
887            # self._split_bitmask_taxon_map.pop(bm, None)
888            self._taxon_accession_index_map.pop(taxon, None)
889
890    def remove(self, taxon):
891        deprecate.dendropy_deprecation_warning(
892                message="Deprecated since DendroPy 4: 'TaxonNamespace.remove()'; use 'TaxonNamespace.remove_taxon()' instead",
893                stacklevel=3)
894        return self.remove_taxon(taxon)
895
896    def remove_taxon_label(self,
897            label,
898            is_case_sensitive=None,
899            first_match_only=False,
900            ):
901        """
902        Removes *all* |Taxon| objects with label matching ``label`` from the
903        collection in this namespace.
904
905        Parameters
906        ----------
907        label : string or string-like
908            The value of the |Taxon| object label to remove.
909        is_case_sensitive : |None| or bool
910            By default, label lookup will use the
911            ``is_case_sensitive`` attribute of ``self`` to decide
912            whether or not to respect case when trying to match labels to
913            operational taxonomic unit names represented by |Taxon|
914            instances. This can be over-ridden by specifying
915            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
916            (forcing case-insensitivity).
917        first_match_only : bool
918            If |False|, then the entire namespace will be searched and *all*
919            |Taxon| objects with the matching labels will be remove. If
920            |True| then only the first |Taxon| object with a matching
921            label will be removed (i.e., the entire namespace is not searched).
922            Setting this argument to |True| will be more efficient and should
923            be preferred if there are no redundant or duplicate labels.
924
925        Raises
926        ------
927        LookupError
928            If no |Taxon| objects are found with matching label(s).
929
930        See Also
931        --------
932        :meth:`TaxonNamespace.discard_taxon_labels`
933            Similar, but does not raise an error if no matching |Taxon|
934            objects are found.
935        """
936        taxa = self._lookup_label(label,
937                is_case_sensitive=is_case_sensitive,
938                first_match_only=first_match_only,
939                error_if_not_found=True,
940                )
941        for taxon in taxa:
942            self.remove_taxon(taxon)
943
944    def discard_taxon_label(self,
945            label,
946            is_case_sensitive=None,
947            first_match_only=False,
948            ):
949        """
950        Removes *all* |Taxon| objects with label matching ``label`` from the
951        collection in this namespace.
952
953        Parameters
954        ----------
955        label : string or string-like
956            The value of the |Taxon| object label to remove.
957        is_case_sensitive : |None| or bool
958            By default, label lookup will use the
959            ``is_case_sensitive`` attribute of ``self`` to decide
960            whether or not to respect case when trying to match labels to
961            operational taxonomic unit names represented by |Taxon|
962            instances. This can be over-ridden by specifying
963            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
964            (forcing case-insensitivity).
965        first_match_only : bool
966            If |False|, then the entire namespace will be searched and *all*
967            |Taxon| objects with the matching labels will be remove. If
968            |True| then only the first |Taxon| object with a matching
969            label will be removed (i.e., the entire namespace is not searched).
970            Setting this argument to |True| will be more efficient and should
971            be preferred if there are no redundant or duplicate labels.
972
973        See Also
974        --------
975        :meth:`TaxonNamespace.remove_taxon_label` : Similar, but
976            raises an error if no matching |Taxon| objects are found.
977        """
978        taxa = self._lookup_label(label,
979                is_case_sensitive=is_case_sensitive,
980                first_match_only=first_match_only,
981                error_if_not_found=False,
982                )
983        if taxa is None:
984            return
985        for taxon in taxa:
986            self.remove_taxon(taxon)
987
988    def clear(self):
989        """
990        Removes all |Taxon| objects from this namespace.
991        """
992        # self._taxa.clear() # Python 2 ``list`` class does not have `clear()` method
993        del self._taxa[:]
994        self._accession_index_taxon_map.clear()
995        self._taxon_accession_index_map.clear()
996        self._taxon_bitmask_map.clear()
997        # self._split_bitmask_taxon_map.clear()
998
999    ### Look-up and Retrieval of Taxa
1000
1001    def findall(self, label, is_case_sensitive=None):
1002        """
1003        Return list of |Taxon| object(s) with label matching ``label``.
1004
1005        Parameters
1006        ----------
1007        label : string or string-like
1008            The value which the ``label`` attribute of the |Taxon| object(s)
1009            to be returned must match.
1010        is_case_sensitive : |None| or bool
1011            By default, label lookup will use the
1012            ``is_case_sensitive`` attribute of ``self`` to decide
1013            whether or not to respect case when trying to match labels to
1014            operational taxonomic unit names represented by |Taxon|
1015            instances. This can be over-ridden by specifying
1016            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1017            (forcing case-insensitivity).
1018
1019        Returns
1020        -------
1021        taxa : ``list`` [|Taxon|]
1022            A list containing zero or more |Taxon| objects with labels
1023            matching ``label``.
1024
1025        """
1026        taxa = self._lookup_label(label=label,
1027                is_case_sensitive=is_case_sensitive,
1028                first_match_only=False,
1029                error_if_not_found=False,
1030                )
1031        if taxa is None:
1032            return []
1033        else:
1034            return taxa
1035
1036    def has_taxon_label(self, label, is_case_sensitive=None):
1037        """
1038        Checks for presence of a |Taxon| object with the given label.
1039
1040        Parameters
1041        ----------
1042        label : string or string-like
1043            The value of the |Taxon| object label to match.
1044        is_case_sensitive : |None| or bool
1045            By default, label lookup will use the
1046            ``is_case_sensitive`` attribute of ``self`` to decide
1047            whether or not to respect case when trying to match labels to
1048            operational taxonomic unit names represented by |Taxon|
1049            instances. This can be over-ridden by specifying
1050            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1051            (forcing case-insensitivity).
1052
1053        Returns
1054        -------
1055        b : boolean
1056            |True| if there is at least one |Taxon| object in this namespace
1057            with a label matching the value of ``label``. Otherwise, |False|.
1058        """
1059        t = self._lookup_label(
1060                label=label,
1061                is_case_sensitive=is_case_sensitive,
1062                first_match_only=True,
1063                error_if_not_found=False,
1064                )
1065        return t is not None
1066
1067    def has_taxa_labels(self, labels, is_case_sensitive=None):
1068        """
1069        Checks for presence of |Taxon| objects with the given labels.
1070
1071        Parameters
1072        ----------
1073        labels : ``collections.Iterable`` [string]
1074            The values of the |Taxon| object labels to match.
1075        is_case_sensitive : |None| or bool
1076            By default, label lookup will use the
1077            ``is_case_sensitive`` attribute of ``self`` to decide
1078            whether or not to respect case when trying to match labels to
1079            operational taxonomic unit names represented by |Taxon|
1080            instances. This can be over-ridden by specifying
1081            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1082            (forcing case-insensitivity).
1083
1084        Returns
1085        -------
1086        b : boolean
1087            Returns |True| if, for every element in the iterable ``labels``,
1088            there is at least one |Taxon| object that has a label attribute
1089            that matches this. |False| otherwise.
1090        """
1091        for label in labels:
1092            f = self._lookup_label(label=label,
1093                    is_case_sensitive=is_case_sensitive,
1094                    first_match_only=False,
1095                    error_if_not_found=False,
1096                    )
1097            if f is None:
1098                return False
1099        return True
1100
1101    def get_taxon(self, label, is_case_sensitive=None):
1102        """
1103        Retrieves a |Taxon| object with the given label.
1104
1105        If multiple |Taxon| objects exist with labels that match
1106        ``label``, then only the first one is returned.  If no |Taxon|
1107        object is found in this namespace with the specified critieria,
1108        |None| is returned.
1109
1110        Parameters
1111        ----------
1112        label : string or string-like
1113            The value which the ``label`` attribute of the |Taxon| object
1114            to be returned must match.
1115        is_case_sensitive : |None| or bool
1116            By default, label lookup will use the
1117            ``is_case_sensitive`` attribute of ``self`` to decide
1118            whether or not to respect case when trying to match labels to
1119            operational taxonomic unit names represented by |Taxon|
1120            instances. This can be over-ridden by specifying
1121            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1122            (forcing case-insensitivity).
1123
1124        Returns
1125        -------
1126        taxon : |Taxon| object or |None|
1127            The first |Taxon| object in this namespace collection with a label
1128            matching ``label``, or |None| if no such |Taxon| object exists.
1129        """
1130        return self._lookup_label(label=label,
1131                is_case_sensitive=is_case_sensitive,
1132                first_match_only=True,
1133                error_if_not_found=False,
1134                )
1135
1136    def get_taxa(self, labels, is_case_sensitive=None, first_match_only=False):
1137        """
1138        Retrieves list of |Taxon| objects with given labels.
1139
1140        Parameters
1141        ----------
1142        labels : ``collections.Iterable`` [string]
1143            Any |Taxon| object in this namespace collection that has a label
1144            attribute that matches any value in ``labels`` will be included in
1145            the list returned.
1146        is_case_sensitive : |None| or bool
1147            By default, label lookup will use the
1148            ``is_case_sensitive`` attribute of ``self`` to decide
1149            whether or not to respect case when trying to match labels to
1150            operational taxonomic unit names represented by |Taxon|
1151            instances. This can be over-ridden by specifying
1152            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1153            (forcing case-insensitivity).
1154        first_match_only : bool
1155            If |False|, then for *each* label in ``labels``, the entire namespace
1156            will be searched and *all* |Taxon| objects with the matches
1157            will be added to the lest. If |True| then, for each label in
1158            ``labels``, only the first |Taxon| object with a matching
1159            label will be added to the list (i.e., the entire namespace is not
1160            searched). Setting this argument to |True| will be more
1161            efficient and should be preferred if there are no redundant or
1162            duplicate labels.
1163
1164        Returns
1165        -------
1166        taxa : ``list`` [|Taxon|]
1167            A list containing zero or more |Taxon| objects with labels
1168            matching ``label``.
1169        """
1170        taxa = []
1171        for label in labels:
1172            tt = self._lookup_label(label=label,
1173                    is_case_sensitive=is_case_sensitive,
1174                    first_match_only=first_match_only,
1175                    error_if_not_found=False,
1176                    )
1177            if tt is None:
1178                continue
1179            if first_match_only:
1180                taxa.append(tt)
1181            else:
1182                for t in tt:
1183                    if t not in taxa:
1184                        taxa.append(t)
1185        return taxa
1186
1187    def require_taxon(self, label, is_case_sensitive=None):
1188        """
1189        Retrieves a |Taxon| object with the given label, creating it if
1190        necessary.
1191
1192        Retrieves a Taxon object with the label, ``label``.
1193        If multiple |Taxon| objects exist with labels that match
1194        ``label``, then only the first one is returned.  If no such
1195        |Taxon| object exists in the current namespace and the
1196        |TaxonNamespace| is NOT mutable, an exception is raised.  If no
1197        such |Taxon| object exists in the current namespace and
1198        |TaxonNamespace| is mutable, then a new |Taxon| is
1199        created, added, and returned.
1200
1201        Parameters
1202        ----------
1203        label : string or string-like
1204            The value which the ``label`` attribute of the |Taxon| object
1205            to be returned must match.
1206        is_case_sensitive : |None| or bool
1207            By default, label lookup will use the
1208            ``is_case_sensitive`` attribute of ``self`` to decide
1209            whether or not to respect case when trying to match labels to
1210            operational taxonomic unit names represented by |Taxon|
1211            instances. This can be over-ridden by specifying
1212            ``is_case_sensitive`` to |True| (forcing case-sensitivity) or |False|
1213            (forcing case-insensitivity).
1214
1215        Returns
1216        -------
1217        taxon : |Taxon| object or |None|
1218            A |Taxon| object in this namespace collection with a label
1219            matching ``label``.
1220
1221        Raises
1222        ------
1223        TypeError
1224            If no |Taxon| object is currently in the collection with a label
1225            matching the input ``label`` and the ``is_mutable`` attribute of self
1226            is |False|.
1227        """
1228        taxon = self._lookup_label(label=label,
1229                is_case_sensitive=is_case_sensitive,
1230                first_match_only=True,
1231                error_if_not_found=False,
1232                )
1233        if taxon is not None:
1234            return taxon
1235        if not self.is_mutable:
1236            raise error.ImmutableTaxonNamespaceError("Taxon '{}' not in TaxonNamespace, and cannot be created because TaxonNamespace is immutable".format(label))
1237        taxon = self.new_taxon(label=label)
1238        return taxon
1239
1240    ### Taxon Ordering
1241
1242    def sort(self, key=None, reverse=False):
1243        """
1244        Sorts |Taxon| objects in collection. If ``key`` is not given, defaults
1245        to sorting by label (i.e., ``key = lambda x: x.label``).
1246
1247        Parameters
1248        ----------
1249        key : key function object, optional
1250            Function that takes a |Taxon| object as an argument and
1251            returns the value that determines its sort order. Defaults to
1252            sorting by label.
1253        reverse : boolean, optional
1254            If |True|, sort will be in reverse order.
1255        """
1256        if key is None:
1257            key = lambda x: x.label
1258        self._taxa.sort(key=key, reverse=reverse)
1259
1260    def reverse(self):
1261        """
1262        Reverses order of |Taxon| objects in collection.
1263        """
1264        self._taxa.reverse()
1265
1266    ### Summarization of Collection
1267
1268    def labels(self):
1269        """
1270        Returns list of labels of all |Taxon| objects in ``self``.
1271
1272        Returns
1273        -------
1274        labels : ``list`` [string]
1275            List of :attr:`Taxon.label` values of |Taxon| objects in
1276            ``self``.
1277        """
1278        return [t.label for t in self._taxa]
1279
1280    def label_taxon_map(self, is_case_sensitive=None):
1281        """
1282        Returns dictionary with taxon labels as keys and corresponding |Taxon|
1283        objects as values.
1284
1285        If the |TaxonNamespace| is currently case-insensitive, then the
1286        dictionary returned will have case-insensitive keys, other the
1287        dictionary will be case-sensitive. You can override this by explicitly
1288        specifying ``is_case_sensitive`` to |False| or |True|.
1289
1290        No attempt is made to handle collisions.
1291
1292        Returns
1293        -------
1294        d : dictonary-like
1295            Dictionary with :attr:`Taxon.label` values of |Taxon| objects in
1296            ``self`` as keys and corresponding |Taxon| objects as values.
1297        """
1298        if is_case_sensitive is True or (is_case_sensitive is None and self.is_case_sensitive):
1299            d = {}
1300        else:
1301            d = container.CaseInsensitiveDict()
1302        for t in self._taxa:
1303            d[t.label] = t
1304        return d
1305
1306    ### Split Management
1307
1308    # def complement_bitmask(self, bitmask):
1309    #     """
1310    #     Returns complement of the given split or clade bitmask.
1311
1312    #     Parameters
1313    #     ----------
1314    #     bitmask : integer
1315    #         Bitmask to be complemented.
1316
1317    #     Returns
1318    #     -------
1319    #     h : integer
1320    #         Complement of ``bitmask``.
1321    #     """
1322    #     return (~bitmask) & self.all_taxa_bitmask()
1323
1324    # def normalize_bitmask(self, bitmask):
1325    #     """
1326    #     "Normalizes" split, by ensuring that the least-significant bit is
1327    #     always 1 (used on unrooted trees to establish split identity
1328    #     independent of rotation).
1329
1330    #     Parameters
1331    #     ----------
1332    #     bitmask : integer
1333    #         Split bitmask hash to be normalized.
1334
1335    #     Returns
1336    #     -------
1337    #     h : integer
1338    #         Normalized split bitmask.
1339    #     """
1340    #     return container.NormalizedBitmaskDict.normalize(bitmask, self.all_taxa_bitmask(), 1)
1341
1342    def all_taxa_bitmask(self):
1343        """
1344        Returns mask of all taxa.
1345
1346        Returns
1347        -------
1348        h : integer
1349            Bitmask spanning all |Taxon| objects in self.
1350        """
1351        #return pow(2, len(self)) - 1
1352        b = 1 << self._current_accession_count
1353        return b - 1
1354
1355    def taxon_bitmask(self, taxon):
1356        """
1357        Returns bitmask value of split hash for split subtending node with
1358        ``taxon``.
1359
1360        Parameters
1361        ----------
1362        taxon : |Taxon|
1363            |Taxon| object for which to calculate split hash bitmask.
1364
1365        Returns
1366        -------
1367        h : integer
1368            Split hash bitmask value for node associated with |Taxon| object ``taxon``.
1369        """
1370        # i = self._taxa.index(taxon)
1371        # m = 1 << i
1372        # return m
1373        try:
1374            return self._taxon_bitmask_map[taxon]
1375        except KeyError:
1376            i = self._taxon_accession_index_map[taxon]
1377            # i = self._taxa.index(taxon)
1378            m = 1 << i
1379            self._taxon_bitmask_map[taxon] = m
1380            # self._split_bitmask_taxon_map[m] = taxon
1381            return m
1382
1383    def accession_index(self, taxon):
1384        """
1385        Returns the accession index of ``taxon``. Note that this may not be the
1386        same as the list index of the taxon if taxa have been deleted from the
1387        namespace.
1388
1389        Parameters
1390        ----------
1391        taxon : |Taxon|
1392            |Taxon| object for which to return the accession index.
1393
1394        Returns
1395        -------
1396        h : integer
1397            The accession index.
1398        """
1399        return self._taxon_accession_index_map[taxon]
1400
1401    def taxa_bitmask(self, **kwargs):
1402        """
1403        Retrieves the list of split hash bitmask values representing all taxa
1404        specified by keyword-specified list of taxon objects (``taxa=``) or
1405        labels (``labels=``).
1406
1407        Parameters
1408        ----------
1409        \*\*kwargs : keyword arguments
1410            Requires one of:
1411
1412                taxa : ``collections.Iterable`` [|Taxon|]
1413                    Iterable of |Taxon| objects.
1414                labels : ``collections.Iterable`` [string]
1415                    Iterable of |Taxon| label values.
1416
1417        Returns
1418        -------
1419        b : ``list`` [integer]
1420            List of split hash bitmask values for specified |Taxon|
1421            objects.
1422        """
1423        if "taxa" in kwargs:
1424            taxa = kwargs["taxa"]
1425        else:
1426            taxa = self.get_taxa(**kwargs)
1427        bitmask = 0
1428        for taxon in taxa:
1429            bitmask |= self.taxon_bitmask(taxon)
1430        return bitmask
1431
1432    def taxa_bipartition(self,
1433            **kwargs):
1434        """
1435        Returns a bipartition that represents all taxa specified by
1436        keyword-specified list of taxon objects (``taxa=``) or labels
1437        (``labels=``).
1438
1439        Parameters
1440        ----------
1441        \*\*kwargs : keyword arguments
1442            Requires one of:
1443
1444                taxa : ``collections.Iterable`` [|Taxon|]
1445                    Iterable of |Taxon| objects.
1446                labels : ``collections.Iterable`` [string]
1447                    Iterable of |Taxon| label values.
1448
1449        Returns
1450        -------
1451        b : ``list`` [integer]
1452            List of split hash bitmask values for specified |Taxon|
1453            objects.
1454        """
1455        tree_leafset_bitmask = kwargs.get("tree_leafset_bitmask")
1456        if tree_leafset_bitmask is None:
1457            tree_leafset_bitmask = self.all_taxa_bitmask()
1458        from dendropy.datamodel.treemodel import Bipartition
1459        bitmask = self.taxa_bitmask(**kwargs)
1460        return Bipartition(
1461                bitmask=bitmask,
1462                tree_leafset_bitmask=tree_leafset_bitmask,
1463                compile_bipartition=True,
1464                is_rooted=kwargs.get("is_rooted", None))
1465
1466    def get_taxa_bitmask(self, **kwargs):
1467        """
1468        LEGACY. Use 'taxa_bitmask' instead.
1469        """
1470        return self.taxa_bitmask(**kwargs)
1471
1472    def bitmask_taxa_list(self, bitmask, index=0):
1473        """
1474        Returns list of |Taxon| objects represented by split
1475        ``bitmask``.
1476
1477        Parameters
1478        ----------
1479        bitmask : integer
1480            Split hash bitmask value.
1481        index : integer, optional
1482            Start from this |Taxon| object instead of the first
1483            |Taxon| object in the collection.
1484
1485        Returns
1486        -------
1487        taxa : ``list`` [|Taxon|]
1488            List of |Taxon| objects specified or spanned by
1489            ``bitmask``.
1490        """
1491        taxa = []
1492        while bitmask:
1493            if bitmask & 1:
1494                taxa.append(self._accession_index_taxon_map[index])
1495            bitmask = bitmask >> 1
1496            index += 1
1497        return taxa
1498
1499    def bitmask_as_newick_string(self,
1500            bitmask,
1501            preserve_spaces=False,
1502            quote_underscores=True):
1503        """
1504        Represents a split as a newick string.
1505
1506        Parameters
1507        ----------
1508        bitmask : integer
1509            Split hash bitmask value.
1510        preserve_spaces : boolean, optional
1511            If |False| (default), then spaces in taxon labels will be replaced
1512            by underscores. If |True|, then taxon labels with spaces will be
1513            wrapped in quotes.
1514        quote_underscores : boolean, optional
1515            If |True| (default), then taxon labels with underscores will be
1516            wrapped in quotes. If |False|, then the labels will not be wrapped
1517            in quotes.
1518
1519        Returns
1520        -------
1521        s : string
1522            NEWICK representation of split specified by ``bitmask``.
1523        """
1524        from dendropy.dataio import nexusprocessing
1525        return nexusprocessing.bitmask_as_newick_string(
1526                bitmask,
1527                self,
1528                preserve_spaces=preserve_spaces,
1529                quote_underscores=quote_underscores)
1530
1531    def split_as_newick_string(self,
1532            split,
1533            preserve_spaces=False,
1534            quote_underscores=True):
1535        """
1536        Represents a split as a newick string.
1537
1538        Parameters
1539        ----------
1540        bitmask : integer
1541            Split hash bitmask value.
1542        preserve_spaces : boolean, optional
1543            If |False| (default), then spaces in taxon labels will be replaced
1544            by underscores. If |True|, then taxon labels with spaces will be
1545            wrapped in quotes.
1546        quote_underscores : boolean, optional
1547            If |True| (default), then taxon labels with underscores will be
1548            wrapped in quotes. If |False|, then the labels will not be wrapped
1549            in quotes.
1550
1551        Returns
1552        -------
1553        s : string
1554            NEWICK representation of split specified by ``bitmask``.
1555        """
1556        return self.bitmask_as_newick_string(
1557                bitmask=split,
1558                preserve_spaces=preserve_spaces,
1559                quote_underscores=quote_underscores)
1560
1561    def bitmask_as_bitstring(self, b):
1562        return bitprocessing.int_as_bitstring(b, length=self._current_accession_count)
1563
1564    def split_as_string(self, b):
1565        deprecate.dendropy_deprecation_warning(
1566                message="Deprecated since DendroPy 4: 'TaxonNamespace.split_as_string()'; use 'TaxonNamespace.bitmask_as_bitstring()' instead",
1567                stacklevel=3)
1568        return self.bitmask_as_bitstring(b)
1569
1570    def description(self, depth=1, indent=0, itemize="", output=None, **kwargs):
1571        """
1572        Returns description of object, up to level ``depth``.
1573        """
1574        if depth is None or depth < 0:
1575            return ""
1576        output_strio = StringIO()
1577        if self.label is None:
1578            label = str(self.label)
1579        output_strio.write('%s%sTaxonNamespace object at %s%s'
1580                % (indent*' ',
1581                   itemize,
1582                   hex(id(self)),
1583                   label))
1584        if depth >= 1:
1585            output_strio.write(': %d Taxa' % len(self))
1586            if depth >= 2 and len(self) > 0:
1587                for i, t in enumerate(self):
1588                    output_strio.write('\n')
1589                    t.description(depth=depth-1, indent=indent+4, itemize="[%d]" % (i), output=output_strio, **kwargs)
1590        s = output_strio.getvalue()
1591        if output is not None:
1592            output.write(s)
1593        return s
1594
1595    ### I/O
1596
1597    def _format_and_write_to_stream(self, stream, schema, **kwargs):
1598        """
1599        Writes out ``self`` in ``schema`` format to a destination given by
1600        file-like object ``stream``.
1601
1602        Parameters
1603        ----------
1604        stream : file or file-like object
1605            Destination for data.
1606        schema : string
1607            Must be a recognized and tree file schema, such as "nexus",
1608            "newick", etc, for which a specialized tree list writer is
1609            available. If this is not implemented for the schema specified, then
1610            a UnsupportedSchemaError is raised.
1611
1612        \*\*kwargs : keyword arguments, optional
1613            Keyword arguments will be passed directly to the writer for the
1614            specified schema. See documentation for details on keyword
1615            arguments supported by writers of various schemas.
1616
1617        """
1618        from dendropy import dataio
1619        writer = dataio.get_writer(schema, **kwargs)
1620        writer._write(
1621                stream=stream,
1622                taxon_namespaces=[self],)
1623
1624##############################################################################
1625## TaxonSet
1626
1627class TaxonSet(TaxonNamespace):
1628    """
1629    This class is present for (temporary!) legacy support of code written under
1630    DendroPy 3.x.  It will be removed in future versions. All new code should
1631    be written using |TaxonNamespace|. Old code needs to be updated to use
1632    |TaxonNamespace|.
1633    """
1634
1635    def __init__(self, *args, **kwargs):
1636        deprecate.dendropy_deprecation_warning(
1637                message="Deprecated since DendroPy 4: 'TaxonSet' will no longer be supported in future releases; use 'TaxonNamespace' instead",
1638                stacklevel=3)
1639        TaxonNamespace.__init__(self, *args, **kwargs)
1640
1641##############################################################################
1642## Taxon
1643
1644class Taxon(
1645        basemodel.DataObject,
1646        basemodel.Annotable):
1647    """
1648    A taxon associated with a sequence or a node on a tree.
1649    """
1650
1651    def __init__(self, label=None):
1652        """
1653        Parameters
1654        ----------
1655        label : string or |Taxon| object
1656            Label or name of this operational taxonomic unit concept. If a
1657            string, then the ``label`` attribute of ``self`` is set to this value.
1658            If a |Taxon| object, then the ``label`` attribute of ``self`` is
1659            set to the same value as the ``label`` attribute the other
1660            |Taxon| object and all annotations/metadata are copied.
1661        """
1662        if isinstance(label, Taxon):
1663            other_taxon = label
1664            label = other_taxon.label
1665            memo={id(other_taxon):self}
1666            for k in other_taxon.__dict__:
1667                if k != "_annotations":
1668                    self.__dict__[k] = copy.deepcopy(other_taxon.__dict__[k], memo=memo)
1669            self.deep_copy_annotations_from(other_taxon, memo=memo)
1670            # self.copy_annotations_from(other_taxon, attribute_object_mapper=memo)
1671        else:
1672            basemodel.DataObject.__init__(self, label=label)
1673            self._lower_cased_label = None
1674        self.comments = []
1675
1676    def _get_label(self):
1677        return self._label
1678    def _set_label(self, v):
1679        self._label = v
1680        self._lower_cased_label = None
1681    label = property(_get_label, _set_label)
1682
1683    def _get_lower_cased_label(self):
1684        if self._label is None:
1685            return None
1686        if self._lower_cased_label is None:
1687            self._lower_cased_label = str(self._label).lower()
1688        return self._lower_cased_label
1689    lower_cased_label = property(_get_lower_cased_label)
1690
1691    def __copy__(self):
1692        raise TypeError("Cannot shallow-copy Taxon")
1693        # return self
1694
1695    def taxon_namespace_scoped_copy(self, memo=None):
1696        if memo is not None:
1697            memo[id(self)] = self
1698        return self
1699
1700    def __deepcopy__(self, memo=None):
1701        if memo is None:
1702            memo = {}
1703        try:
1704            o = memo[id(self)]
1705        except KeyError:
1706            # o = type(self).__new__(self.__class__)
1707            o = self.__class__.__new__(self.__class__)
1708            memo[id(self)] = o
1709        for k in self.__dict__:
1710            if k != "_annotations":
1711                o.__dict__[k] = copy.deepcopy(self.__dict__[k], memo)
1712        o.deep_copy_annotations_from(self, memo)
1713        # o.copy_annotations_from(self, attribute_object_mapper=memo)
1714        return o
1715
1716    def __hash__(self):
1717        return id(self)
1718
1719    def __eq__(self, other):
1720        return self is other
1721
1722    def __lt__(self, other):
1723        return self.label < other.label
1724
1725    def __str__(self):
1726        "String representation of self = taxon name."
1727        return "'{}'".format(self._label)
1728
1729    def __repr__(self):
1730        return "<{} {} '{}'>".format(self.__class__.__name__, hex(id(self)), self._label)
1731
1732    def description(self, depth=1, indent=0, itemize="", output=None, **kwargs):
1733        """
1734        Returns description of object, up to level ``depth``.
1735        """
1736        if depth is None or depth < 0:
1737            return ""
1738        output_strio = StringIO()
1739        if self._label is None:
1740            label = "<Unnamed Taxon>"
1741        else:
1742            label = "'{}'".format(self._label)
1743        output_strio.write('{}{} Taxon object at {}: {}'.format(indent*' ', itemize, hex(id(self)), label))
1744        s = output_strio.getvalue()
1745        if output is not None:
1746            output.write(s)
1747        return s
1748
1749##############################################################################
1750## TaxonNamespacePartition
1751
1752class TaxonNamespacePartition(TaxonNamespaceAssociated):
1753    """
1754    Manages a partition of a TaxonNamespace (i.e., a set of mutually-exclusive
1755    and exhaustive subsets of a TaxonNamespace).
1756    """
1757
1758    def __init__(self, taxon_namespace, **kwargs):
1759        """
1760        __init__ uses one of the following keyword arguments:
1761
1762            - ``membership_fn``
1763                A function that takes a |Taxon| object as an argument and
1764                returns a a population membership identifier or flag
1765                (e.g., a string, an integer) .
1766            - ``membership_attr_name``
1767                Name of an attribute of |Taxon| objects that serves as an
1768                identifier for subset membership.
1769            - ``membership_dict``
1770                A dictionary with |Taxon| objects as keys and population
1771                membership identifier or flag as values (e.g., a string,
1772                an integer).
1773            - ``membership_lists``
1774                A container of containers of |Taxon| objects, with every
1775                |Taxon| object in ``taxon_namespace`` represented once and only
1776                once in the sub-containers.
1777
1778        If none of these are specified, defaults to a partition consisting of
1779        a single subset with all the objects in ``taxon_namespace``.
1780        """
1781        TaxonNamespaceAssociated.__init__(self,
1782                taxon_namespace=taxon_namespace)
1783        self.subset_map = {}
1784        if taxon_namespace is not None:
1785            if len(kwargs) > 0:
1786                self.apply(**kwargs)
1787            else:
1788                ss = TaxonNamespace(self.taxon_namespace)
1789                self.subset_map = { self.taxon_namespace.label : ss}
1790
1791    def subsets(self):
1792        """
1793        Return subsets of partition.
1794        """
1795        return set(self.subset_map.values())
1796
1797    def __len__(self):
1798        """
1799        Number of subsets.
1800        """
1801        return len(self.subset_map)
1802
1803    def __iter__(self):
1804        """
1805        Iterate over subsets.
1806        """
1807        for k, v in self.subset_map.items():
1808            yield v
1809
1810    def __getitem__(self, label):
1811        """
1812        Get subset with specified label.
1813        """
1814        return self.subset_map[label]
1815
1816    def apply(self, **kwargs):
1817        """
1818        Builds the subsets of the linked TaxonNamespace resulting from the
1819        partitioning scheme specified by one of the following keyword arguments:
1820
1821            ``membership_fn``
1822                A function that takes a |Taxon| object as an argument and
1823                returns a a population membership identifier or flag
1824                (e.g., a string, an integer).
1825
1826            ``membership_attr_name``
1827                Name of an attribute of |Taxon| objects that serves as an
1828                identifier for subset membership.
1829
1830            ``membership_dict``
1831                A dictionary with |Taxon| objects as keys and population
1832                membership identifier or flag as values (e.g., a string,
1833                an integer).
1834
1835            ``membership_lists``
1836                A container of containers of |Taxon| objects, with every
1837                |Taxon| object in ``taxon_namespace`` represented once and only
1838                once in the sub-containers.
1839        """
1840        if "membership_fn" in kwargs:
1841            self.apply_membership_fn(kwargs["membership_fn"])
1842        elif  "membership_attr_name" in kwargs:
1843            self.apply_membership_attr_name(kwargs["membership_attr_name"])
1844        elif  "membership_dict" in kwargs:
1845            self.apply_membership_dict(kwargs["membership_dict"])
1846        elif "membership_lists" in kwargs:
1847            self.apply_membership_lists(kwargs["membership_lists"])
1848        else:
1849            raise TypeError("Must specify partitioning scheme using one of: " \
1850                + "'membership_fn', 'membership_dict', or 'membership_lists'")
1851
1852    def apply_membership_fn(self, mfunc):
1853        """
1854        Constructs subsets based on function ``mfunc``, which should take a
1855        |Taxon| object as an argument and return a population membership
1856        identifier or flag (e.g., a string, an integer).
1857        """
1858        self.subset_map = {}
1859        for t in self.taxon_namespace:
1860            subset_id = mfunc(t)
1861            if subset_id not in self.subset_map:
1862                self.subset_map[subset_id] = TaxonNamespace(label=subset_id)
1863            self.subset_map[subset_id].add_taxon(t)
1864        return self.subsets()
1865
1866    def apply_membership_attr_name(self, attr_name):
1867        """
1868        Constructs subsets based on attribute ``attr_name`` of each
1869        |Taxon| object.
1870        """
1871        return self.apply_membership_fn(lambda x: getattr(x, attr_name))
1872
1873    def apply_membership_dict(self, mdict):
1874        """
1875        Constructs subsets based on dictionary ``mdict``, which should be
1876        dictionary with |Taxon| objects as keys and population membership
1877        identifier or flag as values (e.g., a string, an integer).
1878        """
1879        return self.apply_membership_fn(lambda x: mdict[x])
1880
1881    def apply_membership_lists(self, mlists, subset_labels=None):
1882        """
1883        Constructs subsets based on list ``mlists``, which should be an interable
1884        of iterables of |Taxon| objects, with every |Taxon| object in
1885        ``taxon_namespace`` represented once and only once in the sub-containers.
1886        """
1887        if subset_labels is not None:
1888            if len(subset_labels) != len(mlists):
1889                raise ValueError('Length of subset label list must equal to number of subsets')
1890        else:
1891            subset_labels = range(len(mlists))
1892        self.subset_map = {}
1893        for lidx, mlist in enumerate(mlists):
1894            subset_id = subset_labels[lidx]
1895            self.subset_map[subset_id] = TaxonNamespace(label=subset_id)
1896            for i, t in enumerate(mlist):
1897                self.subset_map[subset_id].add_taxon(t)
1898        return self.subsets()
1899
1900##############################################################################
1901## TaxonNamespaceMapping
1902
1903class TaxonNamespaceMapping(
1904        basemodel.DataObject,
1905        basemodel.Annotable):
1906    """
1907    A many-to-one mapping of |Taxon| objects (e.g., gene taxa to population/species taxa).
1908    """
1909
1910    @staticmethod
1911    def create_contained_taxon_mapping(containing_taxon_namespace,
1912            num_contained,
1913            contained_taxon_label_prefix=None,
1914            contained_taxon_label_separator=' ',
1915            contained_taxon_label_fn=None):
1916        """
1917        Creates and returns a TaxonNamespaceMapping object that maps multiple
1918        "contained" Taxon objects (e.g., genes) to Taxon objects in
1919        ``containing_taxon_namespace`` (e.g., populations or species).
1920
1921            ``containing_taxon_namespace``
1922                A TaxonNamespace object that defines a Taxon for each population or
1923                species.
1924
1925            ``num_contained``
1926                The number of genes per population of species. The value of
1927                this attribute can be a scalar integer, in which case each
1928                species or population taxon will get the same fixed number
1929                of genes. Or it can be a list, in which case the list has
1930                to have as many elements as there are members in
1931                ``containing_taxon_namespace``, and each element will specify the
1932                number of genes that the corresponding species or population
1933                Taxon will get.
1934
1935            ``contained_taxon_label_prefix``
1936                If specified, then each gene Taxon label will begin with this.
1937                Otherwise, each gene Taxon label will begin with the same label
1938                as its corresponding species/population taxon label.
1939
1940            ``contained_taxon_label_separator``
1941                String used to separate gene Taxon label prefix from its index.
1942
1943            ``contained_taxon_label_fn``
1944                If specified, should be a function that takes two arguments: a
1945                Taxon object from ``containing_taxon_namespace`` and an integer
1946                specifying the contained gene index. It should return a string
1947                which will be used as the label for the corresponding gene
1948                taxon. If not None, this will bypass the
1949                ``contained_taxon_label_prefix`` and
1950                ``contained_taxon_label_separator`` arguments.
1951        """
1952        if isinstance(num_contained, int):
1953            _num_contained = [num_contained] * len(containing_taxon_namespace)
1954        else:
1955            _num_contained = num_contained
1956        contained_to_containing = {}
1957        contained_taxa = TaxonNamespace()
1958        for cidx, containing_taxon in enumerate(containing_taxon_namespace):
1959            num_new = _num_contained[cidx]
1960            for new_idx in range(num_new):
1961
1962                if contained_taxon_label_fn is not None:
1963                    label = contained_taxon_label_fn(containing_taxon,
1964                            new_idx)
1965                else:
1966                    label = "%s%s%d" % (containing_taxon.label,
1967                            contained_taxon_label_separator,
1968                            new_idx+1)
1969                contained_taxon = Taxon(label=label)
1970                contained_to_containing[contained_taxon] = containing_taxon
1971                contained_taxa.append(contained_taxon)
1972        contained_to_containing_map = TaxonNamespaceMapping(domain_taxon_namespace=contained_taxa,
1973                range_taxon_namespace=containing_taxon_namespace,
1974                mapping_dict=contained_to_containing)
1975        return contained_to_containing_map
1976
1977    def __init__(self, **kwargs):
1978        """
1979        __init__ uses one of the following keyword arguments:
1980
1981            - ``mapping_fn``
1982                A function that takes a |Taxon| object from the domain taxa
1983                as an argument and returns the corresponding |Taxon| object
1984                from the range taxa. If this argument is given, then a
1985                |TaxonNamespace| or some other container of |Taxon| objects needs
1986                to be passed using the ``taxon_namespace`` argument.
1987            - ``mapping_attr_name``
1988                Name of an attribute of |Taxon| object of the domain taxa
1989                that references the corresponding |Taxon| object from the
1990                range taxa. If this argument is given, then a |TaxonNamespace| or
1991                some other container of |Taxon| objects needs to be passed
1992                using the ``taxon_namespace`` argument.
1993            - ``mapping_dict``
1994                A dictionary with |Taxon| objects from the domain taxa as
1995                keys, and the corresponding |Taxon| object from the range
1996                taxa as values.
1997        """
1998        basemodel.DataObject.__init__(self, label=kwargs.pop("label", None))
1999        self.forward = {}
2000        self.reverse = {}
2001        if "mapping_fn" in kwargs:
2002            if "domain_taxon_namespace" not in kwargs:
2003                raise TypeError("Must specify 'domain_taxon_namespace'")
2004            self.apply_mapping_fn(kwargs["mapping_fn"],
2005                    domain_taxon_namespace=kwargs["domain_taxon_namespace"],
2006                    range_taxon_namespace=kwargs.get("range_taxon_namespace", None))
2007        elif "mapping_attr_name" in kwargs:
2008            if "domain_taxon_namespace" not in kwargs:
2009                raise TypeError("Must specify 'domain_taxon_namespace'")
2010            self.apply_mapping_attr_name(kwargs["mapping_attr_name"],
2011                    domain_taxon_namespace=kwargs["domain_taxon_namespace"],
2012                    range_taxon_namespace=kwargs.get("range_taxon_namespace", None))
2013        elif "mapping_dict" in kwargs:
2014            self.apply_mapping_dict(kwargs["mapping_dict"],
2015                    domain_taxon_namespace=kwargs.get("domain_taxon_namespace", None),
2016                    range_taxon_namespace=kwargs.get("range_taxon_namespace", None))
2017        else:
2018            raise TypeError("Must specify at least one of: 'mapping_fn', 'mapping_attr_name', or 'mapping_dict'")
2019
2020    def __len__(self):
2021        """
2022        Number of subsets.
2023        """
2024        return len(self.forward)
2025
2026    def __iter__(self):
2027        """
2028        Iterate over subsets.
2029        """
2030        for k in self.forward:
2031            yield k
2032
2033    def items(self):
2034        return self.forward.items()
2035
2036    def keys(self):
2037        return self.forward.keys()
2038
2039    def __getitem__(self, taxon):
2040        """
2041        Get mapping for specified taxon.
2042        """
2043        return self.forward[taxon]
2044
2045    def _get_domain_taxon_namespace(self):
2046        return self._domain_taxon_namespace
2047
2048    def _set_domain_taxon_namespace(self, taxa):
2049        if taxa and not isinstance(taxa, TaxonNamespace):
2050            self._domain_taxon_namespace = TaxonNamespace(taxa)
2051        else:
2052            self._domain_taxon_namespace = taxa
2053
2054    domain_taxon_namespace = property(_get_domain_taxon_namespace, _set_domain_taxon_namespace)
2055
2056    def _get_range_taxon_namespace(self):
2057        return self._range_taxon_namespace
2058
2059    def _set_range_taxon_namespace(self, taxa):
2060        if taxa and not isinstance(taxa, TaxonNamespace):
2061            self._range_taxon_namespace = TaxonNamespace(taxa)
2062        else:
2063            self._range_taxon_namespace = taxa
2064
2065    range_taxon_namespace = property(_get_range_taxon_namespace, _set_range_taxon_namespace)
2066
2067    def apply_mapping_fn(self, mfunc, domain_taxon_namespace, range_taxon_namespace=None):
2068        """
2069        Constructs forward and reverse mapping dictionaries based on ``mfunc``,
2070        which should take a |Taxon| object in ``domain_taxon_namespace`` as an argument
2071        and return another |Taxon| object.
2072        """
2073        self.forward = {}
2074        self.reverse = {}
2075        self.domain_taxon_namespace = domain_taxon_namespace
2076        if range_taxon_namespace is None:
2077            self.range_taxon_namespace = TaxonNamespace()
2078        else:
2079            self.range_taxon_namespace = range_taxon_namespace
2080        for dt in self.domain_taxon_namespace:
2081            rt = mfunc(dt)
2082            if rt not in self.range_taxon_namespace:
2083                self.range_taxon_namespace.add_taxon(rt)
2084            self.forward[dt] = rt
2085            try:
2086                self.reverse[rt].add(dt)
2087            except KeyError:
2088                self.reverse[rt] = set([dt])
2089
2090    def apply_mapping_attr_name(self, attr_name, domain_taxon_namespace, range_taxon_namespace=None):
2091        """
2092        Constructs mapping based on attribute ``attr_name`` of each
2093        |Taxon| object in ``domain_taxon_namespace``.
2094        """
2095        return self.apply_mapping_fn(lambda x: getattr(x, attr_name), domain_taxon_namespace=domain_taxon_namespace, range_taxon_namespace=range_taxon_namespace)
2096
2097    def apply_mapping_dict(self, mdict, domain_taxon_namespace=None, range_taxon_namespace=None):
2098        """
2099        Constructs mapping based on dictionary ``mdict``, which should have
2100        domain taxa as keys and range taxa as values.
2101        """
2102        if domain_taxon_namespace is None:
2103            domain_taxon_namespace = TaxonNamespace(mdict.keys())
2104        return self.apply_mapping_fn(lambda x: mdict[x], domain_taxon_namespace=domain_taxon_namespace, range_taxon_namespace=range_taxon_namespace)
2105
2106    def mesquite_association_rows(self):
2107        from dendropy.dataio import nexusprocessing
2108        rows = []
2109        for rt in self.reverse:
2110            x1 = nexusprocessing.escape_nexus_token(rt.label)
2111            dt_labels = [dt.label for dt in self.reverse[rt]]
2112            dt_labels.sort()
2113            x2 = " ".join([nexusprocessing.escape_nexus_token(d) for d in dt_labels])
2114            rows.append("        %s / %s" % (x1, x2))
2115        return ",\n".join(rows)
2116
2117    def write_mesquite_association_block(self, out, domain_taxon_namespace_title=None, range_taxon_namespace_title=None):
2118        """
2119        For debugging purposes ...
2120        """
2121        def _compose_title(b):
2122            if b.label:
2123                return b.label
2124            else:
2125                return "d{}".format(id(b))
2126        from dendropy.dataio import nexusprocessing
2127        out.write("BEGIN TaxaAssociation;\n")
2128        title = _compose_title(self)
2129        out.write("    TITLE %s;\n"  % nexusprocessing.escape_nexus_token(title))
2130        if domain_taxon_namespace_title is None:
2131            domain_taxon_namespace_title = _compose_title(self.domain_taxon_namespace)
2132        if range_taxon_namespace_title is None:
2133            range_taxon_namespace_title = _compose_title(self.range_taxon_namespace)
2134        out.write("    TAXA %s, %s;\n" % (
2135            nexusprocessing.escape_nexus_token(range_taxon_namespace_title),
2136            nexusprocessing.escape_nexus_token(domain_taxon_namespace_title)
2137            ))
2138        out.write("    ASSOCIATES\n")
2139        out.write(self.mesquite_association_rows() + "\n")
2140        out.write("    ;\n")
2141        out.write("END;\n")
2142