1 #ifndef ADVANCED_HPP
2 #define ADVANCED_HPP
3 
4 #include <functional>
5 #include <iosfwd>
6 
7 #include "bigint.hpp"
8 #include "bitmatrix.hpp"
9 #include "constraints.hpp"
10 #include "trees.hpp"
11 
12 namespace terraces {
13 
14 /**
15  * Contains the necessary data to describe all possible supertrees equivalent to a certain tree.
16  */
17 struct supertree_data {
18 	/** The lca constraints describing the structure of the possible supertrees */
19 	terraces::constraints constraints;
20 	/** The number of leaves of the supertree. */
21 	index num_leaves;
22 	/** The index of the 'root leaf' in leaf-based numbering.
23 	 * It will be placed as a leaf below the root of all supertrees.
24 	 * In terms of phylogenetic trees, this means the index of a comprehensive taxon. */
25 	index root;
26 };
27 
28 /**
29  * Returns the index of the first comprehensive taxon in a occurrence bitmatrix.
30  * @param data The occurrence bitmatrix.
31  * @return The (row) index of the first comprehensive taxon in the input matrix
32  *         or @ref none if none exists.
33  */
34 index find_comprehensive_taxon(const bitmatrix& data);
35 
36 /**
37  * Extracts a maximum size subset of the columns
38  * so the resulting matrix contains a comprehensive taxon.
39  * @param data The occurrence matrix
40  * @return The output matrix containing a subset of the columns of the input matrix.
41  */
42 bitmatrix maximum_comprehensive_columnset(const bitmatrix& data);
43 
44 /**
45  * Computes the necessary data to enumerate the supertrees of the given tree and missing data
46  * matrix.
47  * \param tree The phylogenetic tree.
48  * \param data The missing data matrix. It must contain only data for the leaves!
49  * \returns \ref supertree_data object describing all possible supertrees equivalent to the input
50  * tree.
51  */
52 supertree_data create_supertree_data(const tree& tree, const bitmatrix& data);
53 
54 /**
55  * Checks if a phylogenetic tree lies on a phylogenetic terrace.
56  * \param data The constraints extracted from the tree and missing data matrix describing all
57  * possible supertrees.
58  * \return True if and only if the tree lies on a phylogenetic terrace,
59  *         i.e. there are at least 2 different trees (called supertrees)
60  *         that are equivalent with respect to the missing data matrix.
61  */
62 bool check_terrace(const supertree_data& data);
63 
64 /**
65  * Checks if a phylogenetic tree lies on a phylogenetic terrace by computing a lower bound to the
66  * terrace size.
67  * \param data The constraints extracted from the tree and missing data matrix describing all
68  * possible supertrees.
69  * \return A lower bound to the number of trees on the phylogenetic terrace.
70  */
71 index fast_count_terrace(const supertree_data& data);
72 
73 /**
74  * Counts all trees on a terrace around a phylogenetic tree.
75  * Note that this number might not be representable as a 32/64 bit integer, and might thus be
76  * clamped.
77  * \param data The constraints extracted from the tree and missing data matrix describing all
78  * possible supertrees.
79  * \return The number of trees on the phylogenetic terrace containing the input tree.
80  *         Note that if this result is UINT32/64_MAX = 2^32/64 - 1, the computations resulted in an
81  * overflow,
82  *         i.e. the result is only a lower bound on the number of trees on this terrace.
83  */
84 index count_terrace(const supertree_data& data);
85 
86 /**
87  * Counts all trees on a terrace around a phylogenetic tree.
88  * \param data The constraints extracted from the tree and missing data matrix describing all
89  * possible supertrees.
90  * \return The number of trees on the phylogenetic terrace containing the input tree.
91  */
92 big_integer count_terrace_bigint(const supertree_data& data);
93 
94 /**
95  * Enumerates all trees on a terrace around a phylogenetic tree.
96  * The trees will be printed in a compressed <b>multitree format</b>,
97  * which is an extension of the normal Newick format:
98  * In place of a subtree <i>T</i>, there may be two additional types of nodes:
99  * <ul>
100  * <li>An <b>alternative list</b> of all possible sub(-multi-)trees that appear on the terrace:<br>
101  *     <i>T1</i>|<i>T2</i>|<i>T3</i>|...</li>
102  * <li>An <b>unconstrained subtree</b> where all possible subtrees with the given leaves are part of
103  * the terrace:<br>
104  * {<i>leaf1</i>,<i>leaf2</i>,<i>leaf3</i>,...}
105  * </li>
106  * </ul>
107  * \param data The constraints extracted from the tree and missing data matrix describing all
108  * possible supertrees.
109  * \param names The name map containing only leaf names. It will be used to output the multitree.
110  * \param output The output stream into which the multitree will be written.
111  * \return The number of trees on the phylogenetic terrace containing the input tree.
112  */
113 big_integer print_terrace_compressed(const supertree_data& data, const name_map& names,
114                                      std::ostream& output);
115 
116 /**
117  * Enumerates all trees on a terrace around a phylogenetic tree.
118  * The trees will be printed in Newick format, one tree per line
119  * \param data The constraints extracted from the tree and missing data matrix describing all
120  * possible supertrees.
121  * \param names The name map containing only leaf names. It will be used to output the multitree.
122  * \param output The output stream into which the trees will be written.
123  * \return The number of trees on the phylogenetic terrace containing the input tree.
124  */
125 big_integer print_terrace(const supertree_data& data, const name_map& names, std::ostream& output);
126 
127 /**
128  * Enumerates all trees on a terrace around a phylogenetic tree.
129  * The given callback function will be called with every tree on the terrace as a parameter.
130  * \param data The constraints extracted from the tree and missing data matrix describing all
131  * possible supertrees.
132  * \param callback The callback function taking a tree as a parameter.
133  */
134 void enumerate_terrace(const supertree_data& data, std::function<void(const tree&)> callback);
135 
136 } // namespace terraces
137 
138 #endif // ADVANCED_HPP
139