1 #include "trees_impl.hpp"
2 #include "utils.hpp"
3 #include <terraces/errors.hpp>
4 #include <terraces/subtree_extraction.hpp>
5
6 using std::vector;
7 using std::stack;
8 using std::pair;
9 using std::make_pair;
10
11 namespace terraces {
12
compute_node_occ(const tree & t,const bitmatrix & occ)13 std::pair<bitmatrix, std::vector<index>> compute_node_occ(const tree& t, const bitmatrix& occ) {
14 auto num_nodes = num_nodes_from_leaves(occ.rows());
15 auto num_sites = occ.cols();
16 utils::ensure<bad_input_error>(t.size() == num_nodes,
17 bad_input_error_type::tree_mismatching_size);
18 check_rooted_tree(t);
19 auto node_occ = bitmatrix{t.size(), occ.cols()};
20 std::vector<index> num_leaves_per_site(occ.cols(), 0);
21
22 // compute occurrences on inner nodes: bitwise or of the children
23 foreach_postorder(t, [&](index i) {
24 auto node = t[i];
25 if (is_leaf(node)) {
26 // copy data from taxon occurrence matrix
27 utils::ensure<bad_input_error>(node.taxon() != none,
28 bad_input_error_type::tree_unnamed_leaf);
29 for (index site = 0; site < num_sites; ++site) {
30 auto has_leaf = occ.get(node.taxon(), site);
31 node_occ.set(i, site, has_leaf);
32 num_leaves_per_site[site] += has_leaf;
33 }
34 } else {
35 node_occ.row_or(node.lchild(), node.rchild(), i);
36 }
37 });
38 return {std::move(node_occ), std::move(num_leaves_per_site)};
39 }
40
induced_lca(const tree & t,const bitmatrix & node_occ,index column)41 index induced_lca(const tree& t, const bitmatrix& node_occ, index column) {
42 index lca = 0;
43 while (!is_leaf(t[lca])) {
44 auto present = [&](index node) { return node_occ.get(node, column); };
45 assert(present(lca));
46 auto node = t[lca];
47 if (present(node.lchild()) && present(node.rchild())) {
48 return lca;
49 } else {
50 lca = present(node.lchild()) ? node.lchild() : node.rchild();
51 }
52 }
53 return lca;
54 }
55
subtree(const tree & t,const bitmatrix & node_occ,const std::vector<index> & num_leaves_per_site,index site)56 tree subtree(const tree& t, const bitmatrix& node_occ,
57 const std::vector<index>& num_leaves_per_site, index site) {
58 auto root = induced_lca(t, node_occ, site);
59 if (is_leaf(t[root])) {
60 // tree containing only a single leaf
61 return {{none, none, none, t[root].taxon()}};
62 }
63
64 auto present = [&](index node) { return node_occ.get(node, site); };
65 assert(present(t[root].lchild()) && present(t[root].rchild()));
66
67 tree out_tree;
68 out_tree.reserve(num_nodes_from_leaves(num_leaves_per_site[site]));
69 out_tree.emplace_back(); // root node
70
71 stack<index> boundary;
72 auto callback = [&](index i) {
73 auto node = t[i];
74 bool leaf_occ = is_leaf(node) && present(i);
75 bool inner_occ = !is_leaf(node) && present(node.lchild()) && present(node.rchild());
76
77 if (leaf_occ || (inner_occ && i != root)) {
78 // fires if the tree is trivial (i.e. only one edge!)
79 // this can only happen with sites for which only one
80 // species has data, which we should have caught earlier.
81 assert(!boundary.empty());
82 auto parent = boundary.top();
83 out_tree.emplace_back(parent, none, none, node.taxon());
84 if (out_tree[parent].lchild() == none) {
85 out_tree[parent].lchild() = out_tree.size() - 1;
86 } else {
87 assert(out_tree[parent].rchild() == none);
88 out_tree[parent].rchild() = out_tree.size() - 1;
89 boundary.pop();
90 }
91 }
92 if (inner_occ) {
93 boundary.push(out_tree.size() - 1);
94 }
95 };
96 foreach_preorder(t, callback, root);
97
98 return out_tree;
99 }
100
subtrees(const tree & t,const bitmatrix & occ)101 std::vector<tree> subtrees(const tree& t, const bitmatrix& occ) {
102 auto num_sites = occ.cols();
103 // TODO naming of compute_node_occ
104 const auto node_occ_pair = compute_node_occ(t, occ);
105 const auto& node_occ = node_occ_pair.first;
106 const auto& num_leaves_per_site = node_occ_pair.second;
107
108 // collect leaves and inner nodes: bitwise and of the children
109 vector<tree> out_trees;
110
111 for (index site = 0; site < num_sites; ++site) {
112 out_trees.push_back(subtree(t, node_occ, num_leaves_per_site, site));
113 }
114
115 return out_trees;
116 }
117
118 } // namespace terraces
119