1 #include "trees_impl.hpp"
2 #include "utils.hpp"
3 #include <terraces/errors.hpp>
4 #include <terraces/subtree_extraction.hpp>
5 
6 using std::vector;
7 using std::stack;
8 using std::pair;
9 using std::make_pair;
10 
11 namespace terraces {
12 
compute_node_occ(const tree & t,const bitmatrix & occ)13 std::pair<bitmatrix, std::vector<index>> compute_node_occ(const tree& t, const bitmatrix& occ) {
14 	auto num_nodes = num_nodes_from_leaves(occ.rows());
15 	auto num_sites = occ.cols();
16 	utils::ensure<bad_input_error>(t.size() == num_nodes,
17 	                               bad_input_error_type::tree_mismatching_size);
18 	check_rooted_tree(t);
19 	auto node_occ = bitmatrix{t.size(), occ.cols()};
20 	std::vector<index> num_leaves_per_site(occ.cols(), 0);
21 
22 	// compute occurrences on inner nodes: bitwise or of the children
23 	foreach_postorder(t, [&](index i) {
24 		auto node = t[i];
25 		if (is_leaf(node)) {
26 			// copy data from taxon occurrence matrix
27 			utils::ensure<bad_input_error>(node.taxon() != none,
28 			                               bad_input_error_type::tree_unnamed_leaf);
29 			for (index site = 0; site < num_sites; ++site) {
30 				auto has_leaf = occ.get(node.taxon(), site);
31 				node_occ.set(i, site, has_leaf);
32 				num_leaves_per_site[site] += has_leaf;
33 			}
34 		} else {
35 			node_occ.row_or(node.lchild(), node.rchild(), i);
36 		}
37 	});
38 	return {std::move(node_occ), std::move(num_leaves_per_site)};
39 }
40 
induced_lca(const tree & t,const bitmatrix & node_occ,index column)41 index induced_lca(const tree& t, const bitmatrix& node_occ, index column) {
42 	index lca = 0;
43 	while (!is_leaf(t[lca])) {
44 		auto present = [&](index node) { return node_occ.get(node, column); };
45 		assert(present(lca));
46 		auto node = t[lca];
47 		if (present(node.lchild()) && present(node.rchild())) {
48 			return lca;
49 		} else {
50 			lca = present(node.lchild()) ? node.lchild() : node.rchild();
51 		}
52 	}
53 	return lca;
54 }
55 
subtree(const tree & t,const bitmatrix & node_occ,const std::vector<index> & num_leaves_per_site,index site)56 tree subtree(const tree& t, const bitmatrix& node_occ,
57              const std::vector<index>& num_leaves_per_site, index site) {
58 	auto root = induced_lca(t, node_occ, site);
59 	if (is_leaf(t[root])) {
60 		// tree containing only a single leaf
61 		return {{none, none, none, t[root].taxon()}};
62 	}
63 
64 	auto present = [&](index node) { return node_occ.get(node, site); };
65 	assert(present(t[root].lchild()) && present(t[root].rchild()));
66 
67 	tree out_tree;
68 	out_tree.reserve(num_nodes_from_leaves(num_leaves_per_site[site]));
69 	out_tree.emplace_back(); // root node
70 
71 	stack<index> boundary;
72 	auto callback = [&](index i) {
73 		auto node = t[i];
74 		bool leaf_occ = is_leaf(node) && present(i);
75 		bool inner_occ = !is_leaf(node) && present(node.lchild()) && present(node.rchild());
76 
77 		if (leaf_occ || (inner_occ && i != root)) {
78 			// fires if the tree is trivial (i.e. only one edge!)
79 			// this can only happen with sites for which only one
80 			// species has data, which we should have caught earlier.
81 			assert(!boundary.empty());
82 			auto parent = boundary.top();
83 			out_tree.emplace_back(parent, none, none, node.taxon());
84 			if (out_tree[parent].lchild() == none) {
85 				out_tree[parent].lchild() = out_tree.size() - 1;
86 			} else {
87 				assert(out_tree[parent].rchild() == none);
88 				out_tree[parent].rchild() = out_tree.size() - 1;
89 				boundary.pop();
90 			}
91 		}
92 		if (inner_occ) {
93 			boundary.push(out_tree.size() - 1);
94 		}
95 	};
96 	foreach_preorder(t, callback, root);
97 
98 	return out_tree;
99 }
100 
subtrees(const tree & t,const bitmatrix & occ)101 std::vector<tree> subtrees(const tree& t, const bitmatrix& occ) {
102 	auto num_sites = occ.cols();
103 	// TODO naming of compute_node_occ
104 	const auto node_occ_pair = compute_node_occ(t, occ);
105 	const auto& node_occ = node_occ_pair.first;
106 	const auto& num_leaves_per_site = node_occ_pair.second;
107 
108 	// collect leaves and inner nodes: bitwise and of the children
109 	vector<tree> out_trees;
110 
111 	for (index site = 0; site < num_sites; ++site) {
112 		out_trees.push_back(subtree(t, node_occ, num_leaves_per_site, site));
113 	}
114 
115 	return out_trees;
116 }
117 
118 } // namespace terraces
119