1 #include "similar_subtree_analysis.hh"
2 #include "../tree/node_tree.hh"
3 #include "../tree/layout.hh"
4 #include "../utils/tree_utils.hh"
5 #include "../utils/perf_helper.hh"
6 #include "../tree/shape.hh"
7 #include "../tree/visual_flags.hh"
8 #include "../tree/layout_computer.hh"
9 
10 #include <set>
11 
12 namespace cpprofiler
13 {
14 
15 namespace analysis
16 {
17 
18 using std::vector;
19 using tree::Layout;
20 using tree::NodeStatus;
21 using tree::NodeTree;
22 using tree::Shape;
23 
24 struct ShapeInfo
25 {
26     NodeID nid;
27     const Shape &shape;
28 };
29 
30 struct CompareShapes
31 {
32   public:
operator ()cpprofiler::analysis::CompareShapes33     bool operator()(const ShapeInfo &si1, const ShapeInfo &si2) const
34     {
35 
36         const auto &s1 = si1.shape;
37         const auto &s2 = si2.shape;
38 
39         if (s1.height() < s2.height())
40             return true;
41         if (s1.height() > s2.height())
42             return false;
43 
44         for (int i = 0; i < s1.height(); ++i)
45         {
46             if (s1[i].l < s2[i].l)
47                 return false;
48             if (s1[i].l > s2[i].l)
49                 return true;
50             if (s1[i].r < s2[i].r)
51                 return true;
52             if (s1[i].r > s2[i].r)
53                 return false;
54         }
55         return false;
56     }
57 };
58 
set_as_processed(const NodeTree & nt,Partition & partition,int h,std::vector<int> & height_info)59 static void set_as_processed(const NodeTree &nt, Partition &partition, int h, std::vector<int> &height_info)
60 {
61 
62     /// Note that in general a group can contain subtrees of
63     /// different height; however, since the subtrees of height 'h'
64     /// have already been processed, any group that contains such a
65     /// subtree is processed and contains only subtrees of height 'h'.
66     auto should_stay = [&nt, &height_info, h](const Group &g) {
67         auto nid = g.at(0);
68 
69         return height_info.at(nid) != h;
70     };
71 
72     auto &rem = partition.remaining;
73 
74     auto first_to_move = std::partition(rem.begin(), rem.end(), should_stay);
75 
76     move(first_to_move, rem.end(), std::back_inserter(partition.processed));
77     rem.resize(distance(rem.begin(), first_to_move));
78 }
79 
separate_marked(const vector<Group> & rem_groups,const vector<NodeID> & marked)80 static vector<Group> separate_marked(const vector<Group> &rem_groups, const vector<NodeID> &marked)
81 {
82 
83     vector<Group> res_groups;
84 
85     for (auto &group : rem_groups)
86     {
87 
88         Group g1; /// marked
89         Group g2; /// not marked
90 
91         for (auto nid : group)
92         {
93             if (std::find(marked.begin(), marked.end(), nid) != marked.end())
94             {
95                 g1.push_back(nid);
96             }
97             else
98             {
99                 g2.push_back(nid);
100             }
101         }
102 
103         if (g1.size() > 0)
104         {
105             res_groups.push_back(std::move(g1));
106         }
107         if (g2.size() > 0)
108         {
109             res_groups.push_back(std::move(g2));
110         }
111     }
112 
113     return res_groups;
114 }
115 
116 /// go through processed groups of height 'h' and
117 /// mark their parent nodes as candidates for separating
partition_step(const NodeTree & nt,Partition & p,int h,vector<int> & height_info)118 static void partition_step(const NodeTree &nt, Partition &p, int h, vector<int> &height_info)
119 {
120 
121     for (auto &group : p.processed)
122     {
123 
124         /// Check if the group should be skipped
125         /// Note that processed nodes are assumed to be of the same height
126         if (group.size() == 0 || height_info.at(group[0]) != h)
127         {
128             continue;
129         }
130 
131         const auto max_kids = std::accumulate(group.begin(), group.end(), 0, [&nt](int cur, NodeID nid) {
132             auto pid = nt.getParent(nid);
133             return std::max(cur, nt.childrenCount(pid));
134         });
135 
136         int alt = 0;
137         for (auto alt = 0; alt < max_kids; ++alt)
138         {
139 
140             vector<NodeID> marked;
141 
142             for (auto nid : group)
143             {
144                 if (nt.getAlternative(nid) == alt)
145                 {
146                     /// Mark the parent of nid to separate
147                     auto pid = nt.getParent(nid);
148                     marked.push_back(pid);
149                 }
150             }
151 
152             /// separate marked nodes from their groups
153             p.remaining = separate_marked(p.remaining, marked);
154         }
155     }
156 }
157 
initialPartition(const NodeTree & nt)158 static Partition initialPartition(const NodeTree &nt)
159 {
160     /// separate leaf nodes from internal
161     /// NOTE: this assumes that branch nodes have at least one child!
162     Group failed_nodes;
163     Group solution_nodes;
164     Group branch_nodes;
165     {
166         auto nodes = utils::any_order(nt);
167 
168         for (auto nid : nodes)
169         {
170             auto status = nt.getStatus(nid);
171             switch (status)
172             {
173             case NodeStatus::FAILED:
174             {
175                 failed_nodes.push_back(nid);
176             }
177             break;
178             case NodeStatus::SOLVED:
179             {
180                 solution_nodes.push_back(nid);
181             }
182             break;
183             case NodeStatus::BRANCH:
184             {
185                 branch_nodes.push_back(nid);
186             }
187             break;
188             default:
189             {
190                 /// ignore other nodes for now
191             }
192             break;
193             }
194         }
195     }
196 
197     Partition result{{}, {}};
198 
199     if (failed_nodes.size() > 0)
200     {
201         result.processed.push_back(std::move(failed_nodes));
202     }
203 
204     if (solution_nodes.size() > 0)
205     {
206         result.processed.push_back(std::move(solution_nodes));
207     }
208 
209     if (branch_nodes.size() > 0)
210     {
211         result.remaining.push_back(std::move(branch_nodes));
212     }
213 
214     return result;
215 }
216 
calculateHeightOf(NodeID nid,const NodeTree & nt,vector<int> & height_info)217 static int calculateHeightOf(NodeID nid, const NodeTree &nt, vector<int> &height_info)
218 {
219 
220     int cur_max = 0;
221 
222     auto kids = nt.childrenCount(nid);
223 
224     if (kids == 0)
225     {
226         cur_max = 1;
227     }
228     else
229     {
230         for (auto alt = 0; alt < nt.childrenCount(nid); ++alt)
231         {
232             auto child = nt.getChild(nid, alt);
233             cur_max = std::max(cur_max, calculateHeightOf(child, nt, height_info) + 1);
234         }
235     }
236 
237     height_info[nid] = cur_max;
238 
239     return cur_max;
240 }
241 
242 /// TODO: make sure the structure isn't changing anymore
243 /// TODO: this does not work correctly for n-ary trees yet
runIdenticalSubtrees(const NodeTree & nt)244 vector<SubtreePattern> runIdenticalSubtrees(const NodeTree &nt)
245 {
246 
247     auto label_opt = LabelOption::IGNORE_LABEL;
248 
249     vector<int> height_info(nt.nodeCount());
250 
251     auto max_height = calculateHeightOf(nt.getRoot(), nt, height_info);
252 
253     auto max_depth = nt.depth();
254 
255     auto cur_height = 1;
256 
257     auto sizes = utils::calc_subtree_sizes(nt);
258 
259     // 0) Initial Partition
260     auto partition = initialPartition(nt);
261 
262     while (cur_height != max_height)
263     {
264 
265         partition_step(nt, partition, cur_height, height_info);
266 
267         /// By this point, all subtrees of height 'cur_height + 1'
268         /// should have been processed
269         set_as_processed(nt, partition, cur_height + 1, height_info);
270 
271         ++cur_height;
272     }
273 
274     /// Construct the result in the appropriate form
275     vector<SubtreePattern> result;
276     result.reserve(partition.processed.size());
277 
278     for (auto &&group : partition.processed)
279     {
280 
281         if (group.empty())
282             continue;
283 
284         auto height = height_info.at(group[0]);
285 
286         const auto first = group[0];
287 
288         SubtreePattern pattern(height);
289 
290         pattern.m_nodes = std::move(group);
291         pattern.setSize(sizes.at(first));
292 
293         result.push_back(std::move(pattern));
294     }
295 
296     return result;
297 }
298 
299 /// SIMILAR SHAPE ANALYSIS
300 
runSimilarShapes(const NodeTree & tree,const Layout & lo)301 std::vector<SubtreePattern> runSimilarShapes(const NodeTree &tree, const Layout &lo)
302 {
303 
304     std::multiset<ShapeInfo, CompareShapes> shape_set;
305 
306     auto node_order = utils::any_order(tree);
307 
308     for (const auto nid : node_order)
309     {
310         shape_set.insert({nid, *lo.getShape(nid)});
311     }
312 
313     auto sizes = utils::calc_subtree_sizes(tree);
314 
315     std::vector<SubtreePattern> shapes;
316 
317     auto it = shape_set.begin();
318     auto end = shape_set.end();
319 
320     while (it != end)
321     {
322         auto upper = shape_set.upper_bound(*it);
323 
324         const int height = it->shape.height();
325 
326         SubtreePattern pattern(height);
327         for (; it != upper; ++it)
328         {
329             pattern.m_nodes.emplace_back(it->nid);
330         }
331 
332         pattern.setSize(sizes.at(pattern.first()));
333 
334         shapes.push_back(std::move(pattern));
335     }
336 
337     return shapes;
338 }
339 
340 } // namespace analysis
341 
342 } // namespace cpprofiler