1 // Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 - 2013
2 //
3 // (c) 2010-2012 Goethe-Universität Frankfurt
4 //
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any
8 // later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 //
19 // An optimal, polynomial-time register allocator.
20 //
21 // For details, see:
22 //
23 // Philipp Klaus Krause,
24 // "Optimal Register Allocation in Polynomial Time",
25 // Compiler Construction - 22nd International Conference, CC 2013, Held as Part of the European Joint Conferences on Theory and Practice of Software, ETAPS 2013. Proceedings,
26 // Lecture Notes in Computer Science, volume 7791, pp. 1-20.
27 // Springer,
28 // 2013.
29 //
30 // To use this from a port do the following:
31 //
32 // 1) Supply a cost function
33 // template <class G_t, class I_t>
34 // float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
35 // Which can range from
36 // simple, e.g. cost 1 for each byte accessed in a register, cost 4 for each byte accessed in memory
37 // to
38 // quite involved, e.g. the number of bytes of code the code generator would generate.
39 //
40 // 2) Call
41 // create_cfg(), thorup_tree_decomposition(), nicify(), alive_tree_dec(), tree_dec_ralloc_nodes().
42 //
43 // The Z80 port can serve as an example, see z80_ralloc2_cc() in z80/ralloc2.cc.
44
45 #ifndef SDCCRALLOC_HH
46 #define SDCCRALLOC_HH 1
47
48 #include <iostream>
49 #include <limits>
50 #include <utility>
51 #include <sstream>
52 #include <fstream>
53
54 // Workaround for boost bug #11880
55 #include <boost/version.hpp>
56 #if BOOST_VERSION == 106000
57 #include <boost/type_traits/ice.hpp>
58 #endif
59
60 #include <boost/graph/graphviz.hpp>
61 #include <boost/graph/adjacency_matrix.hpp>
62 #include <boost/graph/connected_components.hpp>
63 #include <boost/container/flat_set.hpp>
64 #include <boost/container/flat_map.hpp>
65
66 #include "common.h"
67
68 extern "C"
69 {
70 #include "SDCCbtree.h"
71 }
72
73 typedef short int var_t;
74 typedef signed char reg_t;
75
76 // Integer constant upper bound on port->num_regs
77 #define MAX_NUM_REGS 9
78
79 // Assignment at an instruction
80 struct i_assignment_t
81 {
82 var_t registers[MAX_NUM_REGS][2];
83
i_assignment_ti_assignment_t84 i_assignment_t(void)
85 {
86 for (reg_t r = 0; r < MAX_NUM_REGS; r++)
87 for (unsigned int i = 0; i < 2; i++)
88 registers[r][i] = -1;
89 }
90
91 #if 0
92 bool operator<(const i_assignment_t &i_a) const
93 {
94 for (reg_t r = 0; r < port->num_regs; r++)
95 for (unsigned int i = 0; i < 2; i++)
96 {
97 if (registers[r][i] < i_a.registers[r][i])
98 return(true);
99 else if (registers[r][i] > i_a.registers[r][i])
100 return(false);
101 }
102 return(false);
103 }
104 #endif
105
add_vari_assignment_t106 void add_var(var_t v, reg_t r)
107 {
108 if (registers[r][1] < v)
109 {
110 registers[r][0] = registers[r][1];
111 registers[r][1] = v;
112 }
113 else
114 registers[r][0] = v;
115 }
116
remove_vari_assignment_t117 void remove_var(var_t v)
118 {
119 for (reg_t r = 0; r < port->num_regs; r++)
120 {
121 if (registers[r][1] == v)
122 {
123 registers[r][1] = registers[r][0];
124 registers[r][0] = -1;
125 }
126 else if (registers[r][0] == v)
127 registers[r][0] = -1;
128 }
129 }
130 };
131
132 typedef std::vector<var_t> varset_t; // Faster than std::set, std::tr1::unordered_set and stx::btree_set here.
133
134 typedef boost::container::flat_map<int, float> icosts_t; // Faster than std::map and stx::btree_map here.
135
136 typedef std::vector<var_t> cfg_alive_t; // Faster than stx::btree_set here .
137 typedef boost::container::flat_set<var_t> cfg_dying_t; // Faster than stx::btree_set and std::set here.
138
139 struct assignment
140 {
141 float s;
142
143 varset_t local; // Entries: var
144 std::vector<reg_t> global; // Entries: global[var] = reg (-1 if no reg assigned)
145 icosts_t i_costs; // Costs for all instructions in bag (needed to avoid double counting costs at join nodes)
146 i_assignment_t i_assignment; // Assignment at the instruction currently being added in an introduce node;
147
148 bool marked;
149
operator <assignment150 bool operator<(const assignment& a) const
151 {
152 varset_t::const_iterator i, ai, i_end, ai_end;
153
154 i_end = local.end();
155 ai_end = a.local.end();
156
157 for (i = local.begin(), ai = a.local.begin();; ++i, ++ai)
158 {
159 if (i == i_end && ai == ai_end)
160 return(false);
161 if (i == i_end)
162 return(true);
163 if (ai == ai_end)
164 return(false);
165
166 if (*i < *ai)
167 return(true);
168 if (*i > *ai)
169 return(false);
170
171 if (global[*i] < a.global[*ai])
172 return(true);
173 if (global[*i] > a.global[*ai])
174 return(false);
175 }
176 }
177 };
178
179 typedef std::list<assignment> assignment_list_t;
180 //typedef std::vector<assignment> assignment_list_t; // Probably faster, but would require some code reorganization.
181
182 struct tree_dec_node
183 {
184 std::set<unsigned int> bag;
185 std::set<var_t> alive;
186 assignment_list_t assignments;
187 unsigned weight; // The weight is the number of nodes at which intermediate results need to be remembered. In general, to minimize memory consumption, at join nodes the child with maximum weight should be processed first.
188 };
189
190 typedef boost::container::flat_multimap<int, var_t> operand_map_t; // Faster than std::multimap<int, var_t> and stx::btree_multimap<int, var_t> here.
191
192 struct cfg_node
193 {
194 iCode *ic;
195 operand_map_t operands;
196 cfg_alive_t alive;
197 cfg_dying_t dying;
198
199 std::set<var_t> stack_alive;
200
201 #ifdef DEBUG_SEGV
202 cfg_node(void);
203 #endif
204 };
205
206 #ifdef DEBUG_SEGV
207 // This only exists to track down #3506333 and #3475617.
208 bool default_constructor_of_cfg_node_called;
cfg_node(void)209 cfg_node::cfg_node(void)
210 {
211 default_constructor_of_cfg_node_called = true;
212 }
213 #endif
214
215 struct con_node
216 {
217 int v;
218 int byte;
219 int size;
220 char *name;
221 };
222
223 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, tree_dec_node> tree_dec_t;
224 typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, con_node> con_t;
225 typedef boost::adjacency_matrix<boost::undirectedS, con_node> con2_t;
226 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, cfg_node> cfg_t;
227 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS> cfg_sym_t;
228
229 #ifdef HAVE_TREEDEC_COMBINATIONS_HPP
230 #include <treedec/treedec_traits.hpp>
231 TREEDEC_TREEDEC_BAG_TRAITS(tree_dec_t, bag);
232 #endif
233
234 #include "SDCCtree_dec.hpp"
235
236 // Cost function. Port-specific.
237 template <class G_t, class I_t>
238 static float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
239
240 // For early removel of assignments that cannot be extended to valid assignments. Port-specific.
241 template <class G_t, class I_t>
242 static bool assignment_hopeless(const assignment &a, unsigned short int i, const G_t &G, const I_t &I, const var_t lastvar);
243
244 // Rough cost estimate. Port-specific.
245 template <class G_t, class I_t>
246 static float rough_cost_estimate(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
247
248 // Avoid overwriting operands that are still needed by the result. Port-specific.
249 template <class I_t>
250 static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I);
251
252 // Port-specific
253 template <class T_t>
254 static void get_best_local_assignment_biased(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T);
255
256 // Code for another ic is generated when generating this one. Mark the other as generated. Port-specific.
257 static void extra_ic_generated(iCode *ic);
258
259 inline void
add_operand_to_cfg_node(cfg_node & n,operand * o,std::map<std::pair<int,reg_t>,var_t> & sym_to_index)260 add_operand_to_cfg_node(cfg_node &n, operand *o, std::map<std::pair<int, reg_t>, var_t> &sym_to_index)
261 {
262 reg_t k;
263 if (o && IS_SYMOP(o) && sym_to_index.find(std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, 0)) != sym_to_index.end())
264 {
265 if (n.operands.find(OP_SYMBOL_CONST(o)->key) == n.operands.end())
266 for (k = 0; k < OP_SYMBOL_CONST(o)->nRegs; k++)
267 n.operands.insert(std::pair<int, var_t>(OP_SYMBOL_CONST(o)->key, sym_to_index[std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, k)]));
268 }
269 }
270
271 // Check if the live-range of variable i is connected
272 #if 0
273 // This check was too expensive - Profiling shows that compiling the Dhrystone benchmark for stm8 with default options, we spent about a quarter of compiler runtime in here!
274 // Profiling shows that we spent a significant amount of time on the first call to copy_graph()
275 // Todo: Improve efficiency, e.g. using subgraph or filtered_graph to avoid the costly first call to copy_graph()
276 // Issues to solve: cfg2 is undirected, cfg is bidirectional; this makes use of subgraph or filtered_graph harder.
277 static bool liverange_connected(cfg_t &cfg, var_t i)
278 {
279 cfg_sym_t cfg2;
280 boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); // This call to copy_graph is expensive!
281 for (int j = boost::num_vertices(cfg) - 1; j >= 0; j--)
282 {
283 if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
284 {
285 boost::clear_vertex(j, cfg2);
286 boost::remove_vertex(j, cfg2);
287 }
288 }
289
290 std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
291
292 return(boost::connected_components(cfg2, &component[0]) <= 1);
293 }
294 #else
295 // A not very elegant, but faster check
component_size_impl(const cfg_t & cfg,const std::vector<bool> & life,var_t v,int i,std::vector<bool> & visited)296 static inline int component_size_impl(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i, std::vector<bool>& visited)
297 {
298 typename boost::graph_traits<cfg_t>::in_edge_iterator in, in_end;
299 typename boost::graph_traits<cfg_t>::out_edge_iterator out, out_end;
300
301 int size = 1;
302 visited[i] = true;
303
304 for(boost::tie(in, in_end) = boost::in_edges(i, cfg); in != in_end; ++in)
305 if(life[boost::source(*in, cfg)] && !visited[boost::source(*in, cfg)])
306 size += component_size_impl(cfg, life, v, boost::source(*in, cfg), visited);
307
308 for(boost::tie(out, out_end) = boost::out_edges(i, cfg); out != out_end; ++out)
309 if(life[boost::target(*out, cfg)] && !visited[boost::target(*out, cfg)])
310 size += component_size_impl(cfg, life, v, boost::target(*out, cfg), visited);
311
312 return(size);
313 }
314
component_size(const cfg_t & cfg,const std::vector<bool> & life,var_t v,int i)315 static inline int component_size(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i)
316 {
317 std::vector<bool> visited(boost::num_vertices(cfg));
318
319 return(component_size_impl(cfg, life, v, i, visited));
320 }
321
liverange_connected(const cfg_t & cfg,var_t v)322 static bool liverange_connected(const cfg_t &cfg, var_t v)
323 {
324 std::vector<bool> life(boost::num_vertices(cfg));
325 int num_life = 0;
326 int last_life;
327
328 for(int i = 0; i < boost::num_vertices (cfg); i++)
329 if(std::find(cfg[i].alive.begin(), cfg[i].alive.end(), v) != cfg[i].alive.end())
330 {
331 life[i] = true;
332 num_life++;
333 last_life = i;
334 }
335
336 if(!num_life)
337 return(true);
338
339 return(component_size(cfg, life, v, last_life) >= num_life);
340 }
341 #endif
342
343 // A quick-and-dirty function to get the CFG from sdcc.
344 static iCode *
create_cfg(cfg_t & cfg,con_t & con,ebbIndex * ebbi)345 create_cfg(cfg_t &cfg, con_t &con, ebbIndex *ebbi)
346 {
347 eBBlock **ebbs = ebbi->bbOrder;
348 iCode *start_ic;
349 iCode *ic;
350
351 std::map<int, unsigned int> key_to_index;
352 std::map<std::pair<int, reg_t>, var_t> sym_to_index;
353
354 if(currFunc)
355 currFunc->funcDivFlagSafe = 1;
356
357 start_ic = iCodeLabelOptimize(iCodeFromeBBlock (ebbs, ebbi->count));
358 {
359 int i;
360 var_t j;
361 wassertl (!boost::num_vertices(cfg), "CFG non-empty before creation.");
362 for (ic = start_ic, i = 0, j = 0; ic; ic = ic->next, i++)
363 {
364 if (currFunc)
365 currFunc->funcDivFlagSafe &= !(ic->op == INLINEASM || ic->op == '/' || ic->op == '%' || ic->op == PCALL ||
366 ic->op == CALL && (IS_OP_LITERAL (IC_LEFT (ic)) || !OP_SYMBOL(IC_LEFT (ic))->funcDivFlagSafe) ||
367 ic->op == RIGHT_OP && IS_OP_LITERAL (IC_RIGHT (ic))); // Right shift might be implemented using division.
368
369 #ifdef DEBUG_SEGV
370 default_constructor_of_cfg_node_called = false;
371 #endif
372 boost::add_vertex(cfg);
373
374 #ifdef DEBUG_SEGV
375 wassertl (default_constructor_of_cfg_node_called, "add_vertex failed to call default constructor of cfg_node!");
376 #endif
377 wassertl (cfg[i].alive.empty(), "Alive set non-empty upon creation.");
378 key_to_index[ic->key] = i;
379
380 if(ic->op == SEND && ic->builtinSEND) // Ensure that only the very first send iCode is active.
381 {
382 operand *bi_parms[MAX_BUILTIN_ARGS];
383 int nbi_parms;
384 getBuiltinParms(ic, &nbi_parms, bi_parms);
385 }
386
387 extra_ic_generated(ic);
388
389 cfg[i].ic = ic;
390 ic->rSurv = newBitVect(port->num_regs); // Never freed. Memory leak?
391 ic->rMask = newBitVect(port->num_regs); // Never freed. Memory leak?
392
393 if (ic->generated)
394 continue;
395
396 for (int j2 = 0; j2 <= operandKey; j2++)
397 {
398 if (bitVectBitValue(ic->rlive, j2))
399 {
400 symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, j2));
401
402 if (!sym->for_newralloc)
403 continue;
404
405 // Add node to conflict graph:
406 if (sym_to_index.find(std::pair<int, reg_t>(j2, 0)) != sym_to_index.end())
407 continue;
408
409 // Other parts of the allocator may rely on the variables corresponding to bytes from the same sdcc variable to have subsequent numbers.
410 for (reg_t k = 0; k < sym->nRegs; k++)
411 {
412 boost::add_vertex(con);
413 con[j].v = j2;
414 con[j].byte = k;
415 con[j].size = sym->nRegs;
416 con[j].name = sym->name;
417 sym_to_index[std::pair<int, reg_t>(j2, k)] = j;
418 for (reg_t l = 0; l < k; l++)
419 boost::add_edge(j - l - 1, j, con);
420 j++;
421 }
422 }
423 }
424 }
425 }
426
427 // Get control flow graph from sdcc.
428 for (ic = start_ic; ic; ic = ic->next)
429 {
430 wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
431
432 if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next)
433 {
434 wassertl (key_to_index[ic->next->key] < boost::num_vertices(cfg), "Next node not in CFG.");
435 boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], cfg);
436 }
437
438 if (ic->op == GOTO)
439 {
440 wassertl (key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
441 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], cfg);
442 }
443 else if (ic->op == RETURN)
444 {
445 wassertl (key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key] < boost::num_vertices(cfg), "RETURN target not in CFG.");
446 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], cfg);
447 }
448 else if (ic->op == IFX)
449 {
450 wassertl (key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key] < boost::num_vertices(cfg), "IFX target not in CFG.");
451 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key], cfg);
452 }
453 else if (ic->op == JUMPTABLE)
454 for (symbol *lbl = (symbol *)(setFirstItem (IC_JTLABELS (ic))); lbl; lbl = (symbol *)(setNextItem (IC_JTLABELS (ic))))
455 {
456 wassertl (key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
457 boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], cfg);
458 }
459
460 for (int i = 0; i <= operandKey; i++)
461 {
462 if (sym_to_index.find(std::pair<int, reg_t>(i, 0)) == sym_to_index.end())
463 continue;
464
465 if (bitVectBitValue(ic->rlive, i))
466 {
467 symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, i));
468 for (reg_t k = 0; k < isym->nRegs; k++)
469 {
470 wassert (key_to_index.find(ic->key) != key_to_index.end());
471 wassert (sym_to_index.find(std::pair<int, int>(i, k)) != sym_to_index.end());
472 wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
473 cfg[key_to_index[ic->key]].alive.push_back(sym_to_index[std::pair<int, int>(i, k)]);
474 }
475
476 // TODO: Move this to a place where it also works when using the old allocator!
477 isym->block = btree_lowest_common_ancestor(isym->block, ic->block);
478 // If this symbol has a spill location, ensure the spill location is also allocated in a compatible block
479 if (SYM_SPIL_LOC(isym))
480 SYM_SPIL_LOC(isym)->block = btree_lowest_common_ancestor(SYM_SPIL_LOC(isym)->block, isym->block);
481 }
482 }
483
484 if (ic->op == IFX)
485 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_COND(ic), sym_to_index);
486 else if (ic->op == JUMPTABLE)
487 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_JTCOND(ic), sym_to_index);
488 else
489 {
490 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RESULT(ic), sym_to_index);
491 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_LEFT(ic), sym_to_index);
492 add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RIGHT(ic), sym_to_index);
493 }
494
495 // TODO: Extend live-ranges of returns of built-in function calls back to first SEND.
496
497 add_operand_conflicts_in_node(cfg[key_to_index[ic->key]], con);
498 }
499
500 #if 0
501 // Get conflict graph from sdcc
502 for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < num_vertices(con); i++)
503 {
504 symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, con[i].v));
505 for (int j = 0; j <= operandKey; j++)
506 if (bitVectBitValue(isym->clashes, j))
507 {
508 symbol *jsym = (symbol *)(hTabItemWithKey(liveRanges, j));
509 if (sym_to_index.find(std::pair<int, reg_t>(j, 0)) == sym_to_index.end())
510 continue;
511 for (reg_t k = 0; k < jsym->nRegs; k++)
512 boost::add_edge(i, sym_to_index[std::pair<int, reg_t>(j, k)], con);
513 }
514 }
515 #endif
516
517 // Check for unconnected live ranges, some might have survived earlier stages.
518 for (var_t i = (var_t)boost::num_vertices(con) - 1; i >= 0; i--)
519 if (!liverange_connected(cfg, i))
520 {
521 // Non-connected CFGs are created by at least GCSE and lospre. We now have a live-range splitter that fixes them, so this should no longer be necessary, but we leave this code here for now, so in case one gets through, we can still generate correct code.
522 std::cerr << "Warning: Non-connected liverange found and extended to connected component of the CFG:" << con[i].name << ". Please contact sdcc authors with source code to reproduce.\n";
523
524 cfg_sym_t cfg2;
525 boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties()));
526 std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
527 boost::connected_components(cfg2, &component[0]);
528
529 for (boost::graph_traits<cfg_t>::vertices_size_type j = 0; j < boost::num_vertices(cfg) - 1; j++)
530 {
531 if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
532 continue;
533
534 for (boost::graph_traits<cfg_t>::vertices_size_type k = 0; k < boost::num_vertices(cfg) - 1; k++)
535 if (component[j] == component[k] && std::find(cfg[k].alive.begin(), cfg[k].alive.end(), i) == cfg[k].alive.end())
536 cfg[k].alive.push_back(i);
537 }
538 }
539
540 // Sort alive and setup dying.
541 for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
542 {
543 std::sort(cfg[i].alive.begin(), cfg[i].alive.end());
544 cfg[i].dying = cfg_dying_t(cfg[i].alive.begin(), cfg[i].alive.end());;
545 typedef boost::graph_traits<cfg_t>::adjacency_iterator adjacency_iter_t;
546 adjacency_iter_t j, j_end;
547 for (boost::tie(j, j_end) = adjacent_vertices(i, cfg); j != j_end; ++j)
548 {
549 cfg_alive_t::const_iterator v, v_end;
550 for (v = cfg[*j].alive.begin(), v_end = cfg[*j].alive.end(); v != v_end; ++v)
551 {
552 const symbol *const vsym = (symbol *)(hTabItemWithKey(liveRanges, con[*v].v));
553
554 const operand *const left = IC_LEFT(cfg[*j].ic);
555 const operand *const right = IC_RIGHT(cfg[*j].ic);
556 const operand *const result = IC_RESULT(cfg[*j].ic);
557
558 if (!(POINTER_SET(cfg[*j].ic) || cfg[*j].ic->op == SET_VALUE_AT_ADDRESS) &&
559 (!left || !IS_SYMOP(left) || OP_SYMBOL_CONST(left)->key != vsym->key) &&
560 (!right || !IS_SYMOP(right) || OP_SYMBOL_CONST(right)->key != vsym->key) &&
561 result && IS_SYMOP(result) && OP_SYMBOL_CONST(result)->key == vsym->key)
562 continue;
563
564 cfg[i].dying.erase(*v);
565 }
566 }
567 }
568
569 // Construct conflict graph
570 for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
571 {
572 cfg_alive_t::const_iterator v, v_end;
573 const iCode *ic = cfg[i].ic;
574
575 for (v = cfg[i].alive.begin(), v_end = cfg[i].alive.end(); v != v_end; ++v)
576 {
577 cfg_alive_t::const_iterator v2, v2_end;
578
579 // Conflict between operands are handled by add_operand_conflicts_in_node().
580 if (cfg[i].dying.find (*v) != cfg[i].dying.end())
581 continue;
582 if (ic->op != IFX && ic->op != JUMPTABLE && IC_RESULT(ic) && IS_SYMOP(IC_RESULT(ic)))
583 {
584 operand_map_t::const_iterator oi, oi_end;
585 for(boost::tie(oi, oi_end) = cfg[i].operands.equal_range(OP_SYMBOL_CONST(IC_RESULT(ic))->key); oi != oi_end; ++oi)
586 if(oi->second == *v)
587 goto next_var;
588 }
589
590 // Here, v is a variable that survives cfg[i].
591 // TODO: Check if we can use v, ++v2 instead of cfg[i].alive.begin() to speed things up.
592 for (v2 = cfg[i].alive.begin(), v2_end = cfg[i].alive.end(); v2 != v2_end; ++v2)
593 {
594 if(*v == *v2)
595 continue;
596 if (cfg[i].dying.find (*v2) != cfg[i].dying.end())
597 continue;
598
599 boost::add_edge(*v, *v2, con);
600 }
601
602 next_var:
603 ;
604 }
605 }
606
607 return(start_ic);
608 }
609
610 // Computes live ranges for tree decomposition from live ranges from cfg.
alive_tree_dec(tree_dec_t & tree_dec,const cfg_t & cfg)611 inline void alive_tree_dec(tree_dec_t &tree_dec, const cfg_t &cfg)
612 {
613 for (unsigned int i = 0; i < num_vertices(tree_dec); i++)
614 {
615 std::set<unsigned int>::const_iterator v;
616 for (v = tree_dec[i].bag.begin(); v != tree_dec[i].bag.end(); ++v)
617 tree_dec[i].alive.insert(cfg[*v].alive.begin(), cfg[*v].alive.end());
618 }
619 }
620
621 #if defined(DEBUG_RALLOC_DEC) || defined (DEBUG_RALLOC_DEC_ASS)
print_assignment(const assignment & a)622 static void print_assignment(const assignment &a)
623 {
624 varset_t::const_iterator i;
625 std::cout << "[";
626 for (i = a.local.begin(); i != a.local.end(); ++i)
627 std::cout << "(" << int(*i) << ", " << int(a.global[*i]) << "), ";
628 std::cout << "c: " << a.s << "]";
629 }
630 #endif
631
632 template <class I_t>
assignment_conflict(const assignment & a,const I_t & I,var_t v,reg_t r)633 bool assignment_conflict(const assignment &a, const I_t &I, var_t v, reg_t r)
634 {
635 varset_t::const_iterator i, i_end;
636
637 for (i = a.local.begin(), i_end = a.local.end(); i != i_end; ++i)
638 {
639 if (a.global[*i] != r)
640 continue;
641 if (boost::edge(*i, v, I).second)
642 return(true);
643 }
644
645 return(false);
646 }
647
648 template<class G_t>
assignments_introduce_instruction(assignment_list_t & alist,unsigned short int i,const G_t & G)649 void assignments_introduce_instruction(assignment_list_t &alist, unsigned short int i, const G_t &G)
650 {
651 assignment_list_t::iterator ai, ai_end;
652
653 #if !defined(_MSC_VER) // Efficient code - reduces total SDCC runtime by about 5.5% vs. code below, but doesn't work with MSVC++ (at least up to MSVC++ 2015)
654 struct inserter_t
655 {
656 explicit inserter_t(const std::vector<reg_t>& g, i_assignment_t& a) : global(g), ia(a)
657 {
658 }
659 inserter_t& operator=(var_t v)
660 {
661 if (global[v] >= 0)
662 ia.add_var(v, global[v]);
663 return(*this);
664 }
665 inserter_t& operator*()
666 {
667 return(*this);
668 }
669 inserter_t& operator++()
670 {
671 return(*this);
672 }
673 inserter_t& operator++(int i)
674 {
675 i;
676 return(*this);
677 }
678 private:
679 const std::vector<reg_t>& global;
680 i_assignment_t& ia;
681 };
682
683 for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
684 {
685 i_assignment_t ia;
686
687 std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), inserter_t(ai->global, ia));
688
689 ai->i_assignment = ia;
690 }
691 #else // Human-readable code
692 for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
693 {
694 varset_t i_variables;
695
696 std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), std::inserter(i_variables, i_variables.end()));
697
698 i_assignment_t ia;
699
700 varset_t::const_iterator v, v_end;
701
702 for (v = i_variables.begin(), v_end = i_variables.end(); v != v_end; ++v)
703 if (ai->global[*v] >= 0)
704 ia.add_var(*v, ai->global[*v]);
705
706 ai->i_assignment = ia;
707 }
708 #endif
709 }
710
711 template <class G_t, class I_t>
assignments_introduce_variable(assignment_list_t & alist,unsigned short int i,short int v,const G_t & G,const I_t & I)712 static void assignments_introduce_variable(assignment_list_t &alist, unsigned short int i, short int v, const G_t &G, const I_t &I)
713 {
714 assignment_list_t::iterator ai;
715 bool a_initialized;
716 assignment a;
717 size_t c, c_end;
718
719 for (ai = alist.begin(), c = 0, c_end = alist.size(); c < c_end; c++, ai++)
720 {
721 a_initialized = false;
722
723 for (reg_t r = 0; r < port->num_regs; r++)
724 {
725 if (!assignment_conflict(*ai, I, v, r))
726 {
727 if(!a_initialized)
728 {
729 a = *ai;
730 ai->marked = true;
731 a.marked = false;
732 varset_t::iterator i = std::lower_bound(a.local.begin(), a.local.end(), v);
733 if (i == a.local.end() || *i != v)
734 a.local.insert(i, v);
735 }
736 a.global[v] = r;
737 a.i_assignment.add_var(v, r);
738 if(!assignment_hopeless(a, i, G, I, v))
739 alist.push_back(a);
740 a.i_assignment.remove_var(v);
741 }
742 }
743 }
744 }
745
746 struct assignment_rep
747 {
748 assignment_list_t::iterator i;
749 float s;
750
operator <assignment_rep751 bool operator<(const assignment_rep& a) const
752 {
753 return(s < a.s);
754 }
755 };
756
757 template <class I_t>
compability_cost(const assignment & a,const assignment & ac,const I_t & I)758 float compability_cost(const assignment& a, const assignment& ac, const I_t &I)
759 {
760 typedef typename boost::graph_traits<I_t>::adjacency_iterator adjacency_iter_t;
761
762 float c = 0.0f;
763
764 varset_t::const_iterator vi, vi_end;
765
766 for(vi = ac.local.begin(), vi_end = ac.local.end(); vi != vi_end; ++vi)
767 {
768 const var_t v = *vi;
769 if(a.global[v] != ac.global[v])
770 {
771 c += 1000.0f;
772 continue;
773 }
774 #if 0 // This improves the quality of assignments, but it has a big runtime overhead for some cases.
775 adjacency_iter_t j, j_end;
776 for (boost::tie(j, j_end) = adjacent_vertices(v, I); j != j_end; ++j)
777 if(ac.global[v] != -1 && a.global[*j] == ac.global[v])
778 {
779 c += 1000.0f;
780 break;
781 }
782 #endif
783 }
784
785 return(c);
786 }
787
788 // Ensure that we never get more than options.max_allocs_per_node assignments at a single node of the tree decomposition.
789 // Tries to drop the worst ones first (but never drop the empty assignment, as it's the only one guaranteed to be always valid).
790 template <class G_t, class I_t>
drop_worst_assignments(assignment_list_t & alist,unsigned short int i,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)791 static void drop_worst_assignments(assignment_list_t &alist, unsigned short int i, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
792 {
793 unsigned int n;
794 size_t alist_size;
795 assignment_list_t::iterator ai, an;
796
797 if ((alist_size = alist.size()) * port->num_regs <= static_cast<size_t>(options.max_allocs_per_node) || alist_size <= 1)
798 return;
799
800 *assignment_optimal = false;
801
802 #ifdef DEBUG_RALLOC_DEC
803 std::cout << "Too many assignments here (" << i << "):" << alist_size << " > " << options.max_allocs_per_node / port->num_regs << ". Dropping some.\n"; std::cout.flush();
804 #endif
805
806 #if 0
807 assignment_rep *arep = new assignment_rep[alist_size];
808
809 for (n = 0, ai = alist.begin(); n < alist_size; ++ai, n++)
810 {
811 arep[n].i = ai;
812 arep[n].s = ai->s + rough_cost_estimate(*ai, i, G, I) + compability_cost(*ai, ac, I);
813 }
814
815 std::nth_element(arep + 1, arep + options.max_allocs_per_node / port->num_regs, arep + alist_size);
816
817 //std::cout << "nth elem. est. cost: " << arep[options.max_allocs_per_node / port->num_regs].s << "\n"; std::cout.flush();
818
819 for (n = options.max_allocs_per_node / port->num_regs + 1; n < alist_size; n++)
820 alist.erase(arep[n].i);
821 #else // More efficient, reduces total SDCC runtime by about 1%.
822
823 size_t endsize = options.max_allocs_per_node / port->num_regs + 1;
824 size_t arep_maxsize = std::min(alist_size, endsize * 2) + 1;
825 size_t m, k;
826 float bound = std::numeric_limits<float>::infinity();
827
828 assignment_rep *arep = new assignment_rep[arep_maxsize];
829
830 for(m = 0, n = 1, ai = alist.begin(), ++ai; n < alist_size; n++)
831 {
832 float s = ai->s;
833
834 if(s > bound)
835 {
836 alist.erase(ai++);
837 continue;
838 }
839 s += compability_cost(*ai, ac, I);
840 if(s > bound)
841 {
842 alist.erase(ai++);
843 continue;
844 }
845 s += rough_cost_estimate(*ai, i, G, I);
846 if(s > bound)
847 {
848 alist.erase(ai++);
849 continue;
850 }
851
852 if(m >= arep_maxsize - 1)
853 {
854 std::nth_element(arep, arep + (endsize - 1), arep + m);
855 for(k = endsize; k < m; k++)
856 alist.erase(arep[k].i);
857 bound = arep[endsize - 1].s;
858
859 m = endsize;
860 }
861
862 arep[m].i = ai;
863 arep[m].s = s;
864
865 m++;
866
867 ++ai;
868 }
869
870 std::nth_element(arep, arep + (endsize - 1), arep + m);
871
872 for (n = endsize; n < m; n++)
873 alist.erase(arep[n].i);
874 #endif
875
876 delete[] arep;
877 }
878
879 // Handle Leaf nodes in the nice tree decomposition
880 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_leaf(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)881 static void tree_dec_ralloc_leaf(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
882 {
883 #ifdef DEBUG_RALLOC_DEC
884 std::cout << "Leaf (" << t << "):\n"; std::cout.flush();
885 #endif
886
887 assignment a;
888 assignment_list_t &alist = T[t].assignments;
889
890 a.s = 0;
891 a.global.resize(boost::num_vertices(I), -1);
892 alist.push_back(a);
893
894 #ifdef DEBUG_RALLOC_DEC_ASS
895 assignment_list_t::iterator ai;
896 for(ai = alist.begin(); ai != alist.end(); ++ai)
897 {
898 print_assignment(*ai);
899 std::cout << "\n";
900 }
901 assignment best;
902 get_best_local_assignment(best, t, T);
903 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
904 #endif
905 }
906
907 // Handle introduce nodes in the nice tree decomposition
908 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_introduce(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)909 static void tree_dec_ralloc_introduce(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
910 {
911 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
912 adjacency_iter_t c, c_end;
913 assignment_list_t::iterator ai;
914 boost::tie(c, c_end) = adjacent_vertices(t, T);
915
916 #ifdef DEBUG_RALLOC_DEC
917 std::cout << "Introduce (" << t << "):\n"; std::cout.flush();
918 std::cout << "ac: "; print_assignment(ac); std::cout << "\n";
919 #endif
920
921 assignment_list_t &alist = T[t].assignments;
922
923 std::swap(alist, T[*c].assignments);
924
925 std::set<var_t> new_vars;
926 std::set_difference(T[t].alive.begin(), T[t].alive.end(), T[*c].alive.begin(), T[*c].alive.end(), std::inserter(new_vars, new_vars.end()));
927
928 std::set<unsigned short> new_inst;
929 std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), T[*c].bag.end(), std::inserter(new_inst, new_inst.end()));
930 unsigned short int i = *(new_inst.begin());
931
932 // Extend to new instruction.
933 assignments_introduce_instruction(alist, i, G);
934
935 std::set<var_t>::const_iterator v;
936 for (v = new_vars.begin(); v != new_vars.end(); ++v)
937 {
938 drop_worst_assignments(alist, i, G, I, ac, assignment_optimal);
939 assignments_introduce_variable(alist, i, *v, G, I);
940 }
941
942 // Summation of costs and early removal of assignments.
943 for (ai = alist.begin(); ai != alist.end();)
944 {
945 if ((ai->s += (ai->i_costs[i] = instruction_cost(*ai, i, G, I))) == std::numeric_limits<float>::infinity())
946 ai = alist.erase(ai);
947 else
948 ++ai;
949 }
950
951 // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
952 //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
953
954 #ifdef DEBUG_RALLOC_DEC_ASS
955 for(ai = alist.begin(); ai != alist.end(); ++ai)
956 {
957 print_assignment(*ai);
958 std::cout << "\n";
959 }
960
961 assignment best;
962 get_best_local_assignment(best, t, T);
963 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
964 #endif
965 }
966
assignments_locally_same(const assignment & a1,const assignment & a2)967 static bool assignments_locally_same(const assignment &a1, const assignment &a2)
968 {
969 if (a1.local != a2.local)
970 return(false);
971
972 varset_t::const_iterator i, i_end;
973 for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i)
974 if (a1.global[*i] != a2.global[*i])
975 return(false);
976
977 return(true);
978 }
979
980 // Handle forget nodes in the nice tree decomposition
981 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_forget(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)982 static void tree_dec_ralloc_forget(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
983 {
984 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
985 adjacency_iter_t c, c_end;
986 boost::tie(c, c_end) = adjacent_vertices(t, T);
987
988 #ifdef DEBUG_RALLOC_DEC
989 std::cout << "Forget (" << t << "):\n"; std::cout.flush();
990 #endif
991
992 assignment_list_t &alist = T[t].assignments;
993
994 std::swap(alist, T[*c].assignments);
995
996 std::set<unsigned short int> old_inst;
997 std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), T[t].bag.end(), std::inserter(old_inst, old_inst.end()));
998 wassert(old_inst.size() == 1);
999 unsigned short int i = *(old_inst.begin());
1000
1001 varset_t old_vars;
1002 std::set_difference(T[*c].alive.begin(), T[*c].alive.end(), T[t].alive.begin(), T[t].alive.end(), std::inserter(old_vars, old_vars.end()));
1003
1004 assignment_list_t::iterator ai, aif;
1005
1006 // Restrict assignments (locally) to current variables.
1007 varset_t newlocal;
1008 for (ai = alist.begin(); ai != alist.end(); ++ai)
1009 {
1010 newlocal.clear();
1011 std::set_difference(ai->local.begin(), ai->local.end(), old_vars.begin(), old_vars.end(), std::inserter(newlocal, newlocal.end()));
1012 std::swap(ai->local, newlocal);
1013
1014 ai->i_costs.erase(i);
1015 }
1016
1017 alist.sort();
1018
1019 // Collapse (locally) identical assignments.
1020 for (ai = alist.begin(); ai != alist.end();)
1021 {
1022 aif = ai;
1023
1024 for (++ai; ai != alist.end() && assignments_locally_same(*aif, *ai);)
1025 {
1026 if (aif->s > ai->s)
1027 {
1028 alist.erase(aif);
1029 aif = ai;
1030 ++ai;
1031 }
1032 else
1033 ai = alist.erase(ai);
1034 }
1035 }
1036
1037 // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
1038 //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
1039
1040 #ifdef DEBUG_RALLOC_DEC
1041 std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1042 #endif
1043
1044 #ifdef DEBUG_RALLOC_DEC_ASS
1045 for(ai = alist.begin(); ai != alist.end(); ++ai)
1046 {
1047 print_assignment(*ai);
1048 std::cout << "\n";
1049 }
1050
1051 assignment best;
1052 get_best_local_assignment(best, t, T);
1053 std::cout << "Best: "; print_assignment(best); std::cout << "\n";
1054 #endif
1055 }
1056
1057 // Handle join nodes in the nice tree decomposition
1058 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_join(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)1059 static void tree_dec_ralloc_join(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
1060 {
1061 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1062 adjacency_iter_t c, c_end, c2, c3;
1063 boost::tie(c, c_end) = adjacent_vertices(t, T);
1064
1065 #ifdef DEBUG_RALLOC_DEC
1066 std::cout << "Join (" << t << "):\n"; std::cout.flush();
1067 #endif
1068
1069 c2 = c;
1070 ++c;
1071 c3 = c;
1072
1073 assignment_list_t &alist = T[t].assignments;
1074 assignment_list_t &alist2 = T[*c2].assignments;
1075 std::swap(alist, T[*c3].assignments);
1076
1077 alist.sort();
1078 alist2.sort();
1079
1080 assignment_list_t::iterator ai, ai2;
1081 for (ai = alist.begin(), ai2 = alist2.begin(); ai != alist.end() && ai2 != alist2.end();)
1082 {
1083 if (assignments_locally_same(*ai, *ai2))
1084 {
1085 ai->s += ai2->s;
1086 // Avoid double-counting instruction costs.
1087 std::set<unsigned int>::iterator bi;
1088 for (bi = T[t].bag.begin(); bi != T[t].bag.end(); ++bi)
1089 ai->s -= ai->i_costs[*bi];
1090 for (size_t i = 0; i < ai->global.size(); i++)
1091 ai->global[i] = ((ai->global[i] != -1) ? ai->global[i] : ai2->global[i]);
1092 ++ai;
1093 ++ai2;
1094 }
1095 else if (*ai < *ai2)
1096 ai = alist.erase(ai);
1097 else if (*ai2 < *ai)
1098 ++ai2;
1099 }
1100 while(ai != alist.end())
1101 ai = alist.erase(ai);
1102
1103 alist2.clear();
1104
1105 #ifdef DEBUG_RALLOC_DEC
1106 std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1107 #endif
1108
1109 #ifdef DEBUG_RALLOC_DEC_ASS
1110 for(std::list<assignment>::iterator ai = alist.begin(); ai != alist.end(); ++ai)
1111 {
1112 print_assignment(*ai);
1113 std::cout << "\n";
1114 }
1115 #endif
1116 }
1117
1118 template <class T_t>
get_best_local_assignment(assignment & a,typename boost::graph_traits<T_t>::vertex_descriptor t,const T_t & T)1119 void get_best_local_assignment(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T)
1120 {
1121 const assignment_list_t &alist = T[t].assignments;
1122
1123 assignment_list_t::const_iterator ai, ai_end, ai_best;
1124 for(ai = ai_best = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
1125 if(ai->s < ai_best->s)
1126 ai_best = ai;
1127
1128 a = *ai_best;
1129 }
1130
1131 // Handle nodes in the tree decomposition, by detecting their type and calling the appropriate function. Recurses.
1132 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_nodes(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)1133 static void tree_dec_ralloc_nodes(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
1134 {
1135 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1136
1137 adjacency_iter_t c, c_end;
1138 typename boost::graph_traits<T_t>::vertex_descriptor c0, c1;
1139
1140 boost::tie(c, c_end) = adjacent_vertices(t, T);
1141
1142 switch (out_degree(t, T))
1143 {
1144 case 0:
1145 tree_dec_ralloc_leaf(T, t, G, I);
1146 break;
1147 case 1:
1148 c0 = *c;
1149 tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1150 T[c0].bag.size() < T[t].bag.size() ? tree_dec_ralloc_introduce(T, t, G, I, ac, assignment_optimal) : tree_dec_ralloc_forget(T, t, G, I);
1151 break;
1152 case 2:
1153 c0 = *c++;
1154 c1 = *c;
1155
1156 if (T[c0].weight < T[c1].weight) // Minimize memory consumption needed for keeping intermediate results. As a side effect, this also helps the ac mechanism in the heuristic.
1157 std::swap (c0, c1);
1158
1159 tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1160 {
1161 assignment *ac2 = new assignment;
1162 get_best_local_assignment_biased(*ac2, c0, T);
1163 tree_dec_ralloc_nodes(T, c1, G, I, *ac2, assignment_optimal);
1164 delete ac2;
1165 }
1166 tree_dec_ralloc_join(T, t, G, I);
1167 break;
1168 default:
1169 std::cerr << "Not nice.\n";
1170 break;
1171 }
1172 }
1173
1174 // Find the best root selecting from t_old and the leafs under t.
1175 template <class T_t>
find_best_root(const T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,size_t t_s,typename boost::graph_traits<T_t>::vertex_descriptor t_old,size_t t_old_s)1176 static std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t> find_best_root(const T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, size_t t_s, typename boost::graph_traits<T_t>::vertex_descriptor t_old, size_t t_old_s)
1177 {
1178 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1179 adjacency_iter_t c, c_end;
1180 typename boost::graph_traits<T_t>::vertex_descriptor c0, c1, t0;
1181 size_t t0_s;
1182
1183 boost::tie(c, c_end) = adjacent_vertices(t, T);
1184
1185 switch (out_degree(t, T))
1186 {
1187 case 0:
1188 return(t_s > t_old_s ? std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t, t_s) : std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1189 case 1:
1190 return(find_best_root(T, *c, T[*c].alive.size() ? T[*c].alive.size() : t_s, t_old, t_old_s));
1191 case 2:
1192 c0 = *c++;
1193 c1 = *c;
1194 boost::tie(t0, t0_s) = find_best_root(T, c0, T[c0].alive.size() ? T[c0].alive.size() : t_s, t_old, t_old_s);
1195 return(find_best_root(T, c1, T[c1].alive.size() ? T[c1].alive.size() : t_s, t0_s > t_old_s ? t0 : t_old, t0_s > t_old_s ? t0_s : t_old_s));
1196 break;
1197 default:
1198 std::cerr << "Not nice.\n";
1199 break;
1200 }
1201
1202 return(std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1203 }
1204
1205 // Change the root to t.
1206 template <class T_t>
re_root(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t)1207 static void re_root(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t)
1208 {
1209 typename boost::graph_traits<T_t>::vertex_descriptor s0, s1, s2;
1210 typename boost::graph_traits<T_t>::in_edge_iterator e, e_end;
1211
1212 boost::tie(e, e_end) = boost::in_edges(t, T);
1213 if (e == e_end)
1214 return;
1215
1216 s0 = t;
1217 s1 = boost::source(*e, T);
1218
1219 for (boost::tie(e, e_end) = boost::in_edges(s1, T); e != e_end; boost::tie(e, e_end) = boost::in_edges(s1, T))
1220 {
1221 s2 = boost::source(*e, T);
1222 boost::remove_edge(s1, s0, T);
1223 boost::add_edge(s0, s1, T);
1224 s0 = s1;
1225 s1 = s2;
1226 }
1227 boost::remove_edge(s1, s0, T);
1228 boost::add_edge(s0, s1, T);
1229 }
1230
1231 // Change the root to improve the assignment removal heuristic.
1232 template <class T_t>
good_re_root(T_t & T)1233 static void good_re_root(T_t &T)
1234 {
1235 typename boost::graph_traits<T_t>::vertex_descriptor t;
1236
1237 typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1238 adjacency_iter_t c, c_end;
1239
1240 t = find_root(T);
1241
1242 for (boost::tie(c, c_end) = boost::adjacent_vertices(t, T); c != c_end && !T[*c].alive.size();)
1243 boost::tie(c, c_end) = boost::adjacent_vertices(*c, T);
1244
1245 size_t t_s = (c != c_end ? T[*c].alive.size() : 0);
1246 t = find_best_root(T, t, t_s, t, t_s).first;
1247
1248 if (T[t].alive.size())
1249 {
1250 std::cerr << "Error: Invalid root.\n";
1251 return;
1252 }
1253
1254 re_root(T, t);
1255 }
1256
1257 // Dump conflict graph, with numbered nodes, show live variables at each node.
dump_con(const con_t & con)1258 static void dump_con(const con_t &con)
1259 {
1260 if (!currFunc)
1261 return;
1262
1263 std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccon" + currFunc->rname + ".dot").c_str());
1264
1265 std::string *name = new std::string[num_vertices(con)];
1266 for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < boost::num_vertices(con); i++)
1267 {
1268 std::ostringstream os;
1269 os << i;
1270 if (con[i].name)
1271 os << " : " << con[i].name << ":" << con[i].byte;
1272 name[i] = os.str();
1273 }
1274 boost::write_graphviz(dump_file, con, boost::make_label_writer(name));
1275 delete[] name;
1276 }
1277
1278 // Dump cfg, with numbered nodes, show live variables at each node.
dump_cfg(const cfg_t & cfg)1279 static void dump_cfg(const cfg_t &cfg)
1280 {
1281 if (!currFunc)
1282 return;
1283
1284 std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccfg" + currFunc->rname + ".dot").c_str());
1285
1286 std::string *name = new std::string[num_vertices(cfg)];
1287 for (unsigned int i = 0; i < boost::num_vertices(cfg); i++)
1288 {
1289 std::ostringstream os;
1290 os << i << ", " << cfg[i].ic->key << ": ";
1291 cfg_alive_t::const_iterator v;
1292 for (v = cfg[i].alive.begin(); v != cfg[i].alive.end(); ++v)
1293 os << *v << " ";
1294 name[i] = os.str();
1295 }
1296
1297 boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name), boost::default_writer(), cfg_titlewriter(currFunc->rname, "register allocator"));
1298 delete[] name;
1299 }
1300
1301 // Dump tree decomposition, show bag and live variables at each node.
dump_tree_decomposition(const tree_dec_t & tree_dec)1302 static void dump_tree_decomposition(const tree_dec_t &tree_dec)
1303 {
1304 if (!currFunc)
1305 return;
1306
1307 std::ofstream dump_file((std::string(dstFileName) + ".dumprallocdec" + currFunc->rname + ".dot").c_str());
1308
1309 unsigned int w = 0;
1310
1311 std::string *name = new std::string[num_vertices(tree_dec)];
1312 for (unsigned int i = 0; i < boost::num_vertices(tree_dec); i++)
1313 {
1314 if (tree_dec[i].bag.size() > w)
1315 w = tree_dec[i].bag.size();
1316 std::ostringstream os;
1317 std::set<unsigned int>::const_iterator v1;
1318 os << i << " | ";
1319 for (v1 = tree_dec[i].bag.begin(); v1 != tree_dec[i].bag.end(); ++v1)
1320 os << *v1 << " ";
1321 os << ": ";
1322 std::set<var_t>::const_iterator v2;
1323 for (v2 = tree_dec[i].alive.begin(); v2 != tree_dec[i].alive.end(); ++v2)
1324 os << *v2 << " ";
1325 name[i] = os.str();
1326 }
1327
1328 boost::write_graphviz(dump_file, tree_dec, boost::make_label_writer(name), boost::default_writer(), dec_titlewriter(w - 1, currFunc->rname, "register allocator"));
1329 delete[] name;
1330
1331 #ifdef D_RALLOC_DEC
1332 std::cout << "Width: " << (w - 1) << "(" << currFunc->name << ")\n";
1333 #endif
1334 }
1335
1336 #endif
1337
1338