1 // Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 - 2013
2 //
3 // (c) 2010-2012 Goethe-Universität Frankfurt
4 //
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the
7 // Free Software Foundation; either version 2, or (at your option) any
8 // later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 //
19 // An optimal, polynomial-time register allocator.
20 //
21 // For details, see:
22 //
23 // Philipp Klaus Krause,
24 // "Optimal Register Allocation in Polynomial Time",
25 // Compiler Construction - 22nd International Conference, CC 2013, Held as Part of the European Joint Conferences on Theory and Practice of Software, ETAPS 2013. Proceedings,
26 // Lecture Notes in Computer Science, volume 7791, pp. 1-20.
27 // Springer,
28 // 2013.
29 //
30 // To use this from a port do the following:
31 //
32 // 1) Supply a cost function
33 // template <class G_t, class I_t>
34 // float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
35 // Which can range from
36 // simple, e.g. cost 1 for each byte accessed in a register, cost 4 for each byte accessed in memory
37 // to
38 // quite involved, e.g. the number of bytes of code the code generator would generate.
39 //
40 // 2) Call
41 // create_cfg(), thorup_tree_decomposition(), nicify(), alive_tree_dec(), tree_dec_ralloc_nodes().
42 //
43 // The Z80 port can serve as an example, see z80_ralloc2_cc() in z80/ralloc2.cc.
44 
45 #ifndef SDCCRALLOC_HH
46 #define SDCCRALLOC_HH 1
47 
48 #include <iostream>
49 #include <limits>
50 #include <utility>
51 #include <sstream>
52 #include <fstream>
53 
54 // Workaround for boost bug #11880
55 #include <boost/version.hpp>
56 #if BOOST_VERSION == 106000
57    #include <boost/type_traits/ice.hpp>
58 #endif
59 
60 #include <boost/graph/graphviz.hpp>
61 #include <boost/graph/adjacency_matrix.hpp>
62 #include <boost/graph/connected_components.hpp>
63 #include <boost/container/flat_set.hpp>
64 #include <boost/container/flat_map.hpp>
65 
66 #include "common.h"
67 
68 extern "C"
69 {
70 #include "SDCCbtree.h"
71 }
72 
73 typedef short int var_t;
74 typedef signed char reg_t;
75 
76 // Integer constant upper bound on port->num_regs
77 #define MAX_NUM_REGS 9
78 
79 // Assignment at an instruction
80 struct i_assignment_t
81 {
82   var_t registers[MAX_NUM_REGS][2];
83 
i_assignment_ti_assignment_t84   i_assignment_t(void)
85   {
86     for (reg_t r = 0; r < MAX_NUM_REGS; r++)
87       for (unsigned int i = 0; i < 2; i++)
88         registers[r][i] = -1;
89   }
90 
91 #if 0
92   bool operator<(const i_assignment_t &i_a) const
93   {
94     for (reg_t r = 0; r < port->num_regs; r++)
95       for (unsigned int i = 0; i < 2; i++)
96         {
97           if (registers[r][i] < i_a.registers[r][i])
98             return(true);
99           else if (registers[r][i] > i_a.registers[r][i])
100             return(false);
101         }
102     return(false);
103   }
104 #endif
105 
add_vari_assignment_t106   void add_var(var_t v, reg_t r)
107   {
108     if (registers[r][1] < v)
109       {
110         registers[r][0] = registers[r][1];
111         registers[r][1] = v;
112       }
113     else
114       registers[r][0] = v;
115   }
116 
remove_vari_assignment_t117   void remove_var(var_t v)
118   {
119     for (reg_t r = 0; r < port->num_regs; r++)
120       {
121         if (registers[r][1] == v)
122           {
123             registers[r][1] = registers[r][0];
124             registers[r][0] = -1;
125           }
126         else if (registers[r][0] == v)
127           registers[r][0] = -1;
128       }
129   }
130 };
131 
132 typedef std::vector<var_t> varset_t; // Faster than std::set,  std::tr1::unordered_set and stx::btree_set here.
133 
134 typedef boost::container::flat_map<int, float> icosts_t; // Faster than std::map and stx::btree_map here.
135 
136 typedef std::vector<var_t> cfg_alive_t; // Faster than stx::btree_set here .
137 typedef boost::container::flat_set<var_t> cfg_dying_t; // Faster than stx::btree_set and std::set here.
138 
139 struct assignment
140 {
141   float s;
142 
143   varset_t local;               // Entries: var
144   std::vector<reg_t> global;    // Entries: global[var] = reg (-1 if no reg assigned)
145   icosts_t i_costs;             // Costs for all instructions in bag (needed to avoid double counting costs at join nodes)
146   i_assignment_t i_assignment;  // Assignment at the instruction currently being added in an introduce node;
147 
148   bool marked;
149 
operator <assignment150   bool operator<(const assignment& a) const
151   {
152     varset_t::const_iterator i, ai, i_end, ai_end;
153 
154     i_end = local.end();
155     ai_end = a.local.end();
156 
157     for (i = local.begin(), ai = a.local.begin();; ++i, ++ai)
158       {
159         if (i == i_end && ai == ai_end)
160           return(false);
161         if (i == i_end)
162           return(true);
163         if (ai == ai_end)
164           return(false);
165 
166         if (*i < *ai)
167           return(true);
168         if (*i > *ai)
169           return(false);
170 
171         if (global[*i] < a.global[*ai])
172           return(true);
173         if (global[*i] > a.global[*ai])
174           return(false);
175       }
176   }
177 };
178 
179 typedef std::list<assignment> assignment_list_t;
180 //typedef std::vector<assignment> assignment_list_t; // Probably faster, but would require some code reorganization.
181 
182 struct tree_dec_node
183 {
184   std::set<unsigned int> bag;
185   std::set<var_t> alive;
186   assignment_list_t assignments;
187   unsigned weight; // The weight is the number of nodes at which intermediate results need to be remembered. In general, to minimize memory consumption, at join nodes the child with maximum weight should be processed first.
188 };
189 
190 typedef boost::container::flat_multimap<int, var_t> operand_map_t; // Faster than std::multimap<int, var_t> and stx::btree_multimap<int, var_t> here.
191 
192 struct cfg_node
193 {
194   iCode *ic;
195   operand_map_t operands;
196   cfg_alive_t alive;
197   cfg_dying_t dying;
198 
199   std::set<var_t> stack_alive;
200 
201 #ifdef DEBUG_SEGV
202   cfg_node(void);
203 #endif
204 };
205 
206 #ifdef DEBUG_SEGV
207 // This only exists to track down #3506333 and #3475617.
208 bool default_constructor_of_cfg_node_called;
cfg_node(void)209 cfg_node::cfg_node(void)
210 {
211   default_constructor_of_cfg_node_called = true;
212 }
213 #endif
214 
215 struct con_node
216 {
217   int v;
218   int byte;
219   int size;
220   char *name;
221 };
222 
223 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, tree_dec_node> tree_dec_t;
224 typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, con_node> con_t;
225 typedef boost::adjacency_matrix<boost::undirectedS, con_node> con2_t;
226 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, cfg_node> cfg_t;
227 typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS> cfg_sym_t;
228 
229 #ifdef HAVE_TREEDEC_COMBINATIONS_HPP
230 #include <treedec/treedec_traits.hpp>
231 TREEDEC_TREEDEC_BAG_TRAITS(tree_dec_t, bag);
232 #endif
233 
234 #include "SDCCtree_dec.hpp"
235 
236 // Cost function. Port-specific.
237 template <class G_t, class I_t>
238 static float instruction_cost(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
239 
240 // For early removel of assignments that cannot be extended to valid assignments. Port-specific.
241 template <class G_t, class I_t>
242 static bool assignment_hopeless(const assignment &a, unsigned short int i, const G_t &G, const I_t &I, const var_t lastvar);
243 
244 // Rough cost estimate. Port-specific.
245 template <class G_t, class I_t>
246 static float rough_cost_estimate(const assignment &a, unsigned short int i, const G_t &G, const I_t &I);
247 
248 // Avoid overwriting operands that are still needed by the result. Port-specific.
249 template <class I_t>
250 static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I);
251 
252 // Port-specific
253 template <class T_t>
254 static void get_best_local_assignment_biased(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T);
255 
256 // Code for another ic is generated when generating this one. Mark the other as generated. Port-specific.
257 static void extra_ic_generated(iCode *ic);
258 
259 inline void
add_operand_to_cfg_node(cfg_node & n,operand * o,std::map<std::pair<int,reg_t>,var_t> & sym_to_index)260 add_operand_to_cfg_node(cfg_node &n, operand *o, std::map<std::pair<int, reg_t>, var_t> &sym_to_index)
261 {
262   reg_t k;
263   if (o && IS_SYMOP(o) && sym_to_index.find(std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, 0)) != sym_to_index.end())
264     {
265       if (n.operands.find(OP_SYMBOL_CONST(o)->key) == n.operands.end())
266         for (k = 0; k < OP_SYMBOL_CONST(o)->nRegs; k++)
267           n.operands.insert(std::pair<int, var_t>(OP_SYMBOL_CONST(o)->key, sym_to_index[std::pair<int, reg_t>(OP_SYMBOL_CONST(o)->key, k)]));
268     }
269 }
270 
271 // Check if the live-range of variable i is connected
272 #if 0
273 // This check was too expensive - Profiling shows that compiling the Dhrystone benchmark for stm8 with default options, we spent about a quarter of compiler runtime in here!
274 // Profiling shows that we spent a significant amount of time on the first call to copy_graph()
275 // Todo: Improve efficiency, e.g. using subgraph or filtered_graph to avoid the costly first call to copy_graph()
276 // Issues to solve: cfg2 is undirected, cfg is bidirectional; this makes use of subgraph or filtered_graph harder.
277 static bool liverange_connected(cfg_t &cfg, var_t i)
278 {
279   cfg_sym_t cfg2;
280   boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); // This call to copy_graph is expensive!
281   for (int j = boost::num_vertices(cfg) - 1; j >= 0; j--)
282     {
283       if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
284         {
285           boost::clear_vertex(j, cfg2);
286           boost::remove_vertex(j, cfg2);
287         }
288     }
289 
290   std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
291 
292   return(boost::connected_components(cfg2, &component[0]) <= 1);
293 }
294 #else
295 // A not very elegant, but faster check
component_size_impl(const cfg_t & cfg,const std::vector<bool> & life,var_t v,int i,std::vector<bool> & visited)296 static inline int component_size_impl(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i, std::vector<bool>& visited)
297 {
298   typename boost::graph_traits<cfg_t>::in_edge_iterator in, in_end;
299   typename boost::graph_traits<cfg_t>::out_edge_iterator out, out_end;
300 
301   int size = 1;
302   visited[i] = true;
303 
304   for(boost::tie(in, in_end) = boost::in_edges(i, cfg); in != in_end; ++in)
305     if(life[boost::source(*in, cfg)] && !visited[boost::source(*in, cfg)])
306       size += component_size_impl(cfg, life, v, boost::source(*in, cfg), visited);
307 
308   for(boost::tie(out, out_end) = boost::out_edges(i, cfg); out != out_end; ++out)
309     if(life[boost::target(*out, cfg)] && !visited[boost::target(*out, cfg)])
310       size += component_size_impl(cfg, life, v, boost::target(*out, cfg), visited);
311 
312   return(size);
313 }
314 
component_size(const cfg_t & cfg,const std::vector<bool> & life,var_t v,int i)315 static inline int component_size(const cfg_t &cfg, const std::vector<bool> &life, var_t v, int i)
316 {
317   std::vector<bool> visited(boost::num_vertices(cfg));
318 
319   return(component_size_impl(cfg, life, v, i, visited));
320 }
321 
liverange_connected(const cfg_t & cfg,var_t v)322 static bool liverange_connected(const cfg_t &cfg, var_t v)
323 {
324   std::vector<bool> life(boost::num_vertices(cfg));
325   int num_life = 0;
326   int last_life;
327 
328   for(int i = 0; i < boost::num_vertices (cfg); i++)
329     if(std::find(cfg[i].alive.begin(), cfg[i].alive.end(), v) != cfg[i].alive.end())
330       {
331         life[i] = true;
332         num_life++;
333         last_life = i;
334       }
335 
336   if(!num_life)
337     return(true);
338 
339   return(component_size(cfg, life, v, last_life) >= num_life);
340 }
341 #endif
342 
343 // A quick-and-dirty function to get the CFG from sdcc.
344 static iCode *
create_cfg(cfg_t & cfg,con_t & con,ebbIndex * ebbi)345 create_cfg(cfg_t &cfg, con_t &con, ebbIndex *ebbi)
346 {
347   eBBlock **ebbs = ebbi->bbOrder;
348   iCode *start_ic;
349   iCode *ic;
350 
351   std::map<int, unsigned int> key_to_index;
352   std::map<std::pair<int, reg_t>, var_t> sym_to_index;
353 
354   if(currFunc)
355     currFunc->funcDivFlagSafe = 1;
356 
357   start_ic = iCodeLabelOptimize(iCodeFromeBBlock (ebbs, ebbi->count));
358   {
359     int i;
360     var_t j;
361     wassertl (!boost::num_vertices(cfg), "CFG non-empty before creation.");
362     for (ic = start_ic, i = 0, j = 0; ic; ic = ic->next, i++)
363       {
364         if (currFunc)
365           currFunc->funcDivFlagSafe &= !(ic->op == INLINEASM || ic->op == '/' || ic->op == '%' || ic->op == PCALL ||
366             ic->op == CALL && (IS_OP_LITERAL (IC_LEFT (ic)) || !OP_SYMBOL(IC_LEFT (ic))->funcDivFlagSafe) ||
367             ic->op == RIGHT_OP && IS_OP_LITERAL (IC_RIGHT (ic))); // Right shift might be implemented using division.
368 
369 #ifdef DEBUG_SEGV
370         default_constructor_of_cfg_node_called = false;
371 #endif
372         boost::add_vertex(cfg);
373 
374 #ifdef DEBUG_SEGV
375         wassertl (default_constructor_of_cfg_node_called, "add_vertex failed to call default constructor of cfg_node!");
376 #endif
377         wassertl (cfg[i].alive.empty(), "Alive set non-empty upon creation.");
378         key_to_index[ic->key] = i;
379 
380         if(ic->op == SEND && ic->builtinSEND) // Ensure that only the very first send iCode is active.
381           {
382             operand *bi_parms[MAX_BUILTIN_ARGS];
383             int nbi_parms;
384             getBuiltinParms(ic, &nbi_parms, bi_parms);
385           }
386 
387         extra_ic_generated(ic);
388 
389         cfg[i].ic = ic;
390         ic->rSurv = newBitVect(port->num_regs); // Never freed. Memory leak?
391         ic->rMask = newBitVect(port->num_regs); // Never freed. Memory leak?
392 
393         if (ic->generated)
394           continue;
395 
396         for (int j2 = 0; j2 <= operandKey; j2++)
397           {
398             if (bitVectBitValue(ic->rlive, j2))
399               {
400                 symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, j2));
401 
402                 if (!sym->for_newralloc)
403                   continue;
404 
405                 // Add node to conflict graph:
406                 if (sym_to_index.find(std::pair<int, reg_t>(j2, 0)) != sym_to_index.end())
407                   continue;
408 
409                 // Other parts of the allocator may rely on the variables corresponding to bytes from the same sdcc variable to have subsequent numbers.
410                 for (reg_t k = 0; k < sym->nRegs; k++)
411                   {
412                     boost::add_vertex(con);
413                     con[j].v = j2;
414                     con[j].byte = k;
415                     con[j].size = sym->nRegs;
416                     con[j].name = sym->name;
417                     sym_to_index[std::pair<int, reg_t>(j2, k)] = j;
418                     for (reg_t l = 0; l < k; l++)
419                       boost::add_edge(j - l - 1, j, con);
420                     j++;
421                   }
422               }
423           }
424       }
425   }
426 
427   // Get control flow graph from sdcc.
428   for (ic = start_ic; ic; ic = ic->next)
429     {
430       wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
431 
432       if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next)
433         {
434           wassertl (key_to_index[ic->next->key] < boost::num_vertices(cfg), "Next node not in CFG.");
435           boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], cfg);
436         }
437 
438       if (ic->op == GOTO)
439         {
440           wassertl (key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
441           boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], cfg);
442         }
443       else if (ic->op == RETURN)
444         {
445           wassertl (key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key] < boost::num_vertices(cfg), "RETURN target not in CFG.");
446           boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], cfg);
447         }
448       else if (ic->op == IFX)
449         {
450           wassertl (key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key] < boost::num_vertices(cfg), "IFX target not in CFG.");
451           boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic))->sch->key], cfg);
452         }
453       else if (ic->op == JUMPTABLE)
454         for (symbol *lbl = (symbol *)(setFirstItem (IC_JTLABELS (ic))); lbl; lbl = (symbol *)(setNextItem (IC_JTLABELS (ic))))
455           {
456             wassertl (key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key] < boost::num_vertices(cfg), "GOTO target not in CFG.");
457             boost::add_edge(key_to_index[ic->key], key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], cfg);
458           }
459 
460       for (int i = 0; i <= operandKey; i++)
461         {
462           if (sym_to_index.find(std::pair<int, reg_t>(i, 0)) == sym_to_index.end())
463             continue;
464 
465           if (bitVectBitValue(ic->rlive, i))
466             {
467               symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, i));
468               for (reg_t k = 0; k < isym->nRegs; k++)
469                 {
470                   wassert (key_to_index.find(ic->key) != key_to_index.end());
471                   wassert (sym_to_index.find(std::pair<int, int>(i, k)) != sym_to_index.end());
472                   wassertl (key_to_index[ic->key] < boost::num_vertices(cfg), "Node not in CFG.");
473                   cfg[key_to_index[ic->key]].alive.push_back(sym_to_index[std::pair<int, int>(i, k)]);
474                 }
475 
476               // TODO: Move this to a place where it also works when using the old allocator!
477               isym->block = btree_lowest_common_ancestor(isym->block, ic->block);
478               // If this symbol has a spill location, ensure the spill location is also allocated in a compatible block
479               if (SYM_SPIL_LOC(isym))
480                 SYM_SPIL_LOC(isym)->block = btree_lowest_common_ancestor(SYM_SPIL_LOC(isym)->block, isym->block);
481             }
482         }
483 
484       if (ic->op == IFX)
485         add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_COND(ic), sym_to_index);
486       else if (ic->op == JUMPTABLE)
487         add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_JTCOND(ic), sym_to_index);
488       else
489         {
490           add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RESULT(ic), sym_to_index);
491           add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_LEFT(ic), sym_to_index);
492           add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RIGHT(ic), sym_to_index);
493         }
494 
495       // TODO: Extend live-ranges of returns of built-in function calls back to first SEND.
496 
497       add_operand_conflicts_in_node(cfg[key_to_index[ic->key]], con);
498     }
499 
500 #if 0
501   // Get conflict graph from sdcc
502   for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < num_vertices(con); i++)
503     {
504       symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, con[i].v));
505       for (int j = 0; j <= operandKey; j++)
506         if (bitVectBitValue(isym->clashes, j))
507           {
508             symbol *jsym = (symbol *)(hTabItemWithKey(liveRanges, j));
509             if (sym_to_index.find(std::pair<int, reg_t>(j, 0)) == sym_to_index.end())
510               continue;
511             for (reg_t k = 0; k < jsym->nRegs; k++)
512               boost::add_edge(i, sym_to_index[std::pair<int, reg_t>(j, k)], con);
513           }
514     }
515 #endif
516 
517   // Check for unconnected live ranges, some might have survived earlier stages.
518   for (var_t i = (var_t)boost::num_vertices(con) - 1; i >= 0; i--)
519     if (!liverange_connected(cfg, i))
520       {
521         // Non-connected CFGs are created by at least GCSE and lospre. We now have a live-range splitter that fixes them, so this should no longer be necessary, but we leave this code here for now, so in case one gets through, we can still generate correct code.
522         std::cerr << "Warning: Non-connected liverange found and extended to connected component of the CFG:" << con[i].name << ". Please contact sdcc authors with source code to reproduce.\n";
523 
524         cfg_sym_t cfg2;
525         boost::copy_graph(cfg, cfg2, boost::vertex_copy(forget_properties()).edge_copy(forget_properties()));
526         std::vector<boost::graph_traits<cfg_t>::vertices_size_type> component(num_vertices(cfg2));
527         boost::connected_components(cfg2, &component[0]);
528 
529         for (boost::graph_traits<cfg_t>::vertices_size_type j = 0; j < boost::num_vertices(cfg) - 1; j++)
530           {
531             if (std::find(cfg[j].alive.begin(), cfg[j].alive.end(), i) == cfg[j].alive.end())
532               continue;
533 
534             for (boost::graph_traits<cfg_t>::vertices_size_type k = 0; k < boost::num_vertices(cfg) - 1; k++)
535               if (component[j] == component[k] && std::find(cfg[k].alive.begin(), cfg[k].alive.end(), i) == cfg[k].alive.end())
536                 cfg[k].alive.push_back(i);
537           }
538       }
539 
540   // Sort alive and setup dying.
541   for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
542     {
543       std::sort(cfg[i].alive.begin(), cfg[i].alive.end());
544       cfg[i].dying = cfg_dying_t(cfg[i].alive.begin(), cfg[i].alive.end());;
545       typedef boost::graph_traits<cfg_t>::adjacency_iterator adjacency_iter_t;
546       adjacency_iter_t j, j_end;
547       for (boost::tie(j, j_end) = adjacent_vertices(i, cfg); j != j_end; ++j)
548         {
549           cfg_alive_t::const_iterator v, v_end;
550           for (v = cfg[*j].alive.begin(), v_end = cfg[*j].alive.end(); v != v_end; ++v)
551             {
552               const symbol *const vsym = (symbol *)(hTabItemWithKey(liveRanges, con[*v].v));
553 
554               const operand *const left = IC_LEFT(cfg[*j].ic);
555               const operand *const right = IC_RIGHT(cfg[*j].ic);
556               const operand *const result = IC_RESULT(cfg[*j].ic);
557 
558               if (!(POINTER_SET(cfg[*j].ic) || cfg[*j].ic->op == SET_VALUE_AT_ADDRESS) &&
559                 (!left || !IS_SYMOP(left) || OP_SYMBOL_CONST(left)->key != vsym->key) &&
560                 (!right || !IS_SYMOP(right) || OP_SYMBOL_CONST(right)->key != vsym->key) &&
561                 result && IS_SYMOP(result) && OP_SYMBOL_CONST(result)->key == vsym->key)
562                 continue;
563 
564               cfg[i].dying.erase(*v);
565             }
566         }
567     }
568 
569   // Construct conflict graph
570   for (boost::graph_traits<cfg_t>::vertices_size_type i = 0; i < num_vertices(cfg); i++)
571     {
572       cfg_alive_t::const_iterator v, v_end;
573       const iCode *ic = cfg[i].ic;
574 
575       for (v = cfg[i].alive.begin(), v_end = cfg[i].alive.end(); v != v_end; ++v)
576         {
577           cfg_alive_t::const_iterator v2, v2_end;
578 
579           // Conflict between operands are handled by add_operand_conflicts_in_node().
580           if (cfg[i].dying.find (*v) != cfg[i].dying.end())
581             continue;
582           if (ic->op != IFX && ic->op != JUMPTABLE && IC_RESULT(ic) && IS_SYMOP(IC_RESULT(ic)))
583             {
584               operand_map_t::const_iterator oi, oi_end;
585               for(boost::tie(oi, oi_end) = cfg[i].operands.equal_range(OP_SYMBOL_CONST(IC_RESULT(ic))->key); oi != oi_end; ++oi)
586                 if(oi->second == *v)
587                   goto next_var;
588             }
589 
590           // Here, v is a variable that survives cfg[i].
591           // TODO: Check if we can use v, ++v2 instead of cfg[i].alive.begin() to speed things up.
592           for (v2 = cfg[i].alive.begin(), v2_end = cfg[i].alive.end(); v2 != v2_end; ++v2)
593             {
594               if(*v == *v2)
595                 continue;
596               if (cfg[i].dying.find (*v2) != cfg[i].dying.end())
597                 continue;
598 
599               boost::add_edge(*v, *v2, con);
600             }
601 
602           next_var:
603             ;
604         }
605     }
606 
607   return(start_ic);
608 }
609 
610 // Computes live ranges for tree decomposition from live ranges from cfg.
alive_tree_dec(tree_dec_t & tree_dec,const cfg_t & cfg)611 inline void alive_tree_dec(tree_dec_t &tree_dec, const cfg_t &cfg)
612 {
613   for (unsigned int i = 0; i < num_vertices(tree_dec); i++)
614     {
615       std::set<unsigned int>::const_iterator v;
616       for (v = tree_dec[i].bag.begin(); v != tree_dec[i].bag.end(); ++v)
617         tree_dec[i].alive.insert(cfg[*v].alive.begin(), cfg[*v].alive.end());
618     }
619 }
620 
621 #if defined(DEBUG_RALLOC_DEC) || defined (DEBUG_RALLOC_DEC_ASS)
print_assignment(const assignment & a)622 static void print_assignment(const assignment &a)
623 {
624   varset_t::const_iterator i;
625   std::cout << "[";
626   for (i = a.local.begin(); i != a.local.end(); ++i)
627     std::cout << "(" << int(*i) << ", " << int(a.global[*i]) << "), ";
628   std::cout << "c: " << a.s << "]";
629 }
630 #endif
631 
632 template <class I_t>
assignment_conflict(const assignment & a,const I_t & I,var_t v,reg_t r)633 bool assignment_conflict(const assignment &a, const I_t &I, var_t v, reg_t r)
634 {
635   varset_t::const_iterator i, i_end;
636 
637   for (i = a.local.begin(), i_end = a.local.end(); i != i_end; ++i)
638     {
639       if (a.global[*i] != r)
640         continue;
641       if (boost::edge(*i, v, I).second)
642         return(true);
643     }
644 
645   return(false);
646 }
647 
648 template<class G_t>
assignments_introduce_instruction(assignment_list_t & alist,unsigned short int i,const G_t & G)649 void assignments_introduce_instruction(assignment_list_t &alist, unsigned short int i, const G_t &G)
650 {
651   assignment_list_t::iterator ai, ai_end;
652 
653 #if !defined(_MSC_VER) // Efficient code - reduces total SDCC runtime by about 5.5% vs. code below, but doesn't work with MSVC++ (at least up to MSVC++ 2015)
654   struct inserter_t
655     {
656       explicit inserter_t(const std::vector<reg_t>& g, i_assignment_t& a) : global(g), ia(a)
657         {
658 	}
659       inserter_t& operator=(var_t v)
660         {
661           if (global[v] >= 0)
662             ia.add_var(v, global[v]);
663           return(*this);
664         }
665       inserter_t& operator*()
666         {
667           return(*this);
668         }
669       inserter_t& operator++()
670         {
671           return(*this);
672         }
673       inserter_t& operator++(int i)
674         {
675           i;
676           return(*this);
677         }
678       private:
679         const std::vector<reg_t>& global;
680         i_assignment_t& ia;
681     };
682 
683   for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
684     {
685       i_assignment_t ia;
686 
687       std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), inserter_t(ai->global, ia));
688 
689       ai->i_assignment = ia;
690     }
691 #else // Human-readable code
692   for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
693     {
694       varset_t i_variables;
695 
696       std::set_intersection(ai->local.begin(), ai->local.end(), G[i].alive.begin(), G[i].alive.end(), std::inserter(i_variables, i_variables.end()));
697 
698       i_assignment_t ia;
699 
700       varset_t::const_iterator v, v_end;
701 
702       for (v = i_variables.begin(), v_end = i_variables.end(); v != v_end; ++v)
703         if (ai->global[*v] >= 0)
704           ia.add_var(*v, ai->global[*v]);
705 
706       ai->i_assignment = ia;
707     }
708 #endif
709 }
710 
711 template <class G_t, class I_t>
assignments_introduce_variable(assignment_list_t & alist,unsigned short int i,short int v,const G_t & G,const I_t & I)712 static void assignments_introduce_variable(assignment_list_t &alist, unsigned short int i, short int v, const G_t &G, const I_t &I)
713 {
714   assignment_list_t::iterator ai;
715   bool a_initialized;
716   assignment a;
717   size_t c, c_end;
718 
719   for (ai = alist.begin(), c = 0, c_end = alist.size(); c < c_end; c++, ai++)
720     {
721       a_initialized = false;
722 
723       for (reg_t r = 0; r < port->num_regs; r++)
724         {
725           if (!assignment_conflict(*ai, I, v, r))
726             {
727               if(!a_initialized)
728                 {
729                   a = *ai;
730                   ai->marked = true;
731                   a.marked = false;
732                   varset_t::iterator i = std::lower_bound(a.local.begin(), a.local.end(), v);
733                   if (i == a.local.end() || *i != v)
734                     a.local.insert(i, v);
735                 }
736               a.global[v] = r;
737               a.i_assignment.add_var(v, r);
738               if(!assignment_hopeless(a, i, G, I, v))
739                 alist.push_back(a);
740               a.i_assignment.remove_var(v);
741             }
742         }
743     }
744 }
745 
746 struct assignment_rep
747 {
748   assignment_list_t::iterator i;
749   float s;
750 
operator <assignment_rep751   bool operator<(const assignment_rep& a) const
752   {
753     return(s < a.s);
754   }
755 };
756 
757 template <class I_t>
compability_cost(const assignment & a,const assignment & ac,const I_t & I)758 float compability_cost(const assignment& a, const assignment& ac, const I_t &I)
759 {
760   typedef typename boost::graph_traits<I_t>::adjacency_iterator adjacency_iter_t;
761 
762   float c = 0.0f;
763 
764   varset_t::const_iterator vi, vi_end;
765 
766   for(vi = ac.local.begin(), vi_end = ac.local.end(); vi != vi_end; ++vi)
767     {
768       const var_t v = *vi;
769       if(a.global[v] != ac.global[v])
770       {
771         c += 1000.0f;
772         continue;
773       }
774 #if 0 // This improves the quality of assignments, but it has a big runtime overhead for some cases.
775       adjacency_iter_t j, j_end;
776       for (boost::tie(j, j_end) = adjacent_vertices(v, I); j != j_end; ++j)
777         if(ac.global[v] != -1 && a.global[*j] == ac.global[v])
778         {
779           c += 1000.0f;
780           break;
781         }
782 #endif
783     }
784 
785   return(c);
786 }
787 
788 // Ensure that we never get more than options.max_allocs_per_node assignments at a single node of the tree decomposition.
789 // Tries to drop the worst ones first (but never drop the empty assignment, as it's the only one guaranteed to be always valid).
790 template <class G_t, class I_t>
drop_worst_assignments(assignment_list_t & alist,unsigned short int i,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)791 static void drop_worst_assignments(assignment_list_t &alist, unsigned short int i, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
792 {
793   unsigned int n;
794   size_t alist_size;
795   assignment_list_t::iterator ai, an;
796 
797   if ((alist_size = alist.size()) * port->num_regs <= static_cast<size_t>(options.max_allocs_per_node) || alist_size <= 1)
798     return;
799 
800   *assignment_optimal = false;
801 
802 #ifdef DEBUG_RALLOC_DEC
803   std::cout << "Too many assignments here (" << i << "):" << alist_size << " > " << options.max_allocs_per_node / port->num_regs << ". Dropping some.\n"; std::cout.flush();
804 #endif
805 
806 #if 0
807   assignment_rep *arep = new assignment_rep[alist_size];
808 
809   for (n = 0, ai = alist.begin(); n < alist_size; ++ai, n++)
810     {
811       arep[n].i = ai;
812       arep[n].s = ai->s + rough_cost_estimate(*ai, i, G, I) + compability_cost(*ai, ac, I);
813     }
814 
815   std::nth_element(arep + 1, arep + options.max_allocs_per_node / port->num_regs, arep + alist_size);
816 
817   //std::cout << "nth elem. est. cost: " << arep[options.max_allocs_per_node / port->num_regs].s << "\n"; std::cout.flush();
818 
819   for (n = options.max_allocs_per_node / port->num_regs + 1; n < alist_size; n++)
820     alist.erase(arep[n].i);
821 #else // More efficient, reduces total SDCC runtime by about 1%.
822 
823   size_t endsize = options.max_allocs_per_node / port->num_regs + 1;
824   size_t arep_maxsize = std::min(alist_size, endsize * 2) + 1;
825   size_t m, k;
826   float bound = std::numeric_limits<float>::infinity();
827 
828   assignment_rep *arep = new assignment_rep[arep_maxsize];
829 
830   for(m = 0, n = 1, ai = alist.begin(), ++ai; n < alist_size; n++)
831     {
832       float s = ai->s;
833 
834       if(s > bound)
835         {
836           alist.erase(ai++);
837           continue;
838         }
839       s += compability_cost(*ai, ac, I);
840       if(s > bound)
841         {
842           alist.erase(ai++);
843           continue;
844         }
845       s += rough_cost_estimate(*ai, i, G, I);
846       if(s > bound)
847         {
848           alist.erase(ai++);
849           continue;
850         }
851 
852       if(m >= arep_maxsize - 1)
853       {
854         std::nth_element(arep, arep + (endsize - 1), arep + m);
855         for(k = endsize; k < m; k++)
856           alist.erase(arep[k].i);
857         bound = arep[endsize - 1].s;
858 
859         m = endsize;
860       }
861 
862       arep[m].i = ai;
863       arep[m].s = s;
864 
865       m++;
866 
867       ++ai;
868     }
869 
870   std::nth_element(arep, arep + (endsize - 1), arep + m);
871 
872   for (n = endsize; n < m; n++)
873     alist.erase(arep[n].i);
874 #endif
875 
876   delete[] arep;
877 }
878 
879 // Handle Leaf nodes in the nice tree decomposition
880 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_leaf(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)881 static void tree_dec_ralloc_leaf(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
882 {
883 #ifdef DEBUG_RALLOC_DEC
884   std::cout << "Leaf (" << t << "):\n"; std::cout.flush();
885 #endif
886 
887   assignment a;
888   assignment_list_t &alist = T[t].assignments;
889 
890   a.s = 0;
891   a.global.resize(boost::num_vertices(I), -1);
892   alist.push_back(a);
893 
894 #ifdef DEBUG_RALLOC_DEC_ASS
895   assignment_list_t::iterator ai;
896   for(ai = alist.begin(); ai != alist.end(); ++ai)
897     {
898       print_assignment(*ai);
899       std::cout << "\n";
900     }
901   assignment best;
902   get_best_local_assignment(best, t, T);
903   std::cout << "Best: "; print_assignment(best); std::cout << "\n";
904 #endif
905 }
906 
907 // Handle introduce nodes in the nice tree decomposition
908 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_introduce(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)909 static void tree_dec_ralloc_introduce(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
910 {
911   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
912   adjacency_iter_t c, c_end;
913   assignment_list_t::iterator ai;
914   boost::tie(c, c_end) = adjacent_vertices(t, T);
915 
916 #ifdef DEBUG_RALLOC_DEC
917   std::cout << "Introduce (" << t << "):\n"; std::cout.flush();
918   std::cout << "ac: "; print_assignment(ac); std::cout << "\n";
919 #endif
920 
921   assignment_list_t &alist = T[t].assignments;
922 
923   std::swap(alist, T[*c].assignments);
924 
925   std::set<var_t> new_vars;
926   std::set_difference(T[t].alive.begin(), T[t].alive.end(), T[*c].alive.begin(), T[*c].alive.end(), std::inserter(new_vars, new_vars.end()));
927 
928   std::set<unsigned short> new_inst;
929   std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), T[*c].bag.end(), std::inserter(new_inst, new_inst.end()));
930   unsigned short int i = *(new_inst.begin());
931 
932   // Extend to new instruction.
933   assignments_introduce_instruction(alist, i, G);
934 
935   std::set<var_t>::const_iterator v;
936   for (v = new_vars.begin(); v != new_vars.end(); ++v)
937     {
938       drop_worst_assignments(alist, i, G, I, ac, assignment_optimal);
939       assignments_introduce_variable(alist, i, *v, G, I);
940     }
941 
942   // Summation of costs and early removal of assignments.
943   for (ai = alist.begin(); ai != alist.end();)
944     {
945       if ((ai->s += (ai->i_costs[i] = instruction_cost(*ai, i, G, I))) == std::numeric_limits<float>::infinity())
946         ai = alist.erase(ai);
947       else
948         ++ai;
949     }
950 
951   // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
952   //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
953 
954 #ifdef DEBUG_RALLOC_DEC_ASS
955   for(ai = alist.begin(); ai != alist.end(); ++ai)
956     {
957       print_assignment(*ai);
958       std::cout << "\n";
959     }
960 
961   assignment best;
962   get_best_local_assignment(best, t, T);
963   std::cout << "Best: "; print_assignment(best); std::cout << "\n";
964 #endif
965 }
966 
assignments_locally_same(const assignment & a1,const assignment & a2)967 static bool assignments_locally_same(const assignment &a1, const assignment &a2)
968 {
969   if (a1.local != a2.local)
970     return(false);
971 
972   varset_t::const_iterator i, i_end;
973   for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i)
974     if (a1.global[*i] != a2.global[*i])
975       return(false);
976 
977   return(true);
978 }
979 
980 // Handle forget nodes in the nice tree decomposition
981 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_forget(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)982 static void tree_dec_ralloc_forget(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
983 {
984   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
985   adjacency_iter_t c, c_end;
986   boost::tie(c, c_end) = adjacent_vertices(t, T);
987 
988 #ifdef DEBUG_RALLOC_DEC
989   std::cout << "Forget (" << t << "):\n"; std::cout.flush();
990 #endif
991 
992   assignment_list_t &alist = T[t].assignments;
993 
994   std::swap(alist, T[*c].assignments);
995 
996   std::set<unsigned short int> old_inst;
997   std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), T[t].bag.end(), std::inserter(old_inst, old_inst.end()));
998   wassert(old_inst.size() == 1);
999   unsigned short int i = *(old_inst.begin());
1000 
1001   varset_t old_vars;
1002   std::set_difference(T[*c].alive.begin(), T[*c].alive.end(), T[t].alive.begin(), T[t].alive.end(), std::inserter(old_vars, old_vars.end()));
1003 
1004   assignment_list_t::iterator ai, aif;
1005 
1006   // Restrict assignments (locally) to current variables.
1007   varset_t newlocal;
1008   for (ai = alist.begin(); ai != alist.end(); ++ai)
1009     {
1010       newlocal.clear();
1011       std::set_difference(ai->local.begin(), ai->local.end(), old_vars.begin(), old_vars.end(), std::inserter(newlocal, newlocal.end()));
1012       std::swap(ai->local, newlocal);
1013 
1014       ai->i_costs.erase(i);
1015     }
1016 
1017   alist.sort();
1018 
1019   // Collapse (locally) identical assignments.
1020   for (ai = alist.begin(); ai != alist.end();)
1021     {
1022       aif = ai;
1023 
1024       for (++ai; ai != alist.end() && assignments_locally_same(*aif, *ai);)
1025         {
1026           if (aif->s > ai->s)
1027             {
1028               alist.erase(aif);
1029               aif = ai;
1030               ++ai;
1031             }
1032           else
1033             ai = alist.erase(ai);
1034         }
1035     }
1036 
1037   // Free memory in the std::set<var_t, boost::pool_allocator<var_t> > that live in the assignments in the list.
1038   //boost::singleton_pool<boost::fast_pool_allocator_tag, sizeof(var_t)>::release_memory();
1039 
1040 #ifdef DEBUG_RALLOC_DEC
1041   std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1042 #endif
1043 
1044 #ifdef DEBUG_RALLOC_DEC_ASS
1045   for(ai = alist.begin(); ai != alist.end(); ++ai)
1046     {
1047       print_assignment(*ai);
1048       std::cout << "\n";
1049     }
1050 
1051   assignment best;
1052   get_best_local_assignment(best, t, T);
1053   std::cout << "Best: "; print_assignment(best); std::cout << "\n";
1054 #endif
1055 }
1056 
1057 // Handle join nodes in the nice tree decomposition
1058 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_join(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I)1059 static void tree_dec_ralloc_join(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I)
1060 {
1061   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1062   adjacency_iter_t c, c_end, c2, c3;
1063   boost::tie(c, c_end) = adjacent_vertices(t, T);
1064 
1065 #ifdef DEBUG_RALLOC_DEC
1066   std::cout << "Join (" << t << "):\n"; std::cout.flush();
1067 #endif
1068 
1069   c2 = c;
1070   ++c;
1071   c3 = c;
1072 
1073   assignment_list_t &alist = T[t].assignments;
1074   assignment_list_t &alist2 = T[*c2].assignments;
1075   std::swap(alist, T[*c3].assignments);
1076 
1077   alist.sort();
1078   alist2.sort();
1079 
1080   assignment_list_t::iterator ai, ai2;
1081   for (ai = alist.begin(), ai2 = alist2.begin(); ai != alist.end() && ai2 != alist2.end();)
1082     {
1083       if (assignments_locally_same(*ai, *ai2))
1084         {
1085           ai->s += ai2->s;
1086           // Avoid double-counting instruction costs.
1087           std::set<unsigned int>::iterator bi;
1088           for (bi = T[t].bag.begin(); bi != T[t].bag.end(); ++bi)
1089             ai->s -= ai->i_costs[*bi];
1090           for (size_t i = 0; i < ai->global.size(); i++)
1091             ai->global[i] = ((ai->global[i] != -1) ? ai->global[i] : ai2->global[i]);
1092           ++ai;
1093           ++ai2;
1094         }
1095       else if (*ai < *ai2)
1096         ai = alist.erase(ai);
1097       else if (*ai2 < *ai)
1098         ++ai2;
1099     }
1100   while(ai != alist.end())
1101     ai = alist.erase(ai);
1102 
1103   alist2.clear();
1104 
1105 #ifdef DEBUG_RALLOC_DEC
1106   std::cout << "Remaining assignments: " << alist.size() << "\n"; std::cout.flush();
1107 #endif
1108 
1109 #ifdef DEBUG_RALLOC_DEC_ASS
1110   for(std::list<assignment>::iterator ai = alist.begin(); ai != alist.end(); ++ai)
1111     {
1112       print_assignment(*ai);
1113       std::cout << "\n";
1114     }
1115 #endif
1116 }
1117 
1118 template <class T_t>
get_best_local_assignment(assignment & a,typename boost::graph_traits<T_t>::vertex_descriptor t,const T_t & T)1119 void get_best_local_assignment(assignment &a, typename boost::graph_traits<T_t>::vertex_descriptor t, const T_t &T)
1120 {
1121   const assignment_list_t &alist = T[t].assignments;
1122 
1123   assignment_list_t::const_iterator ai, ai_end, ai_best;
1124   for(ai = ai_best = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai)
1125     if(ai->s < ai_best->s)
1126       ai_best = ai;
1127 
1128   a = *ai_best;
1129 }
1130 
1131 // Handle nodes in the tree decomposition, by detecting their type and calling the appropriate function. Recurses.
1132 template <class T_t, class G_t, class I_t>
tree_dec_ralloc_nodes(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,const G_t & G,const I_t & I,const assignment & ac,bool * const assignment_optimal)1133 static void tree_dec_ralloc_nodes(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, const G_t &G, const I_t &I, const assignment& ac, bool *const assignment_optimal)
1134 {
1135   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1136 
1137   adjacency_iter_t c, c_end;
1138   typename boost::graph_traits<T_t>::vertex_descriptor c0, c1;
1139 
1140   boost::tie(c, c_end) = adjacent_vertices(t, T);
1141 
1142   switch (out_degree(t, T))
1143     {
1144     case 0:
1145       tree_dec_ralloc_leaf(T, t, G, I);
1146       break;
1147     case 1:
1148       c0 = *c;
1149       tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1150       T[c0].bag.size() < T[t].bag.size() ? tree_dec_ralloc_introduce(T, t, G, I, ac, assignment_optimal) : tree_dec_ralloc_forget(T, t, G, I);
1151       break;
1152     case 2:
1153       c0 = *c++;
1154       c1 = *c;
1155 
1156       if (T[c0].weight < T[c1].weight) // Minimize memory consumption needed for keeping intermediate results. As a side effect, this also helps the ac mechanism in the heuristic.
1157         std::swap (c0, c1);
1158 
1159       tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal);
1160         {
1161           assignment *ac2 = new assignment;
1162           get_best_local_assignment_biased(*ac2, c0, T);
1163           tree_dec_ralloc_nodes(T, c1, G, I, *ac2, assignment_optimal);
1164           delete ac2;
1165         }
1166       tree_dec_ralloc_join(T, t, G, I);
1167       break;
1168     default:
1169       std::cerr << "Not nice.\n";
1170       break;
1171     }
1172 }
1173 
1174 // Find the best root selecting from t_old and the leafs under t.
1175 template <class T_t>
find_best_root(const T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t,size_t t_s,typename boost::graph_traits<T_t>::vertex_descriptor t_old,size_t t_old_s)1176 static std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t> find_best_root(const T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t, size_t t_s, typename boost::graph_traits<T_t>::vertex_descriptor t_old, size_t t_old_s)
1177 {
1178   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1179   adjacency_iter_t c, c_end;
1180   typename boost::graph_traits<T_t>::vertex_descriptor c0, c1, t0;
1181   size_t t0_s;
1182 
1183   boost::tie(c, c_end) = adjacent_vertices(t, T);
1184 
1185   switch (out_degree(t, T))
1186     {
1187     case 0:
1188       return(t_s > t_old_s ? std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t, t_s) : std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1189     case 1:
1190       return(find_best_root(T, *c, T[*c].alive.size() ? T[*c].alive.size() : t_s, t_old, t_old_s));
1191     case 2:
1192       c0 = *c++;
1193       c1 = *c;
1194       boost::tie(t0, t0_s) = find_best_root(T, c0, T[c0].alive.size() ? T[c0].alive.size() : t_s, t_old, t_old_s);
1195       return(find_best_root(T, c1, T[c1].alive.size() ? T[c1].alive.size() : t_s, t0_s > t_old_s ? t0 : t_old, t0_s > t_old_s ? t0_s : t_old_s));
1196       break;
1197     default:
1198       std::cerr << "Not nice.\n";
1199       break;
1200     }
1201 
1202   return(std::pair<typename boost::graph_traits<T_t>::vertex_descriptor, size_t>(t_old, t_old_s));
1203 }
1204 
1205 // Change the root to t.
1206 template <class T_t>
re_root(T_t & T,typename boost::graph_traits<T_t>::vertex_descriptor t)1207 static void re_root(T_t &T, typename boost::graph_traits<T_t>::vertex_descriptor t)
1208 {
1209   typename boost::graph_traits<T_t>::vertex_descriptor s0, s1, s2;
1210   typename boost::graph_traits<T_t>::in_edge_iterator e, e_end;
1211 
1212   boost::tie(e, e_end) = boost::in_edges(t, T);
1213   if (e == e_end)
1214     return;
1215 
1216   s0 = t;
1217   s1 = boost::source(*e, T);
1218 
1219   for (boost::tie(e, e_end) = boost::in_edges(s1, T); e != e_end; boost::tie(e, e_end) = boost::in_edges(s1, T))
1220     {
1221       s2 = boost::source(*e, T);
1222       boost::remove_edge(s1, s0, T);
1223       boost::add_edge(s0, s1, T);
1224       s0 = s1;
1225       s1 = s2;
1226     }
1227   boost::remove_edge(s1, s0, T);
1228   boost::add_edge(s0, s1, T);
1229 }
1230 
1231 // Change the root to improve the assignment removal heuristic.
1232 template <class T_t>
good_re_root(T_t & T)1233 static void good_re_root(T_t &T)
1234 {
1235   typename boost::graph_traits<T_t>::vertex_descriptor t;
1236 
1237   typedef typename boost::graph_traits<T_t>::adjacency_iterator adjacency_iter_t;
1238   adjacency_iter_t c, c_end;
1239 
1240   t = find_root(T);
1241 
1242   for (boost::tie(c, c_end) = boost::adjacent_vertices(t, T); c != c_end && !T[*c].alive.size();)
1243     boost::tie(c, c_end) = boost::adjacent_vertices(*c, T);
1244 
1245   size_t t_s = (c != c_end ? T[*c].alive.size() : 0);
1246   t = find_best_root(T, t, t_s, t, t_s).first;
1247 
1248   if (T[t].alive.size())
1249     {
1250       std::cerr << "Error: Invalid root.\n";
1251       return;
1252     }
1253 
1254   re_root(T, t);
1255 }
1256 
1257 // Dump conflict graph, with numbered nodes, show live variables at each node.
dump_con(const con_t & con)1258 static void dump_con(const con_t &con)
1259 {
1260   if (!currFunc)
1261     return;
1262 
1263   std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccon" + currFunc->rname + ".dot").c_str());
1264 
1265   std::string *name = new std::string[num_vertices(con)];
1266   for (var_t i = 0; static_cast<boost::graph_traits<cfg_t>::vertices_size_type>(i) < boost::num_vertices(con); i++)
1267     {
1268       std::ostringstream os;
1269       os << i;
1270       if (con[i].name)
1271         os << " : " << con[i].name << ":" << con[i].byte;
1272       name[i] = os.str();
1273     }
1274   boost::write_graphviz(dump_file, con, boost::make_label_writer(name));
1275   delete[] name;
1276 }
1277 
1278 // Dump cfg, with numbered nodes, show live variables at each node.
dump_cfg(const cfg_t & cfg)1279 static void dump_cfg(const cfg_t &cfg)
1280 {
1281   if (!currFunc)
1282     return;
1283 
1284   std::ofstream dump_file((std::string(dstFileName) + ".dumpralloccfg" + currFunc->rname + ".dot").c_str());
1285 
1286   std::string *name = new std::string[num_vertices(cfg)];
1287   for (unsigned int i = 0; i < boost::num_vertices(cfg); i++)
1288     {
1289       std::ostringstream os;
1290       os << i << ", " << cfg[i].ic->key << ": ";
1291       cfg_alive_t::const_iterator v;
1292       for (v = cfg[i].alive.begin(); v != cfg[i].alive.end(); ++v)
1293         os << *v << " ";
1294       name[i] = os.str();
1295     }
1296 
1297   boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name), boost::default_writer(), cfg_titlewriter(currFunc->rname, "register allocator"));
1298   delete[] name;
1299 }
1300 
1301 // Dump tree decomposition, show bag and live variables at each node.
dump_tree_decomposition(const tree_dec_t & tree_dec)1302 static void dump_tree_decomposition(const tree_dec_t &tree_dec)
1303 {
1304   if (!currFunc)
1305     return;
1306 
1307   std::ofstream dump_file((std::string(dstFileName) + ".dumprallocdec" + currFunc->rname + ".dot").c_str());
1308 
1309   unsigned int w = 0;
1310 
1311   std::string *name = new std::string[num_vertices(tree_dec)];
1312   for (unsigned int i = 0; i < boost::num_vertices(tree_dec); i++)
1313     {
1314       if (tree_dec[i].bag.size() > w)
1315         w = tree_dec[i].bag.size();
1316       std::ostringstream os;
1317       std::set<unsigned int>::const_iterator v1;
1318       os << i << " | ";
1319       for (v1 = tree_dec[i].bag.begin(); v1 != tree_dec[i].bag.end(); ++v1)
1320         os << *v1 << " ";
1321       os << ": ";
1322       std::set<var_t>::const_iterator v2;
1323       for (v2 = tree_dec[i].alive.begin(); v2 != tree_dec[i].alive.end(); ++v2)
1324         os << *v2 << " ";
1325       name[i] = os.str();
1326     }
1327 
1328   boost::write_graphviz(dump_file, tree_dec, boost::make_label_writer(name), boost::default_writer(), dec_titlewriter(w - 1, currFunc->rname, "register allocator"));
1329   delete[] name;
1330 
1331 #ifdef D_RALLOC_DEC
1332   std::cout << "Width: " << (w  - 1) << "(" << currFunc->name << ")\n";
1333 #endif
1334 }
1335 
1336 #endif
1337 
1338