1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19 
20 /**
21  * @file
22  * @brief       Beladys spillalgorithm.
23  * @author      Daniel Grund, Matthias Braun
24  * @date        20.09.2005
25  */
26 #include "config.h"
27 
28 #include <stdbool.h>
29 
30 #include "obst.h"
31 #include "irprintf_t.h"
32 #include "irgraph.h"
33 #include "irnode.h"
34 #include "irmode.h"
35 #include "irgwalk.h"
36 #include "irloop.h"
37 #include "iredges_t.h"
38 #include "ircons_t.h"
39 #include "irprintf.h"
40 #include "irnodeset.h"
41 #include "irtools.h"
42 #include "util.h"
43 
44 #include "beutil.h"
45 #include "bearch.h"
46 #include "beuses.h"
47 #include "besched.h"
48 #include "beirgmod.h"
49 #include "belive_t.h"
50 #include "benode.h"
51 #include "bechordal_t.h"
52 #include "bespill.h"
53 #include "beloopana.h"
54 #include "beirg.h"
55 #include "bespillutil.h"
56 #include "bemodule.h"
57 
58 #define DBG_SPILL     1
59 #define DBG_WSETS     2
60 #define DBG_FIX       4
61 #define DBG_DECIDE    8
62 #define DBG_START    16
63 #define DBG_SLOTS    32
64 #define DBG_TRACE    64
65 #define DBG_WORKSET 128
66 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
67 
68 #define TIME_UNDEFINED 6666
69 
70 /**
71  * An association between a node and a point in time.
72  */
73 typedef struct loc_t {
74 	ir_node          *node;
75 	unsigned          time;     /**< A use time (see beuses.h). */
76 	bool              spilled;  /**< value was already spilled on this path */
77 } loc_t;
78 
79 typedef struct workset_t {
80 	unsigned len;     /**< current length */
81 	loc_t    vals[];  /**< array of the values/distances in this working set */
82 } workset_t;
83 
84 static struct obstack               obst;
85 static const arch_register_class_t *cls;
86 static const be_lv_t               *lv;
87 static be_loopana_t                *loop_ana;
88 static unsigned                     n_regs;
89 static workset_t                   *ws;     /**< the main workset used while
90 	                                             processing a block. */
91 static be_uses_t                   *uses;   /**< env for the next-use magic */
92 static ir_node                     *instr;  /**< current instruction */
93 static spill_env_t                 *senv;   /**< see bespill.h */
94 static ir_node                    **blocklist;
95 
96 static int                          move_spills      = true;
97 static int                          respectloopdepth = true;
98 static int                          improve_known_preds = true;
99 /* factor to weight the different costs of reloading/rematerializing a node
100    (see bespill.h be_get_reload_costs_no_weight) */
101 static int                          remat_bonus      = 10;
102 
103 static const lc_opt_table_entry_t options[] = {
104 	LC_OPT_ENT_BOOL   ("movespills", "try to move spills out of loops", &move_spills),
105 	LC_OPT_ENT_BOOL   ("respectloopdepth", "outermost loop cutting", &respectloopdepth),
106 	LC_OPT_ENT_BOOL   ("improveknownpreds", "known preds cutting", &improve_known_preds),
107 	LC_OPT_ENT_INT    ("rematbonus", "give bonus to rematerialisable nodes", &remat_bonus),
108 	LC_OPT_LAST
109 };
110 
111 /**
112  * Alloc a new workset on obstack @p ob with maximum size @p max
113  */
new_workset(void)114 static workset_t *new_workset(void)
115 {
116 	return OALLOCFZ(&obst, workset_t, vals, n_regs);
117 }
118 
119 /**
120  * Alloc a new instance on obstack and make it equal to @param workset
121  */
workset_clone(workset_t * workset)122 static workset_t *workset_clone(workset_t *workset)
123 {
124 	workset_t *res = OALLOCF(&obst, workset_t, vals, n_regs);
125 	memcpy(res, workset, sizeof(*res) + n_regs * sizeof(res->vals[0]));
126 	return res;
127 }
128 
129 /**
130  * Copy workset @param src to @param tgt
131  */
workset_copy(workset_t * dest,const workset_t * src)132 static void workset_copy(workset_t *dest, const workset_t *src)
133 {
134 	size_t size = sizeof(*src) + n_regs * sizeof(src->vals[0]);
135 	memcpy(dest, src, size);
136 }
137 
138 /**
139  * Overwrites the current content array of @param ws with the
140  * @param count locations given at memory @param locs.
141  * Set the length of @param ws to count.
142  */
workset_bulk_fill(workset_t * workset,int count,const loc_t * locs)143 static void workset_bulk_fill(workset_t *workset, int count, const loc_t *locs)
144 {
145 	workset->len = count;
146 	memcpy(&(workset->vals[0]), locs, count * sizeof(locs[0]));
147 }
148 
149 /**
150  * Inserts the value @p val into the workset, iff it is not
151  * already contained. The workset must not be full.
152  */
workset_insert(workset_t * workset,ir_node * val,bool spilled)153 static void workset_insert(workset_t *workset, ir_node *val, bool spilled)
154 {
155 	loc_t    *loc;
156 	unsigned  i;
157 	/* check for current regclass */
158 	assert(arch_irn_consider_in_reg_alloc(cls, val));
159 
160 	/* check if val is already contained */
161 	for (i = 0; i < workset->len; ++i) {
162 		loc = &workset->vals[i];
163 		if (loc->node == val) {
164 			if (spilled) {
165 				loc->spilled = true;
166 			}
167 			return;
168 		}
169 	}
170 
171 	/* insert val */
172 	assert(workset->len < n_regs && "Workset already full!");
173 	loc           = &workset->vals[workset->len];
174 	loc->node     = val;
175 	loc->spilled  = spilled;
176 	loc->time     = TIME_UNDEFINED;
177 	workset->len++;
178 }
179 
180 /**
181  * Removes all entries from this workset
182  */
workset_clear(workset_t * workset)183 static void workset_clear(workset_t *workset)
184 {
185 	workset->len = 0;
186 }
187 
188 /**
189  * Removes the value @p val from the workset if present.
190  */
workset_remove(workset_t * workset,ir_node * val)191 static void workset_remove(workset_t *workset, ir_node *val)
192 {
193 	unsigned i;
194 	for (i = 0; i < workset->len; ++i) {
195 		if (workset->vals[i].node == val) {
196 			workset->vals[i] = workset->vals[--workset->len];
197 			return;
198 		}
199 	}
200 }
201 
workset_contains(const workset_t * ws,const ir_node * val)202 static const loc_t *workset_contains(const workset_t *ws, const ir_node *val)
203 {
204 	unsigned i;
205 	for (i = 0; i < ws->len; ++i) {
206 		if (ws->vals[i].node == val)
207 			return &ws->vals[i];
208 	}
209 
210 	return NULL;
211 }
212 
loc_compare(const void * a,const void * b)213 static int loc_compare(const void *a, const void *b)
214 {
215 	const loc_t   *p  = ((const loc_t*) a);
216 	const loc_t   *q  = ((const loc_t*) b);
217 	const unsigned pt = p->time;
218 	const unsigned qt = q->time;
219 
220 	if (pt < qt)
221 		return -1;
222 	if (pt > qt)
223 		return 1;
224 
225 	return get_irn_node_nr(p->node) - get_irn_node_nr(q->node);
226 }
227 
workset_sort(workset_t * workset)228 static void workset_sort(workset_t *workset)
229 {
230 	qsort(workset->vals, workset->len, sizeof(workset->vals[0]), loc_compare);
231 }
232 
workset_get_time(const workset_t * workset,unsigned idx)233 static inline unsigned workset_get_time(const workset_t *workset, unsigned idx)
234 {
235 	return workset->vals[idx].time;
236 }
237 
workset_set_time(workset_t * workset,unsigned idx,unsigned time)238 static inline void workset_set_time(workset_t *workset, unsigned idx,
239                                     unsigned time)
240 {
241 	workset->vals[idx].time = time;
242 }
243 
workset_get_length(const workset_t * workset)244 static inline unsigned workset_get_length(const workset_t *workset)
245 {
246 	return workset->len;
247 }
248 
workset_set_length(workset_t * workset,unsigned len)249 static inline void workset_set_length(workset_t *workset, unsigned len)
250 {
251 	workset->len = len;
252 }
253 
workset_get_val(const workset_t * workset,unsigned idx)254 static inline ir_node *workset_get_val(const workset_t *workset, unsigned idx)
255 {
256 	return workset->vals[idx].node;
257 }
258 
259 /**
260  * Iterates over all values in the working set.
261  * @p ws The workset to iterate
262  * @p v  A variable to put the current value in
263  * @p i  An integer for internal use
264  */
265 #define workset_foreach(ws, v, i) \
266 	for (i=0; v=(i < ws->len) ? ws->vals[i].node : NULL, i < ws->len; ++i)
267 
268 typedef struct block_info_t {
269 	workset_t *start_workset;
270 	workset_t *end_workset;
271 } block_info_t;
272 
new_block_info(void)273 static block_info_t *new_block_info(void)
274 {
275 	return OALLOCZ(&obst, block_info_t);
276 }
277 
get_block_info(const ir_node * block)278 static inline block_info_t *get_block_info(const ir_node *block)
279 {
280 	return (block_info_t*)get_irn_link(block);
281 }
282 
set_block_info(ir_node * block,block_info_t * info)283 static inline void set_block_info(ir_node *block, block_info_t *info)
284 {
285 	set_irn_link(block, info);
286 }
287 
288 /**
289  * @return The distance to the next use or 0 if irn has dont_spill flag set
290  */
get_distance(ir_node * from,const ir_node * def,int skip_from_uses)291 static unsigned get_distance(ir_node *from, const ir_node *def, int skip_from_uses)
292 {
293 	be_next_use_t use;
294 	unsigned      costs;
295 	unsigned      time;
296 
297 	assert(!arch_irn_is_ignore(def));
298 
299 	use  = be_get_next_use(uses, from, def, skip_from_uses);
300 	time = use.time;
301 	if (USES_IS_INFINITE(time))
302 		return USES_INFINITY;
303 
304 	/* We have to keep nonspillable nodes in the workingset */
305 	if (arch_get_irn_flags(skip_Proj_const(def)) & arch_irn_flags_dont_spill)
306 		return 0;
307 
308 	/* give some bonus to rematerialisable nodes */
309 	if (remat_bonus > 0) {
310 		costs = be_get_reload_costs_no_weight(senv, def, use.before);
311 		assert(costs * remat_bonus < 1000);
312 		time  += 1000 - (costs * remat_bonus);
313 	}
314 
315 	return time;
316 }
317 
318 /**
319  * Performs the actions necessary to grant the request that:
320  * - new_vals can be held in registers
321  * - as few as possible other values are disposed
322  * - the worst values get disposed
323  *
324  * @p is_usage indicates that the values in new_vals are used (not defined)
325  * In this case reloads must be performed
326  */
displace(workset_t * new_vals,int is_usage)327 static void displace(workset_t *new_vals, int is_usage)
328 {
329 	ir_node **to_insert = ALLOCAN(ir_node*, n_regs);
330 	bool     *spilled   = ALLOCAN(bool,     n_regs);
331 	ir_node  *val;
332 	int       i;
333 	int       len;
334 	int       spills_needed;
335 	int       demand;
336 	unsigned  iter;
337 
338 	/* 1. Identify the number of needed slots and the values to reload */
339 	demand = 0;
340 	workset_foreach(new_vals, val, iter) {
341 		bool reloaded = false;
342 
343 		if (! workset_contains(ws, val)) {
344 			DB((dbg, DBG_DECIDE, "    insert %+F\n", val));
345 			if (is_usage) {
346 				DB((dbg, DBG_SPILL, "Reload %+F before %+F\n", val, instr));
347 				be_add_reload(senv, val, instr, cls, 1);
348 				reloaded = true;
349 			}
350 		} else {
351 			DB((dbg, DBG_DECIDE, "    %+F already in workset\n", val));
352 			assert(is_usage);
353 			/* remove the value from the current workset so it is not accidently
354 			 * spilled */
355 			workset_remove(ws, val);
356 		}
357 		spilled[demand]   = reloaded;
358 		to_insert[demand] = val;
359 		++demand;
360 	}
361 
362 	/* 2. Make room for at least 'demand' slots */
363 	len           = workset_get_length(ws);
364 	spills_needed = len + demand - n_regs;
365 	assert(spills_needed <= len);
366 
367 	/* Only make more free room if we do not have enough */
368 	if (spills_needed > 0) {
369 		DB((dbg, DBG_DECIDE, "    disposing %d values\n", spills_needed));
370 
371 		/* calculate current next-use distance for live values */
372 		for (i = 0; i < len; ++i) {
373 			ir_node  *val  = workset_get_val(ws, i);
374 			unsigned  dist = get_distance(instr, val, !is_usage);
375 			workset_set_time(ws, i, dist);
376 		}
377 
378 		/* sort entries by increasing nextuse-distance*/
379 		workset_sort(ws);
380 
381 		for (i = len - spills_needed; i < len; ++i) {
382 			ir_node *val = ws->vals[i].node;
383 
384 			DB((dbg, DBG_DECIDE, "    disposing node %+F (%u)\n", val,
385 			     workset_get_time(ws, i)));
386 
387 			if (move_spills) {
388 				if (!USES_IS_INFINITE(ws->vals[i].time)
389 						&& !ws->vals[i].spilled) {
390 					ir_node *after_pos = sched_prev(instr);
391 					DB((dbg, DBG_DECIDE, "Spill %+F after node %+F\n", val,
392 						after_pos));
393 					be_add_spill(senv, val, after_pos);
394 				}
395 			}
396 		}
397 
398 		/* kill the last 'demand' entries in the array */
399 		workset_set_length(ws, len - spills_needed);
400 	}
401 
402 	/* 3. Insert the new values into the workset */
403 	for (i = 0; i < demand; ++i) {
404 		ir_node *val = to_insert[i];
405 
406 		workset_insert(ws, val, spilled[i]);
407 	}
408 }
409 
410 enum {
411 	AVAILABLE_EVERYWHERE,
412 	AVAILABLE_NOWHERE,
413 	AVAILABLE_PARTLY,
414 	AVAILABLE_UNKNOWN
415 };
416 
available_in_all_preds(workset_t * const * pred_worksets,size_t n_pred_worksets,const ir_node * value,bool is_local_phi)417 static unsigned available_in_all_preds(workset_t* const* pred_worksets,
418                                        size_t n_pred_worksets,
419                                        const ir_node *value, bool is_local_phi)
420 {
421 	size_t i;
422 	bool   avail_everywhere = true;
423 	bool   avail_nowhere    = true;
424 
425 	assert(n_pred_worksets > 0);
426 
427 	/* value available in all preds? */
428 	for (i = 0; i < n_pred_worksets; ++i) {
429 		bool             found     = false;
430 		const workset_t *p_workset = pred_worksets[i];
431 		int              p_len     = workset_get_length(p_workset);
432 		int              p_i;
433 		const ir_node   *l_value;
434 
435 		if (is_local_phi) {
436 			assert(is_Phi(value));
437 			l_value = get_irn_n(value, i);
438 		} else {
439 			l_value = value;
440 		}
441 
442 		for (p_i = 0; p_i < p_len; ++p_i) {
443 			const loc_t *p_l = &p_workset->vals[p_i];
444 			if (p_l->node != l_value)
445 				continue;
446 
447 			found = true;
448 			break;
449 		}
450 
451 		if (found) {
452 			avail_nowhere = false;
453 		} else {
454 			avail_everywhere = false;
455 		}
456 	}
457 
458 	if (avail_everywhere) {
459 		assert(!avail_nowhere);
460 		return AVAILABLE_EVERYWHERE;
461 	} else if (avail_nowhere) {
462 		return AVAILABLE_NOWHERE;
463 	} else {
464 		return AVAILABLE_PARTLY;
465 	}
466 }
467 
468 /** Decides whether a specific node should be in the start workset or not
469  *
470  * @param env      belady environment
471  * @param first
472  * @param node     the node to test
473  * @param loop     the loop of the node
474  */
to_take_or_not_to_take(ir_node * first,ir_node * node,ir_loop * loop,unsigned available)475 static loc_t to_take_or_not_to_take(ir_node* first, ir_node *node,
476                                     ir_loop *loop, unsigned available)
477 {
478 	be_next_use_t next_use;
479 	loc_t         loc;
480 
481 	loc.time    = USES_INFINITY;
482 	loc.node    = node;
483 	loc.spilled = false;
484 
485 	if (!arch_irn_consider_in_reg_alloc(cls, node)) {
486 		loc.time = USES_INFINITY;
487 		return loc;
488 	}
489 
490 	/* We have to keep nonspillable nodes in the workingset */
491 	if (arch_get_irn_flags(skip_Proj_const(node)) & arch_irn_flags_dont_spill) {
492 		loc.time = 0;
493 		DB((dbg, DBG_START, "    %+F taken (dontspill node)\n", node, loc.time));
494 		return loc;
495 	}
496 
497 	next_use = be_get_next_use(uses, first, node, 0);
498 	if (USES_IS_INFINITE(next_use.time)) {
499 		/* the nodes marked as live in shouldn't be dead, so it must be a phi */
500 		assert(is_Phi(node));
501 		loc.time = USES_INFINITY;
502 		DB((dbg, DBG_START, "    %+F not taken (dead)\n", node));
503 		return loc;
504 	}
505 
506 	loc.time = next_use.time;
507 
508 	if (improve_known_preds) {
509 		if (available == AVAILABLE_EVERYWHERE) {
510 			DB((dbg, DBG_START, "    %+F taken (%u, live in all preds)\n",
511 			    node, loc.time));
512 			return loc;
513 		} else if (available == AVAILABLE_NOWHERE) {
514 			DB((dbg, DBG_START, "    %+F not taken (%u, live in no pred)\n",
515 			    node, loc.time));
516 			loc.time = USES_INFINITY;
517 			return loc;
518 		}
519 	}
520 
521 	if (!respectloopdepth || next_use.outermost_loop >= get_loop_depth(loop)) {
522 		DB((dbg, DBG_START, "    %+F taken (%u, loop %d)\n", node, loc.time,
523 		    next_use.outermost_loop));
524 	} else {
525 		loc.time = USES_PENDING;
526 		DB((dbg, DBG_START, "    %+F delayed (outerdepth %d < loopdepth %d)\n",
527 		    node, next_use.outermost_loop, get_loop_depth(loop)));
528 	}
529 
530 	return loc;
531 }
532 
533 /**
534  * Computes the start-workset for a block with multiple predecessors. We assume
535  * that at least 1 of the predeccesors is a back-edge which means we're at the
536  * beginning of a loop. We try to reload as much values as possible now so they
537  * don't get reloaded inside the loop.
538  */
decide_start_workset(const ir_node * block)539 static void decide_start_workset(const ir_node *block)
540 {
541 	ir_loop    *loop = get_irn_loop(block);
542 	ir_node    *first;
543 	loc_t       loc;
544 	loc_t      *starters;
545 	loc_t      *delayed;
546 	unsigned    len;
547 	unsigned    i;
548 	unsigned    ws_count;
549 	int         free_slots, free_pressure_slots;
550 	unsigned    pressure;
551 	int         arity;
552 	workset_t **pred_worksets;
553 	bool        all_preds_known;
554 
555 	/* check predecessors */
556 	arity           = get_irn_arity(block);
557 	pred_worksets   = ALLOCAN(workset_t*, arity);
558 	all_preds_known = true;
559 	for (int in = 0; in < arity; ++in) {
560 		ir_node      *pred_block = get_Block_cfgpred_block(block, in);
561 		block_info_t *pred_info  = get_block_info(pred_block);
562 
563 		if (pred_info == NULL) {
564 			pred_worksets[in] = NULL;
565 			all_preds_known   = false;
566 		} else {
567 			pred_worksets[in] = pred_info->end_workset;
568 		}
569 	}
570 
571 	/* Collect all values living at start of block */
572 	starters = NEW_ARR_F(loc_t, 0);
573 	delayed  = NEW_ARR_F(loc_t, 0);
574 
575 	DB((dbg, DBG_START, "Living at start of %+F:\n", block));
576 	first = sched_first(block);
577 
578 	/* check all Phis first */
579 	sched_foreach(block, node) {
580 		unsigned available;
581 
582 		if (! is_Phi(node))
583 			break;
584 		if (!arch_irn_consider_in_reg_alloc(cls, node))
585 			continue;
586 
587 		if (all_preds_known) {
588 			available = available_in_all_preds(pred_worksets, arity, node, true);
589 		} else {
590 			available = AVAILABLE_UNKNOWN;
591 		}
592 
593 		loc = to_take_or_not_to_take(first, node, loop, available);
594 
595 		if (! USES_IS_INFINITE(loc.time)) {
596 			if (USES_IS_PENDING(loc.time))
597 				ARR_APP1(loc_t, delayed, loc);
598 			else
599 				ARR_APP1(loc_t, starters, loc);
600 		} else {
601 			be_spill_phi(senv, node);
602 		}
603 	}
604 
605 	/* check all Live-Ins */
606 	be_lv_foreach(lv, block, be_lv_state_in, node) {
607 		unsigned available;
608 
609 		if (all_preds_known) {
610 			available = available_in_all_preds(pred_worksets, arity, node, false);
611 		} else {
612 			available = AVAILABLE_UNKNOWN;
613 		}
614 
615 		loc = to_take_or_not_to_take(first, node, loop, available);
616 
617 		if (! USES_IS_INFINITE(loc.time)) {
618 			if (USES_IS_PENDING(loc.time))
619 				ARR_APP1(loc_t, delayed, loc);
620 			else
621 				ARR_APP1(loc_t, starters, loc);
622 		}
623 	}
624 
625 	pressure            = be_get_loop_pressure(loop_ana, cls, loop);
626 	assert(ARR_LEN(delayed) <= pressure);
627 	free_slots          = n_regs - ARR_LEN(starters);
628 	free_pressure_slots = n_regs - (pressure - ARR_LEN(delayed));
629 	free_slots          = MIN(free_slots, free_pressure_slots);
630 
631 	/* so far we only put nodes into the starters list that are used inside
632 	 * the loop. If register pressure in the loop is low then we can take some
633 	 * values and let them live through the loop */
634 	DB((dbg, DBG_START, "Loop pressure %d, taking %d delayed vals\n",
635 	    pressure, free_slots));
636 	if (free_slots > 0) {
637 		size_t i;
638 
639 		qsort(delayed, ARR_LEN(delayed), sizeof(delayed[0]), loc_compare);
640 
641 		for (i = 0; i < ARR_LEN(delayed) && free_slots > 0; ++i) {
642 			int    p, arity;
643 			loc_t *loc = & delayed[i];
644 
645 			if (!is_Phi(loc->node)) {
646 				/* don't use values which are dead in a known predecessors
647 				 * to not induce unnecessary reloads */
648 				arity = get_irn_arity(block);
649 				for (p = 0; p < arity; ++p) {
650 					ir_node      *pred_block = get_Block_cfgpred_block(block, p);
651 					block_info_t *pred_info  = get_block_info(pred_block);
652 
653 					if (pred_info == NULL)
654 						continue;
655 
656 					if (!workset_contains(pred_info->end_workset, loc->node)) {
657 						DB((dbg, DBG_START,
658 							"    delayed %+F not live at pred %+F\n", loc->node,
659 							pred_block));
660 						goto skip_delayed;
661 					}
662 				}
663 			}
664 
665 			DB((dbg, DBG_START, "    delayed %+F taken\n", loc->node));
666 			ARR_APP1(loc_t, starters, *loc);
667 			loc->node = NULL;
668 			--free_slots;
669 		skip_delayed:
670 			;
671 		}
672 	}
673 
674 	/* spill phis (the actual phis not just their values) that are in this block
675 	 * but not in the start workset */
676 	len = ARR_LEN(delayed);
677 	for (i = 0; i < len; ++i) {
678 		ir_node *node = delayed[i].node;
679 		if (node == NULL || !is_Phi(node) || get_nodes_block(node) != block)
680 			continue;
681 
682 		DB((dbg, DBG_START, "    spilling delayed phi %+F\n", node));
683 		be_spill_phi(senv, node);
684 	}
685 	DEL_ARR_F(delayed);
686 
687 	/* Sort start values by first use */
688 	qsort(starters, ARR_LEN(starters), sizeof(starters[0]), loc_compare);
689 
690 	/* Copy the best ones from starters to start workset */
691 	ws_count = MIN((unsigned) ARR_LEN(starters), n_regs);
692 	workset_clear(ws);
693 	workset_bulk_fill(ws, ws_count, starters);
694 
695 	/* spill phis (the actual phis not just their values) that are in this block
696 	 * but not in the start workset */
697 	len = ARR_LEN(starters);
698 	for (i = ws_count; i < len; ++i) {
699 		ir_node *node = starters[i].node;
700 		if (! is_Phi(node) || get_nodes_block(node) != block)
701 			continue;
702 
703 		DB((dbg, DBG_START, "    spilling phi %+F\n", node));
704 		be_spill_phi(senv, node);
705 	}
706 
707 	DEL_ARR_F(starters);
708 
709 	/* determine spill status of the values: If there's 1 pred block (which
710 	 * is no backedge) where the value is spilled then we must set it to
711 	 * spilled here. */
712 	for (i = 0; i < ws_count; ++i) {
713 		loc_t   *loc     = &ws->vals[i];
714 		ir_node *value   = loc->node;
715 		bool     spilled;
716 		int      n;
717 
718 		/* phis from this block aren't spilled */
719 		if (get_nodes_block(value) == block) {
720 			assert(is_Phi(value));
721 			loc->spilled = false;
722 			continue;
723 		}
724 
725 		/* determine if value was spilled on any predecessor */
726 		spilled = false;
727 		for (n = 0; n < arity; ++n) {
728 			workset_t *pred_workset = pred_worksets[n];
729 			int        p_len;
730 			int        p;
731 
732 			if (pred_workset == NULL)
733 				continue;
734 
735 			p_len = workset_get_length(pred_workset);
736 			for (p = 0; p < p_len; ++p) {
737 				loc_t *l = &pred_workset->vals[p];
738 
739 				if (l->node != value)
740 					continue;
741 
742 				if (l->spilled) {
743 					spilled = true;
744 				}
745 				break;
746 			}
747 		}
748 
749 		loc->spilled = spilled;
750 	}
751 }
752 
753 /**
754  * For the given block @p block, decide for each values
755  * whether it is used from a register or is reloaded
756  * before the use.
757  */
process_block(ir_node * block)758 static void process_block(ir_node *block)
759 {
760 	workset_t    *new_vals;
761 	unsigned      iter;
762 	block_info_t *block_info;
763 	int           arity;
764 
765 	/* no need to process a block twice */
766 	assert(get_block_info(block) == NULL);
767 
768 	/* construct start workset */
769 	arity = get_Block_n_cfgpreds(block);
770 	if (arity == 0) {
771 		/* no predecessor -> empty set */
772 		workset_clear(ws);
773 	} else if (arity == 1) {
774 		/* one predecessor, copy its end workset */
775 		ir_node      *pred_block = get_Block_cfgpred_block(block, 0);
776 		block_info_t *pred_info  = get_block_info(pred_block);
777 
778 		assert(pred_info != NULL);
779 		workset_copy(ws, pred_info->end_workset);
780 	} else {
781 		/* multiple predecessors, do more advanced magic :) */
782 		decide_start_workset(block);
783 	}
784 
785 	DB((dbg, DBG_DECIDE, "\n"));
786 	DB((dbg, DBG_DECIDE, "Decide for %+F\n", block));
787 
788 	block_info = new_block_info();
789 	set_block_info(block, block_info);
790 
791 	DB((dbg, DBG_WSETS, "Start workset for %+F:\n", block));
792 	{
793 		ir_node *irn;
794 		workset_foreach(ws, irn, iter) {
795 			DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
796 		}
797 	}
798 
799 	block_info->start_workset = workset_clone(ws);
800 
801 	/* process the block from start to end */
802 	DB((dbg, DBG_WSETS, "Processing...\n"));
803 	/* TODO: this leaks (into the obstack)... */
804 	new_vals = new_workset();
805 
806 	sched_foreach(block, irn) {
807 		int i, arity;
808 		assert(workset_get_length(ws) <= n_regs);
809 
810 		/* Phis are no real instr (see insert_starters()) */
811 		if (is_Phi(irn)) {
812 			continue;
813 		}
814 		DB((dbg, DBG_DECIDE, "  ...%+F\n", irn));
815 
816 		/* set instruction in the workset */
817 		instr = irn;
818 
819 		/* allocate all values _used_ by this instruction */
820 		workset_clear(new_vals);
821 		for (i = 0, arity = get_irn_arity(irn); i < arity; ++i) {
822 			ir_node *in = get_irn_n(irn, i);
823 			if (!arch_irn_consider_in_reg_alloc(cls, in))
824 				continue;
825 
826 			/* (note that "spilled" is irrelevant here) */
827 			workset_insert(new_vals, in, false);
828 		}
829 		displace(new_vals, 1);
830 
831 		/* allocate all values _defined_ by this instruction */
832 		workset_clear(new_vals);
833 		be_foreach_definition(irn, cls, value,
834 			assert(req_->width == 1);
835 			workset_insert(new_vals, value, false);
836 		);
837 		displace(new_vals, 0);
838 	}
839 
840 	/* Remember end-workset for this block */
841 	block_info->end_workset = workset_clone(ws);
842 	DB((dbg, DBG_WSETS, "End workset for %+F:\n", block));
843 	{
844 		ir_node *irn;
845 		workset_foreach(ws, irn, iter)
846 			DB((dbg, DBG_WSETS, "  %+F (%u)\n", irn, workset_get_time(ws, iter)));
847 	}
848 }
849 
850 /**
851  * 'decide' is block-local and makes assumptions
852  * about the set of live-ins. Thus we must adapt the
853  * live-outs to the live-ins at each block-border.
854  */
fix_block_borders(ir_node * block,void * data)855 static void fix_block_borders(ir_node *block, void *data)
856 {
857 	workset_t *start_workset;
858 	int        arity;
859 	int        i;
860 	unsigned   iter;
861 	(void) data;
862 
863 	DB((dbg, DBG_FIX, "\n"));
864 	DB((dbg, DBG_FIX, "Fixing %+F\n", block));
865 
866 	arity = get_irn_arity(block);
867 	/* can happen for endless loops */
868 	if (arity == 0)
869 		return;
870 
871 	start_workset = get_block_info(block)->start_workset;
872 
873 	/* process all pred blocks */
874 	for (i = 0; i < arity; ++i) {
875 		ir_node   *pred = get_Block_cfgpred_block(block, i);
876 		workset_t *pred_end_workset = get_block_info(pred)->end_workset;
877 		ir_node   *node;
878 
879 		DB((dbg, DBG_FIX, "  Pred %+F\n", pred));
880 
881 		/* spill all values not used anymore */
882 		workset_foreach(pred_end_workset, node, iter) {
883 			ir_node *n2;
884 			unsigned iter2;
885 			bool     found = false;
886 			workset_foreach(start_workset, n2, iter2) {
887 				if (n2 == node) {
888 					found = true;
889 					break;
890 				}
891 				/* note that we do not look at phi inputs, becuase the values
892 				 * will be either live-end and need no spill or
893 				 * they have other users in which must be somewhere else in the
894 				 * workset */
895 			}
896 
897 			if (found)
898 				continue;
899 
900 			if (move_spills && be_is_live_in(lv, block, node)
901 					&& !pred_end_workset->vals[iter].spilled) {
902 				ir_node *insert_point;
903 				if (arity > 1) {
904 					insert_point = be_get_end_of_block_insertion_point(pred);
905 					insert_point = sched_prev(insert_point);
906 				} else {
907 					insert_point = block;
908 				}
909 				DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
910 				     insert_point));
911 				be_add_spill(senv, node, insert_point);
912 			}
913 		}
914 
915 		/* reload missing values in predecessors, add missing spills */
916 		workset_foreach(start_workset, node, iter) {
917 			const loc_t *l    = &start_workset->vals[iter];
918 			const loc_t *pred_loc;
919 
920 			/* if node is a phi of the current block we reload
921 			 * the corresponding argument, else node itself */
922 			if (is_Phi(node) && get_nodes_block(node) == block) {
923 				node = get_irn_n(node, i);
924 				assert(!l->spilled);
925 
926 				/* we might have unknowns as argument for the phi */
927 				if (!arch_irn_consider_in_reg_alloc(cls, node))
928 					continue;
929 			}
930 
931 			/* check if node is in a register at end of pred */
932 			pred_loc = workset_contains(pred_end_workset, node);
933 			if (pred_loc != NULL) {
934 				/* we might have to spill value on this path */
935 				if (move_spills && !pred_loc->spilled && l->spilled) {
936 					ir_node *insert_point
937 						= be_get_end_of_block_insertion_point(pred);
938 					insert_point = sched_prev(insert_point);
939 					DB((dbg, DBG_SPILL, "Spill %+F after %+F\n", node,
940 					    insert_point));
941 					be_add_spill(senv, node, insert_point);
942 				}
943 			} else {
944 				/* node is not in register at the end of pred -> reload it */
945 				DB((dbg, DBG_FIX, "    reload %+F\n", node));
946 				DB((dbg, DBG_SPILL, "Reload %+F before %+F,%d\n", node, block, i));
947 				be_add_reload_on_edge(senv, node, block, i, cls, 1);
948 			}
949 		}
950 	}
951 }
952 
be_spill_belady(ir_graph * irg,const arch_register_class_t * rcls)953 static void be_spill_belady(ir_graph *irg, const arch_register_class_t *rcls)
954 {
955 	int i;
956 
957 	be_assure_live_sets(irg);
958 
959 	stat_ev_tim_push();
960 	assure_loopinfo(irg);
961 	stat_ev_tim_pop("belady_time_backedges");
962 
963 	stat_ev_tim_push();
964 	be_clear_links(irg);
965 	stat_ev_tim_pop("belady_time_clear_links");
966 
967 	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
968 
969 	/* init belady env */
970 	stat_ev_tim_push();
971 	obstack_init(&obst);
972 	cls       = rcls;
973 	lv        = be_get_irg_liveness(irg);
974 	n_regs    = be_get_n_allocatable_regs(irg, cls);
975 	ws        = new_workset();
976 	uses      = be_begin_uses(irg, lv);
977 	loop_ana  = be_new_loop_pressure(irg, cls);
978 	senv      = be_new_spill_env(irg);
979 	blocklist = be_get_cfgpostorder(irg);
980 	stat_ev_tim_pop("belady_time_init");
981 
982 	stat_ev_tim_push();
983 	/* walk blocks in reverse postorder */
984 	for (i = ARR_LEN(blocklist) - 1; i >= 0; --i) {
985 		process_block(blocklist[i]);
986 	}
987 	DEL_ARR_F(blocklist);
988 	stat_ev_tim_pop("belady_time_belady");
989 
990 	stat_ev_tim_push();
991 	/* belady was block-local, fix the global flow by adding reloads on the
992 	 * edges */
993 	irg_block_walk_graph(irg, fix_block_borders, NULL, NULL);
994 	stat_ev_tim_pop("belady_time_fix_borders");
995 
996 	ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
997 
998 	/* Insert spill/reload nodes into the graph and fix usages */
999 	be_insert_spills_reloads(senv);
1000 
1001 	/* clean up */
1002 	be_delete_spill_env(senv);
1003 	be_end_uses(uses);
1004 	be_free_loop_pressure(loop_ana);
1005 	obstack_free(&obst, NULL);
1006 }
1007 
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady)1008 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_spillbelady)
1009 void be_init_spillbelady(void)
1010 {
1011 	static be_spiller_t belady_spiller = {
1012 		be_spill_belady
1013 	};
1014 	lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
1015 	lc_opt_entry_t *belady_group = lc_opt_get_grp(be_grp, "belady");
1016 	lc_opt_add_table(belady_group, options);
1017 
1018 	be_register_spiller("belady", &belady_spiller);
1019 	FIRM_DBG_REGISTER(dbg, "firm.be.spill.belady");
1020 }
1021