1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19 
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  */
25 #include "config.h"
26 
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30 
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34 
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41 
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49 
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57 
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66 
DEBUG_ONLY(static firm_dbg_module_t * dbg;)67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68 
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73 
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79 	set_irn_link(node, new_node);
80 }
81 
82 /**
83  * Get this new node, before the old node is forgotten.
84  */
get_new_node(ir_node * old_node)85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87 	assert(irn_visited(old_node));
88 	return (ir_node*) get_irn_link(old_node);
89 }
90 
91 /*--------------------------------------------------------------------*/
92 /*  Functionality for inlining                                         */
93 /*--------------------------------------------------------------------*/
94 
95 /**
96  * Copy node for inlineing.  Updates attributes that change when
97  * inlineing but not for dead node elimination.
98  *
99  * Copies the node by calling copy_node() and then updates the entity if
100  * it's a local one.  env must be a pointer of the frame type of the
101  * inlined procedure. The new entities must be in the link field of
102  * the entities.
103  */
copy_node_inline(ir_node * node,void * env)104 static void copy_node_inline(ir_node *node, void *env)
105 {
106 	ir_graph *new_irg  = (ir_graph*) env;
107 	ir_node  *new_node = irn_copy_into_irg(node, new_irg);
108 
109 	set_new_node(node, new_node);
110 	if (is_Sel(node)) {
111 		ir_graph  *old_irg        = get_irn_irg(node);
112 		ir_type   *old_frame_type = get_irg_frame_type(old_irg);
113 		ir_entity *old_entity     = get_Sel_entity(node);
114 		assert(is_Sel(new_node));
115 		/* use copied entities from the new frame */
116 		if (get_entity_owner(old_entity) == old_frame_type) {
117 			ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118 			assert(new_entity != NULL);
119 			set_Sel_entity(new_node, new_entity);
120 		}
121 	} else if (is_Block(new_node)) {
122 		new_node->attr.block.irg.irg = new_irg;
123 	}
124 }
125 
set_preds_inline(ir_node * node,void * env)126 static void set_preds_inline(ir_node *node, void *env)
127 {
128 	ir_node *new_node;
129 
130 	irn_rewire_inputs(node);
131 
132 	/* move constants into start block */
133 	new_node = get_new_node(node);
134 	if (is_irn_constlike(new_node)) {
135 		ir_graph *new_irg     = (ir_graph *) env;
136 		ir_node  *start_block = get_irg_start_block(new_irg);
137 		set_nodes_block(new_node, start_block);
138 	}
139 }
140 
141 /**
142  * Walker: checks if P_value_arg_base is used.
143  */
find_addr(ir_node * node,void * env)144 static void find_addr(ir_node *node, void *env)
145 {
146 	bool *allow_inline = (bool*)env;
147 
148 	if (is_Block(node) && get_Block_entity(node)) {
149 		/**
150 		 * Currently we can't handle blocks whose address was taken correctly
151 		 * when inlining
152 		 */
153 		*allow_inline = false;
154 	} else if (is_Sel(node)) {
155 		ir_graph *irg = current_ir_graph;
156 		if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157 			/* access to frame */
158 			ir_entity *ent = get_Sel_entity(node);
159 			if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160 				/* access to value_type */
161 				*allow_inline = false;
162 			}
163 			if (is_parameter_entity(ent)) {
164 				*allow_inline = false;
165 			}
166 		}
167 	} else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168 		/* From GCC:
169 		 * Refuse to inline alloca call unless user explicitly forced so as this
170 		 * may change program's memory overhead drastically when the function
171 		 * using alloca is called in loop.  In GCC present in SPEC2000 inlining
172 		 * into schedule_block cause it to require 2GB of ram instead of 256MB.
173 		 *
174 		 * Sorrily this is true with our implementation also.
175 		 * Moreover, we cannot differentiate between alloca() and VLA yet, so
176 		 * this disables inlining of functions using VLA (which are completely
177 		 * save).
178 		 *
179 		 * 2 Solutions:
180 		 * - add a flag to the Alloc node for "real" alloca() calls
181 		 * - add a new Stack-Restore node at the end of a function using
182 		 *   alloca()
183 		 */
184 		*allow_inline = false;
185 	}
186 }
187 
188 /**
189  * Check if we can inline a given call.
190  * Currently, we cannot inline two cases:
191  * - call with compound arguments
192  * - graphs that take the address of a parameter
193  *
194  * check these conditions here
195  */
can_inline(ir_node * call,ir_graph * called_graph)196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198 	ir_entity          *called      = get_irg_entity(called_graph);
199 	ir_type            *called_type = get_entity_type(called);
200 	ir_type            *call_type   = get_Call_type(call);
201 	size_t              n_params    = get_method_n_params(called_type);
202 	size_t              n_arguments = get_method_n_params(call_type);
203 	size_t              n_res       = get_method_n_ress(called_type);
204 	mtp_additional_properties props = get_entity_additional_properties(called);
205 	size_t              i;
206 	bool                res;
207 
208 	if (props & mtp_property_noinline)
209 		return false;
210 
211 	if (n_arguments != n_params) {
212 		/* this is a bad feature of C: without a prototype, we can
213 		 * call a function with less parameters than needed. Currently
214 		 * we don't support this, although we could use Unknown than. */
215 		return false;
216 	}
217 	if (n_res != get_method_n_ress(call_type)) {
218 		return false;
219 	}
220 
221 	/* Argh, compiling C has some bad consequences:
222 	 * It is implementation dependent what happens in that case.
223 	 * We support inlining, if the bitsize of the types matches AND
224 	 * the same arithmetic is used. */
225 	for (i = 0; i < n_params; ++i) {
226 		ir_type *param_tp = get_method_param_type(called_type, i);
227 		ir_type *arg_tp   = get_method_param_type(call_type, i);
228 
229 		if (param_tp != arg_tp) {
230 			ir_mode *pmode = get_type_mode(param_tp);
231 			ir_mode *amode = get_type_mode(arg_tp);
232 
233 			if (pmode == NULL || amode == NULL)
234 				return false;
235 			if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236 				return false;
237 			if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238 				return false;
239 			/* otherwise we can simply "reinterpret" the bits */
240 		}
241 	}
242 	for (i = 0; i < n_res; ++i) {
243 		ir_type *decl_res_tp = get_method_res_type(called_type, i);
244 		ir_type *used_res_tp = get_method_res_type(call_type, i);
245 
246 		if (decl_res_tp != used_res_tp) {
247 			ir_mode *decl_mode = get_type_mode(decl_res_tp);
248 			ir_mode *used_mode = get_type_mode(used_res_tp);
249 			if (decl_mode == NULL || used_mode == NULL)
250 				return false;
251 			if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252 				return false;
253 			if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254 				return false;
255 			/* otherwise we can "reinterpret" the bits */
256 		}
257 	}
258 
259 	/* check parameters for compound arguments */
260 	for (i = 0; i < n_params; ++i) {
261 		ir_type *p_type = get_method_param_type(call_type, i);
262 
263 		if (is_compound_type(p_type))
264 			return false;
265 	}
266 
267 	/* check results for compound arguments */
268 	for (i = 0; i < n_res; ++i) {
269 		ir_type *r_type = get_method_res_type(call_type, i);
270 
271 		if (is_compound_type(r_type))
272 			return false;
273 	}
274 
275 	res = true;
276 	irg_walk_graph(called_graph, find_addr, NULL, &res);
277 
278 	return res;
279 }
280 
281 enum exc_mode {
282 	exc_handler,    /**< There is a handler. */
283 	exc_no_handler  /**< Exception handling not represented. */
284 };
285 
286 /**
287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
288  * Sets entity links of the old entities to the copies
289  */
copy_frame_entities(ir_graph * from,ir_graph * to)290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292 	ir_type *from_frame = get_irg_frame_type(from);
293 	ir_type *to_frame   = get_irg_frame_type(to);
294 	size_t   n_members  = get_class_n_members(from_frame);
295 	size_t   i;
296 	assert(from_frame != to_frame);
297 
298 	for (i = 0; i < n_members; ++i) {
299 		ir_entity *old_ent = get_class_member(from_frame, i);
300 		ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301 		set_entity_link(old_ent, new_ent);
302 		assert (!is_parameter_entity(old_ent));
303 	}
304 }
305 
306 /* Inlines a method at the given call site. */
inline_method(ir_node * call,ir_graph * called_graph)307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309 	/* we cannot inline some types of calls */
310 	if (! can_inline(call, called_graph))
311 		return 0;
312 
313 	/* We cannot inline a recursive call. The graph must be copied before
314 	 * the call the inline_method() using create_irg_copy(). */
315 	ir_graph *irg = get_irn_irg(call);
316 	if (called_graph == irg)
317 		return 0;
318 
319 	ir_entity *ent      = get_irg_entity(called_graph);
320 	ir_type   *mtp      = get_entity_type(ent);
321 	ir_type   *ctp      = get_Call_type(call);
322 	int        n_params = get_method_n_params(mtp);
323 
324 	ir_graph *rem = current_ir_graph;
325 	current_ir_graph = irg;
326 
327 	DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
328 
329 	/* optimizations can cause problems when allocating new nodes */
330 	int rem_opt = get_opt_optimize();
331 	set_optimize(0);
332 
333 	/* Handle graph state */
334 	assert(get_irg_pinned(irg) == op_pin_state_pinned);
335 	assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
336 	clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
337 	                   | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
338 	set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
339 	clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
340 	edges_deactivate(irg);
341 
342 	/* here we know we WILL inline, so inform the statistics */
343 	hook_inline(call, called_graph);
344 
345 	/* -- Decide how to handle exception control flow: Is there a handler
346 	   for the Call node, or do we branch directly to End on an exception?
347 	   exc_handling:
348 	   0 There is a handler.
349 	   2 Exception handling not represented in Firm. -- */
350 	ir_node *Xproj = NULL;
351 	for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
352 		 proj = (ir_node*)get_irn_link(proj)) {
353 		long proj_nr = get_Proj_proj(proj);
354 		if (proj_nr == pn_Call_X_except) Xproj = proj;
355 	}
356 	enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
357 
358 	/* create the argument tuple */
359 	ir_node **args_in = ALLOCAN(ir_node*, n_params);
360 
361 	ir_node *block = get_nodes_block(call);
362 	for (int i = n_params - 1; i >= 0; --i) {
363 		ir_node *arg      = get_Call_param(call, i);
364 		ir_type *param_tp = get_method_param_type(mtp, i);
365 		ir_mode *mode     = get_type_mode(param_tp);
366 
367 		if (mode != get_irn_mode(arg)) {
368 			arg = new_r_Conv(block, arg, mode);
369 		}
370 		args_in[i] = arg;
371 	}
372 
373 	/* the procedure and later replaces the Start node of the called graph.
374 	 * Post_call is the old Call node and collects the results of the called
375 	 * graph. Both will end up being a tuple. */
376 	ir_node *post_bl = get_nodes_block(call);
377 	/* XxMxPxPxPxT of Start + parameter of Call */
378 	ir_node *in[pn_Start_max+1];
379 	in[pn_Start_M]              = get_Call_mem(call);
380 	in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
381 	in[pn_Start_P_frame_base]   = get_irg_frame(irg);
382 	in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
383 	ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
384 	ir_node *post_call = call;
385 
386 	/* --
387 	   The new block gets the ins of the old block, pre_call and all its
388 	   predecessors and all Phi nodes. -- */
389 	part_block(pre_call);
390 
391 	/* increment visited flag for later walk */
392 	inc_irg_visited(called_graph);
393 
394 	/* link some nodes to nodes in the current graph so instead of copying
395 	 * the linked nodes will get used.
396 	 * So the copier will use the created Tuple instead of copying the start
397 	 * node, similar for singleton nodes like NoMem and Bad.
398 	 * Note: this will prohibit predecessors to be copied - only do it for
399 	 *       nodes without predecessors */
400 	ir_node *start_block = get_irg_start_block(called_graph);
401 	set_new_node(start_block, get_nodes_block(pre_call));
402 	mark_irn_visited(start_block);
403 
404 	ir_node *start = get_irg_start(called_graph);
405 	set_new_node(start, pre_call);
406 	mark_irn_visited(start);
407 
408 	ir_node *nomem = get_irg_no_mem(called_graph);
409 	set_new_node(nomem, get_irg_no_mem(irg));
410 	mark_irn_visited(nomem);
411 
412 	/* entitiy link is used to link entities on old stackframe to the
413 	 * new stackframe */
414 	irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
415 
416 	/* copy entities and nodes */
417 	assert(!irn_visited(get_irg_end(called_graph)));
418 	copy_frame_entities(called_graph, irg);
419 	irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
420 	              irg);
421 
422 	irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
423 
424 	/* -- Merge the end of the inlined procedure with the call site -- */
425 	/* We will turn the old Call node into a Tuple with the following
426 	   predecessors:
427 	   -1:  Block of Tuple.
428 	   0: Phi of all Memories of Return statements.
429 	   1: Jmp from new Block that merges the control flow from all exception
430 	   predecessors of the old end block.
431 	   2: Tuple of all arguments.
432 	   3: Phi of Exception memories.
433 	   In case the old Call directly branches to End on an exception we don't
434 	   need the block merging all exceptions nor the Phi of the exception
435 	   memories.
436 	*/
437 
438 	/* Precompute some values */
439 	ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
440 	ir_node *end    = get_new_node(get_irg_end(called_graph));
441 	int      arity  = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret  */
442 	int      n_res  = get_method_n_ress(get_Call_type(call));
443 
444 	ir_node **res_pred = XMALLOCN(ir_node*, n_res);
445 	ir_node **cf_pred  = XMALLOCN(ir_node*, arity);
446 
447 	/* archive keepalives */
448 	int irn_arity = get_irn_arity(end);
449 	for (int i = 0; i < irn_arity; i++) {
450 		ir_node *ka = get_End_keepalive(end, i);
451 		if (! is_Bad(ka))
452 			add_End_keepalive(get_irg_end(irg), ka);
453 	}
454 
455 	/* replace Return nodes by Jump nodes */
456 	int n_ret = 0;
457 	for (int i = 0; i < arity; i++) {
458 		ir_node *ret = get_Block_cfgpred(end_bl, i);
459 		if (is_Return(ret)) {
460 			ir_node *block = get_nodes_block(ret);
461 			cf_pred[n_ret] = new_r_Jmp(block);
462 			n_ret++;
463 		}
464 	}
465 	set_irn_in(post_bl, n_ret, cf_pred);
466 
467 	/* build a Tuple for all results of the method.
468 	 * add Phi node if there was more than one Return. */
469 	turn_into_tuple(post_call, pn_Call_max+1);
470 	/* First the Memory-Phi */
471 	int n_mem_phi = 0;
472 	for (int i = 0; i < arity; i++) {
473 		ir_node *ret = get_Block_cfgpred(end_bl, i);
474 		if (is_Return(ret)) {
475 			cf_pred[n_mem_phi++] = get_Return_mem(ret);
476 		}
477 		/* memory output for some exceptions is directly connected to End */
478 		if (is_Call(ret)) {
479 			cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
480 		} else if (is_fragile_op(ret)) {
481 			/* We rely that all cfops have the memory output at the same position. */
482 			cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
483 		} else if (is_Raise(ret)) {
484 			cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
485 		}
486 	}
487 	ir_node *phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
488 	set_Tuple_pred(call, pn_Call_M, phi);
489 	/* Conserve Phi-list for further inlinings -- but might be optimized */
490 	if (get_nodes_block(phi) == post_bl) {
491 		set_irn_link(phi, get_irn_link(post_bl));
492 		set_irn_link(post_bl, phi);
493 	}
494 	/* Now the real results */
495 	if (n_res > 0) {
496 		for (int j = 0; j < n_res; j++) {
497 			ir_type *res_type = get_method_res_type(ctp, j);
498 			ir_mode *res_mode = get_type_mode(res_type);
499 			int n_ret = 0;
500 			for (int i = 0; i < arity; i++) {
501 				ir_node *ret = get_Block_cfgpred(end_bl, i);
502 				if (is_Return(ret)) {
503 					ir_node *res = get_Return_res(ret, j);
504 					if (get_irn_mode(res) != res_mode) {
505 						ir_node *block = get_nodes_block(res);
506 						res = new_r_Conv(block, res, res_mode);
507 					}
508 					cf_pred[n_ret] = res;
509 					n_ret++;
510 				}
511 			}
512 			if (n_ret > 0) {
513 				phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
514 			} else {
515 				phi = new_r_Bad(irg, res_mode);
516 			}
517 			res_pred[j] = phi;
518 			/* Conserve Phi-list for further inlinings -- but might be optimized */
519 			if (get_nodes_block(phi) == post_bl) {
520 				set_Phi_next(phi, get_Block_phis(post_bl));
521 				set_Block_phis(post_bl, phi);
522 			}
523 		}
524 		ir_node *result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
525 		set_Tuple_pred(call, pn_Call_T_result, result_tuple);
526 	} else {
527 		set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
528 	}
529 	/* handle the regular call */
530 	set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
531 
532 	/* Finally the exception control flow.
533 	   We have two possible situations:
534 	   First if the Call branches to an exception handler:
535 	   We need to add a Phi node to
536 	   collect the memory containing the exception objects.  Further we need
537 	   to add another block to get a correct representation of this Phi.  To
538 	   this block we add a Jmp that resolves into the X output of the Call
539 	   when the Call is turned into a tuple.
540 	   Second: There is no exception edge. Just add all inlined exception
541 	   branches to the End node.
542 	 */
543 	if (exc_handling == exc_handler) {
544 		int n_exc = 0;
545 		for (int i = 0; i < arity; i++) {
546 			ir_node *ret = get_Block_cfgpred(end_bl, i);
547 			ir_node *irn = skip_Proj(ret);
548 			if (is_fragile_op(irn) || is_Raise(irn)) {
549 				cf_pred[n_exc] = ret;
550 				++n_exc;
551 			}
552 		}
553 		if (n_exc > 0) {
554 			if (n_exc == 1) {
555 				/* simple fix */
556 				set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
557 			} else {
558 				ir_node *block = new_r_Block(irg, n_exc, cf_pred);
559 				set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
560 			}
561 		} else {
562 			set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
563 		}
564 	} else {
565 		/* assert(exc_handling == 1 || no exceptions. ) */
566 		int n_exc = 0;
567 		for (int i = 0; i < arity; i++) {
568 			ir_node *ret = get_Block_cfgpred(end_bl, i);
569 			ir_node *irn = skip_Proj(ret);
570 
571 			if (is_fragile_op(irn) || is_Raise(irn)) {
572 				cf_pred[n_exc] = ret;
573 				n_exc++;
574 			}
575 		}
576 		ir_node  *main_end_bl       = get_irg_end_block(irg);
577 		int       main_end_bl_arity = get_irn_arity(main_end_bl);
578 		ir_node **end_preds         = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
579 
580 		for (int i = 0; i < main_end_bl_arity; ++i)
581 			end_preds[i] = get_irn_n(main_end_bl, i);
582 		for (int i = 0; i < n_exc; ++i)
583 			end_preds[main_end_bl_arity + i] = cf_pred[i];
584 		set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
585 		set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
586 		free(end_preds);
587 	}
588 	free(res_pred);
589 	free(cf_pred);
590 
591 	/* --  Turn CSE back on. -- */
592 	set_optimize(rem_opt);
593 	current_ir_graph = rem;
594 
595 	return 1;
596 }
597 
598 /********************************************************************/
599 /* Apply inlining to small methods.                                 */
600 /********************************************************************/
601 
602 static struct obstack  temp_obst;
603 
604 /** Represents a possible inlinable call in a graph. */
605 typedef struct call_entry {
606 	ir_node    *call;       /**< The Call node. */
607 	ir_graph   *callee;     /**< The callee IR-graph. */
608 	list_head  list;        /**< List head for linking the next one. */
609 	int        loop_depth;  /**< The loop depth of this call. */
610 	int        benefice;    /**< The calculated benefice of this call. */
611 	unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
612 	unsigned   all_const:1; /**< Set if this call has only constant parameters. */
613 } call_entry;
614 
615 /**
616  * environment for inlining small irgs
617  */
618 typedef struct inline_env_t {
619 	struct obstack obst;  /**< An obstack where call_entries are allocated on. */
620 	list_head      calls; /**< The call entry list. */
621 } inline_env_t;
622 
623 /**
624  * Returns the irg called from a Call node. If the irg is not
625  * known, NULL is returned.
626  *
627  * @param call  the call node
628  */
get_call_called_irg(ir_node * call)629 static ir_graph *get_call_called_irg(ir_node *call)
630 {
631 	ir_node *addr;
632 
633 	addr = get_Call_ptr(call);
634 	if (is_SymConst_addr_ent(addr)) {
635 		ir_entity *ent = get_SymConst_entity(addr);
636 		/* we don't know which function gets finally bound to a weak symbol */
637 		if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
638 			return NULL;
639 
640 		return get_entity_irg(ent);
641 	}
642 
643 	return NULL;
644 }
645 
646 /**
647  * Walker: Collect all calls to known graphs inside a graph.
648  */
collect_calls(ir_node * call,void * env)649 static void collect_calls(ir_node *call, void *env)
650 {
651 	(void) env;
652 	if (is_Call(call)) {
653 		ir_graph *called_irg = get_call_called_irg(call);
654 
655 		if (called_irg != NULL) {
656 			/* The Call node calls a locally defined method.  Remember to inline. */
657 			inline_env_t *ienv  = (inline_env_t*)env;
658 			call_entry   *entry = OALLOC(&ienv->obst, call_entry);
659 			entry->call       = call;
660 			entry->callee     = called_irg;
661 			entry->loop_depth = 0;
662 			entry->benefice   = 0;
663 			entry->local_adr  = 0;
664 			entry->all_const  = 0;
665 
666 			list_add_tail(&entry->list, &ienv->calls);
667 		}
668 	}
669 }
670 
671 /**
672  * Inlines all small methods at call sites where the called address comes
673  * from a Const node that references the entity representing the called
674  * method.
675  * The size argument is a rough measure for the code size of the method:
676  * Methods where the obstack containing the firm graph is smaller than
677  * size are inlined.
678  */
inline_small_irgs(ir_graph * irg,int size)679 void inline_small_irgs(ir_graph *irg, int size)
680 {
681 	ir_graph *rem = current_ir_graph;
682 	inline_env_t env;
683 
684 	current_ir_graph = irg;
685 	free_callee_info(irg);
686 
687 	/* Find Call nodes to inline.
688 	   (We can not inline during a walk of the graph, as inlining the same
689 	   method several times changes the visited flag of the walked graph:
690 	   after the first inlining visited of the callee equals visited of
691 	   the caller.  With the next inlining both are increased.) */
692 	obstack_init(&env.obst);
693 	INIT_LIST_HEAD(&env.calls);
694 	irg_walk_graph(irg, NULL, collect_calls, &env);
695 
696 	if (! list_empty(&env.calls)) {
697 		/* There are calls to inline */
698 		ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
699 		collect_phiprojs(irg);
700 
701 		list_for_each_entry(call_entry, entry, &env.calls, list) {
702 			ir_graph  *callee = entry->callee;
703 			ir_entity *called = get_irg_entity(callee);
704 			mtp_additional_properties props
705 				= get_entity_additional_properties(called);
706 
707 			if (props & mtp_property_noinline)
708 				continue;
709 
710 			if ((props & mtp_property_always_inline) ||
711 			    _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
712 				inline_method(entry->call, callee);
713 			}
714 		}
715 		ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
716 	}
717 	obstack_free(&env.obst, NULL);
718 	current_ir_graph = rem;
719 }
720 
721 typedef struct inline_small_irgs_pass_t {
722 	ir_graph_pass_t pass;
723 	int            size;
724 } inline_small_irgs_pass_t;
725 
726 /**
727  * Wrapper to run inline_small_irgs() as a pass.
728  */
inline_small_irgs_wrapper(ir_graph * irg,void * context)729 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
730 {
731 	inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
732 
733 	inline_small_irgs(irg, pass->size);
734 	return 0;
735 }
736 
737 /* create a pass for inline_small_irgs() */
inline_small_irgs_pass(const char * name,int size)738 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
739 {
740 	inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
741 
742 	pass->size = size;
743 	return def_graph_pass_constructor(
744 		&pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
745 }
746 
747 /**
748  * Environment for inlining irgs.
749  */
750 typedef struct {
751 	list_head calls;             /**< List of of all call nodes in this graph. */
752 	unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
753 	unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
754 	unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
755 	unsigned  n_nodes_orig;      /**< for statistics */
756 	unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
757 	unsigned  n_call_nodes_orig; /**< for statistics */
758 	unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
759 	unsigned  n_callers_orig;    /**< for statistics */
760 	unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
761 	unsigned  recursive:1;       /**< Set, if this function is self recursive. */
762 } inline_irg_env;
763 
764 /**
765  * Allocate a new environment for inlining.
766  */
alloc_inline_irg_env(void)767 static inline_irg_env *alloc_inline_irg_env(void)
768 {
769 	inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
770 	INIT_LIST_HEAD(&env->calls);
771 	env->local_weights     = NULL;
772 	env->n_nodes           = -2; /* do not count count Start, End */
773 	env->n_blocks          = -2; /* do not count count Start, End Block */
774 	env->n_nodes_orig      = -2; /* do not count Start, End */
775 	env->n_call_nodes      = 0;
776 	env->n_call_nodes_orig = 0;
777 	env->n_callers         = 0;
778 	env->n_callers_orig    = 0;
779 	env->got_inline        = 0;
780 	env->recursive         = 0;
781 	return env;
782 }
783 
784 typedef struct walker_env {
785 	inline_irg_env *x;     /**< the inline environment */
786 	char ignore_runtime;   /**< the ignore runtime flag */
787 	char ignore_callers;   /**< if set, do change callers data */
788 } wenv_t;
789 
790 /**
791  * post-walker: collect all calls in the inline-environment
792  * of a graph and sum some statistics.
793  */
collect_calls2(ir_node * call,void * ctx)794 static void collect_calls2(ir_node *call, void *ctx)
795 {
796 	wenv_t         *env = (wenv_t*)ctx;
797 	inline_irg_env *x = env->x;
798 	unsigned        code = get_irn_opcode(call);
799 	ir_graph       *callee;
800 	call_entry     *entry;
801 
802 	/* count meaningful nodes in irg */
803 	if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
804 		if (code != iro_Block) {
805 			++x->n_nodes;
806 			++x->n_nodes_orig;
807 		} else {
808 			++x->n_blocks;
809 		}
810 	}
811 
812 	if (code != iro_Call) return;
813 
814 	/* check, if it's a runtime call */
815 	if (env->ignore_runtime) {
816 		ir_node *symc = get_Call_ptr(call);
817 
818 		if (is_SymConst_addr_ent(symc)) {
819 			ir_entity *ent = get_SymConst_entity(symc);
820 
821 			if (get_entity_additional_properties(ent) & mtp_property_runtime)
822 				return;
823 		}
824 	}
825 
826 	/* collect all call nodes */
827 	++x->n_call_nodes;
828 	++x->n_call_nodes_orig;
829 
830 	callee = get_call_called_irg(call);
831 	if (callee != NULL) {
832 		if (! env->ignore_callers) {
833 			inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
834 			/* count all static callers */
835 			++callee_env->n_callers;
836 			++callee_env->n_callers_orig;
837 		}
838 		if (callee == current_ir_graph)
839 			x->recursive = 1;
840 
841 		/* link it in the list of possible inlinable entries */
842 		entry = OALLOC(&temp_obst, call_entry);
843 		entry->call       = call;
844 		entry->callee     = callee;
845 		entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
846 		entry->benefice   = 0;
847 		entry->local_adr  = 0;
848 		entry->all_const  = 0;
849 
850 		list_add_tail(&entry->list, &x->calls);
851 	}
852 }
853 
854 /**
855  * Returns TRUE if the number of callers is 0 in the irg's environment,
856  * hence this irg is a leaf.
857  */
is_leaf(ir_graph * irg)858 inline static int is_leaf(ir_graph *irg)
859 {
860 	inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
861 	return env->n_call_nodes == 0;
862 }
863 
864 /**
865  * Returns TRUE if the number of nodes in the callee is
866  * smaller then size in the irg's environment.
867  */
is_smaller(ir_graph * callee,unsigned size)868 inline static int is_smaller(ir_graph *callee, unsigned size)
869 {
870 	inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
871 	return env->n_nodes < size;
872 }
873 
874 /**
875  * Duplicate a call entry.
876  *
877  * @param entry     the original entry to duplicate
878  * @param new_call  the new call node
879  * @param loop_depth_delta
880  *                  delta value for the loop depth
881  */
duplicate_call_entry(const call_entry * entry,ir_node * new_call,int loop_depth_delta)882 static call_entry *duplicate_call_entry(const call_entry *entry,
883                                         ir_node *new_call, int loop_depth_delta)
884 {
885 	call_entry *nentry = OALLOC(&temp_obst, call_entry);
886 	nentry->call       = new_call;
887 	nentry->callee     = entry->callee;
888 	nentry->benefice   = entry->benefice;
889 	nentry->loop_depth = entry->loop_depth + loop_depth_delta;
890 	nentry->local_adr  = entry->local_adr;
891 	nentry->all_const  = entry->all_const;
892 
893 	return nentry;
894 }
895 
896 /**
897  * Append all call nodes of the source environment to the nodes of in the destination
898  * environment.
899  *
900  * @param dst         destination environment
901  * @param src         source environment
902  * @param loop_depth  the loop depth of the call that is replaced by the src list
903  */
append_call_list(inline_irg_env * dst,inline_irg_env * src,int loop_depth)904 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
905 {
906 	call_entry *nentry;
907 
908 	/* Note that the src list points to Call nodes in the inlined graph, but
909 	   we need Call nodes in our graph. Luckily the inliner leaves this information
910 	   in the link field. */
911 	list_for_each_entry(call_entry, entry, &src->calls, list) {
912 		nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
913 		list_add_tail(&nentry->list, &dst->calls);
914 	}
915 	dst->n_call_nodes += src->n_call_nodes;
916 	dst->n_nodes      += src->n_nodes;
917 }
918 
919 /*
920  * Inlines small leaf methods at call sites where the called address comes
921  * from a Const node that references the entity representing the called
922  * method.
923  * The size argument is a rough measure for the code size of the method:
924  * Methods where the obstack containing the firm graph is smaller than
925  * size are inlined.
926  */
inline_leaf_functions(unsigned maxsize,unsigned leafsize,unsigned size,int ignore_runtime)927 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
928                            unsigned size, int ignore_runtime)
929 {
930 	inline_irg_env   *env;
931 	ir_graph         *irg;
932 	size_t           i, n_irgs;
933 	ir_graph         *rem;
934 	int              did_inline;
935 	wenv_t           wenv;
936 	pmap             *copied_graphs;
937 	pmap_entry       *pm_entry;
938 
939 	rem = current_ir_graph;
940 	obstack_init(&temp_obst);
941 
942 	/* a map for the copied graphs, used to inline recursive calls */
943 	copied_graphs = pmap_create();
944 
945 	/* extend all irgs by a temporary data structure for inlining. */
946 	n_irgs = get_irp_n_irgs();
947 	for (i = 0; i < n_irgs; ++i)
948 		set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
949 
950 	/* Pre-compute information in temporary data structure. */
951 	wenv.ignore_runtime = ignore_runtime;
952 	wenv.ignore_callers = 0;
953 	for (i = 0; i < n_irgs; ++i) {
954 		ir_graph *irg = get_irp_irg(i);
955 
956 		free_callee_info(irg);
957 
958 		assure_irg_properties(irg,
959 			IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
960 		wenv.x = (inline_irg_env*)get_irg_link(irg);
961 		irg_walk_graph(irg, NULL, collect_calls2, &wenv);
962 		confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
963 	}
964 
965 	/* -- and now inline. -- */
966 
967 	/* Inline leafs recursively -- we might construct new leafs. */
968 	do {
969 		did_inline = 0;
970 
971 		for (i = 0; i < n_irgs; ++i) {
972 			int phiproj_computed = 0;
973 
974 			current_ir_graph = get_irp_irg(i);
975 			env              = (inline_irg_env*)get_irg_link(current_ir_graph);
976 
977 			ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
978 			list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
979 				if (env->n_nodes > maxsize)
980 					break;
981 
982 				ir_node   *call   = entry->call;
983 				ir_graph  *callee = entry->callee;
984 				ir_entity *called = get_irg_entity(callee);
985 				mtp_additional_properties props
986 					= get_entity_additional_properties(called);
987 				if (props & mtp_property_noinline)
988 					continue;
989 
990 				if (is_leaf(callee) && (
991 				    is_smaller(callee, leafsize) || (props & mtp_property_always_inline))) {
992 					if (!phiproj_computed) {
993 						phiproj_computed = 1;
994 						collect_phiprojs(current_ir_graph);
995 					}
996 					did_inline = inline_method(call, callee);
997 
998 					if (did_inline) {
999 						inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1000 
1001 						/* call was inlined, Phi/Projs for current graph must be recomputed */
1002 						phiproj_computed = 0;
1003 
1004 						/* Do some statistics */
1005 						env->got_inline = 1;
1006 						--env->n_call_nodes;
1007 						env->n_nodes += callee_env->n_nodes;
1008 						--callee_env->n_callers;
1009 
1010 						/* remove this call from the list */
1011 						list_del(&entry->list);
1012 						continue;
1013 					}
1014 				}
1015 			}
1016 			ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1017 		}
1018 	} while (did_inline);
1019 
1020 	/* inline other small functions. */
1021 	for (i = 0; i < n_irgs; ++i) {
1022 		int phiproj_computed = 0;
1023 
1024 		current_ir_graph = get_irp_irg(i);
1025 		env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1026 
1027 		ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1028 
1029 		/* note that the list of possible calls is updated during the process */
1030 		list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1031 			ir_node   *call   = entry->call;
1032 			ir_graph  *callee = entry->callee;
1033 			ir_entity *called = get_irg_entity(callee);
1034 
1035 			mtp_additional_properties props = get_entity_additional_properties(called);
1036 			if (props & mtp_property_noinline)
1037 				continue;
1038 
1039 			ir_graph *calleee = pmap_get(ir_graph, copied_graphs, callee);
1040 			if (calleee != NULL) {
1041 				/*
1042 				 * Remap callee if we have a copy.
1043 				 * FIXME: Should we do this only for recursive Calls ?
1044 				 */
1045 				callee = calleee;
1046 			}
1047 
1048 			if ((props & mtp_property_always_inline) ||
1049 			    (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1050 				if (current_ir_graph == callee) {
1051 					/*
1052 					 * Recursive call: we cannot directly inline because we cannot walk
1053 					 * the graph and change it. So we have to make a copy of the graph
1054 					 * first.
1055 					 */
1056 
1057 					inline_irg_env *callee_env;
1058 					ir_graph       *copy;
1059 
1060 					ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061 
1062 					/*
1063 					 * No copy yet, create one.
1064 					 * Note that recursive methods are never leafs, so it is sufficient
1065 					 * to test this condition here.
1066 					 */
1067 					copy = create_irg_copy(callee);
1068 
1069 					/* create_irg_copy() destroys the Proj links, recompute them */
1070 					phiproj_computed = 0;
1071 
1072 					ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073 
1074 					/* allocate new environment */
1075 					callee_env = alloc_inline_irg_env();
1076 					set_irg_link(copy, callee_env);
1077 
1078 					assure_irg_properties(copy,
1079 						IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1080 					wenv.x              = callee_env;
1081 					wenv.ignore_callers = 1;
1082 					irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1083 
1084 					/*
1085 					 * Enter the entity of the original graph. This is needed
1086 					 * for inline_method(). However, note that ent->irg still points
1087 					 * to callee, NOT to copy.
1088 					 */
1089 					set_irg_entity(copy, get_irg_entity(callee));
1090 
1091 					pmap_insert(copied_graphs, callee, copy);
1092 					callee = copy;
1093 
1094 					/* we have only one caller: the original graph */
1095 					callee_env->n_callers      = 1;
1096 					callee_env->n_callers_orig = 1;
1097 				}
1098 				if (! phiproj_computed) {
1099 					phiproj_computed = 1;
1100 					collect_phiprojs(current_ir_graph);
1101 				}
1102 				did_inline = inline_method(call, callee);
1103 				if (did_inline) {
1104 					inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1105 
1106 					/* call was inlined, Phi/Projs for current graph must be recomputed */
1107 					phiproj_computed = 0;
1108 
1109 					/* callee was inline. Append its call list. */
1110 					env->got_inline = 1;
1111 					--env->n_call_nodes;
1112 					append_call_list(env, callee_env, entry->loop_depth);
1113 					--callee_env->n_callers;
1114 
1115 					/* after we have inlined callee, all called methods inside callee
1116 					   are now called once more */
1117 					list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1118 						inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1119 						++penv->n_callers;
1120 					}
1121 
1122 					/* remove this call from the list */
1123 					list_del(&entry->list);
1124 					continue;
1125 				}
1126 			}
1127 		}
1128 		ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1129 	}
1130 
1131 	for (i = 0; i < n_irgs; ++i) {
1132 		irg = get_irp_irg(i);
1133 		env = (inline_irg_env*)get_irg_link(irg);
1134 
1135 		if (env->got_inline) {
1136 			optimize_graph_df(irg);
1137 			optimize_cf(irg);
1138 		}
1139 		if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1140 			DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1141 			env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1142 			env->n_callers_orig, env->n_callers,
1143 			get_entity_name(get_irg_entity(irg))));
1144 		}
1145 	}
1146 
1147 	/* kill the copied graphs: we don't need them anymore */
1148 	foreach_pmap(copied_graphs, pm_entry) {
1149 		ir_graph *copy = (ir_graph*)pm_entry->value;
1150 
1151 		/* reset the entity, otherwise it will be deleted in the next step ... */
1152 		set_irg_entity(copy, NULL);
1153 		free_ir_graph(copy);
1154 	}
1155 	pmap_destroy(copied_graphs);
1156 
1157 	obstack_free(&temp_obst, NULL);
1158 	current_ir_graph = rem;
1159 }
1160 
1161 typedef struct inline_leaf_functions_pass_t {
1162 	ir_prog_pass_t pass;
1163 	unsigned       maxsize;
1164 	unsigned       leafsize;
1165 	unsigned       size;
1166 	int            ignore_runtime;
1167 } inline_leaf_functions_pass_t;
1168 
1169 /**
1170  * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1171  */
inline_leaf_functions_wrapper(ir_prog * irp,void * context)1172 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1173 {
1174 	inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1175 
1176 	(void)irp;
1177 	inline_leaf_functions(
1178 		pass->maxsize, pass->leafsize,
1179 		pass->size, pass->ignore_runtime);
1180 	return 0;
1181 }
1182 
1183 /* create a pass for inline_leaf_functions() */
inline_leaf_functions_pass(const char * name,unsigned maxsize,unsigned leafsize,unsigned size,int ignore_runtime)1184 ir_prog_pass_t *inline_leaf_functions_pass(
1185 	const char *name, unsigned maxsize, unsigned leafsize,
1186 	unsigned size, int ignore_runtime)
1187 {
1188 	inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1189 
1190 	pass->maxsize        = maxsize;
1191 	pass->leafsize       = leafsize;
1192 	pass->size           = size;
1193 	pass->ignore_runtime = ignore_runtime;
1194 
1195 	return def_prog_pass_constructor(
1196 		&pass->pass,
1197 		name ? name : "inline_leaf_functions",
1198 		inline_leaf_functions_wrapper);
1199 }
1200 
1201 /**
1202  * Calculate the parameter weights for transmitting the address of a local variable.
1203  */
calc_method_local_weight(ir_node * arg)1204 static unsigned calc_method_local_weight(ir_node *arg)
1205 {
1206 	int      j;
1207 	unsigned v, weight = 0;
1208 
1209 	for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
1210 		ir_node *succ = get_irn_out(arg, i);
1211 
1212 		switch (get_irn_opcode(succ)) {
1213 		case iro_Load:
1214 		case iro_Store:
1215 			/* Loads and Store can be removed */
1216 			weight += 3;
1217 			break;
1218 		case iro_Sel:
1219 			/* check if all args are constant */
1220 			for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1221 				ir_node *idx = get_Sel_index(succ, j);
1222 				if (! is_Const(idx))
1223 					return 0;
1224 			}
1225 			/* Check users on this Sel. Note: if a 0 is returned here, there was
1226 			   some unsupported node. */
1227 			v = calc_method_local_weight(succ);
1228 			if (v == 0)
1229 				return 0;
1230 			/* we can kill one Sel with constant indexes, this is cheap */
1231 			weight += v + 1;
1232 			break;
1233 		case iro_Id:
1234 			/* when looking backward we might find Id nodes */
1235 			weight += calc_method_local_weight(succ);
1236 			break;
1237 		case iro_Tuple:
1238 			/* unoptimized tuple */
1239 			for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1240 				ir_node *pred = get_Tuple_pred(succ, j);
1241 				if (pred == arg) {
1242 					/* look for Proj(j) */
1243 					for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
1244 						ir_node *succ_succ = get_irn_out(succ, k);
1245 						if (is_Proj(succ_succ)) {
1246 							if (get_Proj_proj(succ_succ) == j) {
1247 								/* found */
1248 								weight += calc_method_local_weight(succ_succ);
1249 							}
1250 						} else {
1251 							/* this should NOT happen */
1252 							return 0;
1253 						}
1254 					}
1255 				}
1256 			}
1257 			break;
1258 		default:
1259 			/* any other node: unsupported yet or bad. */
1260 			return 0;
1261 		}
1262 	}
1263 	return weight;
1264 }
1265 
1266 /**
1267  * Calculate the parameter weights for transmitting the address of a local variable.
1268  */
analyze_irg_local_weights(inline_irg_env * env,ir_graph * irg)1269 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1270 {
1271 	ir_entity *ent = get_irg_entity(irg);
1272 	ir_type  *mtp;
1273 	size_t   nparams;
1274 	long     proj_nr;
1275 	ir_node  *irg_args, *arg;
1276 
1277 	mtp      = get_entity_type(ent);
1278 	nparams  = get_method_n_params(mtp);
1279 
1280 	/* allocate a new array. currently used as 'analysed' flag */
1281 	env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1282 
1283 	/* If the method haven't parameters we have nothing to do. */
1284 	if (nparams <= 0)
1285 		return;
1286 
1287 	assure_irg_outs(irg);
1288 	irg_args = get_irg_args(irg);
1289 	for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
1290 		arg     = get_irn_out(irg_args, i);
1291 		proj_nr = get_Proj_proj(arg);
1292 		env->local_weights[proj_nr] = calc_method_local_weight(arg);
1293 	}
1294 }
1295 
1296 /**
1297  * Calculate the benefice for transmitting an local variable address.
1298  * After inlining, the local variable might be transformed into a
1299  * SSA variable by scalar_replacement().
1300  */
get_method_local_adress_weight(ir_graph * callee,size_t pos)1301 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1302 {
1303 	inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1304 
1305 	if (env->local_weights == NULL)
1306 		analyze_irg_local_weights(env, callee);
1307 
1308 	if (pos < ARR_LEN(env->local_weights))
1309 		return env->local_weights[pos];
1310 	return 0;
1311 }
1312 
1313 /**
1314  * Calculate a benefice value for inlining the given call.
1315  *
1316  * @param call       the call node we have to inspect
1317  * @param callee     the called graph
1318  */
calc_inline_benefice(call_entry * entry,ir_graph * callee)1319 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1320 {
1321 	ir_node   *call = entry->call;
1322 	ir_entity *ent  = get_irg_entity(callee);
1323 	ir_type   *callee_frame;
1324 	size_t    i, n_members, n_params;
1325 	ir_node   *frame_ptr;
1326 	ir_type   *mtp;
1327 	int       weight = 0;
1328 	int       all_const;
1329 	unsigned  cc, v;
1330 
1331 	inline_irg_env *callee_env;
1332 
1333 	mtp_additional_properties props = get_entity_additional_properties(ent);
1334 	if (props & mtp_property_noinline) {
1335 		DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1336 		    call, callee));
1337 		return entry->benefice = INT_MIN;
1338 	}
1339 
1340 	callee_frame = get_irg_frame_type(callee);
1341 	n_members = get_class_n_members(callee_frame);
1342 	for (i = 0; i < n_members; ++i) {
1343 		ir_entity *frame_ent = get_class_member(callee_frame, i);
1344 		if (is_parameter_entity(frame_ent)) {
1345 			// TODO inliner should handle parameter entities by inserting Store operations
1346 			DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1347 			add_entity_additional_properties(ent, mtp_property_noinline);
1348 			return entry->benefice = INT_MIN;
1349 		}
1350 	}
1351 
1352 	if (props & mtp_property_noreturn) {
1353 		DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1354 		    call, callee));
1355 		return entry->benefice = INT_MIN;
1356 	}
1357 
1358 	/* costs for every passed parameter */
1359 	n_params = get_Call_n_params(call);
1360 	mtp      = get_entity_type(ent);
1361 	cc       = get_method_calling_convention(mtp);
1362 	if (cc & cc_reg_param) {
1363 		/* register parameter, smaller costs for register parameters */
1364 		size_t max_regs = cc & ~cc_bits;
1365 
1366 		if (max_regs < n_params)
1367 			weight += max_regs * 2 + (n_params - max_regs) * 5;
1368 		else
1369 			weight += n_params * 2;
1370 	} else {
1371 		/* parameters are passed an stack */
1372 		weight += 5 * n_params;
1373 	}
1374 
1375 	/* constant parameters improve the benefice */
1376 	frame_ptr = get_irg_frame(current_ir_graph);
1377 	all_const = 1;
1378 	for (i = 0; i < n_params; ++i) {
1379 		ir_node *param = get_Call_param(call, i);
1380 
1381 		if (is_Const(param)) {
1382 			weight += get_method_param_weight(ent, i);
1383 		} else {
1384 			all_const = 0;
1385 			if (is_SymConst(param))
1386 				weight += get_method_param_weight(ent, i);
1387 			else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1388 				/*
1389 				 * An address of a local variable is transmitted. After
1390 				 * inlining, scalar_replacement might be able to remove the
1391 				 * local variable, so honor this.
1392 				 */
1393 				v = get_method_local_adress_weight(callee, i);
1394 				weight += v;
1395 				if (v > 0)
1396 					entry->local_adr = 1;
1397 			}
1398 		}
1399 	}
1400 	entry->all_const = all_const;
1401 
1402 	callee_env = (inline_irg_env*)get_irg_link(callee);
1403 	if (callee_env->n_callers == 1 &&
1404 	    callee != current_ir_graph &&
1405 	    !entity_is_externally_visible(ent)) {
1406 		weight += 700;
1407 	}
1408 
1409 	/* give a bonus for functions with one block */
1410 	if (callee_env->n_blocks == 1)
1411 		weight = weight * 3 / 2;
1412 
1413 	/* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1414 	if (callee_env->n_nodes < 30 && !callee_env->recursive)
1415 		weight += 2000;
1416 
1417 	/* and finally for leafs: they do not increase the register pressure
1418 	   because of callee safe registers */
1419 	if (callee_env->n_call_nodes == 0)
1420 		weight += 400;
1421 
1422 	/** it's important to inline inner loops first */
1423 	if (entry->loop_depth > 30)
1424 		weight += 30 * 1024;
1425 	else
1426 		weight += entry->loop_depth * 1024;
1427 
1428 	/*
1429 	 * All arguments constant is probably a good sign, give an extra bonus
1430 	 */
1431 	if (all_const)
1432 		weight += 1024;
1433 
1434 	return entry->benefice = weight;
1435 }
1436 
1437 typedef struct walk_env_t {
1438 	ir_graph **irgs;
1439 	size_t   last_irg;
1440 } walk_env_t;
1441 
1442 /**
1443  * Callgraph walker, collect all visited graphs.
1444  */
callgraph_walker(ir_graph * irg,void * data)1445 static void callgraph_walker(ir_graph *irg, void *data)
1446 {
1447 	walk_env_t *env = (walk_env_t *)data;
1448 	env->irgs[env->last_irg++] = irg;
1449 }
1450 
1451 /**
1452  * Creates an inline order for all graphs.
1453  *
1454  * @return the list of graphs.
1455  */
create_irg_list(void)1456 static ir_graph **create_irg_list(void)
1457 {
1458 	ir_entity  **free_methods;
1459 	size_t     n_irgs = get_irp_n_irgs();
1460 	walk_env_t env;
1461 
1462 	cgana(&free_methods);
1463 	xfree(free_methods);
1464 
1465 	compute_callgraph();
1466 
1467 	env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1468 	env.last_irg = 0;
1469 
1470 	callgraph_walk(NULL, callgraph_walker, &env);
1471 	assert(n_irgs == env.last_irg);
1472 
1473 	free_callgraph();
1474 
1475 	return env.irgs;
1476 }
1477 
1478 /**
1479  * Push a call onto the priority list if its benefice is big enough.
1480  *
1481  * @param pqueue   the priority queue of calls
1482  * @param call     the call entry
1483  * @param inlien_threshold
1484  *                 the threshold value
1485  */
maybe_push_call(pqueue_t * pqueue,call_entry * call,int inline_threshold)1486 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1487                             int inline_threshold)
1488 {
1489 	ir_graph *callee   = call->callee;
1490 	int       benefice = calc_inline_benefice(call, callee);
1491 
1492 	DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1493 	    get_irn_irg(call->call), call->call, callee, benefice));
1494 
1495 	ir_entity                *ent   = get_irg_entity(callee);
1496 	mtp_additional_properties props = get_entity_additional_properties(ent);
1497 	if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1498 		return;
1499 	}
1500 
1501 	pqueue_put(pqueue, call, benefice);
1502 }
1503 
1504 /**
1505  * Try to inline calls into a graph.
1506  *
1507  * @param irg      the graph into which we inline
1508  * @param maxsize  do NOT inline if the size of irg gets
1509  *                 bigger than this amount
1510  * @param inline_threshold
1511  *                 threshold value for inline decision
1512  * @param copied_graphs
1513  *                 map containing copied of recursive graphs
1514  */
inline_into(ir_graph * irg,unsigned maxsize,int inline_threshold,pmap * copied_graphs)1515 static void inline_into(ir_graph *irg, unsigned maxsize,
1516                         int inline_threshold, pmap *copied_graphs)
1517 {
1518 	int            phiproj_computed = 0;
1519 	inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1520 	wenv_t         wenv;
1521 	pqueue_t       *pqueue;
1522 
1523 	if (env->n_call_nodes == 0)
1524 		return;
1525 
1526 	if (env->n_nodes > maxsize) {
1527 		DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1528 		return;
1529 	}
1530 
1531 	current_ir_graph = irg;
1532 	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1533 
1534 	/* put irgs into the pqueue */
1535 	pqueue = new_pqueue();
1536 
1537 	list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1538 		assert(is_Call(curr_call->call));
1539 		maybe_push_call(pqueue, curr_call, inline_threshold);
1540 	}
1541 
1542 	/* note that the list of possible calls is updated during the process */
1543 	while (!pqueue_empty(pqueue)) {
1544 		int                 did_inline;
1545 		call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1546 		ir_graph            *callee     = curr_call->callee;
1547 		ir_node             *call_node  = curr_call->call;
1548 		inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1549 		ir_entity           *ent        = get_irg_entity(callee);
1550 		mtp_additional_properties props
1551 			= get_entity_additional_properties(ent);
1552 		ir_graph            *calleee;
1553 		int                 loop_depth;
1554 
1555 		if (!(props & mtp_property_always_inline)
1556 		    && env->n_nodes + callee_env->n_nodes > maxsize) {
1557 			DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1558 						env->n_nodes, callee, callee_env->n_nodes));
1559 			continue;
1560 		}
1561 
1562 		calleee = pmap_get(ir_graph, copied_graphs, callee);
1563 		if (calleee != NULL) {
1564 			int benefice = curr_call->benefice;
1565 			/*
1566 			 * Reduce the weight for recursive function IFF not all arguments are const.
1567 			 * inlining recursive functions is rarely good.
1568 			 */
1569 			if (!curr_call->all_const)
1570 				benefice -= 2000;
1571 			if (benefice < inline_threshold)
1572 				continue;
1573 
1574 			/*
1575 			 * Remap callee if we have a copy.
1576 			 */
1577 			callee     = calleee;
1578 			callee_env = (inline_irg_env*)get_irg_link(callee);
1579 		}
1580 
1581 		if (current_ir_graph == callee) {
1582 			/*
1583 			 * Recursive call: we cannot directly inline because we cannot
1584 			 * walk the graph and change it. So we have to make a copy of
1585 			 * the graph first.
1586 			 */
1587 			int benefice = curr_call->benefice;
1588 			ir_graph *copy;
1589 
1590 			/*
1591 			 * Reduce the weight for recursive function IFF not all arguments are const.
1592 			 * inlining recursive functions is rarely good.
1593 			 */
1594 			if (!curr_call->all_const)
1595 				benefice -= 2000;
1596 			if (benefice < inline_threshold)
1597 				continue;
1598 
1599 			ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1600 
1601 			/*
1602 			 * No copy yet, create one.
1603 			 * Note that recursive methods are never leafs, so it is
1604 			 * sufficient to test this condition here.
1605 			 */
1606 			copy = create_irg_copy(callee);
1607 
1608 			/* create_irg_copy() destroys the Proj links, recompute them */
1609 			phiproj_computed = 0;
1610 
1611 			ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1612 
1613 			/* allocate a new environment */
1614 			callee_env = alloc_inline_irg_env();
1615 			set_irg_link(copy, callee_env);
1616 
1617 			assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1618 			memset(&wenv, 0, sizeof(wenv));
1619 			wenv.x              = callee_env;
1620 			wenv.ignore_callers = 1;
1621 			irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1622 
1623 			/*
1624 			 * Enter the entity of the original graph. This is needed
1625 			 * for inline_method(). However, note that ent->irg still points
1626 			 * to callee, NOT to copy.
1627 			 */
1628 			set_irg_entity(copy, get_irg_entity(callee));
1629 
1630 			pmap_insert(copied_graphs, callee, copy);
1631 			callee = copy;
1632 
1633 			/* we have only one caller: the original graph */
1634 			callee_env->n_callers      = 1;
1635 			callee_env->n_callers_orig = 1;
1636 		}
1637 		if (! phiproj_computed) {
1638 			phiproj_computed = 1;
1639 			collect_phiprojs(current_ir_graph);
1640 		}
1641 		did_inline = inline_method(call_node, callee);
1642 		if (!did_inline)
1643 			continue;
1644 
1645 		/* call was inlined, Phi/Projs for current graph must be recomputed */
1646 		phiproj_computed = 0;
1647 
1648 		/* remove it from the caller list */
1649 		list_del(&curr_call->list);
1650 
1651 		/* callee was inline. Append its call list. */
1652 		env->got_inline = 1;
1653 		--env->n_call_nodes;
1654 
1655 		/* we just generate a bunch of new calls */
1656 		loop_depth = curr_call->loop_depth;
1657 		list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1658 			inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1659 			ir_node        *new_call;
1660 			call_entry     *new_entry;
1661 
1662 			/* after we have inlined callee, all called methods inside
1663 			 * callee are now called once more */
1664 			++penv->n_callers;
1665 
1666 			/* Note that the src list points to Call nodes in the inlined graph,
1667 			 * but we need Call nodes in our graph. Luckily the inliner leaves
1668 			 * this information in the link field. */
1669 			new_call = (ir_node*)get_irn_link(centry->call);
1670 			if (get_irn_irg(new_call) != irg) {
1671 				/* centry->call has not been copied, which means it is dead.
1672 				 * This might happen during inlining, if a const function,
1673 				 * which cannot be inlined is only used as an unused argument
1674 				 * of another function, which is inlined. */
1675 				continue;
1676 			}
1677 			assert(is_Call(new_call));
1678 
1679 			new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1680 			list_add_tail(&new_entry->list, &env->calls);
1681 			maybe_push_call(pqueue, new_entry, inline_threshold);
1682 		}
1683 
1684 		env->n_call_nodes += callee_env->n_call_nodes;
1685 		env->n_nodes += callee_env->n_nodes;
1686 		--callee_env->n_callers;
1687 	}
1688 	ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1689 	del_pqueue(pqueue);
1690 }
1691 
1692 /*
1693  * Heuristic inliner. Calculates a benefice value for every call and inlines
1694  * those calls with a value higher than the threshold.
1695  */
inline_functions(unsigned maxsize,int inline_threshold,opt_ptr after_inline_opt)1696 void inline_functions(unsigned maxsize, int inline_threshold,
1697                       opt_ptr after_inline_opt)
1698 {
1699 	inline_irg_env   *env;
1700 	size_t           i, n_irgs;
1701 	ir_graph         *rem;
1702 	wenv_t           wenv;
1703 	pmap             *copied_graphs;
1704 	pmap_entry       *pm_entry;
1705 	ir_graph         **irgs;
1706 
1707 	rem = current_ir_graph;
1708 	obstack_init(&temp_obst);
1709 
1710 	irgs = create_irg_list();
1711 
1712 	/* a map for the copied graphs, used to inline recursive calls */
1713 	copied_graphs = pmap_create();
1714 
1715 	/* extend all irgs by a temporary data structure for inlining. */
1716 	n_irgs = get_irp_n_irgs();
1717 	for (i = 0; i < n_irgs; ++i)
1718 		set_irg_link(irgs[i], alloc_inline_irg_env());
1719 
1720 	/* Pre-compute information in temporary data structure. */
1721 	wenv.ignore_runtime = 0;
1722 	wenv.ignore_callers = 0;
1723 	for (i = 0; i < n_irgs; ++i) {
1724 		ir_graph *irg = irgs[i];
1725 
1726 		free_callee_info(irg);
1727 
1728 		wenv.x = (inline_irg_env*)get_irg_link(irg);
1729 		assure_loopinfo(irg);
1730 		irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1731 	}
1732 
1733 	/* -- and now inline. -- */
1734 	for (i = 0; i < n_irgs; ++i) {
1735 		ir_graph *irg = irgs[i];
1736 
1737 		inline_into(irg, maxsize, inline_threshold, copied_graphs);
1738 	}
1739 
1740 	for (i = 0; i < n_irgs; ++i) {
1741 		ir_graph *irg = irgs[i];
1742 
1743 		env = (inline_irg_env*)get_irg_link(irg);
1744 		if (env->got_inline && after_inline_opt != NULL) {
1745 			/* this irg got calls inlined: optimize it */
1746 			after_inline_opt(irg);
1747 		}
1748 		if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1749 			DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1750 			env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1751 			env->n_callers_orig, env->n_callers,
1752 			get_entity_name(get_irg_entity(irg))));
1753 		}
1754 	}
1755 
1756 	/* kill the copied graphs: we don't need them anymore */
1757 	foreach_pmap(copied_graphs, pm_entry) {
1758 		ir_graph *copy = (ir_graph*)pm_entry->value;
1759 
1760 		/* reset the entity, otherwise it will be deleted in the next step ... */
1761 		set_irg_entity(copy, NULL);
1762 		free_ir_graph(copy);
1763 	}
1764 	pmap_destroy(copied_graphs);
1765 
1766 	xfree(irgs);
1767 
1768 	obstack_free(&temp_obst, NULL);
1769 	current_ir_graph = rem;
1770 }
1771 
1772 typedef struct inline_functions_pass_t {
1773 	ir_prog_pass_t pass;
1774 	unsigned       maxsize;
1775 	int            inline_threshold;
1776 	opt_ptr        after_inline_opt;
1777 } inline_functions_pass_t;
1778 
1779 /**
1780  * Wrapper to run inline_functions() as a ir_prog pass.
1781  */
inline_functions_wrapper(ir_prog * irp,void * context)1782 static int inline_functions_wrapper(ir_prog *irp, void *context)
1783 {
1784 	inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1785 
1786 	(void)irp;
1787 	inline_functions(pass->maxsize, pass->inline_threshold,
1788 	                 pass->after_inline_opt);
1789 	return 0;
1790 }
1791 
1792 /* create a ir_prog pass for inline_functions */
inline_functions_pass(const char * name,unsigned maxsize,int inline_threshold,opt_ptr after_inline_opt)1793 ir_prog_pass_t *inline_functions_pass(
1794 	  const char *name, unsigned maxsize, int inline_threshold,
1795 	  opt_ptr after_inline_opt)
1796 {
1797 	inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1798 
1799 	pass->maxsize          = maxsize;
1800 	pass->inline_threshold = inline_threshold;
1801 	pass->after_inline_opt = after_inline_opt;
1802 
1803 	return def_prog_pass_constructor(
1804 		&pass->pass, name ? name : "inline_functions",
1805 		inline_functions_wrapper);
1806 }
1807 
firm_init_inline(void)1808 void firm_init_inline(void)
1809 {
1810 	FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1811 }
1812