1 /*
2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
3 *
4 * This file is part of libFirm.
5 *
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
10 *
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
14 *
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE.
18 */
19
20 /**
21 * @file
22 * @brief Dead node elimination and Procedure Inlining.
23 * @author Michael Beck, Goetz Lindenmaier
24 */
25 #include "config.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66
DEBUG_ONLY(static firm_dbg_module_t * dbg;)67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection */
71 /* of the obstack. */
72 /*------------------------------------------------------------------*/
73
74 /**
75 * Remember the new node in the old node by using a field all nodes have.
76 */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79 set_irn_link(node, new_node);
80 }
81
82 /**
83 * Get this new node, before the old node is forgotten.
84 */
get_new_node(ir_node * old_node)85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87 assert(irn_visited(old_node));
88 return (ir_node*) get_irn_link(old_node);
89 }
90
91 /*--------------------------------------------------------------------*/
92 /* Functionality for inlining */
93 /*--------------------------------------------------------------------*/
94
95 /**
96 * Copy node for inlineing. Updates attributes that change when
97 * inlineing but not for dead node elimination.
98 *
99 * Copies the node by calling copy_node() and then updates the entity if
100 * it's a local one. env must be a pointer of the frame type of the
101 * inlined procedure. The new entities must be in the link field of
102 * the entities.
103 */
copy_node_inline(ir_node * node,void * env)104 static void copy_node_inline(ir_node *node, void *env)
105 {
106 ir_graph *new_irg = (ir_graph*) env;
107 ir_node *new_node = irn_copy_into_irg(node, new_irg);
108
109 set_new_node(node, new_node);
110 if (is_Sel(node)) {
111 ir_graph *old_irg = get_irn_irg(node);
112 ir_type *old_frame_type = get_irg_frame_type(old_irg);
113 ir_entity *old_entity = get_Sel_entity(node);
114 assert(is_Sel(new_node));
115 /* use copied entities from the new frame */
116 if (get_entity_owner(old_entity) == old_frame_type) {
117 ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118 assert(new_entity != NULL);
119 set_Sel_entity(new_node, new_entity);
120 }
121 } else if (is_Block(new_node)) {
122 new_node->attr.block.irg.irg = new_irg;
123 }
124 }
125
set_preds_inline(ir_node * node,void * env)126 static void set_preds_inline(ir_node *node, void *env)
127 {
128 ir_node *new_node;
129
130 irn_rewire_inputs(node);
131
132 /* move constants into start block */
133 new_node = get_new_node(node);
134 if (is_irn_constlike(new_node)) {
135 ir_graph *new_irg = (ir_graph *) env;
136 ir_node *start_block = get_irg_start_block(new_irg);
137 set_nodes_block(new_node, start_block);
138 }
139 }
140
141 /**
142 * Walker: checks if P_value_arg_base is used.
143 */
find_addr(ir_node * node,void * env)144 static void find_addr(ir_node *node, void *env)
145 {
146 bool *allow_inline = (bool*)env;
147
148 if (is_Block(node) && get_Block_entity(node)) {
149 /**
150 * Currently we can't handle blocks whose address was taken correctly
151 * when inlining
152 */
153 *allow_inline = false;
154 } else if (is_Sel(node)) {
155 ir_graph *irg = current_ir_graph;
156 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157 /* access to frame */
158 ir_entity *ent = get_Sel_entity(node);
159 if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160 /* access to value_type */
161 *allow_inline = false;
162 }
163 if (is_parameter_entity(ent)) {
164 *allow_inline = false;
165 }
166 }
167 } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168 /* From GCC:
169 * Refuse to inline alloca call unless user explicitly forced so as this
170 * may change program's memory overhead drastically when the function
171 * using alloca is called in loop. In GCC present in SPEC2000 inlining
172 * into schedule_block cause it to require 2GB of ram instead of 256MB.
173 *
174 * Sorrily this is true with our implementation also.
175 * Moreover, we cannot differentiate between alloca() and VLA yet, so
176 * this disables inlining of functions using VLA (which are completely
177 * save).
178 *
179 * 2 Solutions:
180 * - add a flag to the Alloc node for "real" alloca() calls
181 * - add a new Stack-Restore node at the end of a function using
182 * alloca()
183 */
184 *allow_inline = false;
185 }
186 }
187
188 /**
189 * Check if we can inline a given call.
190 * Currently, we cannot inline two cases:
191 * - call with compound arguments
192 * - graphs that take the address of a parameter
193 *
194 * check these conditions here
195 */
can_inline(ir_node * call,ir_graph * called_graph)196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198 ir_entity *called = get_irg_entity(called_graph);
199 ir_type *called_type = get_entity_type(called);
200 ir_type *call_type = get_Call_type(call);
201 size_t n_params = get_method_n_params(called_type);
202 size_t n_arguments = get_method_n_params(call_type);
203 size_t n_res = get_method_n_ress(called_type);
204 mtp_additional_properties props = get_entity_additional_properties(called);
205 size_t i;
206 bool res;
207
208 if (props & mtp_property_noinline)
209 return false;
210
211 if (n_arguments != n_params) {
212 /* this is a bad feature of C: without a prototype, we can
213 * call a function with less parameters than needed. Currently
214 * we don't support this, although we could use Unknown than. */
215 return false;
216 }
217 if (n_res != get_method_n_ress(call_type)) {
218 return false;
219 }
220
221 /* Argh, compiling C has some bad consequences:
222 * It is implementation dependent what happens in that case.
223 * We support inlining, if the bitsize of the types matches AND
224 * the same arithmetic is used. */
225 for (i = 0; i < n_params; ++i) {
226 ir_type *param_tp = get_method_param_type(called_type, i);
227 ir_type *arg_tp = get_method_param_type(call_type, i);
228
229 if (param_tp != arg_tp) {
230 ir_mode *pmode = get_type_mode(param_tp);
231 ir_mode *amode = get_type_mode(arg_tp);
232
233 if (pmode == NULL || amode == NULL)
234 return false;
235 if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236 return false;
237 if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238 return false;
239 /* otherwise we can simply "reinterpret" the bits */
240 }
241 }
242 for (i = 0; i < n_res; ++i) {
243 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244 ir_type *used_res_tp = get_method_res_type(call_type, i);
245
246 if (decl_res_tp != used_res_tp) {
247 ir_mode *decl_mode = get_type_mode(decl_res_tp);
248 ir_mode *used_mode = get_type_mode(used_res_tp);
249 if (decl_mode == NULL || used_mode == NULL)
250 return false;
251 if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252 return false;
253 if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254 return false;
255 /* otherwise we can "reinterpret" the bits */
256 }
257 }
258
259 /* check parameters for compound arguments */
260 for (i = 0; i < n_params; ++i) {
261 ir_type *p_type = get_method_param_type(call_type, i);
262
263 if (is_compound_type(p_type))
264 return false;
265 }
266
267 /* check results for compound arguments */
268 for (i = 0; i < n_res; ++i) {
269 ir_type *r_type = get_method_res_type(call_type, i);
270
271 if (is_compound_type(r_type))
272 return false;
273 }
274
275 res = true;
276 irg_walk_graph(called_graph, find_addr, NULL, &res);
277
278 return res;
279 }
280
281 enum exc_mode {
282 exc_handler, /**< There is a handler. */
283 exc_no_handler /**< Exception handling not represented. */
284 };
285
286 /**
287 * copy all entities on the stack frame on 1 irg to the stackframe of another.
288 * Sets entity links of the old entities to the copies
289 */
copy_frame_entities(ir_graph * from,ir_graph * to)290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292 ir_type *from_frame = get_irg_frame_type(from);
293 ir_type *to_frame = get_irg_frame_type(to);
294 size_t n_members = get_class_n_members(from_frame);
295 size_t i;
296 assert(from_frame != to_frame);
297
298 for (i = 0; i < n_members; ++i) {
299 ir_entity *old_ent = get_class_member(from_frame, i);
300 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301 set_entity_link(old_ent, new_ent);
302 assert (!is_parameter_entity(old_ent));
303 }
304 }
305
306 /* Inlines a method at the given call site. */
inline_method(ir_node * call,ir_graph * called_graph)307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309 /* we cannot inline some types of calls */
310 if (! can_inline(call, called_graph))
311 return 0;
312
313 /* We cannot inline a recursive call. The graph must be copied before
314 * the call the inline_method() using create_irg_copy(). */
315 ir_graph *irg = get_irn_irg(call);
316 if (called_graph == irg)
317 return 0;
318
319 ir_entity *ent = get_irg_entity(called_graph);
320 ir_type *mtp = get_entity_type(ent);
321 ir_type *ctp = get_Call_type(call);
322 int n_params = get_method_n_params(mtp);
323
324 ir_graph *rem = current_ir_graph;
325 current_ir_graph = irg;
326
327 DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
328
329 /* optimizations can cause problems when allocating new nodes */
330 int rem_opt = get_opt_optimize();
331 set_optimize(0);
332
333 /* Handle graph state */
334 assert(get_irg_pinned(irg) == op_pin_state_pinned);
335 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
336 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
337 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
338 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
339 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
340 edges_deactivate(irg);
341
342 /* here we know we WILL inline, so inform the statistics */
343 hook_inline(call, called_graph);
344
345 /* -- Decide how to handle exception control flow: Is there a handler
346 for the Call node, or do we branch directly to End on an exception?
347 exc_handling:
348 0 There is a handler.
349 2 Exception handling not represented in Firm. -- */
350 ir_node *Xproj = NULL;
351 for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
352 proj = (ir_node*)get_irn_link(proj)) {
353 long proj_nr = get_Proj_proj(proj);
354 if (proj_nr == pn_Call_X_except) Xproj = proj;
355 }
356 enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
357
358 /* create the argument tuple */
359 ir_node **args_in = ALLOCAN(ir_node*, n_params);
360
361 ir_node *block = get_nodes_block(call);
362 for (int i = n_params - 1; i >= 0; --i) {
363 ir_node *arg = get_Call_param(call, i);
364 ir_type *param_tp = get_method_param_type(mtp, i);
365 ir_mode *mode = get_type_mode(param_tp);
366
367 if (mode != get_irn_mode(arg)) {
368 arg = new_r_Conv(block, arg, mode);
369 }
370 args_in[i] = arg;
371 }
372
373 /* the procedure and later replaces the Start node of the called graph.
374 * Post_call is the old Call node and collects the results of the called
375 * graph. Both will end up being a tuple. */
376 ir_node *post_bl = get_nodes_block(call);
377 /* XxMxPxPxPxT of Start + parameter of Call */
378 ir_node *in[pn_Start_max+1];
379 in[pn_Start_M] = get_Call_mem(call);
380 in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
381 in[pn_Start_P_frame_base] = get_irg_frame(irg);
382 in[pn_Start_T_args] = new_r_Tuple(post_bl, n_params, args_in);
383 ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
384 ir_node *post_call = call;
385
386 /* --
387 The new block gets the ins of the old block, pre_call and all its
388 predecessors and all Phi nodes. -- */
389 part_block(pre_call);
390
391 /* increment visited flag for later walk */
392 inc_irg_visited(called_graph);
393
394 /* link some nodes to nodes in the current graph so instead of copying
395 * the linked nodes will get used.
396 * So the copier will use the created Tuple instead of copying the start
397 * node, similar for singleton nodes like NoMem and Bad.
398 * Note: this will prohibit predecessors to be copied - only do it for
399 * nodes without predecessors */
400 ir_node *start_block = get_irg_start_block(called_graph);
401 set_new_node(start_block, get_nodes_block(pre_call));
402 mark_irn_visited(start_block);
403
404 ir_node *start = get_irg_start(called_graph);
405 set_new_node(start, pre_call);
406 mark_irn_visited(start);
407
408 ir_node *nomem = get_irg_no_mem(called_graph);
409 set_new_node(nomem, get_irg_no_mem(irg));
410 mark_irn_visited(nomem);
411
412 /* entitiy link is used to link entities on old stackframe to the
413 * new stackframe */
414 irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
415
416 /* copy entities and nodes */
417 assert(!irn_visited(get_irg_end(called_graph)));
418 copy_frame_entities(called_graph, irg);
419 irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
420 irg);
421
422 irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
423
424 /* -- Merge the end of the inlined procedure with the call site -- */
425 /* We will turn the old Call node into a Tuple with the following
426 predecessors:
427 -1: Block of Tuple.
428 0: Phi of all Memories of Return statements.
429 1: Jmp from new Block that merges the control flow from all exception
430 predecessors of the old end block.
431 2: Tuple of all arguments.
432 3: Phi of Exception memories.
433 In case the old Call directly branches to End on an exception we don't
434 need the block merging all exceptions nor the Phi of the exception
435 memories.
436 */
437
438 /* Precompute some values */
439 ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
440 ir_node *end = get_new_node(get_irg_end(called_graph));
441 int arity = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret */
442 int n_res = get_method_n_ress(get_Call_type(call));
443
444 ir_node **res_pred = XMALLOCN(ir_node*, n_res);
445 ir_node **cf_pred = XMALLOCN(ir_node*, arity);
446
447 /* archive keepalives */
448 int irn_arity = get_irn_arity(end);
449 for (int i = 0; i < irn_arity; i++) {
450 ir_node *ka = get_End_keepalive(end, i);
451 if (! is_Bad(ka))
452 add_End_keepalive(get_irg_end(irg), ka);
453 }
454
455 /* replace Return nodes by Jump nodes */
456 int n_ret = 0;
457 for (int i = 0; i < arity; i++) {
458 ir_node *ret = get_Block_cfgpred(end_bl, i);
459 if (is_Return(ret)) {
460 ir_node *block = get_nodes_block(ret);
461 cf_pred[n_ret] = new_r_Jmp(block);
462 n_ret++;
463 }
464 }
465 set_irn_in(post_bl, n_ret, cf_pred);
466
467 /* build a Tuple for all results of the method.
468 * add Phi node if there was more than one Return. */
469 turn_into_tuple(post_call, pn_Call_max+1);
470 /* First the Memory-Phi */
471 int n_mem_phi = 0;
472 for (int i = 0; i < arity; i++) {
473 ir_node *ret = get_Block_cfgpred(end_bl, i);
474 if (is_Return(ret)) {
475 cf_pred[n_mem_phi++] = get_Return_mem(ret);
476 }
477 /* memory output for some exceptions is directly connected to End */
478 if (is_Call(ret)) {
479 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
480 } else if (is_fragile_op(ret)) {
481 /* We rely that all cfops have the memory output at the same position. */
482 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
483 } else if (is_Raise(ret)) {
484 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
485 }
486 }
487 ir_node *phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
488 set_Tuple_pred(call, pn_Call_M, phi);
489 /* Conserve Phi-list for further inlinings -- but might be optimized */
490 if (get_nodes_block(phi) == post_bl) {
491 set_irn_link(phi, get_irn_link(post_bl));
492 set_irn_link(post_bl, phi);
493 }
494 /* Now the real results */
495 if (n_res > 0) {
496 for (int j = 0; j < n_res; j++) {
497 ir_type *res_type = get_method_res_type(ctp, j);
498 ir_mode *res_mode = get_type_mode(res_type);
499 int n_ret = 0;
500 for (int i = 0; i < arity; i++) {
501 ir_node *ret = get_Block_cfgpred(end_bl, i);
502 if (is_Return(ret)) {
503 ir_node *res = get_Return_res(ret, j);
504 if (get_irn_mode(res) != res_mode) {
505 ir_node *block = get_nodes_block(res);
506 res = new_r_Conv(block, res, res_mode);
507 }
508 cf_pred[n_ret] = res;
509 n_ret++;
510 }
511 }
512 if (n_ret > 0) {
513 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
514 } else {
515 phi = new_r_Bad(irg, res_mode);
516 }
517 res_pred[j] = phi;
518 /* Conserve Phi-list for further inlinings -- but might be optimized */
519 if (get_nodes_block(phi) == post_bl) {
520 set_Phi_next(phi, get_Block_phis(post_bl));
521 set_Block_phis(post_bl, phi);
522 }
523 }
524 ir_node *result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
525 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
526 } else {
527 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
528 }
529 /* handle the regular call */
530 set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
531
532 /* Finally the exception control flow.
533 We have two possible situations:
534 First if the Call branches to an exception handler:
535 We need to add a Phi node to
536 collect the memory containing the exception objects. Further we need
537 to add another block to get a correct representation of this Phi. To
538 this block we add a Jmp that resolves into the X output of the Call
539 when the Call is turned into a tuple.
540 Second: There is no exception edge. Just add all inlined exception
541 branches to the End node.
542 */
543 if (exc_handling == exc_handler) {
544 int n_exc = 0;
545 for (int i = 0; i < arity; i++) {
546 ir_node *ret = get_Block_cfgpred(end_bl, i);
547 ir_node *irn = skip_Proj(ret);
548 if (is_fragile_op(irn) || is_Raise(irn)) {
549 cf_pred[n_exc] = ret;
550 ++n_exc;
551 }
552 }
553 if (n_exc > 0) {
554 if (n_exc == 1) {
555 /* simple fix */
556 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
557 } else {
558 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
559 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
560 }
561 } else {
562 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
563 }
564 } else {
565 /* assert(exc_handling == 1 || no exceptions. ) */
566 int n_exc = 0;
567 for (int i = 0; i < arity; i++) {
568 ir_node *ret = get_Block_cfgpred(end_bl, i);
569 ir_node *irn = skip_Proj(ret);
570
571 if (is_fragile_op(irn) || is_Raise(irn)) {
572 cf_pred[n_exc] = ret;
573 n_exc++;
574 }
575 }
576 ir_node *main_end_bl = get_irg_end_block(irg);
577 int main_end_bl_arity = get_irn_arity(main_end_bl);
578 ir_node **end_preds = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
579
580 for (int i = 0; i < main_end_bl_arity; ++i)
581 end_preds[i] = get_irn_n(main_end_bl, i);
582 for (int i = 0; i < n_exc; ++i)
583 end_preds[main_end_bl_arity + i] = cf_pred[i];
584 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
585 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
586 free(end_preds);
587 }
588 free(res_pred);
589 free(cf_pred);
590
591 /* -- Turn CSE back on. -- */
592 set_optimize(rem_opt);
593 current_ir_graph = rem;
594
595 return 1;
596 }
597
598 /********************************************************************/
599 /* Apply inlining to small methods. */
600 /********************************************************************/
601
602 static struct obstack temp_obst;
603
604 /** Represents a possible inlinable call in a graph. */
605 typedef struct call_entry {
606 ir_node *call; /**< The Call node. */
607 ir_graph *callee; /**< The callee IR-graph. */
608 list_head list; /**< List head for linking the next one. */
609 int loop_depth; /**< The loop depth of this call. */
610 int benefice; /**< The calculated benefice of this call. */
611 unsigned local_adr:1; /**< Set if this call gets an address of a local variable. */
612 unsigned all_const:1; /**< Set if this call has only constant parameters. */
613 } call_entry;
614
615 /**
616 * environment for inlining small irgs
617 */
618 typedef struct inline_env_t {
619 struct obstack obst; /**< An obstack where call_entries are allocated on. */
620 list_head calls; /**< The call entry list. */
621 } inline_env_t;
622
623 /**
624 * Returns the irg called from a Call node. If the irg is not
625 * known, NULL is returned.
626 *
627 * @param call the call node
628 */
get_call_called_irg(ir_node * call)629 static ir_graph *get_call_called_irg(ir_node *call)
630 {
631 ir_node *addr;
632
633 addr = get_Call_ptr(call);
634 if (is_SymConst_addr_ent(addr)) {
635 ir_entity *ent = get_SymConst_entity(addr);
636 /* we don't know which function gets finally bound to a weak symbol */
637 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
638 return NULL;
639
640 return get_entity_irg(ent);
641 }
642
643 return NULL;
644 }
645
646 /**
647 * Walker: Collect all calls to known graphs inside a graph.
648 */
collect_calls(ir_node * call,void * env)649 static void collect_calls(ir_node *call, void *env)
650 {
651 (void) env;
652 if (is_Call(call)) {
653 ir_graph *called_irg = get_call_called_irg(call);
654
655 if (called_irg != NULL) {
656 /* The Call node calls a locally defined method. Remember to inline. */
657 inline_env_t *ienv = (inline_env_t*)env;
658 call_entry *entry = OALLOC(&ienv->obst, call_entry);
659 entry->call = call;
660 entry->callee = called_irg;
661 entry->loop_depth = 0;
662 entry->benefice = 0;
663 entry->local_adr = 0;
664 entry->all_const = 0;
665
666 list_add_tail(&entry->list, &ienv->calls);
667 }
668 }
669 }
670
671 /**
672 * Inlines all small methods at call sites where the called address comes
673 * from a Const node that references the entity representing the called
674 * method.
675 * The size argument is a rough measure for the code size of the method:
676 * Methods where the obstack containing the firm graph is smaller than
677 * size are inlined.
678 */
inline_small_irgs(ir_graph * irg,int size)679 void inline_small_irgs(ir_graph *irg, int size)
680 {
681 ir_graph *rem = current_ir_graph;
682 inline_env_t env;
683
684 current_ir_graph = irg;
685 free_callee_info(irg);
686
687 /* Find Call nodes to inline.
688 (We can not inline during a walk of the graph, as inlining the same
689 method several times changes the visited flag of the walked graph:
690 after the first inlining visited of the callee equals visited of
691 the caller. With the next inlining both are increased.) */
692 obstack_init(&env.obst);
693 INIT_LIST_HEAD(&env.calls);
694 irg_walk_graph(irg, NULL, collect_calls, &env);
695
696 if (! list_empty(&env.calls)) {
697 /* There are calls to inline */
698 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
699 collect_phiprojs(irg);
700
701 list_for_each_entry(call_entry, entry, &env.calls, list) {
702 ir_graph *callee = entry->callee;
703 ir_entity *called = get_irg_entity(callee);
704 mtp_additional_properties props
705 = get_entity_additional_properties(called);
706
707 if (props & mtp_property_noinline)
708 continue;
709
710 if ((props & mtp_property_always_inline) ||
711 _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
712 inline_method(entry->call, callee);
713 }
714 }
715 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
716 }
717 obstack_free(&env.obst, NULL);
718 current_ir_graph = rem;
719 }
720
721 typedef struct inline_small_irgs_pass_t {
722 ir_graph_pass_t pass;
723 int size;
724 } inline_small_irgs_pass_t;
725
726 /**
727 * Wrapper to run inline_small_irgs() as a pass.
728 */
inline_small_irgs_wrapper(ir_graph * irg,void * context)729 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
730 {
731 inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
732
733 inline_small_irgs(irg, pass->size);
734 return 0;
735 }
736
737 /* create a pass for inline_small_irgs() */
inline_small_irgs_pass(const char * name,int size)738 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
739 {
740 inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
741
742 pass->size = size;
743 return def_graph_pass_constructor(
744 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
745 }
746
747 /**
748 * Environment for inlining irgs.
749 */
750 typedef struct {
751 list_head calls; /**< List of of all call nodes in this graph. */
752 unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
753 unsigned n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
754 unsigned n_blocks; /**< Number of Blocks in graph without Start and End block. */
755 unsigned n_nodes_orig; /**< for statistics */
756 unsigned n_call_nodes; /**< Number of Call nodes in the graph. */
757 unsigned n_call_nodes_orig; /**< for statistics */
758 unsigned n_callers; /**< Number of known graphs that call this graphs. */
759 unsigned n_callers_orig; /**< for statistics */
760 unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
761 unsigned recursive:1; /**< Set, if this function is self recursive. */
762 } inline_irg_env;
763
764 /**
765 * Allocate a new environment for inlining.
766 */
alloc_inline_irg_env(void)767 static inline_irg_env *alloc_inline_irg_env(void)
768 {
769 inline_irg_env *env = OALLOC(&temp_obst, inline_irg_env);
770 INIT_LIST_HEAD(&env->calls);
771 env->local_weights = NULL;
772 env->n_nodes = -2; /* do not count count Start, End */
773 env->n_blocks = -2; /* do not count count Start, End Block */
774 env->n_nodes_orig = -2; /* do not count Start, End */
775 env->n_call_nodes = 0;
776 env->n_call_nodes_orig = 0;
777 env->n_callers = 0;
778 env->n_callers_orig = 0;
779 env->got_inline = 0;
780 env->recursive = 0;
781 return env;
782 }
783
784 typedef struct walker_env {
785 inline_irg_env *x; /**< the inline environment */
786 char ignore_runtime; /**< the ignore runtime flag */
787 char ignore_callers; /**< if set, do change callers data */
788 } wenv_t;
789
790 /**
791 * post-walker: collect all calls in the inline-environment
792 * of a graph and sum some statistics.
793 */
collect_calls2(ir_node * call,void * ctx)794 static void collect_calls2(ir_node *call, void *ctx)
795 {
796 wenv_t *env = (wenv_t*)ctx;
797 inline_irg_env *x = env->x;
798 unsigned code = get_irn_opcode(call);
799 ir_graph *callee;
800 call_entry *entry;
801
802 /* count meaningful nodes in irg */
803 if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
804 if (code != iro_Block) {
805 ++x->n_nodes;
806 ++x->n_nodes_orig;
807 } else {
808 ++x->n_blocks;
809 }
810 }
811
812 if (code != iro_Call) return;
813
814 /* check, if it's a runtime call */
815 if (env->ignore_runtime) {
816 ir_node *symc = get_Call_ptr(call);
817
818 if (is_SymConst_addr_ent(symc)) {
819 ir_entity *ent = get_SymConst_entity(symc);
820
821 if (get_entity_additional_properties(ent) & mtp_property_runtime)
822 return;
823 }
824 }
825
826 /* collect all call nodes */
827 ++x->n_call_nodes;
828 ++x->n_call_nodes_orig;
829
830 callee = get_call_called_irg(call);
831 if (callee != NULL) {
832 if (! env->ignore_callers) {
833 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
834 /* count all static callers */
835 ++callee_env->n_callers;
836 ++callee_env->n_callers_orig;
837 }
838 if (callee == current_ir_graph)
839 x->recursive = 1;
840
841 /* link it in the list of possible inlinable entries */
842 entry = OALLOC(&temp_obst, call_entry);
843 entry->call = call;
844 entry->callee = callee;
845 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
846 entry->benefice = 0;
847 entry->local_adr = 0;
848 entry->all_const = 0;
849
850 list_add_tail(&entry->list, &x->calls);
851 }
852 }
853
854 /**
855 * Returns TRUE if the number of callers is 0 in the irg's environment,
856 * hence this irg is a leaf.
857 */
is_leaf(ir_graph * irg)858 inline static int is_leaf(ir_graph *irg)
859 {
860 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
861 return env->n_call_nodes == 0;
862 }
863
864 /**
865 * Returns TRUE if the number of nodes in the callee is
866 * smaller then size in the irg's environment.
867 */
is_smaller(ir_graph * callee,unsigned size)868 inline static int is_smaller(ir_graph *callee, unsigned size)
869 {
870 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
871 return env->n_nodes < size;
872 }
873
874 /**
875 * Duplicate a call entry.
876 *
877 * @param entry the original entry to duplicate
878 * @param new_call the new call node
879 * @param loop_depth_delta
880 * delta value for the loop depth
881 */
duplicate_call_entry(const call_entry * entry,ir_node * new_call,int loop_depth_delta)882 static call_entry *duplicate_call_entry(const call_entry *entry,
883 ir_node *new_call, int loop_depth_delta)
884 {
885 call_entry *nentry = OALLOC(&temp_obst, call_entry);
886 nentry->call = new_call;
887 nentry->callee = entry->callee;
888 nentry->benefice = entry->benefice;
889 nentry->loop_depth = entry->loop_depth + loop_depth_delta;
890 nentry->local_adr = entry->local_adr;
891 nentry->all_const = entry->all_const;
892
893 return nentry;
894 }
895
896 /**
897 * Append all call nodes of the source environment to the nodes of in the destination
898 * environment.
899 *
900 * @param dst destination environment
901 * @param src source environment
902 * @param loop_depth the loop depth of the call that is replaced by the src list
903 */
append_call_list(inline_irg_env * dst,inline_irg_env * src,int loop_depth)904 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
905 {
906 call_entry *nentry;
907
908 /* Note that the src list points to Call nodes in the inlined graph, but
909 we need Call nodes in our graph. Luckily the inliner leaves this information
910 in the link field. */
911 list_for_each_entry(call_entry, entry, &src->calls, list) {
912 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
913 list_add_tail(&nentry->list, &dst->calls);
914 }
915 dst->n_call_nodes += src->n_call_nodes;
916 dst->n_nodes += src->n_nodes;
917 }
918
919 /*
920 * Inlines small leaf methods at call sites where the called address comes
921 * from a Const node that references the entity representing the called
922 * method.
923 * The size argument is a rough measure for the code size of the method:
924 * Methods where the obstack containing the firm graph is smaller than
925 * size are inlined.
926 */
inline_leaf_functions(unsigned maxsize,unsigned leafsize,unsigned size,int ignore_runtime)927 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
928 unsigned size, int ignore_runtime)
929 {
930 inline_irg_env *env;
931 ir_graph *irg;
932 size_t i, n_irgs;
933 ir_graph *rem;
934 int did_inline;
935 wenv_t wenv;
936 pmap *copied_graphs;
937 pmap_entry *pm_entry;
938
939 rem = current_ir_graph;
940 obstack_init(&temp_obst);
941
942 /* a map for the copied graphs, used to inline recursive calls */
943 copied_graphs = pmap_create();
944
945 /* extend all irgs by a temporary data structure for inlining. */
946 n_irgs = get_irp_n_irgs();
947 for (i = 0; i < n_irgs; ++i)
948 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
949
950 /* Pre-compute information in temporary data structure. */
951 wenv.ignore_runtime = ignore_runtime;
952 wenv.ignore_callers = 0;
953 for (i = 0; i < n_irgs; ++i) {
954 ir_graph *irg = get_irp_irg(i);
955
956 free_callee_info(irg);
957
958 assure_irg_properties(irg,
959 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
960 wenv.x = (inline_irg_env*)get_irg_link(irg);
961 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
962 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
963 }
964
965 /* -- and now inline. -- */
966
967 /* Inline leafs recursively -- we might construct new leafs. */
968 do {
969 did_inline = 0;
970
971 for (i = 0; i < n_irgs; ++i) {
972 int phiproj_computed = 0;
973
974 current_ir_graph = get_irp_irg(i);
975 env = (inline_irg_env*)get_irg_link(current_ir_graph);
976
977 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
978 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
979 if (env->n_nodes > maxsize)
980 break;
981
982 ir_node *call = entry->call;
983 ir_graph *callee = entry->callee;
984 ir_entity *called = get_irg_entity(callee);
985 mtp_additional_properties props
986 = get_entity_additional_properties(called);
987 if (props & mtp_property_noinline)
988 continue;
989
990 if (is_leaf(callee) && (
991 is_smaller(callee, leafsize) || (props & mtp_property_always_inline))) {
992 if (!phiproj_computed) {
993 phiproj_computed = 1;
994 collect_phiprojs(current_ir_graph);
995 }
996 did_inline = inline_method(call, callee);
997
998 if (did_inline) {
999 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1000
1001 /* call was inlined, Phi/Projs for current graph must be recomputed */
1002 phiproj_computed = 0;
1003
1004 /* Do some statistics */
1005 env->got_inline = 1;
1006 --env->n_call_nodes;
1007 env->n_nodes += callee_env->n_nodes;
1008 --callee_env->n_callers;
1009
1010 /* remove this call from the list */
1011 list_del(&entry->list);
1012 continue;
1013 }
1014 }
1015 }
1016 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1017 }
1018 } while (did_inline);
1019
1020 /* inline other small functions. */
1021 for (i = 0; i < n_irgs; ++i) {
1022 int phiproj_computed = 0;
1023
1024 current_ir_graph = get_irp_irg(i);
1025 env = (inline_irg_env*)get_irg_link(current_ir_graph);
1026
1027 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1028
1029 /* note that the list of possible calls is updated during the process */
1030 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1031 ir_node *call = entry->call;
1032 ir_graph *callee = entry->callee;
1033 ir_entity *called = get_irg_entity(callee);
1034
1035 mtp_additional_properties props = get_entity_additional_properties(called);
1036 if (props & mtp_property_noinline)
1037 continue;
1038
1039 ir_graph *calleee = pmap_get(ir_graph, copied_graphs, callee);
1040 if (calleee != NULL) {
1041 /*
1042 * Remap callee if we have a copy.
1043 * FIXME: Should we do this only for recursive Calls ?
1044 */
1045 callee = calleee;
1046 }
1047
1048 if ((props & mtp_property_always_inline) ||
1049 (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1050 if (current_ir_graph == callee) {
1051 /*
1052 * Recursive call: we cannot directly inline because we cannot walk
1053 * the graph and change it. So we have to make a copy of the graph
1054 * first.
1055 */
1056
1057 inline_irg_env *callee_env;
1058 ir_graph *copy;
1059
1060 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061
1062 /*
1063 * No copy yet, create one.
1064 * Note that recursive methods are never leafs, so it is sufficient
1065 * to test this condition here.
1066 */
1067 copy = create_irg_copy(callee);
1068
1069 /* create_irg_copy() destroys the Proj links, recompute them */
1070 phiproj_computed = 0;
1071
1072 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073
1074 /* allocate new environment */
1075 callee_env = alloc_inline_irg_env();
1076 set_irg_link(copy, callee_env);
1077
1078 assure_irg_properties(copy,
1079 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1080 wenv.x = callee_env;
1081 wenv.ignore_callers = 1;
1082 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1083
1084 /*
1085 * Enter the entity of the original graph. This is needed
1086 * for inline_method(). However, note that ent->irg still points
1087 * to callee, NOT to copy.
1088 */
1089 set_irg_entity(copy, get_irg_entity(callee));
1090
1091 pmap_insert(copied_graphs, callee, copy);
1092 callee = copy;
1093
1094 /* we have only one caller: the original graph */
1095 callee_env->n_callers = 1;
1096 callee_env->n_callers_orig = 1;
1097 }
1098 if (! phiproj_computed) {
1099 phiproj_computed = 1;
1100 collect_phiprojs(current_ir_graph);
1101 }
1102 did_inline = inline_method(call, callee);
1103 if (did_inline) {
1104 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1105
1106 /* call was inlined, Phi/Projs for current graph must be recomputed */
1107 phiproj_computed = 0;
1108
1109 /* callee was inline. Append its call list. */
1110 env->got_inline = 1;
1111 --env->n_call_nodes;
1112 append_call_list(env, callee_env, entry->loop_depth);
1113 --callee_env->n_callers;
1114
1115 /* after we have inlined callee, all called methods inside callee
1116 are now called once more */
1117 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1118 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1119 ++penv->n_callers;
1120 }
1121
1122 /* remove this call from the list */
1123 list_del(&entry->list);
1124 continue;
1125 }
1126 }
1127 }
1128 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1129 }
1130
1131 for (i = 0; i < n_irgs; ++i) {
1132 irg = get_irp_irg(i);
1133 env = (inline_irg_env*)get_irg_link(irg);
1134
1135 if (env->got_inline) {
1136 optimize_graph_df(irg);
1137 optimize_cf(irg);
1138 }
1139 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1140 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1141 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1142 env->n_callers_orig, env->n_callers,
1143 get_entity_name(get_irg_entity(irg))));
1144 }
1145 }
1146
1147 /* kill the copied graphs: we don't need them anymore */
1148 foreach_pmap(copied_graphs, pm_entry) {
1149 ir_graph *copy = (ir_graph*)pm_entry->value;
1150
1151 /* reset the entity, otherwise it will be deleted in the next step ... */
1152 set_irg_entity(copy, NULL);
1153 free_ir_graph(copy);
1154 }
1155 pmap_destroy(copied_graphs);
1156
1157 obstack_free(&temp_obst, NULL);
1158 current_ir_graph = rem;
1159 }
1160
1161 typedef struct inline_leaf_functions_pass_t {
1162 ir_prog_pass_t pass;
1163 unsigned maxsize;
1164 unsigned leafsize;
1165 unsigned size;
1166 int ignore_runtime;
1167 } inline_leaf_functions_pass_t;
1168
1169 /**
1170 * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1171 */
inline_leaf_functions_wrapper(ir_prog * irp,void * context)1172 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1173 {
1174 inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1175
1176 (void)irp;
1177 inline_leaf_functions(
1178 pass->maxsize, pass->leafsize,
1179 pass->size, pass->ignore_runtime);
1180 return 0;
1181 }
1182
1183 /* create a pass for inline_leaf_functions() */
inline_leaf_functions_pass(const char * name,unsigned maxsize,unsigned leafsize,unsigned size,int ignore_runtime)1184 ir_prog_pass_t *inline_leaf_functions_pass(
1185 const char *name, unsigned maxsize, unsigned leafsize,
1186 unsigned size, int ignore_runtime)
1187 {
1188 inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1189
1190 pass->maxsize = maxsize;
1191 pass->leafsize = leafsize;
1192 pass->size = size;
1193 pass->ignore_runtime = ignore_runtime;
1194
1195 return def_prog_pass_constructor(
1196 &pass->pass,
1197 name ? name : "inline_leaf_functions",
1198 inline_leaf_functions_wrapper);
1199 }
1200
1201 /**
1202 * Calculate the parameter weights for transmitting the address of a local variable.
1203 */
calc_method_local_weight(ir_node * arg)1204 static unsigned calc_method_local_weight(ir_node *arg)
1205 {
1206 int j;
1207 unsigned v, weight = 0;
1208
1209 for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
1210 ir_node *succ = get_irn_out(arg, i);
1211
1212 switch (get_irn_opcode(succ)) {
1213 case iro_Load:
1214 case iro_Store:
1215 /* Loads and Store can be removed */
1216 weight += 3;
1217 break;
1218 case iro_Sel:
1219 /* check if all args are constant */
1220 for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1221 ir_node *idx = get_Sel_index(succ, j);
1222 if (! is_Const(idx))
1223 return 0;
1224 }
1225 /* Check users on this Sel. Note: if a 0 is returned here, there was
1226 some unsupported node. */
1227 v = calc_method_local_weight(succ);
1228 if (v == 0)
1229 return 0;
1230 /* we can kill one Sel with constant indexes, this is cheap */
1231 weight += v + 1;
1232 break;
1233 case iro_Id:
1234 /* when looking backward we might find Id nodes */
1235 weight += calc_method_local_weight(succ);
1236 break;
1237 case iro_Tuple:
1238 /* unoptimized tuple */
1239 for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1240 ir_node *pred = get_Tuple_pred(succ, j);
1241 if (pred == arg) {
1242 /* look for Proj(j) */
1243 for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
1244 ir_node *succ_succ = get_irn_out(succ, k);
1245 if (is_Proj(succ_succ)) {
1246 if (get_Proj_proj(succ_succ) == j) {
1247 /* found */
1248 weight += calc_method_local_weight(succ_succ);
1249 }
1250 } else {
1251 /* this should NOT happen */
1252 return 0;
1253 }
1254 }
1255 }
1256 }
1257 break;
1258 default:
1259 /* any other node: unsupported yet or bad. */
1260 return 0;
1261 }
1262 }
1263 return weight;
1264 }
1265
1266 /**
1267 * Calculate the parameter weights for transmitting the address of a local variable.
1268 */
analyze_irg_local_weights(inline_irg_env * env,ir_graph * irg)1269 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1270 {
1271 ir_entity *ent = get_irg_entity(irg);
1272 ir_type *mtp;
1273 size_t nparams;
1274 long proj_nr;
1275 ir_node *irg_args, *arg;
1276
1277 mtp = get_entity_type(ent);
1278 nparams = get_method_n_params(mtp);
1279
1280 /* allocate a new array. currently used as 'analysed' flag */
1281 env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1282
1283 /* If the method haven't parameters we have nothing to do. */
1284 if (nparams <= 0)
1285 return;
1286
1287 assure_irg_outs(irg);
1288 irg_args = get_irg_args(irg);
1289 for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
1290 arg = get_irn_out(irg_args, i);
1291 proj_nr = get_Proj_proj(arg);
1292 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1293 }
1294 }
1295
1296 /**
1297 * Calculate the benefice for transmitting an local variable address.
1298 * After inlining, the local variable might be transformed into a
1299 * SSA variable by scalar_replacement().
1300 */
get_method_local_adress_weight(ir_graph * callee,size_t pos)1301 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1302 {
1303 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1304
1305 if (env->local_weights == NULL)
1306 analyze_irg_local_weights(env, callee);
1307
1308 if (pos < ARR_LEN(env->local_weights))
1309 return env->local_weights[pos];
1310 return 0;
1311 }
1312
1313 /**
1314 * Calculate a benefice value for inlining the given call.
1315 *
1316 * @param call the call node we have to inspect
1317 * @param callee the called graph
1318 */
calc_inline_benefice(call_entry * entry,ir_graph * callee)1319 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1320 {
1321 ir_node *call = entry->call;
1322 ir_entity *ent = get_irg_entity(callee);
1323 ir_type *callee_frame;
1324 size_t i, n_members, n_params;
1325 ir_node *frame_ptr;
1326 ir_type *mtp;
1327 int weight = 0;
1328 int all_const;
1329 unsigned cc, v;
1330
1331 inline_irg_env *callee_env;
1332
1333 mtp_additional_properties props = get_entity_additional_properties(ent);
1334 if (props & mtp_property_noinline) {
1335 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1336 call, callee));
1337 return entry->benefice = INT_MIN;
1338 }
1339
1340 callee_frame = get_irg_frame_type(callee);
1341 n_members = get_class_n_members(callee_frame);
1342 for (i = 0; i < n_members; ++i) {
1343 ir_entity *frame_ent = get_class_member(callee_frame, i);
1344 if (is_parameter_entity(frame_ent)) {
1345 // TODO inliner should handle parameter entities by inserting Store operations
1346 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1347 add_entity_additional_properties(ent, mtp_property_noinline);
1348 return entry->benefice = INT_MIN;
1349 }
1350 }
1351
1352 if (props & mtp_property_noreturn) {
1353 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1354 call, callee));
1355 return entry->benefice = INT_MIN;
1356 }
1357
1358 /* costs for every passed parameter */
1359 n_params = get_Call_n_params(call);
1360 mtp = get_entity_type(ent);
1361 cc = get_method_calling_convention(mtp);
1362 if (cc & cc_reg_param) {
1363 /* register parameter, smaller costs for register parameters */
1364 size_t max_regs = cc & ~cc_bits;
1365
1366 if (max_regs < n_params)
1367 weight += max_regs * 2 + (n_params - max_regs) * 5;
1368 else
1369 weight += n_params * 2;
1370 } else {
1371 /* parameters are passed an stack */
1372 weight += 5 * n_params;
1373 }
1374
1375 /* constant parameters improve the benefice */
1376 frame_ptr = get_irg_frame(current_ir_graph);
1377 all_const = 1;
1378 for (i = 0; i < n_params; ++i) {
1379 ir_node *param = get_Call_param(call, i);
1380
1381 if (is_Const(param)) {
1382 weight += get_method_param_weight(ent, i);
1383 } else {
1384 all_const = 0;
1385 if (is_SymConst(param))
1386 weight += get_method_param_weight(ent, i);
1387 else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1388 /*
1389 * An address of a local variable is transmitted. After
1390 * inlining, scalar_replacement might be able to remove the
1391 * local variable, so honor this.
1392 */
1393 v = get_method_local_adress_weight(callee, i);
1394 weight += v;
1395 if (v > 0)
1396 entry->local_adr = 1;
1397 }
1398 }
1399 }
1400 entry->all_const = all_const;
1401
1402 callee_env = (inline_irg_env*)get_irg_link(callee);
1403 if (callee_env->n_callers == 1 &&
1404 callee != current_ir_graph &&
1405 !entity_is_externally_visible(ent)) {
1406 weight += 700;
1407 }
1408
1409 /* give a bonus for functions with one block */
1410 if (callee_env->n_blocks == 1)
1411 weight = weight * 3 / 2;
1412
1413 /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1414 if (callee_env->n_nodes < 30 && !callee_env->recursive)
1415 weight += 2000;
1416
1417 /* and finally for leafs: they do not increase the register pressure
1418 because of callee safe registers */
1419 if (callee_env->n_call_nodes == 0)
1420 weight += 400;
1421
1422 /** it's important to inline inner loops first */
1423 if (entry->loop_depth > 30)
1424 weight += 30 * 1024;
1425 else
1426 weight += entry->loop_depth * 1024;
1427
1428 /*
1429 * All arguments constant is probably a good sign, give an extra bonus
1430 */
1431 if (all_const)
1432 weight += 1024;
1433
1434 return entry->benefice = weight;
1435 }
1436
1437 typedef struct walk_env_t {
1438 ir_graph **irgs;
1439 size_t last_irg;
1440 } walk_env_t;
1441
1442 /**
1443 * Callgraph walker, collect all visited graphs.
1444 */
callgraph_walker(ir_graph * irg,void * data)1445 static void callgraph_walker(ir_graph *irg, void *data)
1446 {
1447 walk_env_t *env = (walk_env_t *)data;
1448 env->irgs[env->last_irg++] = irg;
1449 }
1450
1451 /**
1452 * Creates an inline order for all graphs.
1453 *
1454 * @return the list of graphs.
1455 */
create_irg_list(void)1456 static ir_graph **create_irg_list(void)
1457 {
1458 ir_entity **free_methods;
1459 size_t n_irgs = get_irp_n_irgs();
1460 walk_env_t env;
1461
1462 cgana(&free_methods);
1463 xfree(free_methods);
1464
1465 compute_callgraph();
1466
1467 env.irgs = XMALLOCNZ(ir_graph*, n_irgs);
1468 env.last_irg = 0;
1469
1470 callgraph_walk(NULL, callgraph_walker, &env);
1471 assert(n_irgs == env.last_irg);
1472
1473 free_callgraph();
1474
1475 return env.irgs;
1476 }
1477
1478 /**
1479 * Push a call onto the priority list if its benefice is big enough.
1480 *
1481 * @param pqueue the priority queue of calls
1482 * @param call the call entry
1483 * @param inlien_threshold
1484 * the threshold value
1485 */
maybe_push_call(pqueue_t * pqueue,call_entry * call,int inline_threshold)1486 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1487 int inline_threshold)
1488 {
1489 ir_graph *callee = call->callee;
1490 int benefice = calc_inline_benefice(call, callee);
1491
1492 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1493 get_irn_irg(call->call), call->call, callee, benefice));
1494
1495 ir_entity *ent = get_irg_entity(callee);
1496 mtp_additional_properties props = get_entity_additional_properties(ent);
1497 if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1498 return;
1499 }
1500
1501 pqueue_put(pqueue, call, benefice);
1502 }
1503
1504 /**
1505 * Try to inline calls into a graph.
1506 *
1507 * @param irg the graph into which we inline
1508 * @param maxsize do NOT inline if the size of irg gets
1509 * bigger than this amount
1510 * @param inline_threshold
1511 * threshold value for inline decision
1512 * @param copied_graphs
1513 * map containing copied of recursive graphs
1514 */
inline_into(ir_graph * irg,unsigned maxsize,int inline_threshold,pmap * copied_graphs)1515 static void inline_into(ir_graph *irg, unsigned maxsize,
1516 int inline_threshold, pmap *copied_graphs)
1517 {
1518 int phiproj_computed = 0;
1519 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1520 wenv_t wenv;
1521 pqueue_t *pqueue;
1522
1523 if (env->n_call_nodes == 0)
1524 return;
1525
1526 if (env->n_nodes > maxsize) {
1527 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1528 return;
1529 }
1530
1531 current_ir_graph = irg;
1532 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1533
1534 /* put irgs into the pqueue */
1535 pqueue = new_pqueue();
1536
1537 list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1538 assert(is_Call(curr_call->call));
1539 maybe_push_call(pqueue, curr_call, inline_threshold);
1540 }
1541
1542 /* note that the list of possible calls is updated during the process */
1543 while (!pqueue_empty(pqueue)) {
1544 int did_inline;
1545 call_entry *curr_call = (call_entry*)pqueue_pop_front(pqueue);
1546 ir_graph *callee = curr_call->callee;
1547 ir_node *call_node = curr_call->call;
1548 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1549 ir_entity *ent = get_irg_entity(callee);
1550 mtp_additional_properties props
1551 = get_entity_additional_properties(ent);
1552 ir_graph *calleee;
1553 int loop_depth;
1554
1555 if (!(props & mtp_property_always_inline)
1556 && env->n_nodes + callee_env->n_nodes > maxsize) {
1557 DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1558 env->n_nodes, callee, callee_env->n_nodes));
1559 continue;
1560 }
1561
1562 calleee = pmap_get(ir_graph, copied_graphs, callee);
1563 if (calleee != NULL) {
1564 int benefice = curr_call->benefice;
1565 /*
1566 * Reduce the weight for recursive function IFF not all arguments are const.
1567 * inlining recursive functions is rarely good.
1568 */
1569 if (!curr_call->all_const)
1570 benefice -= 2000;
1571 if (benefice < inline_threshold)
1572 continue;
1573
1574 /*
1575 * Remap callee if we have a copy.
1576 */
1577 callee = calleee;
1578 callee_env = (inline_irg_env*)get_irg_link(callee);
1579 }
1580
1581 if (current_ir_graph == callee) {
1582 /*
1583 * Recursive call: we cannot directly inline because we cannot
1584 * walk the graph and change it. So we have to make a copy of
1585 * the graph first.
1586 */
1587 int benefice = curr_call->benefice;
1588 ir_graph *copy;
1589
1590 /*
1591 * Reduce the weight for recursive function IFF not all arguments are const.
1592 * inlining recursive functions is rarely good.
1593 */
1594 if (!curr_call->all_const)
1595 benefice -= 2000;
1596 if (benefice < inline_threshold)
1597 continue;
1598
1599 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1600
1601 /*
1602 * No copy yet, create one.
1603 * Note that recursive methods are never leafs, so it is
1604 * sufficient to test this condition here.
1605 */
1606 copy = create_irg_copy(callee);
1607
1608 /* create_irg_copy() destroys the Proj links, recompute them */
1609 phiproj_computed = 0;
1610
1611 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1612
1613 /* allocate a new environment */
1614 callee_env = alloc_inline_irg_env();
1615 set_irg_link(copy, callee_env);
1616
1617 assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1618 memset(&wenv, 0, sizeof(wenv));
1619 wenv.x = callee_env;
1620 wenv.ignore_callers = 1;
1621 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1622
1623 /*
1624 * Enter the entity of the original graph. This is needed
1625 * for inline_method(). However, note that ent->irg still points
1626 * to callee, NOT to copy.
1627 */
1628 set_irg_entity(copy, get_irg_entity(callee));
1629
1630 pmap_insert(copied_graphs, callee, copy);
1631 callee = copy;
1632
1633 /* we have only one caller: the original graph */
1634 callee_env->n_callers = 1;
1635 callee_env->n_callers_orig = 1;
1636 }
1637 if (! phiproj_computed) {
1638 phiproj_computed = 1;
1639 collect_phiprojs(current_ir_graph);
1640 }
1641 did_inline = inline_method(call_node, callee);
1642 if (!did_inline)
1643 continue;
1644
1645 /* call was inlined, Phi/Projs for current graph must be recomputed */
1646 phiproj_computed = 0;
1647
1648 /* remove it from the caller list */
1649 list_del(&curr_call->list);
1650
1651 /* callee was inline. Append its call list. */
1652 env->got_inline = 1;
1653 --env->n_call_nodes;
1654
1655 /* we just generate a bunch of new calls */
1656 loop_depth = curr_call->loop_depth;
1657 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1658 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1659 ir_node *new_call;
1660 call_entry *new_entry;
1661
1662 /* after we have inlined callee, all called methods inside
1663 * callee are now called once more */
1664 ++penv->n_callers;
1665
1666 /* Note that the src list points to Call nodes in the inlined graph,
1667 * but we need Call nodes in our graph. Luckily the inliner leaves
1668 * this information in the link field. */
1669 new_call = (ir_node*)get_irn_link(centry->call);
1670 if (get_irn_irg(new_call) != irg) {
1671 /* centry->call has not been copied, which means it is dead.
1672 * This might happen during inlining, if a const function,
1673 * which cannot be inlined is only used as an unused argument
1674 * of another function, which is inlined. */
1675 continue;
1676 }
1677 assert(is_Call(new_call));
1678
1679 new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1680 list_add_tail(&new_entry->list, &env->calls);
1681 maybe_push_call(pqueue, new_entry, inline_threshold);
1682 }
1683
1684 env->n_call_nodes += callee_env->n_call_nodes;
1685 env->n_nodes += callee_env->n_nodes;
1686 --callee_env->n_callers;
1687 }
1688 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1689 del_pqueue(pqueue);
1690 }
1691
1692 /*
1693 * Heuristic inliner. Calculates a benefice value for every call and inlines
1694 * those calls with a value higher than the threshold.
1695 */
inline_functions(unsigned maxsize,int inline_threshold,opt_ptr after_inline_opt)1696 void inline_functions(unsigned maxsize, int inline_threshold,
1697 opt_ptr after_inline_opt)
1698 {
1699 inline_irg_env *env;
1700 size_t i, n_irgs;
1701 ir_graph *rem;
1702 wenv_t wenv;
1703 pmap *copied_graphs;
1704 pmap_entry *pm_entry;
1705 ir_graph **irgs;
1706
1707 rem = current_ir_graph;
1708 obstack_init(&temp_obst);
1709
1710 irgs = create_irg_list();
1711
1712 /* a map for the copied graphs, used to inline recursive calls */
1713 copied_graphs = pmap_create();
1714
1715 /* extend all irgs by a temporary data structure for inlining. */
1716 n_irgs = get_irp_n_irgs();
1717 for (i = 0; i < n_irgs; ++i)
1718 set_irg_link(irgs[i], alloc_inline_irg_env());
1719
1720 /* Pre-compute information in temporary data structure. */
1721 wenv.ignore_runtime = 0;
1722 wenv.ignore_callers = 0;
1723 for (i = 0; i < n_irgs; ++i) {
1724 ir_graph *irg = irgs[i];
1725
1726 free_callee_info(irg);
1727
1728 wenv.x = (inline_irg_env*)get_irg_link(irg);
1729 assure_loopinfo(irg);
1730 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1731 }
1732
1733 /* -- and now inline. -- */
1734 for (i = 0; i < n_irgs; ++i) {
1735 ir_graph *irg = irgs[i];
1736
1737 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1738 }
1739
1740 for (i = 0; i < n_irgs; ++i) {
1741 ir_graph *irg = irgs[i];
1742
1743 env = (inline_irg_env*)get_irg_link(irg);
1744 if (env->got_inline && after_inline_opt != NULL) {
1745 /* this irg got calls inlined: optimize it */
1746 after_inline_opt(irg);
1747 }
1748 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1749 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1750 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1751 env->n_callers_orig, env->n_callers,
1752 get_entity_name(get_irg_entity(irg))));
1753 }
1754 }
1755
1756 /* kill the copied graphs: we don't need them anymore */
1757 foreach_pmap(copied_graphs, pm_entry) {
1758 ir_graph *copy = (ir_graph*)pm_entry->value;
1759
1760 /* reset the entity, otherwise it will be deleted in the next step ... */
1761 set_irg_entity(copy, NULL);
1762 free_ir_graph(copy);
1763 }
1764 pmap_destroy(copied_graphs);
1765
1766 xfree(irgs);
1767
1768 obstack_free(&temp_obst, NULL);
1769 current_ir_graph = rem;
1770 }
1771
1772 typedef struct inline_functions_pass_t {
1773 ir_prog_pass_t pass;
1774 unsigned maxsize;
1775 int inline_threshold;
1776 opt_ptr after_inline_opt;
1777 } inline_functions_pass_t;
1778
1779 /**
1780 * Wrapper to run inline_functions() as a ir_prog pass.
1781 */
inline_functions_wrapper(ir_prog * irp,void * context)1782 static int inline_functions_wrapper(ir_prog *irp, void *context)
1783 {
1784 inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1785
1786 (void)irp;
1787 inline_functions(pass->maxsize, pass->inline_threshold,
1788 pass->after_inline_opt);
1789 return 0;
1790 }
1791
1792 /* create a ir_prog pass for inline_functions */
inline_functions_pass(const char * name,unsigned maxsize,int inline_threshold,opt_ptr after_inline_opt)1793 ir_prog_pass_t *inline_functions_pass(
1794 const char *name, unsigned maxsize, int inline_threshold,
1795 opt_ptr after_inline_opt)
1796 {
1797 inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1798
1799 pass->maxsize = maxsize;
1800 pass->inline_threshold = inline_threshold;
1801 pass->after_inline_opt = after_inline_opt;
1802
1803 return def_prog_pass_constructor(
1804 &pass->pass, name ? name : "inline_functions",
1805 inline_functions_wrapper);
1806 }
1807
firm_init_inline(void)1808 void firm_init_inline(void)
1809 {
1810 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1811 }
1812