1 /*
2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
3 *
4 * This file is part of libFirm.
5 *
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
10 *
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
14 *
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE.
18 */
19
20 /**
21 * @file
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
24 *
25 * Removes Bad control flow predecessors and empty blocks. A block is empty
26 * if it contains only a Jmp node. Blocks can only be removed if they are not
27 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
28 * blocks (even if we could move the label).
29 */
30 #include "config.h"
31
32 #include "iroptimize.h"
33
34 #include <assert.h>
35 #include <stdbool.h>
36
37 #include "xmalloc.h"
38 #include "irnode_t.h"
39 #include "irgraph_t.h"
40 #include "irprog_t.h"
41
42 #include "ircons.h"
43 #include "iropt_t.h"
44 #include "irgwalk.h"
45 #include "irgmod.h"
46 #include "irdump.h"
47 #include "irverify.h"
48 #include "iredges.h"
49
50 #include "array_t.h"
51
52 #include "irouts.h"
53 #include "irbackedge_t.h"
54
55 #include "irflag_t.h"
56 #include "firmstat.h"
57 #include "irpass.h"
58 #include "irnodehashmap.h"
59 #include "irtools.h"
60
61 #include "iropt_dbg.h"
62
63 /** An environment for merge_blocks and collect nodes. */
64 typedef struct merge_env {
65 bool changed; /**< Set if the graph was changed. */
66 bool phis_moved; /**< Set if Phi nodes were moved. */
67 } merge_env;
68
69 /** set or reset the removable property of a block. */
set_Block_removable(ir_node * block,bool removable)70 static void set_Block_removable(ir_node *block, bool removable)
71 {
72 set_Block_mark(block, removable);
73 }
74
75 /** check if a block has the removable property set. */
is_Block_removable(const ir_node * block)76 static bool is_Block_removable(const ir_node *block)
77 {
78 return get_Block_mark(block);
79 }
80
81 /** checks if a given Cond node is a switch Cond. */
is_switch_Cond(const ir_node * cond)82 static bool is_switch_Cond(const ir_node *cond)
83 {
84 ir_node *sel = get_Cond_selector(cond);
85 return get_irn_mode(sel) != mode_b;
86 }
87
88 /** Walker: clear link fields and mark all blocks as removable. */
clear_link_and_mark_blocks_removable(ir_node * node,void * ctx)89 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
90 {
91 (void) ctx;
92 set_irn_link(node, NULL);
93 if (is_Block(node)) {
94 set_Block_removable(node, true);
95 set_Block_phis(node, NULL);
96 } else if (is_Phi(node)) {
97 set_Phi_next(node, NULL);
98 }
99 }
100
101 /**
102 * Collects all Phi nodes in link list of Block.
103 * Marks all blocks "non_removable" if they contain a node other
104 * than Jmp (and Proj).
105 * Links all Proj nodes to their predecessors.
106 * Collects all switch-Conds in a list.
107 */
collect_nodes(ir_node * n,void * ctx)108 static void collect_nodes(ir_node *n, void *ctx)
109 {
110 (void) ctx;
111 if (is_Phi(n)) {
112 /* Collect Phi nodes to compact ins along with block's ins. */
113 ir_node *block = get_nodes_block(n);
114 add_Block_phi(block, n);
115 } else if (is_Block(n)) {
116 if (get_Block_entity(n) != NULL) {
117 /* block with a jump label attached cannot be removed. */
118 set_Block_removable(n, false);
119 }
120 } else if (is_Bad(n) || is_Jmp(n)) {
121 /* ignore these */
122 return;
123 } else {
124 /* Check for non-empty block. */
125 ir_node *block = get_nodes_block(n);
126
127 set_Block_removable(block, false);
128
129 if (is_Proj(n)) {
130 /* link Proj nodes */
131 ir_node *pred = get_Proj_pred(n);
132 set_irn_link(n, get_irn_link(pred));
133 set_irn_link(pred, n);
134 }
135 }
136 }
137
138 /** Returns true if pred is predecessor of block b. */
is_pred_of(const ir_node * pred,const ir_node * b)139 static bool is_pred_of(const ir_node *pred, const ir_node *b)
140 {
141 int i;
142
143 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
144 ir_node *b_pred = get_Block_cfgpred_block(b, i);
145 if (b_pred == pred)
146 return true;
147 }
148 return false;
149 }
150
151 /** Test whether we can optimize away pred block pos of b.
152 *
153 * @param b A block node.
154 * @param pos The position of the predecessor block to judge about.
155 *
156 * @returns The number of predecessors
157 *
158 * The test is rather tricky.
159 *
160 * The situation is something like the following:
161 * @verbatim
162 * if-block
163 * / \
164 * then-b else-b
165 * \ /
166 * b
167 * @endverbatim
168 *
169 * b merges the control flow of an if-then-else. We may not remove
170 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
171 * node in b, even if both are empty. The destruction of this Phi
172 * requires that a copy is added before the merge. We have to
173 * keep one of the case blocks to place the copies in.
174 *
175 * To perform the test for pos, we must regard predecessors before pos
176 * as already removed.
177 **/
test_whether_dispensable(const ir_node * b,int pos)178 static unsigned test_whether_dispensable(const ir_node *b, int pos)
179 {
180 ir_node *pred = get_Block_cfgpred(b, pos);
181 ir_node *predb = get_nodes_block(pred);
182
183 if (is_Bad(pred) || !is_Block_removable(predb))
184 return 1;
185
186 /* can't remove self-loops */
187 if (predb == b)
188 goto non_dispensable;
189 if (is_unknown_jump(pred))
190 goto non_dispensable;
191
192 /* Seems to be empty. At least we detected this in collect_nodes. */
193 if (get_Block_phis(b) != NULL) {
194 int n_cfgpreds = get_Block_n_cfgpreds(b);
195 int i;
196 /* there are Phi nodes */
197
198 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
199 * Handle all pred blocks with preds < pos as if they were already
200 * removed. */
201 for (i = 0; i < pos; i++) {
202 ir_node *other_pred = get_Block_cfgpred(b, i);
203 ir_node *other_predb = get_nodes_block(other_pred);
204 if (is_Bad(other_pred))
205 continue;
206 if (is_Block_removable(other_predb)
207 && !Block_block_visited(other_predb)) {
208 int j;
209 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
210 ir_node *other_predpred
211 = get_Block_cfgpred_block(other_predb, j);
212 if (is_pred_of(other_predpred, predb))
213 goto non_dispensable;
214 }
215 } else if (is_pred_of(other_predb, predb)) {
216 goto non_dispensable;
217 }
218 }
219 for (i = pos+1; i < n_cfgpreds; i++) {
220 ir_node *other_predb = get_Block_cfgpred_block(b, i);
221 if (is_pred_of(other_predb, predb))
222 goto non_dispensable;
223 }
224 }
225 /* we will not dispense already visited blocks */
226 if (Block_block_visited(predb))
227 return 1;
228 /* if we get here, the block is dispensable, count useful preds */
229 return get_irn_arity(predb);
230
231 non_dispensable:
232 set_Block_removable(predb, false);
233 return 1;
234 }
235
236 /**
237 * This method merges blocks. A block is applicable to be merged, if it
238 * has only one predecessor with an unconditional jump to this block;
239 * and if this block does not contain any phis.
240 */
merge_blocks(ir_node * b,void * env)241 static void merge_blocks(ir_node *b, void *env)
242 {
243 (void) env;
244
245 if (get_Block_n_cfgpreds(b) == 1) {
246 ir_node* pred = get_Block_cfgpred(b, 0);
247 if (is_Jmp(pred)) {
248 ir_node* pred_block = get_nodes_block(pred);
249 if (get_Block_phis(b) == NULL) {
250 exchange(b, pred_block);
251 }
252 }
253 }
254 }
255
256 /**
257 * This method removes empty blocks. A block is empty if it only contains Phi
258 * and Jmp nodes.
259 *
260 * We first adapt Phi nodes, then Block nodes, as we need the old ins
261 * of the Block to adapt the Phi nodes. We do this by computing new
262 * in arrays, and then replacing the old ones. So far we compute new in arrays
263 * for all nodes, not regarding whether there is a possibility for optimization.
264 *
265 * For each predecessor p of a Block b there are three cases:
266 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
267 * by one.
268 * - The predecessor p is empty. Remove p. All predecessors of p are now
269 * predecessors of b.
270 * - The predecessor p is a block containing useful code. Just keep p as is.
271 *
272 * For Phi nodes f we have to check the conditions at the Block of f.
273 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
274 * cases:
275 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
276 * In this case we proceed as for blocks. We remove pred_f. All
277 * predecessors of pred_f now are predecessors of f.
278 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
279 * too. We have to replicate f for each predecessor of the removed block.
280 * Or, with other words, the removed predecessor block has exactly one
281 * predecessor.
282 *
283 * Further there is a special case for self referencing blocks:
284 * @verbatim
285 *
286 * then_b else_b then_b else_b
287 * \ / \ /
288 * \ / | /
289 * pred_b | /
290 * | ____ | / ____
291 * | | | | | | |
292 * | | | === optimized to ===> \ | | |
293 * loop_b | loop_b |
294 * | | | | | |
295 * | |____| | |____|
296 * | |
297 * @endverbatim
298 *
299 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
300 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
301 * backedge.
302 */
optimize_blocks(ir_node * b,void * ctx)303 static void optimize_blocks(ir_node *b, void *ctx)
304 {
305 int i, j, k, n, max_preds, n_preds, p_preds = -1;
306 ir_node *phi;
307 ir_node *next;
308 ir_node **in;
309 merge_env *env = (merge_env*)ctx;
310
311 if (get_Block_dom_depth(b) < 0) {
312 /* ignore unreachable blocks */
313 return;
314 }
315
316 /* Count the number of predecessor if this block is merged with pred blocks
317 that are empty. */
318 max_preds = 0;
319 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
320 max_preds += test_whether_dispensable(b, i);
321 }
322 in = XMALLOCN(ir_node*, max_preds);
323
324 /*- Fix the Phi nodes of the current block -*/
325 for (phi = get_Block_phis(b); phi != NULL; phi = next) {
326 next = get_Phi_next(phi);
327
328 /* Find the new predecessors for the Phi */
329 p_preds = 0;
330 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
331 ir_graph *irg = get_irn_irg(b);
332 ir_node *predx = get_Block_cfgpred(b, i);
333 ir_node *pred;
334
335 /* case Phi 1: maintain Bads, as somebody else is responsible to
336 * remove them */
337 if (is_Bad(predx)) {
338 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
339 continue;
340 }
341
342 pred = get_nodes_block(predx);
343
344 /* case Phi 2: It's an empty block and not yet visited. */
345 if (is_Block_removable(pred) && !Block_block_visited(pred)) {
346 ir_node *phi_pred = get_Phi_pred(phi, i);
347
348 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
349 ir_node *pred_pred = get_Block_cfgpred(pred, j);
350
351 if (is_Bad(pred_pred)) {
352 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
353 continue;
354 }
355
356 if (get_nodes_block(phi_pred) == pred) {
357 /* case Phi 2a: */
358 assert(is_Phi(phi_pred)); /* Block is empty!! */
359
360 in[p_preds++] = get_Phi_pred(phi_pred, j);
361 } else {
362 /* case Phi 2b: */
363 in[p_preds++] = phi_pred;
364 }
365 }
366 } else {
367 /* case Phi 3: */
368 in[p_preds++] = get_Phi_pred(phi, i);
369 }
370 }
371 assert(p_preds == max_preds);
372
373 /* Fix the node */
374 if (p_preds == 1) {
375 exchange(phi, in[0]);
376 } else {
377 set_irn_in(phi, p_preds, in);
378 }
379 env->changed = true;
380 }
381
382 /*- This happens only if merge between loop backedge and single loop entry.
383 Moreover, it is only needed if predb is the direct dominator of b,
384 else there can be no uses of the Phi's in predb ... -*/
385 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
386 ir_node *pred = get_Block_cfgpred(b, k);
387 ir_node *predb = get_nodes_block(pred);
388 if (is_Bad(pred))
389 continue;
390
391 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
392 ir_node *next_phi;
393
394 /* we found a predecessor block at position k that will be removed */
395 for (phi = get_Block_phis(predb); phi != NULL; phi = next_phi) {
396 int q_preds = 0;
397 next_phi = get_Phi_next(phi);
398
399 if (get_Block_idom(b) != predb) {
400 /* predb is not the dominator. There can't be uses of
401 * pred's Phi nodes, kill them .*/
402 ir_graph *irg = get_irn_irg(b);
403 ir_mode *mode = get_irn_mode(phi);
404 exchange(phi, new_r_Bad(irg, mode));
405 } else {
406 /* predb is the direct dominator of b. There might be uses
407 * of the Phi nodes from predb in further block, so move
408 * this phi from the predecessor into the block b */
409 set_nodes_block(phi, b);
410 set_Phi_next(phi, get_Block_phis(b));
411 set_Block_phis(b, phi);
412 env->phis_moved = true;
413
414 /* first, copy all 0..k-1 predecessors */
415 for (i = 0; i < k; i++) {
416 ir_node *predx = get_Block_cfgpred(b, i);
417 ir_node *pred_block;
418
419 if (is_Bad(predx)) {
420 ir_graph *irg = get_irn_irg(b);
421 ir_mode *mode = get_irn_mode(phi);
422 in[q_preds++] = new_r_Bad(irg, mode);
423 continue;
424 }
425 pred_block = get_nodes_block(predx);
426 if (is_Block_removable(pred_block)
427 && !Block_block_visited(pred_block)) {
428 int n_cfgpreds = get_Block_n_cfgpreds(pred_block);
429 /* It's an empty block and not yet visited. */
430 for (j = 0; j < n_cfgpreds; j++) {
431 if (!is_Bad(get_Block_cfgpred(pred_block, j))) {
432 in[q_preds++] = phi;
433 } else {
434 ir_graph *irg = get_irn_irg(b);
435 ir_mode *mode = get_irn_mode(phi);
436 in[q_preds++] = new_r_Bad(irg, mode);
437 }
438 }
439 } else {
440 in[q_preds++] = phi;
441 }
442 }
443
444 /* now we are at k, copy the phi predecessors */
445 pred = get_nodes_block(get_Block_cfgpred(b, k));
446 for (i = 0; i < get_Phi_n_preds(phi); i++) {
447 in[q_preds++] = get_Phi_pred(phi, i);
448 }
449
450 /* and now all the rest */
451 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
452 pred = get_Block_cfgpred_block(b, i);
453
454 if (is_Bad(pred)) {
455 ir_graph *irg = get_irn_irg(b);
456 ir_mode *mode = get_irn_mode(phi);
457 in[q_preds++] = new_r_Bad(irg, mode);
458 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
459 /* It's an empty block and not yet visited. */
460 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
461 if (! is_Bad(get_Block_cfgpred(pred, j))) {
462 in[q_preds++] = phi;
463 } else {
464 ir_graph *irg = get_irn_irg(b);
465 ir_mode *mode = get_irn_mode(phi);
466 in[q_preds++] = new_r_Bad(irg, mode);
467 }
468 }
469 } else {
470 in[q_preds++] = phi;
471 }
472 }
473
474 /* Fix the node */
475 if (q_preds == 1)
476 exchange(phi, in[0]);
477 else
478 set_irn_in(phi, q_preds, in);
479 env->changed = true;
480
481 assert(q_preds <= max_preds);
482 // assert(p_preds == q_preds && "Wrong Phi Fix");
483 }
484 }
485 }
486 }
487
488 /*- Fix the block -*/
489 n_preds = 0;
490 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
491 ir_node *pred = get_Block_cfgpred(b, i);
492 ir_node *predb = get_nodes_block(pred);
493 ir_graph *irg = get_irn_irg(pred);
494
495 /* case 1: Bad predecessor */
496 if (is_Bad(pred)) {
497 in[n_preds++] = new_r_Bad(irg, mode_X);
498 continue;
499 }
500 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
501 /* case 2: It's an empty block and not yet visited. */
502 for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
503 ir_node *predpred = get_Block_cfgpred(predb, j);
504
505 if (is_Bad(predpred)) {
506 in[n_preds++] = new_r_Bad(irg, mode_X);
507 continue;
508 }
509
510 in[n_preds++] = predpred;
511 }
512 /* Remove block+jump as it might be kept alive. */
513 exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
514 exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
515 } else {
516 /* case 3: */
517 in[n_preds++] = pred;
518 }
519 }
520 assert(n_preds == max_preds);
521
522 set_irn_in(b, n_preds, in);
523 env->changed = true;
524
525 /* see if phi-fix was correct */
526 assert(get_Block_phis(b) == NULL || p_preds == -1 || (n_preds == p_preds));
527 xfree(in);
528 }
529
530 /**
531 * Optimize boolean Conds, where true and false jump to the same block into a Jmp
532 * Block must contain no Phi nodes.
533 *
534 * Cond
535 * / \
536 * projA projB => Jmp Bad
537 * \ / \ /
538 * block block
539 */
optimize_pred_cond(ir_node * block,int i,int j)540 static bool optimize_pred_cond(ir_node *block, int i, int j)
541 {
542 ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
543 assert(i != j);
544
545 projA = get_Block_cfgpred(block, i);
546 if (!is_Proj(projA)) return false;
547 projB = get_Block_cfgpred(block, j);
548 if (!is_Proj(projB)) return false;
549 cond = get_Proj_pred(projA);
550 if (!is_Cond(cond)) return false;
551
552 if (cond != get_Proj_pred(projB)) return false;
553 if (is_switch_Cond(cond)) return false;
554
555 /* cond should actually be a Jmp */
556 pred_block = get_nodes_block(cond);
557 jmp = new_r_Jmp(pred_block);
558 bad = new_r_Bad(get_irn_irg(block), mode_X);
559
560 assert(projA != projB);
561 exchange(projA, jmp);
562 exchange(projB, bad);
563 return true;
564 }
565
566 typedef enum block_flags_t {
567 BF_HAS_OPERATIONS = 1 << 0,
568 BF_HAS_PHIS = 1 << 1,
569 BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
570 } block_flags_t;
571
get_block_flag(const ir_nodehashmap_t * infos,const ir_node * block,int flag)572 static bool get_block_flag(const ir_nodehashmap_t *infos, const ir_node *block,
573 int flag)
574 {
575 return PTR_TO_INT(ir_nodehashmap_get(void, infos, block)) & flag;
576 }
577
set_block_flag(ir_nodehashmap_t * infos,ir_node * block,block_flags_t flag)578 static void set_block_flag(ir_nodehashmap_t *infos, ir_node *block,
579 block_flags_t flag)
580 {
581 int data = PTR_TO_INT(ir_nodehashmap_get(void, infos, block));
582 data |= flag;
583 ir_nodehashmap_insert(infos, block, INT_TO_PTR(data));
584 }
585
clear_block_flag(ir_nodehashmap_t * infos,const ir_node * block)586 static void clear_block_flag(ir_nodehashmap_t *infos, const ir_node *block)
587 {
588 ir_nodehashmap_remove(infos, block);
589 }
590
has_operations(ir_nodehashmap_t * infos,const ir_node * block)591 static bool has_operations(ir_nodehashmap_t *infos, const ir_node *block)
592 {
593 return get_block_flag(infos, block, BF_HAS_OPERATIONS);
594 }
595
set_has_operations(ir_nodehashmap_t * infos,ir_node * block)596 static void set_has_operations(ir_nodehashmap_t *infos, ir_node *block)
597 {
598 set_block_flag(infos, block, BF_HAS_OPERATIONS);
599 }
600
has_phis(ir_nodehashmap_t * infos,const ir_node * block)601 static bool has_phis(ir_nodehashmap_t *infos, const ir_node *block)
602 {
603 return get_block_flag(infos, block, BF_HAS_PHIS);
604 }
605
set_has_phis(ir_nodehashmap_t * infos,ir_node * block)606 static void set_has_phis(ir_nodehashmap_t *infos, ir_node *block)
607 {
608 set_block_flag(infos, block, BF_HAS_PHIS);
609 }
610
is_unknown_jump_target(ir_nodehashmap_t * infos,const ir_node * block)611 static bool is_unknown_jump_target(ir_nodehashmap_t *infos, const ir_node *block)
612 {
613 return get_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
614 }
615
set_is_unknown_jump_target(ir_nodehashmap_t * infos,ir_node * block)616 static void set_is_unknown_jump_target(ir_nodehashmap_t *infos, ir_node *block)
617 {
618 set_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
619 }
620
621 /**
622 * Pre-Walker: fill block info information.
623 */
compute_block_info(ir_node * n,void * x)624 static void compute_block_info(ir_node *n, void *x)
625 {
626 ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
627
628 if (is_Block(n)) {
629 int i, max = get_Block_n_cfgpreds(n);
630 for (i=0; i<max; i++) {
631 ir_node *pred = get_Block_cfgpred(n,i);
632 if (is_unknown_jump(pred)) {
633 set_is_unknown_jump_target(infos, n);
634 }
635 }
636 } else if (is_Phi(n)) {
637 ir_node *block = get_nodes_block(n);
638 set_has_phis(infos, block);
639 } else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
640 /* ignore */
641 } else {
642 ir_node *block = get_nodes_block(n);
643 set_has_operations(infos, block);
644 }
645 }
646
clear_block_info(ir_node * block,void * x)647 static void clear_block_info(ir_node *block, void *x)
648 {
649 ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
650 clear_block_flag(infos, block);
651 }
652
653 typedef struct skip_env {
654 bool changed;
655 ir_nodehashmap_t block_infos;
656 } skip_env;
657
658 /**
659 * Post-Block-walker: Optimize useless if's (boolean Cond nodes
660 * with same true/false target) away.
661 */
optimize_ifs(ir_node * block,void * x)662 static void optimize_ifs(ir_node *block, void *x)
663 {
664 skip_env *env = (skip_env*)x;
665 int i, j;
666 int n_preds = get_Block_n_cfgpreds(block);
667
668 if (has_phis(&env->block_infos, block))
669 return;
670
671 /* optimize Cond predecessors (might produce Bad predecessors) */
672 for (i = 0; i < n_preds; ++i) {
673 for (j = i+1; j < n_preds; ++j) {
674 optimize_pred_cond(block, i, j);
675 }
676 }
677 }
678
679 /**
680 * Pre-Block walker: remove empty blocks (only contain a Jmp)
681 * that are control flow predecessors of the current block.
682 */
remove_empty_blocks(ir_node * block,void * x)683 static void remove_empty_blocks(ir_node *block, void *x)
684 {
685 skip_env *env = (skip_env*)x;
686 int i;
687 int n_preds = get_Block_n_cfgpreds(block);
688
689 for (i = 0; i < n_preds; ++i) {
690 ir_node *jmp, *jmp_block;
691 int n_jpreds = 0;
692
693 jmp = get_Block_cfgpred(block, i);
694 if (!is_Jmp(jmp))
695 continue;
696 jmp_block = get_nodes_block(jmp);
697 if (jmp_block == block)
698 continue; /* this infinite loop cannot be optimized any further */
699 if (is_unknown_jump_target(&env->block_infos, jmp_block))
700 continue; /* unknown jump target must not be optimized */
701 if (has_phis(&env->block_infos,jmp_block))
702 continue; /* this block contains Phis and is not skipped */
703 if (Block_block_visited(jmp_block)) {
704 continue;
705 /* otherwise we could break the walker,
706 * if block was reached via
707 * KeepAlive edge -> jmp_block -> A ---> block,
708 * because the walker cannot handle Id nodes.
709 *
710 * A B
711 * \ /
712 * jmp_block
713 * / \
714 * block End
715 */
716 }
717
718 /* jmp_block is an empty block and can be optimized! */
719
720 n_jpreds = get_Block_n_cfgpreds(jmp_block);
721 /**
722 * If the jmp block has only one predecessor this is straightforward.
723 * However, if there are more predecessors, we only handle this,
724 * if block has no Phis.
725 */
726 if (n_jpreds == 1) {
727 ir_node *pred = get_Block_cfgpred(jmp_block, 0);
728 ir_node *pred_block = get_nodes_block(pred);
729 if (has_operations(&env->block_infos,jmp_block)) {
730 if (get_irg_start_block(get_irn_irg(pred_block)) == pred_block)
731 continue; /* must not merge operations into start block */
732 if (!is_Jmp(pred))
733 continue; /* must not create partially dead code, especially when it is mode_M */
734 }
735
736 /* skip jmp block by rerouting its predecessor to block
737 *
738 * A A
739 * | |
740 * jmp_block => |
741 * | |
742 * block block
743 */
744 exchange(jmp, pred);
745
746 /* cleanup: jmp_block might have a Keep edge! */
747 exchange(jmp_block, pred_block);
748 env->changed = true;
749 } else if ( !has_phis(&env->block_infos, block) &&
750 !has_operations(&env->block_infos,jmp_block))
751 {
752 /* all predecessors can skip the jmp block, so block gets some new
753 * predecessors
754 *
755 * A B A B
756 * \ / | |
757 * jmp_block C => Bad C | |
758 * \ / \ | | /
759 * block block
760 */
761 ir_node **ins = ALLOCAN(ir_node*, n_preds+n_jpreds);
762 int j;
763 /* first copy the old predecessors, because the outer loop (i)
764 * still walks over them */
765 for (j = 0; j < n_preds; ++j) {
766 ins[j] = get_Block_cfgpred(block, j);
767 }
768 /* now append the new predecessors */
769 for (j = 0; j < n_jpreds; ++j) {
770 ir_node *pred = get_Block_cfgpred(jmp_block, j);
771 ins[n_preds+j] = pred;
772 }
773 set_irn_in(block, n_preds+n_jpreds, ins);
774 /* convert the jmp_block to Bad */
775 ir_graph *irg = get_irn_irg(block);
776 exchange(jmp_block, new_r_Bad(irg, mode_BB));
777 exchange(jmp, new_r_Bad(irg, mode_X));
778 /* let the outer loop walk over the new predecessors as well */
779 n_preds += n_jpreds;
780 env->changed = true;
781 // TODO What if jmp_block had a KeepAlive edge?
782 } else {
783 /* This would involve Phis ... */
784 }
785 }
786 }
787
788 /*
789 * All cfg optimizations, which do not touch Phi nodes.
790 *
791 * Note that this might create critical edges.
792 */
cfgopt_ignoring_phis(ir_graph * irg)793 static void cfgopt_ignoring_phis(ir_graph *irg)
794 {
795 skip_env env;
796
797 env.changed = true;
798 ir_nodehashmap_init(&env.block_infos);
799
800 while (env.changed) {
801 irg_walk_graph(irg, compute_block_info, NULL, &env.block_infos);
802 env.changed = false;
803
804 /* Remove blocks, which only consist of a Jmp */
805 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
806
807 /* Optimize Cond->Jmp, where then- and else-block are the same. */
808 irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
809
810 if (env.changed) {
811 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
812 /* clear block info, because it must be recomputed */
813 irg_block_walk_graph(irg, clear_block_info, NULL, &env.block_infos);
814 /* Removing blocks and Conds might enable more optimizations */
815 continue;
816 } else {
817 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
818 break;
819 }
820 }
821
822 ir_nodehashmap_destroy(&env.block_infos);
823 }
824
825 /* Optimizations of the control flow that also require changes of Phi nodes. */
optimize_cf(ir_graph * irg)826 void optimize_cf(ir_graph *irg)
827 {
828 int i, j, n;
829 ir_node **in = NULL;
830 ir_node *end = get_irg_end(irg);
831 ir_node *new_end;
832 merge_env env;
833
834 env.changed = false;
835 env.phis_moved = false;
836
837 /* if the graph is not pinned, we cannot determine empty blocks */
838 assert(get_irg_pinned(irg) != op_pin_state_floats &&
839 "Control flow optimization need a pinned graph");
840
841 assure_irg_properties(irg, IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE);
842
843 /* First the "simple" optimizations, which do not touch Phis */
844 cfgopt_ignoring_phis(irg);
845
846 /* we use the mark flag to mark removable blocks */
847 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
848 | IR_RESOURCE_PHI_LIST);
849
850 /*
851 * This pass collects all Phi nodes in a link list in the block
852 * nodes. Further it performs simple control flow optimizations.
853 * Finally it marks all blocks that do not contain useful
854 * computations, i.e., these blocks might be removed.
855 */
856 irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, NULL);
857
858 /* assert due to collect_nodes:
859 * 1. removable blocks are now marked as such
860 * 2. phi lists are up to date
861 */
862
863 /* Optimize the standard code.
864 * It walks only over block nodes and adapts these and the Phi nodes in
865 * these blocks, which it finds in a linked list computed before.
866 */
867 assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
868 irg_block_walk_graph(irg, optimize_blocks, merge_blocks, &env);
869
870 new_end = optimize_in_place(end);
871 if (new_end != end) {
872 set_irg_end(irg, new_end);
873 end = new_end;
874 }
875 remove_End_Bads_and_doublets(end);
876
877 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
878 | IR_RESOURCE_PHI_LIST);
879
880 if (env.phis_moved) {
881 /* Bad: when we moved Phi's, we might produce dead Phi nodes
882 that are kept-alive.
883 Some other phases cannot copy with this, so kill them.
884 */
885 n = get_End_n_keepalives(end);
886 if (n > 0) {
887 NEW_ARR_A(ir_node *, in, n);
888 assure_irg_outs(irg);
889
890 for (i = j = 0; i < n; ++i) {
891 ir_node *ka = get_End_keepalive(end, i);
892
893 if (is_Phi(ka)) {
894 int k;
895
896 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
897 ir_node *user = get_irn_out(ka, k);
898
899 if (user != ka && user != end) {
900 /* Is it a real user or just a self loop ? */
901 break;
902 }
903 }
904 if (k >= 0)
905 in[j++] = ka;
906 } else
907 in[j++] = ka;
908 }
909 if (j != n) {
910 set_End_keepalives(end, j, in);
911 env.changed = true;
912 }
913 }
914 }
915
916 confirm_irg_properties(irg,
917 env.changed ? IR_GRAPH_PROPERTIES_NONE : IR_GRAPH_PROPERTIES_ALL);
918 }
919
920 /* Creates an ir_graph pass for optimize_cf. */
optimize_cf_pass(const char * name)921 ir_graph_pass_t *optimize_cf_pass(const char *name)
922 {
923 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
924 }
925