1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19 
20 /**
21  * @file
22  * @brief   Control flow optimizations.
23  * @author  Goetz Lindenmaier, Michael Beck, Sebastian Hack
24  *
25  * Removes Bad control flow predecessors and empty blocks.  A block is empty
26  * if it contains only a Jmp node. Blocks can only be removed if they are not
27  * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
28  * blocks (even if we could move the label).
29  */
30 #include "config.h"
31 
32 #include "iroptimize.h"
33 
34 #include <assert.h>
35 #include <stdbool.h>
36 
37 #include "xmalloc.h"
38 #include "irnode_t.h"
39 #include "irgraph_t.h"
40 #include "irprog_t.h"
41 
42 #include "ircons.h"
43 #include "iropt_t.h"
44 #include "irgwalk.h"
45 #include "irgmod.h"
46 #include "irdump.h"
47 #include "irverify.h"
48 #include "iredges.h"
49 
50 #include "array_t.h"
51 
52 #include "irouts.h"
53 #include "irbackedge_t.h"
54 
55 #include "irflag_t.h"
56 #include "firmstat.h"
57 #include "irpass.h"
58 #include "irnodehashmap.h"
59 #include "irtools.h"
60 
61 #include "iropt_dbg.h"
62 
63 /** An environment for merge_blocks and collect nodes. */
64 typedef struct merge_env {
65 	bool      changed;      /**< Set if the graph was changed. */
66 	bool      phis_moved;   /**< Set if Phi nodes were moved. */
67 } merge_env;
68 
69 /** set or reset the removable property of a block. */
set_Block_removable(ir_node * block,bool removable)70 static void set_Block_removable(ir_node *block, bool removable)
71 {
72 	set_Block_mark(block, removable);
73 }
74 
75 /** check if a block has the removable property set. */
is_Block_removable(const ir_node * block)76 static bool is_Block_removable(const ir_node *block)
77 {
78 	return get_Block_mark(block);
79 }
80 
81 /** checks if a given Cond node is a switch Cond. */
is_switch_Cond(const ir_node * cond)82 static bool is_switch_Cond(const ir_node *cond)
83 {
84 	ir_node *sel = get_Cond_selector(cond);
85 	return get_irn_mode(sel) != mode_b;
86 }
87 
88 /** Walker: clear link fields and mark all blocks as removable. */
clear_link_and_mark_blocks_removable(ir_node * node,void * ctx)89 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
90 {
91 	(void) ctx;
92 	set_irn_link(node, NULL);
93 	if (is_Block(node)) {
94 		set_Block_removable(node, true);
95 		set_Block_phis(node, NULL);
96 	} else if (is_Phi(node)) {
97 		set_Phi_next(node, NULL);
98 	}
99 }
100 
101 /**
102  * Collects all Phi nodes in link list of Block.
103  * Marks all blocks "non_removable" if they contain a node other
104  * than Jmp (and Proj).
105  * Links all Proj nodes to their predecessors.
106  * Collects all switch-Conds in a list.
107  */
collect_nodes(ir_node * n,void * ctx)108 static void collect_nodes(ir_node *n, void *ctx)
109 {
110 	(void) ctx;
111 	if (is_Phi(n)) {
112 		/* Collect Phi nodes to compact ins along with block's ins. */
113 		ir_node *block = get_nodes_block(n);
114 		add_Block_phi(block, n);
115 	} else if (is_Block(n)) {
116 		if (get_Block_entity(n) != NULL) {
117 			/* block with a jump label attached cannot be removed. */
118 			set_Block_removable(n, false);
119 		}
120 	} else if (is_Bad(n) || is_Jmp(n)) {
121 		/* ignore these */
122 		return;
123 	} else {
124 		/* Check for non-empty block. */
125 		ir_node *block = get_nodes_block(n);
126 
127 		set_Block_removable(block, false);
128 
129 		if (is_Proj(n)) {
130 			/* link Proj nodes */
131 			ir_node *pred = get_Proj_pred(n);
132 			set_irn_link(n, get_irn_link(pred));
133 			set_irn_link(pred, n);
134 		}
135 	}
136 }
137 
138 /** Returns true if pred is predecessor of block b. */
is_pred_of(const ir_node * pred,const ir_node * b)139 static bool is_pred_of(const ir_node *pred, const ir_node *b)
140 {
141 	int i;
142 
143 	for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
144 		ir_node *b_pred = get_Block_cfgpred_block(b, i);
145 		if (b_pred == pred)
146 			return true;
147 	}
148 	return false;
149 }
150 
151 /** Test whether we can optimize away pred block pos of b.
152  *
153  *  @param  b    A block node.
154  *  @param  pos  The position of the predecessor block to judge about.
155  *
156  *  @returns     The number of predecessors
157  *
158  *  The test is rather tricky.
159  *
160  *  The situation is something like the following:
161  *  @verbatim
162  *                 if-block
163  *                  /   \
164  *              then-b  else-b
165  *                  \   /
166  *                    b
167  *  @endverbatim
168  *
169  *  b merges the control flow of an if-then-else.  We may not remove
170  *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
171  *  node in b, even if both are empty.  The destruction of this Phi
172  *  requires that a copy is added before the merge.  We have to
173  *  keep one of the case blocks to place the copies in.
174  *
175  *  To perform the test for pos, we must regard predecessors before pos
176  *  as already removed.
177  **/
test_whether_dispensable(const ir_node * b,int pos)178 static unsigned test_whether_dispensable(const ir_node *b, int pos)
179 {
180 	ir_node *pred  = get_Block_cfgpred(b, pos);
181 	ir_node *predb = get_nodes_block(pred);
182 
183 	if (is_Bad(pred) || !is_Block_removable(predb))
184 		return 1;
185 
186 	/* can't remove self-loops */
187 	if (predb == b)
188 		goto non_dispensable;
189 	if (is_unknown_jump(pred))
190 		goto non_dispensable;
191 
192 	/* Seems to be empty. At least we detected this in collect_nodes. */
193 	if (get_Block_phis(b) != NULL) {
194 		int n_cfgpreds = get_Block_n_cfgpreds(b);
195 		int i;
196 		/* there are Phi nodes */
197 
198 		/* b's pred blocks and pred's pred blocks must be pairwise disjunct.
199 		 * Handle all pred blocks with preds < pos as if they were already
200 		 * removed. */
201 		for (i = 0; i < pos; i++) {
202 			ir_node *other_pred  = get_Block_cfgpred(b, i);
203 			ir_node *other_predb = get_nodes_block(other_pred);
204 			if (is_Bad(other_pred))
205 				continue;
206 			if (is_Block_removable(other_predb)
207 			    && !Block_block_visited(other_predb)) {
208 				int j;
209 				for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
210 					ir_node *other_predpred
211 						= get_Block_cfgpred_block(other_predb, j);
212 					if (is_pred_of(other_predpred, predb))
213 						goto non_dispensable;
214 				}
215 			} else if (is_pred_of(other_predb, predb)) {
216 				goto non_dispensable;
217 			}
218 		}
219 		for (i = pos+1; i < n_cfgpreds; i++) {
220 			ir_node *other_predb = get_Block_cfgpred_block(b, i);
221 			if (is_pred_of(other_predb, predb))
222 				goto non_dispensable;
223 		}
224 	}
225 	/* we will not dispense already visited blocks */
226 	if (Block_block_visited(predb))
227 		return 1;
228 	/* if we get here, the block is dispensable, count useful preds */
229 	return get_irn_arity(predb);
230 
231 non_dispensable:
232 	set_Block_removable(predb, false);
233 	return 1;
234 }
235 
236 /**
237  * This method merges blocks. A block is applicable to be merged, if it
238  * has only one predecessor with an unconditional jump to this block;
239  * and if this block does not contain any phis.
240  */
merge_blocks(ir_node * b,void * env)241 static void merge_blocks(ir_node *b, void *env)
242 {
243 	(void) env;
244 
245 	if (get_Block_n_cfgpreds(b) == 1) {
246 		ir_node* pred = get_Block_cfgpred(b, 0);
247 		if (is_Jmp(pred)) {
248 			ir_node* pred_block = get_nodes_block(pred);
249 			if (get_Block_phis(b) == NULL) {
250 				exchange(b, pred_block);
251 			}
252 		}
253 	}
254 }
255 
256 /**
257  * This method removes empty blocks.  A block is empty if it only contains Phi
258  * and Jmp nodes.
259  *
260  * We first adapt Phi nodes, then Block nodes, as we need the old ins
261  * of the Block to adapt the Phi nodes.  We do this by computing new
262  * in arrays, and then replacing the old ones.  So far we compute new in arrays
263  * for all nodes, not regarding whether there is a possibility for optimization.
264  *
265  * For each predecessor p of a Block b there are three cases:
266  *  - The predecessor p is a Bad node: just skip it. The in array of b shrinks
267  *    by one.
268  *  - The predecessor p is empty. Remove p. All predecessors of p are now
269  *    predecessors of b.
270  *  - The predecessor p is a block containing useful code. Just keep p as is.
271  *
272  * For Phi nodes f we have to check the conditions at the Block of f.
273  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
274  * cases:
275  *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
276  *       In this case we proceed as for blocks. We remove pred_f.  All
277  *       predecessors of pred_f now are predecessors of f.
278  *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
279  *       too. We have to replicate f for each predecessor of the removed block.
280  *       Or, with other words, the removed predecessor block has exactly one
281  *       predecessor.
282  *
283  * Further there is a special case for self referencing blocks:
284  * @verbatim
285  *
286  *    then_b     else_b                              then_b  else_b
287  *       \      /                                      \      /
288  *        \    /                                        |    /
289  *        pred_b                                        |   /
290  *         |   ____                                     |  /  ____
291  *         |  |    |                                    |  | |    |
292  *         |  |    |       === optimized to ===>        \  | |    |
293  *        loop_b   |                                     loop_b   |
294  *         |  |    |                                      |  |    |
295  *         |  |____|                                      |  |____|
296  *         |                                              |
297  * @endverbatim
298  *
299  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
300  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
301  * backedge.
302  */
optimize_blocks(ir_node * b,void * ctx)303 static void optimize_blocks(ir_node *b, void *ctx)
304 {
305 	int i, j, k, n, max_preds, n_preds, p_preds = -1;
306 	ir_node *phi;
307 	ir_node *next;
308 	ir_node **in;
309 	merge_env *env = (merge_env*)ctx;
310 
311 	if (get_Block_dom_depth(b) < 0) {
312 		/* ignore unreachable blocks */
313 		return;
314 	}
315 
316 	/* Count the number of predecessor if this block is merged with pred blocks
317 	   that are empty. */
318 	max_preds = 0;
319 	for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
320 		max_preds += test_whether_dispensable(b, i);
321 	}
322 	in = XMALLOCN(ir_node*, max_preds);
323 
324 	/*- Fix the Phi nodes of the current block -*/
325 	for (phi = get_Block_phis(b); phi != NULL; phi = next) {
326 		next = get_Phi_next(phi);
327 
328 		/* Find the new predecessors for the Phi */
329 		p_preds = 0;
330 		for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
331 			ir_graph *irg = get_irn_irg(b);
332 			ir_node *predx = get_Block_cfgpred(b, i);
333 			ir_node *pred;
334 
335 			/* case Phi 1: maintain Bads, as somebody else is responsible to
336 			 * remove them */
337 			if (is_Bad(predx)) {
338 				in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
339 				continue;
340 			}
341 
342 			pred = get_nodes_block(predx);
343 
344 			/* case Phi 2: It's an empty block and not yet visited. */
345 			if (is_Block_removable(pred) && !Block_block_visited(pred)) {
346 				ir_node *phi_pred = get_Phi_pred(phi, i);
347 
348 				for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
349 					ir_node *pred_pred = get_Block_cfgpred(pred, j);
350 
351 					if (is_Bad(pred_pred)) {
352 						in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
353 						continue;
354 					}
355 
356 					if (get_nodes_block(phi_pred) == pred) {
357 						/* case Phi 2a: */
358 						assert(is_Phi(phi_pred));  /* Block is empty!! */
359 
360 						in[p_preds++] = get_Phi_pred(phi_pred, j);
361 					} else {
362 						/* case Phi 2b: */
363 						in[p_preds++] = phi_pred;
364 					}
365 				}
366 			} else {
367 				/* case Phi 3: */
368 				in[p_preds++] = get_Phi_pred(phi, i);
369 			}
370 		}
371 		assert(p_preds == max_preds);
372 
373 		/* Fix the node */
374 		if (p_preds == 1) {
375 			exchange(phi, in[0]);
376 		} else {
377 			set_irn_in(phi, p_preds, in);
378 		}
379 		env->changed = true;
380 	}
381 
382 	/*- This happens only if merge between loop backedge and single loop entry.
383 	    Moreover, it is only needed if predb is the direct dominator of b,
384 	    else there can be no uses of the Phi's in predb ... -*/
385 	for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
386 		ir_node *pred  = get_Block_cfgpred(b, k);
387 		ir_node *predb = get_nodes_block(pred);
388 		if (is_Bad(pred))
389 			continue;
390 
391 		if (is_Block_removable(predb) && !Block_block_visited(predb)) {
392 			ir_node *next_phi;
393 
394 			/* we found a predecessor block at position k that will be removed */
395 			for (phi = get_Block_phis(predb); phi != NULL; phi = next_phi) {
396 				int q_preds = 0;
397 				next_phi = get_Phi_next(phi);
398 
399 				if (get_Block_idom(b) != predb) {
400 					/* predb is not the dominator. There can't be uses of
401 					 * pred's Phi nodes, kill them .*/
402 					ir_graph *irg  = get_irn_irg(b);
403 					ir_mode  *mode = get_irn_mode(phi);
404 					exchange(phi, new_r_Bad(irg, mode));
405 				} else {
406 					/* predb is the direct dominator of b. There might be uses
407 					 * of the Phi nodes from predb in further block, so move
408 					 * this phi from the predecessor into the block b */
409 					set_nodes_block(phi, b);
410 					set_Phi_next(phi, get_Block_phis(b));
411 					set_Block_phis(b, phi);
412 					env->phis_moved = true;
413 
414 					/* first, copy all 0..k-1 predecessors */
415 					for (i = 0; i < k; i++) {
416 						ir_node *predx = get_Block_cfgpred(b, i);
417 						ir_node *pred_block;
418 
419 						if (is_Bad(predx)) {
420 							ir_graph *irg  = get_irn_irg(b);
421 							ir_mode  *mode = get_irn_mode(phi);
422 							in[q_preds++] = new_r_Bad(irg, mode);
423 							continue;
424 						}
425 						pred_block = get_nodes_block(predx);
426 						if (is_Block_removable(pred_block)
427 						           && !Block_block_visited(pred_block)) {
428 							int n_cfgpreds = get_Block_n_cfgpreds(pred_block);
429 							/* It's an empty block and not yet visited. */
430 							for (j = 0; j < n_cfgpreds; j++) {
431 								if (!is_Bad(get_Block_cfgpred(pred_block, j))) {
432 									in[q_preds++] = phi;
433 								} else {
434 									ir_graph *irg  = get_irn_irg(b);
435 									ir_mode  *mode = get_irn_mode(phi);
436 									in[q_preds++] = new_r_Bad(irg, mode);
437 								}
438 							}
439 						} else {
440 							in[q_preds++] = phi;
441 						}
442 					}
443 
444 					/* now we are at k, copy the phi predecessors */
445 					pred = get_nodes_block(get_Block_cfgpred(b, k));
446 					for (i = 0; i < get_Phi_n_preds(phi); i++) {
447 						in[q_preds++] = get_Phi_pred(phi, i);
448 					}
449 
450 					/* and now all the rest */
451 					for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
452 						pred = get_Block_cfgpred_block(b, i);
453 
454 						if (is_Bad(pred)) {
455 							ir_graph *irg  = get_irn_irg(b);
456 							ir_mode  *mode = get_irn_mode(phi);
457 							in[q_preds++] = new_r_Bad(irg, mode);
458 						} else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
459 							/* It's an empty block and not yet visited. */
460 							for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
461 								if (! is_Bad(get_Block_cfgpred(pred, j))) {
462 									in[q_preds++] = phi;
463 								} else {
464 									ir_graph *irg  = get_irn_irg(b);
465 									ir_mode  *mode = get_irn_mode(phi);
466 									in[q_preds++] = new_r_Bad(irg, mode);
467 								}
468 							}
469 						} else {
470 							in[q_preds++] = phi;
471 						}
472 					}
473 
474 					/* Fix the node */
475 					if (q_preds == 1)
476 						exchange(phi, in[0]);
477 					else
478 						set_irn_in(phi, q_preds, in);
479 					env->changed = true;
480 
481 					assert(q_preds <= max_preds);
482 					// assert(p_preds == q_preds && "Wrong Phi Fix");
483 				}
484 			}
485 		}
486 	}
487 
488 	/*- Fix the block -*/
489 	n_preds = 0;
490 	for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
491 		ir_node *pred  = get_Block_cfgpred(b, i);
492 		ir_node *predb = get_nodes_block(pred);
493 		ir_graph *irg  = get_irn_irg(pred);
494 
495 		/* case 1: Bad predecessor */
496 		if (is_Bad(pred)) {
497 			in[n_preds++] = new_r_Bad(irg, mode_X);
498 			continue;
499 		}
500 		if (is_Block_removable(predb) && !Block_block_visited(predb)) {
501 			/* case 2: It's an empty block and not yet visited. */
502 			for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
503 				ir_node *predpred = get_Block_cfgpred(predb, j);
504 
505 				if (is_Bad(predpred)) {
506 					in[n_preds++] = new_r_Bad(irg, mode_X);
507 					continue;
508 				}
509 
510 				in[n_preds++] = predpred;
511 			}
512 			/* Remove block+jump as it might be kept alive. */
513 			exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
514 			exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
515 		} else {
516 			/* case 3: */
517 			in[n_preds++] = pred;
518 		}
519 	}
520 	assert(n_preds == max_preds);
521 
522 	set_irn_in(b, n_preds, in);
523 	env->changed = true;
524 
525 	/* see if phi-fix was correct */
526 	assert(get_Block_phis(b) == NULL || p_preds == -1 || (n_preds == p_preds));
527 	xfree(in);
528 }
529 
530 /**
531  * Optimize boolean Conds, where true and false jump to the same block into a Jmp
532  * Block must contain no Phi nodes.
533  *
534  *        Cond
535  *       /    \
536  *  projA      projB   =>   Jmp     Bad
537  *       \    /                \   /
538  *       block                 block
539  */
optimize_pred_cond(ir_node * block,int i,int j)540 static bool optimize_pred_cond(ir_node *block, int i, int j)
541 {
542 	ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
543 	assert(i != j);
544 
545 	projA = get_Block_cfgpred(block, i);
546 	if (!is_Proj(projA)) return false;
547 	projB = get_Block_cfgpred(block, j);
548 	if (!is_Proj(projB)) return false;
549 	cond  = get_Proj_pred(projA);
550 	if (!is_Cond(cond))  return false;
551 
552 	if (cond != get_Proj_pred(projB)) return false;
553 	if (is_switch_Cond(cond)) return false;
554 
555 	/* cond should actually be a Jmp */
556 	pred_block = get_nodes_block(cond);
557 	jmp = new_r_Jmp(pred_block);
558 	bad = new_r_Bad(get_irn_irg(block), mode_X);
559 
560 	assert(projA != projB);
561 	exchange(projA, jmp);
562 	exchange(projB, bad);
563 	return true;
564 }
565 
566 typedef enum block_flags_t {
567 	BF_HAS_OPERATIONS         = 1 << 0,
568 	BF_HAS_PHIS               = 1 << 1,
569 	BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
570 } block_flags_t;
571 
get_block_flag(const ir_nodehashmap_t * infos,const ir_node * block,int flag)572 static bool get_block_flag(const ir_nodehashmap_t *infos, const ir_node *block,
573                            int flag)
574 {
575 	return PTR_TO_INT(ir_nodehashmap_get(void, infos, block)) & flag;
576 }
577 
set_block_flag(ir_nodehashmap_t * infos,ir_node * block,block_flags_t flag)578 static void set_block_flag(ir_nodehashmap_t *infos, ir_node *block,
579                            block_flags_t flag)
580 {
581 	int data = PTR_TO_INT(ir_nodehashmap_get(void, infos, block));
582 	data |= flag;
583 	ir_nodehashmap_insert(infos, block, INT_TO_PTR(data));
584 }
585 
clear_block_flag(ir_nodehashmap_t * infos,const ir_node * block)586 static void clear_block_flag(ir_nodehashmap_t *infos, const ir_node *block)
587 {
588 	ir_nodehashmap_remove(infos, block);
589 }
590 
has_operations(ir_nodehashmap_t * infos,const ir_node * block)591 static bool has_operations(ir_nodehashmap_t *infos, const ir_node *block)
592 {
593 	return get_block_flag(infos, block, BF_HAS_OPERATIONS);
594 }
595 
set_has_operations(ir_nodehashmap_t * infos,ir_node * block)596 static void set_has_operations(ir_nodehashmap_t *infos, ir_node *block)
597 {
598 	set_block_flag(infos, block, BF_HAS_OPERATIONS);
599 }
600 
has_phis(ir_nodehashmap_t * infos,const ir_node * block)601 static bool has_phis(ir_nodehashmap_t *infos, const ir_node *block)
602 {
603 	return get_block_flag(infos, block, BF_HAS_PHIS);
604 }
605 
set_has_phis(ir_nodehashmap_t * infos,ir_node * block)606 static void set_has_phis(ir_nodehashmap_t *infos, ir_node *block)
607 {
608 	set_block_flag(infos, block, BF_HAS_PHIS);
609 }
610 
is_unknown_jump_target(ir_nodehashmap_t * infos,const ir_node * block)611 static bool is_unknown_jump_target(ir_nodehashmap_t *infos, const ir_node *block)
612 {
613 	return get_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
614 }
615 
set_is_unknown_jump_target(ir_nodehashmap_t * infos,ir_node * block)616 static void set_is_unknown_jump_target(ir_nodehashmap_t *infos, ir_node *block)
617 {
618 	set_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
619 }
620 
621 /**
622  * Pre-Walker: fill block info information.
623  */
compute_block_info(ir_node * n,void * x)624 static void compute_block_info(ir_node *n, void *x)
625 {
626 	ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
627 
628 	if (is_Block(n)) {
629 		int i, max = get_Block_n_cfgpreds(n);
630 		for (i=0; i<max; i++) {
631 			ir_node *pred = get_Block_cfgpred(n,i);
632 			if (is_unknown_jump(pred)) {
633 				set_is_unknown_jump_target(infos, n);
634 			}
635 		}
636 	} else if (is_Phi(n)) {
637 		ir_node *block = get_nodes_block(n);
638 		set_has_phis(infos, block);
639 	} else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
640 		/* ignore */
641 	} else {
642 		ir_node *block = get_nodes_block(n);
643 		set_has_operations(infos, block);
644 	}
645 }
646 
clear_block_info(ir_node * block,void * x)647 static void clear_block_info(ir_node *block, void *x)
648 {
649 	ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
650 	clear_block_flag(infos, block);
651 }
652 
653 typedef struct skip_env {
654 	bool             changed;
655 	ir_nodehashmap_t block_infos;
656 } skip_env;
657 
658 /**
659  * Post-Block-walker: Optimize useless if's (boolean Cond nodes
660  * with same true/false target) away.
661  */
optimize_ifs(ir_node * block,void * x)662 static void optimize_ifs(ir_node *block, void *x)
663 {
664 	skip_env *env = (skip_env*)x;
665 	int i, j;
666 	int n_preds = get_Block_n_cfgpreds(block);
667 
668 	if (has_phis(&env->block_infos, block))
669 		return;
670 
671 	/* optimize Cond predecessors (might produce Bad predecessors) */
672 	for (i = 0; i < n_preds; ++i) {
673 		for (j = i+1; j < n_preds; ++j) {
674 			optimize_pred_cond(block, i, j);
675 		}
676 	}
677 }
678 
679 /**
680  * Pre-Block walker: remove empty blocks (only contain a Jmp)
681  * that are control flow predecessors of the current block.
682  */
remove_empty_blocks(ir_node * block,void * x)683 static void remove_empty_blocks(ir_node *block, void *x)
684 {
685 	skip_env *env = (skip_env*)x;
686 	int i;
687 	int n_preds = get_Block_n_cfgpreds(block);
688 
689 	for (i = 0; i < n_preds; ++i) {
690 		ir_node *jmp, *jmp_block;
691 		int n_jpreds = 0;
692 
693 		jmp = get_Block_cfgpred(block, i);
694 		if (!is_Jmp(jmp))
695 			continue;
696 		jmp_block = get_nodes_block(jmp);
697 		if (jmp_block == block)
698 			continue; /* this infinite loop cannot be optimized any further */
699 		if (is_unknown_jump_target(&env->block_infos, jmp_block))
700 			continue; /* unknown jump target must not be optimized */
701 		if (has_phis(&env->block_infos,jmp_block))
702 			continue; /* this block contains Phis and is not skipped */
703 		if (Block_block_visited(jmp_block)) {
704 			continue;
705 			/* otherwise we could break the walker,
706 			 * if block was reached via
707 			 *     KeepAlive edge -> jmp_block -> A ---> block,
708 			 * because the walker cannot handle Id nodes.
709 			 *
710 			 *   A      B
711 			 *    \    /
712 			 *   jmp_block
713 			 *    /    \
714 			 * block    End
715 			 */
716 		}
717 
718 		/* jmp_block is an empty block and can be optimized! */
719 
720 		n_jpreds = get_Block_n_cfgpreds(jmp_block);
721 		/**
722 		 * If the jmp block has only one predecessor this is straightforward.
723 		 * However, if there are more predecessors, we only handle this,
724 		 * if block has no Phis.
725 		 */
726 		if (n_jpreds == 1) {
727 			ir_node *pred        = get_Block_cfgpred(jmp_block, 0);
728 			ir_node *pred_block  = get_nodes_block(pred);
729 			if (has_operations(&env->block_infos,jmp_block)) {
730 				if (get_irg_start_block(get_irn_irg(pred_block)) == pred_block)
731 					continue; /* must not merge operations into start block */
732 				if (!is_Jmp(pred))
733 					continue; /* must not create partially dead code, especially when it is mode_M */
734 			}
735 
736 			/* skip jmp block by rerouting its predecessor to block
737 			 *
738 			 *     A              A
739 			 *     |              |
740 			 *  jmp_block   =>    |
741 			 *     |              |
742 			 *   block          block
743 			 */
744 			exchange(jmp, pred);
745 
746 			/* cleanup: jmp_block might have a Keep edge! */
747 			exchange(jmp_block, pred_block);
748 			env->changed = true;
749 		} else if ( !has_phis(&env->block_infos, block) &&
750 		            !has_operations(&env->block_infos,jmp_block))
751 		{
752 			/* all predecessors can skip the jmp block, so block gets some new
753 			 * predecessors
754 			 *
755 			 *  A     B                 A  B
756 			 *   \   /                  |  |
757 			 * jmp_block  C  =>  Bad  C |  |
758 			 *      \    /          \ | | /
759 			 *      block            block
760 			 */
761 			ir_node **ins = ALLOCAN(ir_node*, n_preds+n_jpreds);
762 			int j;
763 			/* first copy the old predecessors, because the outer loop (i)
764 			 * still walks over them */
765 			for (j = 0; j < n_preds; ++j) {
766 				ins[j] = get_Block_cfgpred(block, j);
767 			}
768 			/* now append the new predecessors */
769 			for (j = 0; j < n_jpreds; ++j) {
770 				ir_node *pred = get_Block_cfgpred(jmp_block, j);
771 				ins[n_preds+j] = pred;
772 			}
773 			set_irn_in(block, n_preds+n_jpreds, ins);
774 			/* convert the jmp_block to Bad */
775 			ir_graph *irg = get_irn_irg(block);
776 			exchange(jmp_block, new_r_Bad(irg, mode_BB));
777 			exchange(jmp, new_r_Bad(irg, mode_X));
778 			/* let the outer loop walk over the new predecessors as well */
779 			n_preds += n_jpreds;
780 			env->changed = true;
781 			// TODO What if jmp_block had a KeepAlive edge?
782 		} else {
783 			/* This would involve Phis ... */
784 		}
785 	}
786 }
787 
788 /*
789  * All cfg optimizations, which do not touch Phi nodes.
790  *
791  * Note that this might create critical edges.
792  */
cfgopt_ignoring_phis(ir_graph * irg)793 static void cfgopt_ignoring_phis(ir_graph *irg)
794 {
795 	skip_env env;
796 
797 	env.changed = true;
798 	ir_nodehashmap_init(&env.block_infos);
799 
800 	while (env.changed) {
801 		irg_walk_graph(irg, compute_block_info, NULL, &env.block_infos);
802 		env.changed = false;
803 
804 		/* Remove blocks, which only consist of a Jmp */
805 		irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
806 
807 		/* Optimize Cond->Jmp, where then- and else-block are the same. */
808 		irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
809 
810 		if (env.changed) {
811 			confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
812 			/* clear block info, because it must be recomputed */
813 			irg_block_walk_graph(irg, clear_block_info, NULL, &env.block_infos);
814 			/* Removing blocks and Conds might enable more optimizations */
815 			continue;
816 		} else {
817 			confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
818 			break;
819 		}
820 	}
821 
822 	ir_nodehashmap_destroy(&env.block_infos);
823 }
824 
825 /* Optimizations of the control flow that also require changes of Phi nodes.  */
optimize_cf(ir_graph * irg)826 void optimize_cf(ir_graph *irg)
827 {
828 	int i, j, n;
829 	ir_node **in = NULL;
830 	ir_node *end = get_irg_end(irg);
831 	ir_node *new_end;
832 	merge_env env;
833 
834 	env.changed    = false;
835 	env.phis_moved = false;
836 
837 	/* if the graph is not pinned, we cannot determine empty blocks */
838 	assert(get_irg_pinned(irg) != op_pin_state_floats &&
839 	       "Control flow optimization need a pinned graph");
840 
841 	assure_irg_properties(irg, IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE);
842 
843 	/* First the "simple" optimizations, which do not touch Phis */
844 	cfgopt_ignoring_phis(irg);
845 
846 	/* we use the mark flag to mark removable blocks */
847 	ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
848 	                     | IR_RESOURCE_PHI_LIST);
849 
850 	/*
851 	 * This pass collects all Phi nodes in a link list in the block
852 	 * nodes.  Further it performs simple control flow optimizations.
853 	 * Finally it marks all blocks that do not contain useful
854 	 * computations, i.e., these blocks might be removed.
855 	 */
856 	irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, NULL);
857 
858 	/* assert due to collect_nodes:
859 	 * 1. removable blocks are now marked as such
860 	 * 2. phi lists are up to date
861 	 */
862 
863 	/* Optimize the standard code.
864 	 * It walks only over block nodes and adapts these and the Phi nodes in
865 	 * these blocks, which it finds in a linked list computed before.
866 	 */
867 	assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
868 	irg_block_walk_graph(irg, optimize_blocks, merge_blocks, &env);
869 
870 	new_end = optimize_in_place(end);
871 	if (new_end != end) {
872 		set_irg_end(irg, new_end);
873 		end = new_end;
874 	}
875 	remove_End_Bads_and_doublets(end);
876 
877 	ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
878 	                  | IR_RESOURCE_PHI_LIST);
879 
880 	if (env.phis_moved) {
881 		/* Bad: when we moved Phi's, we might produce dead Phi nodes
882 		   that are kept-alive.
883 		   Some other phases cannot copy with this, so kill them.
884 		 */
885 		n = get_End_n_keepalives(end);
886 		if (n > 0) {
887 			NEW_ARR_A(ir_node *, in, n);
888 			assure_irg_outs(irg);
889 
890 			for (i = j = 0; i < n; ++i) {
891 				ir_node *ka = get_End_keepalive(end, i);
892 
893 				if (is_Phi(ka)) {
894 					int k;
895 
896 					for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
897 						ir_node *user = get_irn_out(ka, k);
898 
899 						if (user != ka && user != end) {
900 							/* Is it a real user or just a self loop ? */
901 							break;
902 						}
903 					}
904 					if (k >= 0)
905 						in[j++] = ka;
906 				} else
907 					in[j++] = ka;
908 			}
909 			if (j != n) {
910 				set_End_keepalives(end, j, in);
911 				env.changed = true;
912 			}
913 		}
914 	}
915 
916 	confirm_irg_properties(irg,
917 		env.changed ? IR_GRAPH_PROPERTIES_NONE : IR_GRAPH_PROPERTIES_ALL);
918 }
919 
920 /* Creates an ir_graph pass for optimize_cf. */
optimize_cf_pass(const char * name)921 ir_graph_pass_t *optimize_cf_pass(const char *name)
922 {
923 	return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
924 }
925