1 /* Tail merging for gimple.
2    Copyright (C) 2011-2016 Free Software Foundation, Inc.
3    Contributed by Tom de Vries (tom@codesourcery.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Pass overview.
22 
23 
24    MOTIVATIONAL EXAMPLE
25 
26    gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
27 
28    hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
29    {
30      struct FILED.1638 * fpD.2605;
31      charD.1 fileNameD.2604[1000];
32      intD.0 D.3915;
33      const charD.1 * restrict outputFileName.0D.3914;
34 
35      # BLOCK 2 freq:10000
36      # PRED: ENTRY [100.0%]  (fallthru,exec)
37      # PT = nonlocal { D.3926 } (restr)
38      outputFileName.0D.3914_3
39        = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40      # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43      sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44      # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47      D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48      if (D.3915_4 == 0)
49        goto <bb 3>;
50      else
51        goto <bb 4>;
52      # SUCC: 3 [10.0%]  (true,exec) 4 [90.0%]  (false,exec)
53 
54      # BLOCK 3 freq:1000
55      # PRED: 2 [10.0%]  (true,exec)
56      # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59      freeD.898 (ctxD.2601_5(D));
60      goto <bb 7>;
61      # SUCC: 7 [100.0%]  (fallthru,exec)
62 
63      # BLOCK 4 freq:9000
64      # PRED: 2 [90.0%]  (false,exec)
65      # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66      # PT = nonlocal escaped
67      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69      fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70      if (fpD.2605_8 == 0B)
71        goto <bb 5>;
72      else
73        goto <bb 6>;
74      # SUCC: 5 [1.9%]  (true,exec) 6 [98.1%]  (false,exec)
75 
76      # BLOCK 5 freq:173
77      # PRED: 4 [1.9%]  (true,exec)
78      # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81      freeD.898 (ctxD.2601_5(D));
82      goto <bb 7>;
83      # SUCC: 7 [100.0%]  (fallthru,exec)
84 
85      # BLOCK 6 freq:8827
86      # PRED: 4 [98.1%]  (false,exec)
87      # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88      # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89      # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90      fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91      # SUCC: 7 [100.0%]  (fallthru,exec)
92 
93      # BLOCK 7 freq:10000
94      # PRED: 3 [100.0%]  (fallthru,exec) 5 [100.0%]  (fallthru,exec)
95 	     6 [100.0%]  (fallthru,exec)
96      # PT = nonlocal null
97 
98      # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99      # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 			    .MEMD.3923_18(6)>
101      # VUSE <.MEMD.3923_11>
102      return ctxD.2601_1;
103      # SUCC: EXIT [100.0%]
104    }
105 
106    bb 3 and bb 5 can be merged.  The blocks have different predecessors, but the
107    same successors, and the same operations.
108 
109 
110    CONTEXT
111 
112    A technique called tail merging (or cross jumping) can fix the example
113    above.  For a block, we look for common code at the end (the tail) of the
114    predecessor blocks, and insert jumps from one block to the other.
115    The example is a special case for tail merging, in that 2 whole blocks
116    can be merged, rather than just the end parts of it.
117    We currently only focus on whole block merging, so in that sense
118    calling this pass tail merge is a bit of a misnomer.
119 
120    We distinguish 2 kinds of situations in which blocks can be merged:
121    - same operations, same predecessors.  The successor edges coming from one
122      block are redirected to come from the other block.
123    - same operations, same successors.  The predecessor edges entering one block
124      are redirected to enter the other block.  Note that this operation might
125      involve introducing phi operations.
126 
127    For efficient implementation, we would like to value numbers the blocks, and
128    have a comparison operator that tells us whether the blocks are equal.
129    Besides being runtime efficient, block value numbering should also abstract
130    from irrelevant differences in order of operations, much like normal value
131    numbering abstracts from irrelevant order of operations.
132 
133    For the first situation (same_operations, same predecessors), normal value
134    numbering fits well.  We can calculate a block value number based on the
135    value numbers of the defs and vdefs.
136 
137    For the second situation (same operations, same successors), this approach
138    doesn't work so well.  We can illustrate this using the example.  The calls
139    to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140    remain different in value numbering, since they represent different memory
141    states.  So the resulting vdefs of the frees will be different in value
142    numbering, so the block value numbers will be different.
143 
144    The reason why we call the blocks equal is not because they define the same
145    values, but because uses in the blocks use (possibly different) defs in the
146    same way.  To be able to detect this efficiently, we need to do some kind of
147    reverse value numbering, meaning number the uses rather than the defs, and
148    calculate a block value number based on the value number of the uses.
149    Ideally, a block comparison operator will also indicate which phis are needed
150    to merge the blocks.
151 
152    For the moment, we don't do block value numbering, but we do insn-by-insn
153    matching, using scc value numbers to match operations with results, and
154    structural comparison otherwise, while ignoring vop mismatches.
155 
156 
157    IMPLEMENTATION
158 
159    1. The pass first determines all groups of blocks with the same successor
160       blocks.
161    2. Within each group, it tries to determine clusters of equal basic blocks.
162    3. The clusters are applied.
163    4. The same successor groups are updated.
164    5. This process is repeated from 2 onwards, until no more changes.
165 
166 
167    LIMITATIONS/TODO
168 
169    - block only
170    - handles only 'same operations, same successors'.
171      It handles same predecessors as a special subcase though.
172    - does not implement the reverse value numbering and block value numbering.
173    - improve memory allocation: use garbage collected memory, obstacks,
174      allocpools where appropriate.
175    - no insertion of gimple_reg phis,  We only introduce vop-phis.
176    - handle blocks with gimple_reg phi_nodes.
177 
178 
179    PASS PLACEMENT
180    This 'pass' is not a stand-alone gimple pass, but runs as part of
181    pass_pre, in order to share the value numbering.
182 
183 
184    SWITCHES
185 
186    - ftree-tail-merge.  On at -O2.  We may have to enable it only at -Os.  */
187 
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 
209 /* Describes a group of bbs with the same successors.  The successor bbs are
210    cached in succs, and the successor edge flags are cached in succ_flags.
211    If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
212    it's marked in inverse.
213    Additionally, the hash value for the struct is cached in hashval, and
214    in_worklist indicates whether it's currently part of worklist.  */
215 
216 struct same_succ : pointer_hash <same_succ>
217 {
218   /* The bbs that have the same successor bbs.  */
219   bitmap bbs;
220   /* The successor bbs.  */
221   bitmap succs;
222   /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
223      bb.  */
224   bitmap inverse;
225   /* The edge flags for each of the successor bbs.  */
226   vec<int> succ_flags;
227   /* Indicates whether the struct is currently in the worklist.  */
228   bool in_worklist;
229   /* The hash value of the struct.  */
230   hashval_t hashval;
231 
232   /* hash_table support.  */
233   static inline hashval_t hash (const same_succ *);
234   static int equal (const same_succ *, const same_succ *);
235   static void remove (same_succ *);
236 };
237 
238 /* hash routine for hash_table support, returns hashval of E.  */
239 
240 inline hashval_t
hash(const same_succ * e)241 same_succ::hash (const same_succ *e)
242 {
243   return e->hashval;
244 }
245 
246 /* A group of bbs where 1 bb from bbs can replace the other bbs.  */
247 
248 struct bb_cluster
249 {
250   /* The bbs in the cluster.  */
251   bitmap bbs;
252   /* The preds of the bbs in the cluster.  */
253   bitmap preds;
254   /* Index in all_clusters vector.  */
255   int index;
256   /* The bb to replace the cluster with.  */
257   basic_block rep_bb;
258 };
259 
260 /* Per bb-info.  */
261 
262 struct aux_bb_info
263 {
264   /* The number of non-debug statements in the bb.  */
265   int size;
266   /* The same_succ that this bb is a member of.  */
267   same_succ *bb_same_succ;
268   /* The cluster that this bb is a member of.  */
269   bb_cluster *cluster;
270   /* The vop state at the exit of a bb.  This is shortlived data, used to
271      communicate data between update_block_by and update_vuses.  */
272   tree vop_at_exit;
273   /* The bb that either contains or is dominated by the dependencies of the
274      bb.  */
275   basic_block dep_bb;
276 };
277 
278 /* Macros to access the fields of struct aux_bb_info.  */
279 
280 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
281 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
282 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
283 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
284 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
285 
286 /* Returns true if the only effect a statement STMT has, is to define locally
287    used SSA_NAMEs.  */
288 
289 static bool
stmt_local_def(gimple * stmt)290 stmt_local_def (gimple *stmt)
291 {
292   basic_block bb, def_bb;
293   imm_use_iterator iter;
294   use_operand_p use_p;
295   tree val;
296   def_operand_p def_p;
297 
298   if (gimple_vdef (stmt) != NULL_TREE
299       || gimple_has_side_effects (stmt)
300       || gimple_could_trap_p_1 (stmt, false, false)
301       || gimple_vuse (stmt) != NULL_TREE)
302     return false;
303 
304   def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
305   if (def_p == NULL)
306     return false;
307 
308   val = DEF_FROM_PTR (def_p);
309   if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
310     return false;
311 
312   def_bb = gimple_bb (stmt);
313 
314   FOR_EACH_IMM_USE_FAST (use_p, iter, val)
315     {
316       if (is_gimple_debug (USE_STMT (use_p)))
317 	continue;
318       bb = gimple_bb (USE_STMT (use_p));
319       if (bb == def_bb)
320 	continue;
321 
322       if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
323 	  && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
324 	continue;
325 
326       return false;
327     }
328 
329   return true;
330 }
331 
332 /* Let GSI skip forwards over local defs.  */
333 
334 static void
gsi_advance_fw_nondebug_nonlocal(gimple_stmt_iterator * gsi)335 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
336 {
337   gimple *stmt;
338 
339   while (true)
340     {
341       if (gsi_end_p (*gsi))
342 	return;
343       stmt = gsi_stmt (*gsi);
344       if (!stmt_local_def (stmt))
345 	return;
346       gsi_next_nondebug (gsi);
347     }
348 }
349 
350 /* VAL1 and VAL2 are either:
351    - uses in BB1 and BB2, or
352    - phi alternatives for BB1 and BB2.
353    Return true if the uses have the same gvn value.  */
354 
355 static bool
gvn_uses_equal(tree val1,tree val2)356 gvn_uses_equal (tree val1, tree val2)
357 {
358   gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
359 
360   if (val1 == val2)
361     return true;
362 
363   if (vn_valueize (val1) != vn_valueize (val2))
364     return false;
365 
366   return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
367 	  && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
368 }
369 
370 /* Prints E to FILE.  */
371 
372 static void
same_succ_print(FILE * file,const same_succ * e)373 same_succ_print (FILE *file, const same_succ *e)
374 {
375   unsigned int i;
376   bitmap_print (file, e->bbs, "bbs:", "\n");
377   bitmap_print (file, e->succs, "succs:", "\n");
378   bitmap_print (file, e->inverse, "inverse:", "\n");
379   fprintf (file, "flags:");
380   for (i = 0; i < e->succ_flags.length (); ++i)
381     fprintf (file, " %x", e->succ_flags[i]);
382   fprintf (file, "\n");
383 }
384 
385 /* Prints same_succ VE to VFILE.  */
386 
387 inline int
ssa_same_succ_print_traverse(same_succ ** pe,FILE * file)388 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
389 {
390   const same_succ *e = *pe;
391   same_succ_print (file, e);
392   return 1;
393 }
394 
395 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB.  */
396 
397 static void
update_dep_bb(basic_block use_bb,tree val)398 update_dep_bb (basic_block use_bb, tree val)
399 {
400   basic_block dep_bb;
401 
402   /* Not a dep.  */
403   if (TREE_CODE (val) != SSA_NAME)
404     return;
405 
406   /* Skip use of global def.  */
407   if (SSA_NAME_IS_DEFAULT_DEF (val))
408     return;
409 
410   /* Skip use of local def.  */
411   dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
412   if (dep_bb == use_bb)
413     return;
414 
415   if (BB_DEP_BB (use_bb) == NULL
416       || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
417     BB_DEP_BB (use_bb) = dep_bb;
418 }
419 
420 /* Update BB_DEP_BB, given the dependencies in STMT.  */
421 
422 static void
stmt_update_dep_bb(gimple * stmt)423 stmt_update_dep_bb (gimple *stmt)
424 {
425   ssa_op_iter iter;
426   use_operand_p use;
427 
428   FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
429     update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
430 }
431 
432 /* Calculates hash value for same_succ VE.  */
433 
434 static hashval_t
same_succ_hash(const same_succ * e)435 same_succ_hash (const same_succ *e)
436 {
437   inchash::hash hstate (bitmap_hash (e->succs));
438   int flags;
439   unsigned int i;
440   unsigned int first = bitmap_first_set_bit (e->bbs);
441   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
442   int size = 0;
443   gimple *stmt;
444   tree arg;
445   unsigned int s;
446   bitmap_iterator bs;
447 
448   for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
449        !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
450     {
451       stmt = gsi_stmt (gsi);
452       stmt_update_dep_bb (stmt);
453       if (stmt_local_def (stmt))
454 	continue;
455       size++;
456 
457       hstate.add_int (gimple_code (stmt));
458       if (is_gimple_assign (stmt))
459 	hstate.add_int (gimple_assign_rhs_code (stmt));
460       if (!is_gimple_call (stmt))
461 	continue;
462       if (gimple_call_internal_p (stmt))
463 	hstate.add_int (gimple_call_internal_fn (stmt));
464       else
465 	{
466 	  inchash::add_expr (gimple_call_fn (stmt), hstate);
467 	  if (gimple_call_chain (stmt))
468 	    inchash::add_expr (gimple_call_chain (stmt), hstate);
469 	}
470       for (i = 0; i < gimple_call_num_args (stmt); i++)
471 	{
472 	  arg = gimple_call_arg (stmt, i);
473 	  arg = vn_valueize (arg);
474 	  inchash::add_expr (arg, hstate);
475 	}
476     }
477 
478   hstate.add_int (size);
479   BB_SIZE (bb) = size;
480 
481   for (i = 0; i < e->succ_flags.length (); ++i)
482     {
483       flags = e->succ_flags[i];
484       flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
485       hstate.add_int (flags);
486     }
487 
488   EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
489     {
490       int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
491       for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
492 	   !gsi_end_p (gsi);
493 	   gsi_next (&gsi))
494 	{
495 	  gphi *phi = gsi.phi ();
496 	  tree lhs = gimple_phi_result (phi);
497 	  tree val = gimple_phi_arg_def (phi, n);
498 
499 	  if (virtual_operand_p (lhs))
500 	    continue;
501 	  update_dep_bb (bb, val);
502 	}
503     }
504 
505   return hstate.end ();
506 }
507 
508 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
509    are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
510    the other edge flags.  */
511 
512 static bool
inverse_flags(const same_succ * e1,const same_succ * e2)513 inverse_flags (const same_succ *e1, const same_succ *e2)
514 {
515   int f1a, f1b, f2a, f2b;
516   int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
517 
518   if (e1->succ_flags.length () != 2)
519     return false;
520 
521   f1a = e1->succ_flags[0];
522   f1b = e1->succ_flags[1];
523   f2a = e2->succ_flags[0];
524   f2b = e2->succ_flags[1];
525 
526   if (f1a == f2a && f1b == f2b)
527     return false;
528 
529   return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
530 }
531 
532 /* Compares SAME_SUCCs E1 and E2.  */
533 
534 int
equal(const same_succ * e1,const same_succ * e2)535 same_succ::equal (const same_succ *e1, const same_succ *e2)
536 {
537   unsigned int i, first1, first2;
538   gimple_stmt_iterator gsi1, gsi2;
539   gimple *s1, *s2;
540   basic_block bb1, bb2;
541 
542   if (e1 == e2)
543     return 1;
544 
545   if (e1->hashval != e2->hashval)
546     return 0;
547 
548   if (e1->succ_flags.length () != e2->succ_flags.length ())
549     return 0;
550 
551   if (!bitmap_equal_p (e1->succs, e2->succs))
552     return 0;
553 
554   if (!inverse_flags (e1, e2))
555     {
556       for (i = 0; i < e1->succ_flags.length (); ++i)
557 	if (e1->succ_flags[i] != e2->succ_flags[i])
558 	  return 0;
559     }
560 
561   first1 = bitmap_first_set_bit (e1->bbs);
562   first2 = bitmap_first_set_bit (e2->bbs);
563 
564   bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
565   bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
566 
567   if (BB_SIZE (bb1) != BB_SIZE (bb2))
568     return 0;
569 
570   gsi1 = gsi_start_nondebug_bb (bb1);
571   gsi2 = gsi_start_nondebug_bb (bb2);
572   gsi_advance_fw_nondebug_nonlocal (&gsi1);
573   gsi_advance_fw_nondebug_nonlocal (&gsi2);
574   while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
575     {
576       s1 = gsi_stmt (gsi1);
577       s2 = gsi_stmt (gsi2);
578       if (gimple_code (s1) != gimple_code (s2))
579 	return 0;
580       if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
581 	return 0;
582       gsi_next_nondebug (&gsi1);
583       gsi_next_nondebug (&gsi2);
584       gsi_advance_fw_nondebug_nonlocal (&gsi1);
585       gsi_advance_fw_nondebug_nonlocal (&gsi2);
586     }
587 
588   return 1;
589 }
590 
591 /* Alloc and init a new SAME_SUCC.  */
592 
593 static same_succ *
same_succ_alloc(void)594 same_succ_alloc (void)
595 {
596   same_succ *same = XNEW (struct same_succ);
597 
598   same->bbs = BITMAP_ALLOC (NULL);
599   same->succs = BITMAP_ALLOC (NULL);
600   same->inverse = BITMAP_ALLOC (NULL);
601   same->succ_flags.create (10);
602   same->in_worklist = false;
603 
604   return same;
605 }
606 
607 /* Delete same_succ E.  */
608 
609 void
remove(same_succ * e)610 same_succ::remove (same_succ *e)
611 {
612   BITMAP_FREE (e->bbs);
613   BITMAP_FREE (e->succs);
614   BITMAP_FREE (e->inverse);
615   e->succ_flags.release ();
616 
617   XDELETE (e);
618 }
619 
620 /* Reset same_succ SAME.  */
621 
622 static void
same_succ_reset(same_succ * same)623 same_succ_reset (same_succ *same)
624 {
625   bitmap_clear (same->bbs);
626   bitmap_clear (same->succs);
627   bitmap_clear (same->inverse);
628   same->succ_flags.truncate (0);
629 }
630 
631 static hash_table<same_succ> *same_succ_htab;
632 
633 /* Array that is used to store the edge flags for a successor.  */
634 
635 static int *same_succ_edge_flags;
636 
637 /* Bitmap that is used to mark bbs that are recently deleted.  */
638 
639 static bitmap deleted_bbs;
640 
641 /* Bitmap that is used to mark predecessors of bbs that are
642    deleted.  */
643 
644 static bitmap deleted_bb_preds;
645 
646 /* Prints same_succ_htab to stderr.  */
647 
648 extern void debug_same_succ (void);
649 DEBUG_FUNCTION void
debug_same_succ(void)650 debug_same_succ ( void)
651 {
652   same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
653 }
654 
655 
656 /* Vector of bbs to process.  */
657 
658 static vec<same_succ *> worklist;
659 
660 /* Prints worklist to FILE.  */
661 
662 static void
print_worklist(FILE * file)663 print_worklist (FILE *file)
664 {
665   unsigned int i;
666   for (i = 0; i < worklist.length (); ++i)
667     same_succ_print (file, worklist[i]);
668 }
669 
670 /* Adds SAME to worklist.  */
671 
672 static void
add_to_worklist(same_succ * same)673 add_to_worklist (same_succ *same)
674 {
675   if (same->in_worklist)
676     return;
677 
678   if (bitmap_count_bits (same->bbs) < 2)
679     return;
680 
681   same->in_worklist = true;
682   worklist.safe_push (same);
683 }
684 
685 /* Add BB to same_succ_htab.  */
686 
687 static void
find_same_succ_bb(basic_block bb,same_succ ** same_p)688 find_same_succ_bb (basic_block bb, same_succ **same_p)
689 {
690   unsigned int j;
691   bitmap_iterator bj;
692   same_succ *same = *same_p;
693   same_succ **slot;
694   edge_iterator ei;
695   edge e;
696 
697   if (bb == NULL
698       /* Be conservative with loop structure.  It's not evident that this test
699 	 is sufficient.  Before tail-merge, we've just called
700 	 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
701 	 set, so there's no guarantee that the loop->latch value is still valid.
702 	 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
703 	 start of pre, we've kept that property intact throughout pre, and are
704 	 keeping it throughout tail-merge using this test.  */
705       || bb->loop_father->latch == bb)
706     return;
707   bitmap_set_bit (same->bbs, bb->index);
708   FOR_EACH_EDGE (e, ei, bb->succs)
709     {
710       int index = e->dest->index;
711       bitmap_set_bit (same->succs, index);
712       same_succ_edge_flags[index] = e->flags;
713     }
714   EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
715     same->succ_flags.safe_push (same_succ_edge_flags[j]);
716 
717   same->hashval = same_succ_hash (same);
718 
719   slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
720   if (*slot == NULL)
721     {
722       *slot = same;
723       BB_SAME_SUCC (bb) = same;
724       add_to_worklist (same);
725       *same_p = NULL;
726     }
727   else
728     {
729       bitmap_set_bit ((*slot)->bbs, bb->index);
730       BB_SAME_SUCC (bb) = *slot;
731       add_to_worklist (*slot);
732       if (inverse_flags (same, *slot))
733 	bitmap_set_bit ((*slot)->inverse, bb->index);
734       same_succ_reset (same);
735     }
736 }
737 
738 /* Find bbs with same successors.  */
739 
740 static void
find_same_succ(void)741 find_same_succ (void)
742 {
743   same_succ *same = same_succ_alloc ();
744   basic_block bb;
745 
746   FOR_EACH_BB_FN (bb, cfun)
747     {
748       find_same_succ_bb (bb, &same);
749       if (same == NULL)
750 	same = same_succ_alloc ();
751     }
752 
753   same_succ::remove (same);
754 }
755 
756 /* Initializes worklist administration.  */
757 
758 static void
init_worklist(void)759 init_worklist (void)
760 {
761   alloc_aux_for_blocks (sizeof (struct aux_bb_info));
762   same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
763   same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
764   deleted_bbs = BITMAP_ALLOC (NULL);
765   deleted_bb_preds = BITMAP_ALLOC (NULL);
766   worklist.create (n_basic_blocks_for_fn (cfun));
767   find_same_succ ();
768 
769   if (dump_file && (dump_flags & TDF_DETAILS))
770     {
771       fprintf (dump_file, "initial worklist:\n");
772       print_worklist (dump_file);
773     }
774 }
775 
776 /* Deletes worklist administration.  */
777 
778 static void
delete_worklist(void)779 delete_worklist (void)
780 {
781   free_aux_for_blocks ();
782   delete same_succ_htab;
783   same_succ_htab = NULL;
784   XDELETEVEC (same_succ_edge_flags);
785   same_succ_edge_flags = NULL;
786   BITMAP_FREE (deleted_bbs);
787   BITMAP_FREE (deleted_bb_preds);
788   worklist.release ();
789 }
790 
791 /* Mark BB as deleted, and mark its predecessors.  */
792 
793 static void
mark_basic_block_deleted(basic_block bb)794 mark_basic_block_deleted (basic_block bb)
795 {
796   edge e;
797   edge_iterator ei;
798 
799   bitmap_set_bit (deleted_bbs, bb->index);
800 
801   FOR_EACH_EDGE (e, ei, bb->preds)
802     bitmap_set_bit (deleted_bb_preds, e->src->index);
803 }
804 
805 /* Removes BB from its corresponding same_succ.  */
806 
807 static void
same_succ_flush_bb(basic_block bb)808 same_succ_flush_bb (basic_block bb)
809 {
810   same_succ *same = BB_SAME_SUCC (bb);
811   if (! same)
812     return;
813 
814   BB_SAME_SUCC (bb) = NULL;
815   if (bitmap_single_bit_set_p (same->bbs))
816     same_succ_htab->remove_elt_with_hash (same, same->hashval);
817   else
818     bitmap_clear_bit (same->bbs, bb->index);
819 }
820 
821 /* Removes all bbs in BBS from their corresponding same_succ.  */
822 
823 static void
same_succ_flush_bbs(bitmap bbs)824 same_succ_flush_bbs (bitmap bbs)
825 {
826   unsigned int i;
827   bitmap_iterator bi;
828 
829   EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
830     same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
831 }
832 
833 /* Release the last vdef in BB, either normal or phi result.  */
834 
835 static void
release_last_vdef(basic_block bb)836 release_last_vdef (basic_block bb)
837 {
838   for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
839        gsi_prev_nondebug (&i))
840     {
841       gimple *stmt = gsi_stmt (i);
842       if (gimple_vdef (stmt) == NULL_TREE)
843 	continue;
844 
845       mark_virtual_operand_for_renaming (gimple_vdef (stmt));
846       return;
847     }
848 
849   for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
850        gsi_next (&i))
851     {
852       gphi *phi = i.phi ();
853       tree res = gimple_phi_result (phi);
854 
855       if (!virtual_operand_p (res))
856 	continue;
857 
858       mark_virtual_phi_result_for_renaming (phi);
859       return;
860     }
861 }
862 
863 /* For deleted_bb_preds, find bbs with same successors.  */
864 
865 static void
update_worklist(void)866 update_worklist (void)
867 {
868   unsigned int i;
869   bitmap_iterator bi;
870   basic_block bb;
871   same_succ *same;
872 
873   bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
874   bitmap_clear (deleted_bbs);
875 
876   bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
877   same_succ_flush_bbs (deleted_bb_preds);
878 
879   same = same_succ_alloc ();
880   EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
881     {
882       bb = BASIC_BLOCK_FOR_FN (cfun, i);
883       gcc_assert (bb != NULL);
884       find_same_succ_bb (bb, &same);
885       if (same == NULL)
886 	same = same_succ_alloc ();
887     }
888   same_succ::remove (same);
889   bitmap_clear (deleted_bb_preds);
890 }
891 
892 /* Prints cluster C to FILE.  */
893 
894 static void
print_cluster(FILE * file,bb_cluster * c)895 print_cluster (FILE *file, bb_cluster *c)
896 {
897   if (c == NULL)
898     return;
899   bitmap_print (file, c->bbs, "bbs:", "\n");
900   bitmap_print (file, c->preds, "preds:", "\n");
901 }
902 
903 /* Prints cluster C to stderr.  */
904 
905 extern void debug_cluster (bb_cluster *);
906 DEBUG_FUNCTION void
debug_cluster(bb_cluster * c)907 debug_cluster (bb_cluster *c)
908 {
909   print_cluster (stderr, c);
910 }
911 
912 /* Update C->rep_bb, given that BB is added to the cluster.  */
913 
914 static void
update_rep_bb(bb_cluster * c,basic_block bb)915 update_rep_bb (bb_cluster *c, basic_block bb)
916 {
917   /* Initial.  */
918   if (c->rep_bb == NULL)
919     {
920       c->rep_bb = bb;
921       return;
922     }
923 
924   /* Current needs no deps, keep it.  */
925   if (BB_DEP_BB (c->rep_bb) == NULL)
926     return;
927 
928   /* Bb needs no deps, change rep_bb.  */
929   if (BB_DEP_BB (bb) == NULL)
930     {
931       c->rep_bb = bb;
932       return;
933     }
934 
935   /* Bb needs last deps earlier than current, change rep_bb.  A potential
936      problem with this, is that the first deps might also be earlier, which
937      would mean we prefer longer lifetimes for the deps.  To be able to check
938      for this, we would have to trace BB_FIRST_DEP_BB as well, besides
939      BB_DEP_BB, which is really BB_LAST_DEP_BB.
940      The benefit of choosing the bb with last deps earlier, is that it can
941      potentially be used as replacement for more bbs.  */
942   if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
943     c->rep_bb = bb;
944 }
945 
946 /* Add BB to cluster C.  Sets BB in C->bbs, and preds of BB in C->preds.  */
947 
948 static void
add_bb_to_cluster(bb_cluster * c,basic_block bb)949 add_bb_to_cluster (bb_cluster *c, basic_block bb)
950 {
951   edge e;
952   edge_iterator ei;
953 
954   bitmap_set_bit (c->bbs, bb->index);
955 
956   FOR_EACH_EDGE (e, ei, bb->preds)
957     bitmap_set_bit (c->preds, e->src->index);
958 
959   update_rep_bb (c, bb);
960 }
961 
962 /* Allocate and init new cluster.  */
963 
964 static bb_cluster *
new_cluster(void)965 new_cluster (void)
966 {
967   bb_cluster *c;
968   c = XCNEW (bb_cluster);
969   c->bbs = BITMAP_ALLOC (NULL);
970   c->preds = BITMAP_ALLOC (NULL);
971   c->rep_bb = NULL;
972   return c;
973 }
974 
975 /* Delete clusters.  */
976 
977 static void
delete_cluster(bb_cluster * c)978 delete_cluster (bb_cluster *c)
979 {
980   if (c == NULL)
981     return;
982   BITMAP_FREE (c->bbs);
983   BITMAP_FREE (c->preds);
984   XDELETE (c);
985 }
986 
987 
988 /* Array that contains all clusters.  */
989 
990 static vec<bb_cluster *> all_clusters;
991 
992 /* Allocate all cluster vectors.  */
993 
994 static void
alloc_cluster_vectors(void)995 alloc_cluster_vectors (void)
996 {
997   all_clusters.create (n_basic_blocks_for_fn (cfun));
998 }
999 
1000 /* Reset all cluster vectors.  */
1001 
1002 static void
reset_cluster_vectors(void)1003 reset_cluster_vectors (void)
1004 {
1005   unsigned int i;
1006   basic_block bb;
1007   for (i = 0; i < all_clusters.length (); ++i)
1008     delete_cluster (all_clusters[i]);
1009   all_clusters.truncate (0);
1010   FOR_EACH_BB_FN (bb, cfun)
1011     BB_CLUSTER (bb) = NULL;
1012 }
1013 
1014 /* Delete all cluster vectors.  */
1015 
1016 static void
delete_cluster_vectors(void)1017 delete_cluster_vectors (void)
1018 {
1019   unsigned int i;
1020   for (i = 0; i < all_clusters.length (); ++i)
1021     delete_cluster (all_clusters[i]);
1022   all_clusters.release ();
1023 }
1024 
1025 /* Merge cluster C2 into C1.  */
1026 
1027 static void
merge_clusters(bb_cluster * c1,bb_cluster * c2)1028 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1029 {
1030   bitmap_ior_into (c1->bbs, c2->bbs);
1031   bitmap_ior_into (c1->preds, c2->preds);
1032 }
1033 
1034 /* Register equivalence of BB1 and BB2 (members of cluster C).  Store c in
1035    all_clusters, or merge c with existing cluster.  */
1036 
1037 static void
set_cluster(basic_block bb1,basic_block bb2)1038 set_cluster (basic_block bb1, basic_block bb2)
1039 {
1040   basic_block merge_bb, other_bb;
1041   bb_cluster *merge, *old, *c;
1042 
1043   if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1044     {
1045       c = new_cluster ();
1046       add_bb_to_cluster (c, bb1);
1047       add_bb_to_cluster (c, bb2);
1048       BB_CLUSTER (bb1) = c;
1049       BB_CLUSTER (bb2) = c;
1050       c->index = all_clusters.length ();
1051       all_clusters.safe_push (c);
1052     }
1053   else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1054     {
1055       merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1056       other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1057       merge = BB_CLUSTER (merge_bb);
1058       add_bb_to_cluster (merge, other_bb);
1059       BB_CLUSTER (other_bb) = merge;
1060     }
1061   else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1062     {
1063       unsigned int i;
1064       bitmap_iterator bi;
1065 
1066       old = BB_CLUSTER (bb2);
1067       merge = BB_CLUSTER (bb1);
1068       merge_clusters (merge, old);
1069       EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1070 	BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1071       all_clusters[old->index] = NULL;
1072       update_rep_bb (merge, old->rep_bb);
1073       delete_cluster (old);
1074     }
1075   else
1076     gcc_unreachable ();
1077 }
1078 
1079 /* Return true if gimple operands T1 and T2 have the same value.  */
1080 
1081 static bool
gimple_operand_equal_value_p(tree t1,tree t2)1082 gimple_operand_equal_value_p (tree t1, tree t2)
1083 {
1084   if (t1 == t2)
1085     return true;
1086 
1087   if (t1 == NULL_TREE
1088       || t2 == NULL_TREE)
1089     return false;
1090 
1091   if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1092     return true;
1093 
1094   return gvn_uses_equal (t1, t2);
1095 }
1096 
1097 /* Return true if gimple statements S1 and S2 are equal.  Gimple_bb (s1) and
1098    gimple_bb (s2) are members of SAME_SUCC.  */
1099 
1100 static bool
gimple_equal_p(same_succ * same_succ,gimple * s1,gimple * s2)1101 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1102 {
1103   unsigned int i;
1104   tree lhs1, lhs2;
1105   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1106   tree t1, t2;
1107   bool inv_cond;
1108   enum tree_code code1, code2;
1109 
1110   if (gimple_code (s1) != gimple_code (s2))
1111     return false;
1112 
1113   switch (gimple_code (s1))
1114     {
1115     case GIMPLE_CALL:
1116       if (!gimple_call_same_target_p (s1, s2))
1117 	return false;
1118 
1119       t1 = gimple_call_chain (s1);
1120       t2 = gimple_call_chain (s2);
1121       if (!gimple_operand_equal_value_p (t1, t2))
1122 	return false;
1123 
1124       if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1125 	return false;
1126 
1127       for (i = 0; i < gimple_call_num_args (s1); ++i)
1128 	{
1129 	  t1 = gimple_call_arg (s1, i);
1130 	  t2 = gimple_call_arg (s2, i);
1131 	  if (!gimple_operand_equal_value_p (t1, t2))
1132 	    return false;
1133 	}
1134 
1135       lhs1 = gimple_get_lhs (s1);
1136       lhs2 = gimple_get_lhs (s2);
1137       if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1138 	return true;
1139       if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1140 	return false;
1141       if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1142 	return vn_valueize (lhs1) == vn_valueize (lhs2);
1143       return operand_equal_p (lhs1, lhs2, 0);
1144 
1145     case GIMPLE_ASSIGN:
1146       lhs1 = gimple_get_lhs (s1);
1147       lhs2 = gimple_get_lhs (s2);
1148       if (TREE_CODE (lhs1) != SSA_NAME
1149 	  && TREE_CODE (lhs2) != SSA_NAME)
1150 	return (operand_equal_p (lhs1, lhs2, 0)
1151 		&& gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1152 						 gimple_assign_rhs1 (s2)));
1153       else if (TREE_CODE (lhs1) == SSA_NAME
1154 	       && TREE_CODE (lhs2) == SSA_NAME)
1155 	return operand_equal_p (gimple_assign_rhs1 (s1),
1156 				gimple_assign_rhs1 (s2), 0);
1157       return false;
1158 
1159     case GIMPLE_COND:
1160       t1 = gimple_cond_lhs (s1);
1161       t2 = gimple_cond_lhs (s2);
1162       if (!gimple_operand_equal_value_p (t1, t2))
1163 	return false;
1164 
1165       t1 = gimple_cond_rhs (s1);
1166       t2 = gimple_cond_rhs (s2);
1167       if (!gimple_operand_equal_value_p (t1, t2))
1168 	return false;
1169 
1170       code1 = gimple_expr_code (s1);
1171       code2 = gimple_expr_code (s2);
1172       inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1173 		  != bitmap_bit_p (same_succ->inverse, bb2->index));
1174       if (inv_cond)
1175 	{
1176 	  bool honor_nans = HONOR_NANS (t1);
1177 	  code2 = invert_tree_comparison (code2, honor_nans);
1178 	}
1179       return code1 == code2;
1180 
1181     default:
1182       return false;
1183     }
1184 }
1185 
1186 /* Let GSI skip backwards over local defs.  Return the earliest vuse in VUSE.
1187    Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1188    processed statements.  */
1189 
1190 static void
gsi_advance_bw_nondebug_nonlocal(gimple_stmt_iterator * gsi,tree * vuse,bool * vuse_escaped)1191 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1192 				  bool *vuse_escaped)
1193 {
1194   gimple *stmt;
1195   tree lvuse;
1196 
1197   while (true)
1198     {
1199       if (gsi_end_p (*gsi))
1200 	return;
1201       stmt = gsi_stmt (*gsi);
1202 
1203       lvuse = gimple_vuse (stmt);
1204       if (lvuse != NULL_TREE)
1205 	{
1206 	  *vuse = lvuse;
1207 	  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1208 	    *vuse_escaped = true;
1209 	}
1210 
1211       if (!stmt_local_def (stmt))
1212 	return;
1213       gsi_prev_nondebug (gsi);
1214     }
1215 }
1216 
1217 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1218    STMT2 are allowed to be merged.  */
1219 
1220 static bool
merge_stmts_p(gimple * stmt1,gimple * stmt2)1221 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1222 {
1223   /* What could be better than this here is to blacklist the bb
1224      containing the stmt, when encountering the stmt f.i. in
1225      same_succ_hash.  */
1226   if (is_tm_ending (stmt1))
1227     return false;
1228 
1229   /* Verify EH landing pads.  */
1230   if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1231     return false;
1232 
1233   if (is_gimple_call (stmt1)
1234       && gimple_call_internal_p (stmt1))
1235     switch (gimple_call_internal_fn (stmt1))
1236       {
1237       case IFN_UBSAN_NULL:
1238       case IFN_UBSAN_BOUNDS:
1239       case IFN_UBSAN_VPTR:
1240       case IFN_UBSAN_CHECK_ADD:
1241       case IFN_UBSAN_CHECK_SUB:
1242       case IFN_UBSAN_CHECK_MUL:
1243       case IFN_UBSAN_OBJECT_SIZE:
1244       case IFN_ASAN_CHECK:
1245 	/* For these internal functions, gimple_location is an implicit
1246 	   parameter, which will be used explicitly after expansion.
1247 	   Merging these statements may cause confusing line numbers in
1248 	   sanitizer messages.  */
1249 	return gimple_location (stmt1) == gimple_location (stmt2);
1250       default:
1251 	break;
1252       }
1253 
1254   return true;
1255 }
1256 
1257 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates.  If so,
1258    clusters them.  */
1259 
1260 static void
find_duplicate(same_succ * same_succ,basic_block bb1,basic_block bb2)1261 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1262 {
1263   gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1264   gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1265   tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1266   bool vuse_escaped = false;
1267 
1268   gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1269   gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1270 
1271   while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1272     {
1273       gimple *stmt1 = gsi_stmt (gsi1);
1274       gimple *stmt2 = gsi_stmt (gsi2);
1275 
1276       if (!gimple_equal_p (same_succ, stmt1, stmt2))
1277 	return;
1278 
1279       if (!merge_stmts_p (stmt1, stmt2))
1280 	return;
1281 
1282       gsi_prev_nondebug (&gsi1);
1283       gsi_prev_nondebug (&gsi2);
1284       gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1285       gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1286     }
1287 
1288   if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1289     return;
1290 
1291   /* If the incoming vuses are not the same, and the vuse escaped into an
1292      SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1293      which potentially means the semantics of one of the blocks will be changed.
1294      TODO: make this check more precise.  */
1295   if (vuse_escaped && vuse1 != vuse2)
1296     return;
1297 
1298   if (dump_file)
1299     fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1300 	     bb1->index, bb2->index);
1301 
1302   set_cluster (bb1, bb2);
1303 }
1304 
1305 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1306    E2 are equal.  */
1307 
1308 static bool
same_phi_alternatives_1(basic_block dest,edge e1,edge e2)1309 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1310 {
1311   int n1 = e1->dest_idx, n2 = e2->dest_idx;
1312   gphi_iterator gsi;
1313 
1314   for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1315     {
1316       gphi *phi = gsi.phi ();
1317       tree lhs = gimple_phi_result (phi);
1318       tree val1 = gimple_phi_arg_def (phi, n1);
1319       tree val2 = gimple_phi_arg_def (phi, n2);
1320 
1321       if (virtual_operand_p (lhs))
1322 	continue;
1323 
1324       if (operand_equal_for_phi_arg_p (val1, val2))
1325 	continue;
1326       if (gvn_uses_equal (val1, val2))
1327 	continue;
1328 
1329       return false;
1330     }
1331 
1332   return true;
1333 }
1334 
1335 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1336    phi alternatives for BB1 and BB2 are equal.  */
1337 
1338 static bool
same_phi_alternatives(same_succ * same_succ,basic_block bb1,basic_block bb2)1339 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1340 {
1341   unsigned int s;
1342   bitmap_iterator bs;
1343   edge e1, e2;
1344   basic_block succ;
1345 
1346   EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1347     {
1348       succ = BASIC_BLOCK_FOR_FN (cfun, s);
1349       e1 = find_edge (bb1, succ);
1350       e2 = find_edge (bb2, succ);
1351       if (e1->flags & EDGE_COMPLEX
1352 	  || e2->flags & EDGE_COMPLEX)
1353 	return false;
1354 
1355       /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1356 	 the same value.  */
1357       if (!same_phi_alternatives_1 (succ, e1, e2))
1358 	return false;
1359     }
1360 
1361   return true;
1362 }
1363 
1364 /* Return true if BB has non-vop phis.  */
1365 
1366 static bool
bb_has_non_vop_phi(basic_block bb)1367 bb_has_non_vop_phi (basic_block bb)
1368 {
1369   gimple_seq phis = phi_nodes (bb);
1370   gimple *phi;
1371 
1372   if (phis == NULL)
1373     return false;
1374 
1375   if (!gimple_seq_singleton_p (phis))
1376     return true;
1377 
1378   phi = gimple_seq_first_stmt (phis);
1379   return !virtual_operand_p (gimple_phi_result (phi));
1380 }
1381 
1382 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1383    invariant that uses in FROM are dominates by their defs.  */
1384 
1385 static bool
deps_ok_for_redirect_from_bb_to_bb(basic_block from,basic_block to)1386 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1387 {
1388   basic_block cd, dep_bb = BB_DEP_BB (to);
1389   edge_iterator ei;
1390   edge e;
1391   bitmap from_preds = BITMAP_ALLOC (NULL);
1392 
1393   if (dep_bb == NULL)
1394     return true;
1395 
1396   FOR_EACH_EDGE (e, ei, from->preds)
1397     bitmap_set_bit (from_preds, e->src->index);
1398   cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1399   BITMAP_FREE (from_preds);
1400 
1401   return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1402 }
1403 
1404 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1405    replacement bb) and vice versa maintains the invariant that uses in the
1406    replacement are dominates by their defs.  */
1407 
1408 static bool
deps_ok_for_redirect(basic_block bb1,basic_block bb2)1409 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1410 {
1411   if (BB_CLUSTER (bb1) != NULL)
1412     bb1 = BB_CLUSTER (bb1)->rep_bb;
1413 
1414   if (BB_CLUSTER (bb2) != NULL)
1415     bb2 = BB_CLUSTER (bb2)->rep_bb;
1416 
1417   return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1418 	  && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1419 }
1420 
1421 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged.  */
1422 
1423 static void
find_clusters_1(same_succ * same_succ)1424 find_clusters_1 (same_succ *same_succ)
1425 {
1426   basic_block bb1, bb2;
1427   unsigned int i, j;
1428   bitmap_iterator bi, bj;
1429   int nr_comparisons;
1430   int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1431 
1432   EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1433     {
1434       bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1435 
1436       /* TODO: handle blocks with phi-nodes.  We'll have to find corresponding
1437 	 phi-nodes in bb1 and bb2, with the same alternatives for the same
1438 	 preds.  */
1439       if (bb_has_non_vop_phi (bb1) || bb_has_abnormal_pred (bb1))
1440 	continue;
1441 
1442       nr_comparisons = 0;
1443       EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1444 	{
1445 	  bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1446 
1447 	  if (bb_has_non_vop_phi (bb2) || bb_has_abnormal_pred (bb2))
1448 	    continue;
1449 
1450 	  if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1451 	    continue;
1452 
1453 	  /* Limit quadratic behavior.  */
1454 	  nr_comparisons++;
1455 	  if (nr_comparisons > max_comparisons)
1456 	    break;
1457 
1458 	  /* This is a conservative dependency check.  We could test more
1459 	     precise for allowed replacement direction.  */
1460 	  if (!deps_ok_for_redirect (bb1, bb2))
1461 	    continue;
1462 
1463 	  if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1464 	    continue;
1465 
1466 	  find_duplicate (same_succ, bb1, bb2);
1467 	}
1468     }
1469 }
1470 
1471 /* Find clusters of bbs which can be merged.  */
1472 
1473 static void
find_clusters(void)1474 find_clusters (void)
1475 {
1476   same_succ *same;
1477 
1478   while (!worklist.is_empty ())
1479     {
1480       same = worklist.pop ();
1481       same->in_worklist = false;
1482       if (dump_file && (dump_flags & TDF_DETAILS))
1483 	{
1484 	  fprintf (dump_file, "processing worklist entry\n");
1485 	  same_succ_print (dump_file, same);
1486 	}
1487       find_clusters_1 (same);
1488     }
1489 }
1490 
1491 /* Returns the vop phi of BB, if any.  */
1492 
1493 static gphi *
vop_phi(basic_block bb)1494 vop_phi (basic_block bb)
1495 {
1496   gphi *stmt;
1497   gphi_iterator gsi;
1498   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1499     {
1500       stmt = gsi.phi ();
1501       if (! virtual_operand_p (gimple_phi_result (stmt)))
1502 	continue;
1503       return stmt;
1504     }
1505   return NULL;
1506 }
1507 
1508 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed.  */
1509 
1510 static void
replace_block_by(basic_block bb1,basic_block bb2)1511 replace_block_by (basic_block bb1, basic_block bb2)
1512 {
1513   edge pred_edge;
1514   edge e1, e2;
1515   edge_iterator ei;
1516   unsigned int i;
1517   gphi *bb2_phi;
1518 
1519   bb2_phi = vop_phi (bb2);
1520 
1521   /* Mark the basic block as deleted.  */
1522   mark_basic_block_deleted (bb1);
1523 
1524   /* Redirect the incoming edges of bb1 to bb2.  */
1525   for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1526     {
1527       pred_edge = EDGE_PRED (bb1, i - 1);
1528       pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1529       gcc_assert (pred_edge != NULL);
1530 
1531       if (bb2_phi == NULL)
1532 	continue;
1533 
1534       /* The phi might have run out of capacity when the redirect added an
1535 	 argument, which means it could have been replaced.  Refresh it.  */
1536       bb2_phi = vop_phi (bb2);
1537 
1538       add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1539 		   pred_edge, UNKNOWN_LOCATION);
1540     }
1541 
1542   bb2->frequency += bb1->frequency;
1543   if (bb2->frequency > BB_FREQ_MAX)
1544     bb2->frequency = BB_FREQ_MAX;
1545 
1546   bb2->count += bb1->count;
1547 
1548   /* Merge the outgoing edge counts from bb1 onto bb2.  */
1549   gcov_type out_sum = 0;
1550   FOR_EACH_EDGE (e1, ei, bb1->succs)
1551     {
1552       e2 = find_edge (bb2, e1->dest);
1553       gcc_assert (e2);
1554       e2->count += e1->count;
1555       out_sum += e2->count;
1556     }
1557   /* Recompute the edge probabilities from the new merged edge count.
1558      Use the sum of the new merged edge counts computed above instead
1559      of bb2's merged count, in case there are profile count insanities
1560      making the bb count inconsistent with the edge weights.  */
1561   FOR_EACH_EDGE (e2, ei, bb2->succs)
1562     {
1563       e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1564     }
1565 
1566   /* Clear range info from all stmts in BB2 -- this transformation
1567      could make them out of date.  */
1568   reset_flow_sensitive_info_in_bb (bb2);
1569 
1570   /* Do updates that use bb1, before deleting bb1.  */
1571   release_last_vdef (bb1);
1572   same_succ_flush_bb (bb1);
1573 
1574   delete_basic_block (bb1);
1575 }
1576 
1577 /* Bbs for which update_debug_stmt need to be called.  */
1578 
1579 static bitmap update_bbs;
1580 
1581 /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
1582    number of bbs removed.  */
1583 
1584 static int
apply_clusters(void)1585 apply_clusters (void)
1586 {
1587   basic_block bb1, bb2;
1588   bb_cluster *c;
1589   unsigned int i, j;
1590   bitmap_iterator bj;
1591   int nr_bbs_removed = 0;
1592 
1593   for (i = 0; i < all_clusters.length (); ++i)
1594     {
1595       c = all_clusters[i];
1596       if (c == NULL)
1597 	continue;
1598 
1599       bb2 = c->rep_bb;
1600       bitmap_set_bit (update_bbs, bb2->index);
1601 
1602       bitmap_clear_bit (c->bbs, bb2->index);
1603       EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1604 	{
1605 	  bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1606 	  bitmap_clear_bit (update_bbs, bb1->index);
1607 
1608 	  replace_block_by (bb1, bb2);
1609 	  nr_bbs_removed++;
1610 	}
1611     }
1612 
1613   return nr_bbs_removed;
1614 }
1615 
1616 /* Resets debug statement STMT if it has uses that are not dominated by their
1617    defs.  */
1618 
1619 static void
update_debug_stmt(gimple * stmt)1620 update_debug_stmt (gimple *stmt)
1621 {
1622   use_operand_p use_p;
1623   ssa_op_iter oi;
1624   basic_block bbuse;
1625 
1626   if (!gimple_debug_bind_p (stmt))
1627     return;
1628 
1629   bbuse = gimple_bb (stmt);
1630   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1631     {
1632       tree name = USE_FROM_PTR (use_p);
1633       gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1634       basic_block bbdef = gimple_bb (def_stmt);
1635       if (bbdef == NULL || bbuse == bbdef
1636 	  || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1637 	continue;
1638 
1639       gimple_debug_bind_reset_value (stmt);
1640       update_stmt (stmt);
1641       break;
1642     }
1643 }
1644 
1645 /* Resets all debug statements that have uses that are not
1646    dominated by their defs.  */
1647 
1648 static void
update_debug_stmts(void)1649 update_debug_stmts (void)
1650 {
1651   basic_block bb;
1652   bitmap_iterator bi;
1653   unsigned int i;
1654 
1655   EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1656     {
1657       gimple *stmt;
1658       gimple_stmt_iterator gsi;
1659 
1660       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1661       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1662 	{
1663 	  stmt = gsi_stmt (gsi);
1664 	  if (!is_gimple_debug (stmt))
1665 	    continue;
1666 	  update_debug_stmt (stmt);
1667 	}
1668     }
1669 }
1670 
1671 /* Runs tail merge optimization.  */
1672 
1673 unsigned int
tail_merge_optimize(unsigned int todo)1674 tail_merge_optimize (unsigned int todo)
1675 {
1676   int nr_bbs_removed_total = 0;
1677   int nr_bbs_removed;
1678   bool loop_entered = false;
1679   int iteration_nr = 0;
1680   int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1681 
1682   if (!flag_tree_tail_merge
1683       || max_iterations == 0)
1684     return 0;
1685 
1686   timevar_push (TV_TREE_TAIL_MERGE);
1687 
1688   if (!dom_info_available_p (CDI_DOMINATORS))
1689     {
1690       /* PRE can leave us with unreachable blocks, remove them now.  */
1691       delete_unreachable_blocks ();
1692       calculate_dominance_info (CDI_DOMINATORS);
1693     }
1694   init_worklist ();
1695 
1696   while (!worklist.is_empty ())
1697     {
1698       if (!loop_entered)
1699 	{
1700 	  loop_entered = true;
1701 	  alloc_cluster_vectors ();
1702 	  update_bbs = BITMAP_ALLOC (NULL);
1703 	}
1704       else
1705 	reset_cluster_vectors ();
1706 
1707       iteration_nr++;
1708       if (dump_file && (dump_flags & TDF_DETAILS))
1709 	fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1710 
1711       find_clusters ();
1712       gcc_assert (worklist.is_empty ());
1713       if (all_clusters.is_empty ())
1714 	break;
1715 
1716       nr_bbs_removed = apply_clusters ();
1717       nr_bbs_removed_total += nr_bbs_removed;
1718       if (nr_bbs_removed == 0)
1719 	break;
1720 
1721       free_dominance_info (CDI_DOMINATORS);
1722 
1723       if (iteration_nr == max_iterations)
1724 	break;
1725 
1726       calculate_dominance_info (CDI_DOMINATORS);
1727       update_worklist ();
1728     }
1729 
1730   if (dump_file && (dump_flags & TDF_DETAILS))
1731     fprintf (dump_file, "htab collision / search: %f\n",
1732 	     same_succ_htab->collisions ());
1733 
1734   if (nr_bbs_removed_total > 0)
1735     {
1736       if (MAY_HAVE_DEBUG_STMTS)
1737 	{
1738 	  calculate_dominance_info (CDI_DOMINATORS);
1739 	  update_debug_stmts ();
1740 	}
1741 
1742       if (dump_file && (dump_flags & TDF_DETAILS))
1743 	{
1744 	  fprintf (dump_file, "Before TODOs.\n");
1745 	  dump_function_to_file (current_function_decl, dump_file, dump_flags);
1746 	}
1747 
1748       mark_virtual_operands_for_renaming (cfun);
1749     }
1750 
1751   delete_worklist ();
1752   if (loop_entered)
1753     {
1754       delete_cluster_vectors ();
1755       BITMAP_FREE (update_bbs);
1756     }
1757 
1758   timevar_pop (TV_TREE_TAIL_MERGE);
1759 
1760   return todo;
1761 }
1762