1 /* If-conversion for vectorizer.
2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
3    Contributed by Devang Patel <dpatel@apple.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass implements a tree level if-conversion of loops.  Its
22    initial goal is to help the vectorizer to vectorize loops with
23    conditions.
24 
25    A short description of if-conversion:
26 
27      o Decide if a loop is if-convertible or not.
28      o Walk all loop basic blocks in breadth first order (BFS order).
29        o Remove conditional statements (at the end of basic block)
30          and propagate condition into destination basic blocks'
31 	 predicate list.
32        o Replace modify expression with conditional modify expression
33          using current basic block's condition.
34      o Merge all basic blocks
35        o Replace phi nodes with conditional modify expr
36        o Merge all basic blocks into header
37 
38      Sample transformation:
39 
40      INPUT
41      -----
42 
43      # i_23 = PHI <0(0), i_18(10)>;
44      <L0>:;
45      j_15 = A[i_23];
46      if (j_15 > 41) goto <L1>; else goto <L17>;
47 
48      <L17>:;
49      goto <bb 3> (<L3>);
50 
51      <L1>:;
52 
53      # iftmp.2_4 = PHI <0(8), 42(2)>;
54      <L3>:;
55      A[i_23] = iftmp.2_4;
56      i_18 = i_23 + 1;
57      if (i_18 <= 15) goto <L19>; else goto <L18>;
58 
59      <L19>:;
60      goto <bb 1> (<L0>);
61 
62      <L18>:;
63 
64      OUTPUT
65      ------
66 
67      # i_23 = PHI <0(0), i_18(10)>;
68      <L0>:;
69      j_15 = A[i_23];
70 
71      <L3>:;
72      iftmp.2_4 = j_15 > 41 ? 42 : 0;
73      A[i_23] = iftmp.2_4;
74      i_18 = i_23 + 1;
75      if (i_18 <= 15) goto <L19>; else goto <L18>;
76 
77      <L19>:;
78      goto <bb 1> (<L0>);
79 
80      <L18>:;
81 */
82 
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "params.h"
118 #include "cfganal.h"
119 
120 /* Only handle PHIs with no more arguments unless we are asked to by
121    simd pragma.  */
122 #define MAX_PHI_ARG_NUM \
123   ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
124 
125 /* Indicate if new load/store that needs to be predicated is introduced
126    during if conversion.  */
127 static bool any_pred_load_store;
128 
129 /* Indicate if there are any complicated PHIs that need to be handled in
130    if-conversion.  Complicated PHI has more than two arguments and can't
131    be degenerated to two arguments PHI.  See more information in comment
132    before phi_convertible_by_degenerating_args.  */
133 static bool any_complicated_phi;
134 
135 /* Hash for struct innermost_loop_behavior.  It depends on the user to
136    free the memory.  */
137 
138 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
139 {
140   static inline hashval_t hash (const value_type &);
141   static inline bool equal (const value_type &,
142 			    const compare_type &);
143 };
144 
145 inline hashval_t
hash(const value_type & e)146 innermost_loop_behavior_hash::hash (const value_type &e)
147 {
148   hashval_t hash;
149 
150   hash = iterative_hash_expr (e->base_address, 0);
151   hash = iterative_hash_expr (e->offset, hash);
152   hash = iterative_hash_expr (e->init, hash);
153   return iterative_hash_expr (e->step, hash);
154 }
155 
156 inline bool
equal(const value_type & e1,const compare_type & e2)157 innermost_loop_behavior_hash::equal (const value_type &e1,
158 				     const compare_type &e2)
159 {
160   if ((e1->base_address && !e2->base_address)
161       || (!e1->base_address && e2->base_address)
162       || (!e1->offset && e2->offset)
163       || (e1->offset && !e2->offset)
164       || (!e1->init && e2->init)
165       || (e1->init && !e2->init)
166       || (!e1->step && e2->step)
167       || (e1->step && !e2->step))
168     return false;
169 
170   if (e1->base_address && e2->base_address
171       && !operand_equal_p (e1->base_address, e2->base_address, 0))
172     return false;
173   if (e1->offset && e2->offset
174       && !operand_equal_p (e1->offset, e2->offset, 0))
175     return false;
176   if (e1->init && e2->init
177       && !operand_equal_p (e1->init, e2->init, 0))
178     return false;
179   if (e1->step && e2->step
180       && !operand_equal_p (e1->step, e2->step, 0))
181     return false;
182 
183   return true;
184 }
185 
186 /* List of basic blocks in if-conversion-suitable order.  */
187 static basic_block *ifc_bbs;
188 
189 /* Hash table to store <DR's innermost loop behavior, DR> pairs.  */
190 static hash_map<innermost_loop_behavior_hash,
191 		data_reference_p> *innermost_DR_map;
192 
193 /* Hash table to store <base reference, DR> pairs.  */
194 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
195 
196 /* Structure used to predicate basic blocks.  This is attached to the
197    ->aux field of the BBs in the loop to be if-converted.  */
198 struct bb_predicate {
199 
200   /* The condition under which this basic block is executed.  */
201   tree predicate;
202 
203   /* PREDICATE is gimplified, and the sequence of statements is
204      recorded here, in order to avoid the duplication of computations
205      that occur in previous conditions.  See PR44483.  */
206   gimple_seq predicate_gimplified_stmts;
207 };
208 
209 /* Returns true when the basic block BB has a predicate.  */
210 
211 static inline bool
bb_has_predicate(basic_block bb)212 bb_has_predicate (basic_block bb)
213 {
214   return bb->aux != NULL;
215 }
216 
217 /* Returns the gimplified predicate for basic block BB.  */
218 
219 static inline tree
bb_predicate(basic_block bb)220 bb_predicate (basic_block bb)
221 {
222   return ((struct bb_predicate *) bb->aux)->predicate;
223 }
224 
225 /* Sets the gimplified predicate COND for basic block BB.  */
226 
227 static inline void
set_bb_predicate(basic_block bb,tree cond)228 set_bb_predicate (basic_block bb, tree cond)
229 {
230   gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
231 	       && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
232 	      || is_gimple_condexpr (cond));
233   ((struct bb_predicate *) bb->aux)->predicate = cond;
234 }
235 
236 /* Returns the sequence of statements of the gimplification of the
237    predicate for basic block BB.  */
238 
239 static inline gimple_seq
bb_predicate_gimplified_stmts(basic_block bb)240 bb_predicate_gimplified_stmts (basic_block bb)
241 {
242   return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
243 }
244 
245 /* Sets the sequence of statements STMTS of the gimplification of the
246    predicate for basic block BB.  */
247 
248 static inline void
set_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)249 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
250 {
251   ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
252 }
253 
254 /* Adds the sequence of statements STMTS to the sequence of statements
255    of the predicate for basic block BB.  */
256 
257 static inline void
add_bb_predicate_gimplified_stmts(basic_block bb,gimple_seq stmts)258 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
259 {
260   /* We might have updated some stmts in STMTS via force_gimple_operand
261      calling fold_stmt and that producing multiple stmts.  Delink immediate
262      uses so update_ssa after loop versioning doesn't get confused for
263      the not yet inserted predicates.
264      ???  This should go away once we reliably avoid updating stmts
265      not in any BB.  */
266   for (gimple_stmt_iterator gsi = gsi_start (stmts);
267        !gsi_end_p (gsi); gsi_next (&gsi))
268     {
269       gimple *stmt = gsi_stmt (gsi);
270       delink_stmt_imm_use (stmt);
271       gimple_set_modified (stmt, true);
272     }
273   gimple_seq_add_seq_without_update
274     (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
275 }
276 
277 /* Initializes to TRUE the predicate of basic block BB.  */
278 
279 static inline void
init_bb_predicate(basic_block bb)280 init_bb_predicate (basic_block bb)
281 {
282   bb->aux = XNEW (struct bb_predicate);
283   set_bb_predicate_gimplified_stmts (bb, NULL);
284   set_bb_predicate (bb, boolean_true_node);
285 }
286 
287 /* Release the SSA_NAMEs associated with the predicate of basic block BB.  */
288 
289 static inline void
release_bb_predicate(basic_block bb)290 release_bb_predicate (basic_block bb)
291 {
292   gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
293   if (stmts)
294     {
295       /* Ensure that these stmts haven't yet been added to a bb.  */
296       if (flag_checking)
297 	for (gimple_stmt_iterator i = gsi_start (stmts);
298 	     !gsi_end_p (i); gsi_next (&i))
299 	  gcc_assert (! gimple_bb (gsi_stmt (i)));
300 
301       /* Discard them.  */
302       gimple_seq_discard (stmts);
303       set_bb_predicate_gimplified_stmts (bb, NULL);
304     }
305 }
306 
307 /* Free the predicate of basic block BB.  */
308 
309 static inline void
free_bb_predicate(basic_block bb)310 free_bb_predicate (basic_block bb)
311 {
312   if (!bb_has_predicate (bb))
313     return;
314 
315   release_bb_predicate (bb);
316   free (bb->aux);
317   bb->aux = NULL;
318 }
319 
320 /* Reinitialize predicate of BB with the true predicate.  */
321 
322 static inline void
reset_bb_predicate(basic_block bb)323 reset_bb_predicate (basic_block bb)
324 {
325   if (!bb_has_predicate (bb))
326     init_bb_predicate (bb);
327   else
328     {
329       release_bb_predicate (bb);
330       set_bb_predicate (bb, boolean_true_node);
331     }
332 }
333 
334 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
335    the expression EXPR.  Inserts the statement created for this
336    computation before GSI and leaves the iterator GSI at the same
337    statement.  */
338 
339 static tree
ifc_temp_var(tree type,tree expr,gimple_stmt_iterator * gsi)340 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
341 {
342   tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
343   gimple *stmt = gimple_build_assign (new_name, expr);
344   gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
345   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
346   return new_name;
347 }
348 
349 /* Return true when COND is a false predicate.  */
350 
351 static inline bool
is_false_predicate(tree cond)352 is_false_predicate (tree cond)
353 {
354   return (cond != NULL_TREE
355 	  && (cond == boolean_false_node
356 	      || integer_zerop (cond)));
357 }
358 
359 /* Return true when COND is a true predicate.  */
360 
361 static inline bool
is_true_predicate(tree cond)362 is_true_predicate (tree cond)
363 {
364   return (cond == NULL_TREE
365 	  || cond == boolean_true_node
366 	  || integer_onep (cond));
367 }
368 
369 /* Returns true when BB has a predicate that is not trivial: true or
370    NULL_TREE.  */
371 
372 static inline bool
is_predicated(basic_block bb)373 is_predicated (basic_block bb)
374 {
375   return !is_true_predicate (bb_predicate (bb));
376 }
377 
378 /* Parses the predicate COND and returns its comparison code and
379    operands OP0 and OP1.  */
380 
381 static enum tree_code
parse_predicate(tree cond,tree * op0,tree * op1)382 parse_predicate (tree cond, tree *op0, tree *op1)
383 {
384   gimple *s;
385 
386   if (TREE_CODE (cond) == SSA_NAME
387       && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
388     {
389       if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
390 	{
391 	  *op0 = gimple_assign_rhs1 (s);
392 	  *op1 = gimple_assign_rhs2 (s);
393 	  return gimple_assign_rhs_code (s);
394 	}
395 
396       else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
397 	{
398 	  tree op = gimple_assign_rhs1 (s);
399 	  tree type = TREE_TYPE (op);
400 	  enum tree_code code = parse_predicate (op, op0, op1);
401 
402 	  return code == ERROR_MARK ? ERROR_MARK
403 	    : invert_tree_comparison (code, HONOR_NANS (type));
404 	}
405 
406       return ERROR_MARK;
407     }
408 
409   if (COMPARISON_CLASS_P (cond))
410     {
411       *op0 = TREE_OPERAND (cond, 0);
412       *op1 = TREE_OPERAND (cond, 1);
413       return TREE_CODE (cond);
414     }
415 
416   return ERROR_MARK;
417 }
418 
419 /* Returns the fold of predicate C1 OR C2 at location LOC.  */
420 
421 static tree
fold_or_predicates(location_t loc,tree c1,tree c2)422 fold_or_predicates (location_t loc, tree c1, tree c2)
423 {
424   tree op1a, op1b, op2a, op2b;
425   enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
426   enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
427 
428   if (code1 != ERROR_MARK && code2 != ERROR_MARK)
429     {
430       tree t = maybe_fold_or_comparisons (code1, op1a, op1b,
431 					  code2, op2a, op2b);
432       if (t)
433 	return t;
434     }
435 
436   return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
437 }
438 
439 /* Returns either a COND_EXPR or the folded expression if the folded
440    expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
441    a constant or a SSA_NAME. */
442 
443 static tree
fold_build_cond_expr(tree type,tree cond,tree rhs,tree lhs)444 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
445 {
446   tree rhs1, lhs1, cond_expr;
447 
448   /* If COND is comparison r != 0 and r has boolean type, convert COND
449      to SSA_NAME to accept by vect bool pattern.  */
450   if (TREE_CODE (cond) == NE_EXPR)
451     {
452       tree op0 = TREE_OPERAND (cond, 0);
453       tree op1 = TREE_OPERAND (cond, 1);
454       if (TREE_CODE (op0) == SSA_NAME
455 	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
456 	  && (integer_zerop (op1)))
457 	cond = op0;
458     }
459   cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
460 
461   if (cond_expr == NULL_TREE)
462     return build3 (COND_EXPR, type, cond, rhs, lhs);
463 
464   STRIP_USELESS_TYPE_CONVERSION (cond_expr);
465 
466   if (is_gimple_val (cond_expr))
467     return cond_expr;
468 
469   if (TREE_CODE (cond_expr) == ABS_EXPR)
470     {
471       rhs1 = TREE_OPERAND (cond_expr, 1);
472       STRIP_USELESS_TYPE_CONVERSION (rhs1);
473       if (is_gimple_val (rhs1))
474 	return build1 (ABS_EXPR, type, rhs1);
475     }
476 
477   if (TREE_CODE (cond_expr) == MIN_EXPR
478       || TREE_CODE (cond_expr) == MAX_EXPR)
479     {
480       lhs1 = TREE_OPERAND (cond_expr, 0);
481       STRIP_USELESS_TYPE_CONVERSION (lhs1);
482       rhs1 = TREE_OPERAND (cond_expr, 1);
483       STRIP_USELESS_TYPE_CONVERSION (rhs1);
484       if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
485 	return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
486     }
487   return build3 (COND_EXPR, type, cond, rhs, lhs);
488 }
489 
490 /* Add condition NC to the predicate list of basic block BB.  LOOP is
491    the loop to be if-converted. Use predicate of cd-equivalent block
492    for join bb if it exists: we call basic blocks bb1 and bb2
493    cd-equivalent if they are executed under the same condition.  */
494 
495 static inline void
add_to_predicate_list(struct loop * loop,basic_block bb,tree nc)496 add_to_predicate_list (struct loop *loop, basic_block bb, tree nc)
497 {
498   tree bc, *tp;
499   basic_block dom_bb;
500 
501   if (is_true_predicate (nc))
502     return;
503 
504   /* If dominance tells us this basic block is always executed,
505      don't record any predicates for it.  */
506   if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
507     return;
508 
509   dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
510   /* We use notion of cd equivalence to get simpler predicate for
511      join block, e.g. if join block has 2 predecessors with predicates
512      p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
513      p1 & p2 | p1 & !p2.  */
514   if (dom_bb != loop->header
515       && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
516     {
517       gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
518       bc = bb_predicate (dom_bb);
519       if (!is_true_predicate (bc))
520 	set_bb_predicate (bb, bc);
521       else
522 	gcc_assert (is_true_predicate (bb_predicate (bb)));
523       if (dump_file && (dump_flags & TDF_DETAILS))
524 	fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
525 		 dom_bb->index, bb->index);
526       return;
527     }
528 
529   if (!is_predicated (bb))
530     bc = nc;
531   else
532     {
533       bc = bb_predicate (bb);
534       bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
535       if (is_true_predicate (bc))
536 	{
537 	  reset_bb_predicate (bb);
538 	  return;
539 	}
540     }
541 
542   /* Allow a TRUTH_NOT_EXPR around the main predicate.  */
543   if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
544     tp = &TREE_OPERAND (bc, 0);
545   else
546     tp = &bc;
547   if (!is_gimple_condexpr (*tp))
548     {
549       gimple_seq stmts;
550       *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
551       add_bb_predicate_gimplified_stmts (bb, stmts);
552     }
553   set_bb_predicate (bb, bc);
554 }
555 
556 /* Add the condition COND to the previous condition PREV_COND, and add
557    this to the predicate list of the destination of edge E.  LOOP is
558    the loop to be if-converted.  */
559 
560 static void
add_to_dst_predicate_list(struct loop * loop,edge e,tree prev_cond,tree cond)561 add_to_dst_predicate_list (struct loop *loop, edge e,
562 			   tree prev_cond, tree cond)
563 {
564   if (!flow_bb_inside_loop_p (loop, e->dest))
565     return;
566 
567   if (!is_true_predicate (prev_cond))
568     cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
569 			prev_cond, cond);
570 
571   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
572     add_to_predicate_list (loop, e->dest, cond);
573 }
574 
575 /* Return true if one of the successor edges of BB exits LOOP.  */
576 
577 static bool
bb_with_exit_edge_p(struct loop * loop,basic_block bb)578 bb_with_exit_edge_p (struct loop *loop, basic_block bb)
579 {
580   edge e;
581   edge_iterator ei;
582 
583   FOR_EACH_EDGE (e, ei, bb->succs)
584     if (loop_exit_edge_p (loop, e))
585       return true;
586 
587   return false;
588 }
589 
590 /* Given PHI which has more than two arguments, this function checks if
591    it's if-convertible by degenerating its arguments.  Specifically, if
592    below two conditions are satisfied:
593 
594      1) Number of PHI arguments with different values equals to 2 and one
595 	argument has the only occurrence.
596      2) The edge corresponding to the unique argument isn't critical edge.
597 
598    Such PHI can be handled as PHIs have only two arguments.  For example,
599    below PHI:
600 
601      res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
602 
603    can be transformed into:
604 
605      res = (predicate of e3) ? A_2 : A_1;
606 
607    Return TRUE if it is the case, FALSE otherwise.  */
608 
609 static bool
phi_convertible_by_degenerating_args(gphi * phi)610 phi_convertible_by_degenerating_args (gphi *phi)
611 {
612   edge e;
613   tree arg, t1 = NULL, t2 = NULL;
614   unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
615   unsigned int num_args = gimple_phi_num_args (phi);
616 
617   gcc_assert (num_args > 2);
618 
619   for (i = 0; i < num_args; i++)
620     {
621       arg = gimple_phi_arg_def (phi, i);
622       if (t1 == NULL || operand_equal_p (t1, arg, 0))
623 	{
624 	  n1++;
625 	  i1 = i;
626 	  t1 = arg;
627 	}
628       else if (t2 == NULL || operand_equal_p (t2, arg, 0))
629 	{
630 	  n2++;
631 	  i2 = i;
632 	  t2 = arg;
633 	}
634       else
635 	return false;
636     }
637 
638   if (n1 != 1 && n2 != 1)
639     return false;
640 
641   /* Check if the edge corresponding to the unique arg is critical.  */
642   e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
643   if (EDGE_COUNT (e->src->succs) > 1)
644     return false;
645 
646   return true;
647 }
648 
649 /* Return true when PHI is if-convertible.  PHI is part of loop LOOP
650    and it belongs to basic block BB.  Note at this point, it is sure
651    that PHI is if-convertible.  This function updates global variable
652    ANY_COMPLICATED_PHI if PHI is complicated.  */
653 
654 static bool
if_convertible_phi_p(struct loop * loop,basic_block bb,gphi * phi)655 if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi)
656 {
657   if (dump_file && (dump_flags & TDF_DETAILS))
658     {
659       fprintf (dump_file, "-------------------------\n");
660       print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
661     }
662 
663   if (bb != loop->header
664       && gimple_phi_num_args (phi) > 2
665       && !phi_convertible_by_degenerating_args (phi))
666     any_complicated_phi = true;
667 
668   return true;
669 }
670 
671 /* Records the status of a data reference.  This struct is attached to
672    each DR->aux field.  */
673 
674 struct ifc_dr {
675   bool rw_unconditionally;
676   bool w_unconditionally;
677   bool written_at_least_once;
678 
679   tree rw_predicate;
680   tree w_predicate;
681   tree base_w_predicate;
682 };
683 
684 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
685 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
686 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
687 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
688 
689 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
690    HASH tables.  While storing them in HASH table, it checks if the
691    reference is unconditionally read or written and stores that as a flag
692    information.  For base reference it checks if it is written atlest once
693    unconditionally and stores it as flag information along with DR.
694    In other words for every data reference A in STMT there exist other
695    accesses to a data reference with the same base with predicates that
696    add up (OR-up) to the true predicate: this ensures that the data
697    reference A is touched (read or written) on every iteration of the
698    if-converted loop.  */
699 static void
hash_memrefs_baserefs_and_store_DRs_read_written_info(data_reference_p a)700 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
701 {
702 
703   data_reference_p *master_dr, *base_master_dr;
704   tree base_ref = DR_BASE_OBJECT (a);
705   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
706   tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
707   bool exist1, exist2;
708 
709   master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
710   if (!exist1)
711     *master_dr = a;
712 
713   if (DR_IS_WRITE (a))
714     {
715       IFC_DR (*master_dr)->w_predicate
716 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
717 			      IFC_DR (*master_dr)->w_predicate);
718       if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
719 	DR_W_UNCONDITIONALLY (*master_dr) = true;
720     }
721   IFC_DR (*master_dr)->rw_predicate
722     = fold_or_predicates (UNKNOWN_LOCATION, ca,
723 			  IFC_DR (*master_dr)->rw_predicate);
724   if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
725     DR_RW_UNCONDITIONALLY (*master_dr) = true;
726 
727   if (DR_IS_WRITE (a))
728     {
729       base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
730       if (!exist2)
731 	*base_master_dr = a;
732       IFC_DR (*base_master_dr)->base_w_predicate
733 	= fold_or_predicates (UNKNOWN_LOCATION, ca,
734 			      IFC_DR (*base_master_dr)->base_w_predicate);
735       if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
736 	DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
737     }
738 }
739 
740 /* Return TRUE if can prove the index IDX of an array reference REF is
741    within array bound.  Return false otherwise.  */
742 
743 static bool
idx_within_array_bound(tree ref,tree * idx,void * dta)744 idx_within_array_bound (tree ref, tree *idx, void *dta)
745 {
746   bool overflow;
747   widest_int niter, valid_niter, delta, wi_step;
748   tree ev, init, step;
749   tree low, high;
750   struct loop *loop = (struct loop*) dta;
751 
752   /* Only support within-bound access for array references.  */
753   if (TREE_CODE (ref) != ARRAY_REF)
754     return false;
755 
756   /* For arrays at the end of the structure, we are not guaranteed that they
757      do not really extend over their declared size.  However, for arrays of
758      size greater than one, this is unlikely to be intended.  */
759   if (array_at_struct_end_p (ref))
760     return false;
761 
762   ev = analyze_scalar_evolution (loop, *idx);
763   ev = instantiate_parameters (loop, ev);
764   init = initial_condition (ev);
765   step = evolution_part_in_loop_num (ev, loop->num);
766 
767   if (!init || TREE_CODE (init) != INTEGER_CST
768       || (step && TREE_CODE (step) != INTEGER_CST))
769     return false;
770 
771   low = array_ref_low_bound (ref);
772   high = array_ref_up_bound (ref);
773 
774   /* The case of nonconstant bounds could be handled, but it would be
775      complicated.  */
776   if (TREE_CODE (low) != INTEGER_CST
777       || !high || TREE_CODE (high) != INTEGER_CST)
778     return false;
779 
780   /* Check if the intial idx is within bound.  */
781   if (wi::to_widest (init) < wi::to_widest (low)
782       || wi::to_widest (init) > wi::to_widest (high))
783     return false;
784 
785   /* The idx is always within bound.  */
786   if (!step || integer_zerop (step))
787     return true;
788 
789   if (!max_loop_iterations (loop, &niter))
790     return false;
791 
792   if (wi::to_widest (step) < 0)
793     {
794       delta = wi::to_widest (init) - wi::to_widest (low);
795       wi_step = -wi::to_widest (step);
796     }
797   else
798     {
799       delta = wi::to_widest (high) - wi::to_widest (init);
800       wi_step = wi::to_widest (step);
801     }
802 
803   valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
804   /* The iteration space of idx is within array bound.  */
805   if (!overflow && niter <= valid_niter)
806     return true;
807 
808   return false;
809 }
810 
811 /* Return TRUE if ref is a within bound array reference.  */
812 
813 static bool
ref_within_array_bound(gimple * stmt,tree ref)814 ref_within_array_bound (gimple *stmt, tree ref)
815 {
816   struct loop *loop = loop_containing_stmt (stmt);
817 
818   gcc_assert (loop != NULL);
819   return for_each_index (&ref, idx_within_array_bound, loop);
820 }
821 
822 
823 /* Given a memory reference expression T, return TRUE if base object
824    it refers to is writable.  The base object of a memory reference
825    is the main object being referenced, which is returned by function
826    get_base_address.  */
827 
828 static bool
base_object_writable(tree ref)829 base_object_writable (tree ref)
830 {
831   tree base_tree = get_base_address (ref);
832 
833   return (base_tree
834 	  && DECL_P (base_tree)
835 	  && decl_binds_to_current_def_p (base_tree)
836 	  && !TREE_READONLY (base_tree));
837 }
838 
839 /* Return true when the memory references of STMT won't trap in the
840    if-converted code.  There are two things that we have to check for:
841 
842    - writes to memory occur to writable memory: if-conversion of
843    memory writes transforms the conditional memory writes into
844    unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
845    into "A[i] = cond ? foo : A[i]", and as the write to memory may not
846    be executed at all in the original code, it may be a readonly
847    memory.  To check that A is not const-qualified, we check that
848    there exists at least an unconditional write to A in the current
849    function.
850 
851    - reads or writes to memory are valid memory accesses for every
852    iteration.  To check that the memory accesses are correctly formed
853    and that we are allowed to read and write in these locations, we
854    check that the memory accesses to be if-converted occur at every
855    iteration unconditionally.
856 
857    Returns true for the memory reference in STMT, same memory reference
858    is read or written unconditionally atleast once and the base memory
859    reference is written unconditionally once.  This is to check reference
860    will not write fault.  Also retuns true if the memory reference is
861    unconditionally read once then we are conditionally writing to memory
862    which is defined as read and write and is bound to the definition
863    we are seeing.  */
864 static bool
ifcvt_memrefs_wont_trap(gimple * stmt,vec<data_reference_p> drs)865 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
866 {
867   /* If DR didn't see a reference here we can't use it to tell
868      whether the ref traps or not.  */
869   if (gimple_uid (stmt) == 0)
870     return false;
871 
872   data_reference_p *master_dr, *base_master_dr;
873   data_reference_p a = drs[gimple_uid (stmt) - 1];
874 
875   tree base = DR_BASE_OBJECT (a);
876   innermost_loop_behavior *innermost = &DR_INNERMOST (a);
877 
878   gcc_assert (DR_STMT (a) == stmt);
879   gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
880               || DR_INIT (a) || DR_STEP (a));
881 
882   master_dr = innermost_DR_map->get (innermost);
883   gcc_assert (master_dr != NULL);
884 
885   base_master_dr = baseref_DR_map->get (base);
886 
887   /* If a is unconditionally written to it doesn't trap.  */
888   if (DR_W_UNCONDITIONALLY (*master_dr))
889     return true;
890 
891   /* If a is unconditionally accessed then ...
892 
893      Even a is conditional access, we can treat it as an unconditional
894      one if it's an array reference and all its index are within array
895      bound.  */
896   if (DR_RW_UNCONDITIONALLY (*master_dr)
897       || ref_within_array_bound (stmt, DR_REF (a)))
898     {
899       /* an unconditional read won't trap.  */
900       if (DR_IS_READ (a))
901 	return true;
902 
903       /* an unconditionaly write won't trap if the base is written
904          to unconditionally.  */
905       if (base_master_dr
906 	  && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
907 	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
908       /* or the base is known to be not readonly.  */
909       else if (base_object_writable (DR_REF (a)))
910 	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
911     }
912 
913   return false;
914 }
915 
916 /* Return true if STMT could be converted into a masked load or store
917    (conditional load or store based on a mask computed from bb predicate).  */
918 
919 static bool
ifcvt_can_use_mask_load_store(gimple * stmt)920 ifcvt_can_use_mask_load_store (gimple *stmt)
921 {
922   tree lhs, ref;
923   machine_mode mode;
924   basic_block bb = gimple_bb (stmt);
925   bool is_load;
926 
927   if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
928       || bb->loop_father->dont_vectorize
929       || !gimple_assign_single_p (stmt)
930       || gimple_has_volatile_ops (stmt))
931     return false;
932 
933   /* Check whether this is a load or store.  */
934   lhs = gimple_assign_lhs (stmt);
935   if (gimple_store_p (stmt))
936     {
937       if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
938 	return false;
939       is_load = false;
940       ref = lhs;
941     }
942   else if (gimple_assign_load_p (stmt))
943     {
944       is_load = true;
945       ref = gimple_assign_rhs1 (stmt);
946     }
947   else
948     return false;
949 
950   if (may_be_nonaddressable_p (ref))
951     return false;
952 
953   /* Mask should be integer mode of the same size as the load/store
954      mode.  */
955   mode = TYPE_MODE (TREE_TYPE (lhs));
956   if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
957     return false;
958 
959   if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
960     return true;
961 
962   return false;
963 }
964 
965 /* Return true when STMT is if-convertible.
966 
967    GIMPLE_ASSIGN statement is not if-convertible if,
968    - it is not movable,
969    - it could trap,
970    - LHS is not var decl.  */
971 
972 static bool
if_convertible_gimple_assign_stmt_p(gimple * stmt,vec<data_reference_p> refs)973 if_convertible_gimple_assign_stmt_p (gimple *stmt,
974 				     vec<data_reference_p> refs)
975 {
976   tree lhs = gimple_assign_lhs (stmt);
977 
978   if (dump_file && (dump_flags & TDF_DETAILS))
979     {
980       fprintf (dump_file, "-------------------------\n");
981       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
982     }
983 
984   if (!is_gimple_reg_type (TREE_TYPE (lhs)))
985     return false;
986 
987   /* Some of these constrains might be too conservative.  */
988   if (stmt_ends_bb_p (stmt)
989       || gimple_has_volatile_ops (stmt)
990       || (TREE_CODE (lhs) == SSA_NAME
991           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
992       || gimple_has_side_effects (stmt))
993     {
994       if (dump_file && (dump_flags & TDF_DETAILS))
995         fprintf (dump_file, "stmt not suitable for ifcvt\n");
996       return false;
997     }
998 
999   /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1000      in between if_convertible_loop_p and combine_blocks
1001      we can perform loop versioning.  */
1002   gimple_set_plf (stmt, GF_PLF_2, false);
1003 
1004   if ((! gimple_vuse (stmt)
1005        || gimple_could_trap_p_1 (stmt, false, false)
1006        || ! ifcvt_memrefs_wont_trap (stmt, refs))
1007       && gimple_could_trap_p (stmt))
1008     {
1009       if (ifcvt_can_use_mask_load_store (stmt))
1010 	{
1011 	  gimple_set_plf (stmt, GF_PLF_2, true);
1012 	  any_pred_load_store = true;
1013 	  return true;
1014 	}
1015       if (dump_file && (dump_flags & TDF_DETAILS))
1016 	fprintf (dump_file, "tree could trap...\n");
1017       return false;
1018     }
1019 
1020   /* When if-converting stores force versioning, likewise if we
1021      ended up generating store data races.  */
1022   if (gimple_vdef (stmt))
1023     any_pred_load_store = true;
1024 
1025   return true;
1026 }
1027 
1028 /* Return true when STMT is if-convertible.
1029 
1030    A statement is if-convertible if:
1031    - it is an if-convertible GIMPLE_ASSIGN,
1032    - it is a GIMPLE_LABEL or a GIMPLE_COND,
1033    - it is builtins call.  */
1034 
1035 static bool
if_convertible_stmt_p(gimple * stmt,vec<data_reference_p> refs)1036 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1037 {
1038   switch (gimple_code (stmt))
1039     {
1040     case GIMPLE_LABEL:
1041     case GIMPLE_DEBUG:
1042     case GIMPLE_COND:
1043       return true;
1044 
1045     case GIMPLE_ASSIGN:
1046       return if_convertible_gimple_assign_stmt_p (stmt, refs);
1047 
1048     case GIMPLE_CALL:
1049       {
1050 	tree fndecl = gimple_call_fndecl (stmt);
1051 	if (fndecl)
1052 	  {
1053 	    int flags = gimple_call_flags (stmt);
1054 	    if ((flags & ECF_CONST)
1055 		&& !(flags & ECF_LOOPING_CONST_OR_PURE)
1056 		/* We can only vectorize some builtins at the moment,
1057 		   so restrict if-conversion to those.  */
1058 		&& DECL_BUILT_IN (fndecl))
1059 	      return true;
1060 	  }
1061 	return false;
1062       }
1063 
1064     default:
1065       /* Don't know what to do with 'em so don't do anything.  */
1066       if (dump_file && (dump_flags & TDF_DETAILS))
1067 	{
1068 	  fprintf (dump_file, "don't know what to do\n");
1069 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1070 	}
1071       return false;
1072     }
1073 
1074   return true;
1075 }
1076 
1077 /* Assumes that BB has more than 1 predecessors.
1078    Returns false if at least one successor is not on critical edge
1079    and true otherwise.  */
1080 
1081 static inline bool
all_preds_critical_p(basic_block bb)1082 all_preds_critical_p (basic_block bb)
1083 {
1084   edge e;
1085   edge_iterator ei;
1086 
1087   FOR_EACH_EDGE (e, ei, bb->preds)
1088     if (EDGE_COUNT (e->src->succs) == 1)
1089       return false;
1090   return true;
1091 }
1092 
1093 /* Returns true if at least one successor in on critical edge.  */
1094 static inline bool
has_pred_critical_p(basic_block bb)1095 has_pred_critical_p (basic_block bb)
1096 {
1097   edge e;
1098   edge_iterator ei;
1099 
1100   FOR_EACH_EDGE (e, ei, bb->preds)
1101     if (EDGE_COUNT (e->src->succs) > 1)
1102       return true;
1103   return false;
1104 }
1105 
1106 /* Return true when BB is if-convertible.  This routine does not check
1107    basic block's statements and phis.
1108 
1109    A basic block is not if-convertible if:
1110    - it is non-empty and it is after the exit block (in BFS order),
1111    - it is after the exit block but before the latch,
1112    - its edges are not normal.
1113 
1114    EXIT_BB is the basic block containing the exit of the LOOP.  BB is
1115    inside LOOP.  */
1116 
1117 static bool
if_convertible_bb_p(struct loop * loop,basic_block bb,basic_block exit_bb)1118 if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb)
1119 {
1120   edge e;
1121   edge_iterator ei;
1122 
1123   if (dump_file && (dump_flags & TDF_DETAILS))
1124     fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1125 
1126   if (EDGE_COUNT (bb->succs) > 2)
1127     return false;
1128 
1129   if (exit_bb)
1130     {
1131       if (bb != loop->latch)
1132 	{
1133 	  if (dump_file && (dump_flags & TDF_DETAILS))
1134 	    fprintf (dump_file, "basic block after exit bb but before latch\n");
1135 	  return false;
1136 	}
1137       else if (!empty_block_p (bb))
1138 	{
1139 	  if (dump_file && (dump_flags & TDF_DETAILS))
1140 	    fprintf (dump_file, "non empty basic block after exit bb\n");
1141 	  return false;
1142 	}
1143       else if (bb == loop->latch
1144 	       && bb != exit_bb
1145 	       && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1146 	  {
1147 	    if (dump_file && (dump_flags & TDF_DETAILS))
1148 	      fprintf (dump_file, "latch is not dominated by exit_block\n");
1149 	    return false;
1150 	  }
1151     }
1152 
1153   /* Be less adventurous and handle only normal edges.  */
1154   FOR_EACH_EDGE (e, ei, bb->succs)
1155     if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1156       {
1157 	if (dump_file && (dump_flags & TDF_DETAILS))
1158 	  fprintf (dump_file, "Difficult to handle edges\n");
1159 	return false;
1160       }
1161 
1162   return true;
1163 }
1164 
1165 /* Return true when all predecessor blocks of BB are visited.  The
1166    VISITED bitmap keeps track of the visited blocks.  */
1167 
1168 static bool
pred_blocks_visited_p(basic_block bb,bitmap * visited)1169 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1170 {
1171   edge e;
1172   edge_iterator ei;
1173   FOR_EACH_EDGE (e, ei, bb->preds)
1174     if (!bitmap_bit_p (*visited, e->src->index))
1175       return false;
1176 
1177   return true;
1178 }
1179 
1180 /* Get body of a LOOP in suitable order for if-conversion.  It is
1181    caller's responsibility to deallocate basic block list.
1182    If-conversion suitable order is, breadth first sort (BFS) order
1183    with an additional constraint: select a block only if all its
1184    predecessors are already selected.  */
1185 
1186 static basic_block *
get_loop_body_in_if_conv_order(const struct loop * loop)1187 get_loop_body_in_if_conv_order (const struct loop *loop)
1188 {
1189   basic_block *blocks, *blocks_in_bfs_order;
1190   basic_block bb;
1191   bitmap visited;
1192   unsigned int index = 0;
1193   unsigned int visited_count = 0;
1194 
1195   gcc_assert (loop->num_nodes);
1196   gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1197 
1198   blocks = XCNEWVEC (basic_block, loop->num_nodes);
1199   visited = BITMAP_ALLOC (NULL);
1200 
1201   blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1202 
1203   index = 0;
1204   while (index < loop->num_nodes)
1205     {
1206       bb = blocks_in_bfs_order [index];
1207 
1208       if (bb->flags & BB_IRREDUCIBLE_LOOP)
1209 	{
1210 	  free (blocks_in_bfs_order);
1211 	  BITMAP_FREE (visited);
1212 	  free (blocks);
1213 	  return NULL;
1214 	}
1215 
1216       if (!bitmap_bit_p (visited, bb->index))
1217 	{
1218 	  if (pred_blocks_visited_p (bb, &visited)
1219 	      || bb == loop->header)
1220 	    {
1221 	      /* This block is now visited.  */
1222 	      bitmap_set_bit (visited, bb->index);
1223 	      blocks[visited_count++] = bb;
1224 	    }
1225 	}
1226 
1227       index++;
1228 
1229       if (index == loop->num_nodes
1230 	  && visited_count != loop->num_nodes)
1231 	/* Not done yet.  */
1232 	index = 0;
1233     }
1234   free (blocks_in_bfs_order);
1235   BITMAP_FREE (visited);
1236   return blocks;
1237 }
1238 
1239 /* Returns true when the analysis of the predicates for all the basic
1240    blocks in LOOP succeeded.
1241 
1242    predicate_bbs first allocates the predicates of the basic blocks.
1243    These fields are then initialized with the tree expressions
1244    representing the predicates under which a basic block is executed
1245    in the LOOP.  As the loop->header is executed at each iteration, it
1246    has the "true" predicate.  Other statements executed under a
1247    condition are predicated with that condition, for example
1248 
1249    | if (x)
1250    |   S1;
1251    | else
1252    |   S2;
1253 
1254    S1 will be predicated with "x", and
1255    S2 will be predicated with "!x".  */
1256 
1257 static void
predicate_bbs(loop_p loop)1258 predicate_bbs (loop_p loop)
1259 {
1260   unsigned int i;
1261 
1262   for (i = 0; i < loop->num_nodes; i++)
1263     init_bb_predicate (ifc_bbs[i]);
1264 
1265   for (i = 0; i < loop->num_nodes; i++)
1266     {
1267       basic_block bb = ifc_bbs[i];
1268       tree cond;
1269       gimple *stmt;
1270 
1271       /* The loop latch and loop exit block are always executed and
1272 	 have no extra conditions to be processed: skip them.  */
1273       if (bb == loop->latch
1274 	  || bb_with_exit_edge_p (loop, bb))
1275 	{
1276 	  reset_bb_predicate (bb);
1277 	  continue;
1278 	}
1279 
1280       cond = bb_predicate (bb);
1281       stmt = last_stmt (bb);
1282       if (stmt && gimple_code (stmt) == GIMPLE_COND)
1283 	{
1284 	  tree c2;
1285 	  edge true_edge, false_edge;
1286 	  location_t loc = gimple_location (stmt);
1287 	  tree c = build2_loc (loc, gimple_cond_code (stmt),
1288 				    boolean_type_node,
1289 				    gimple_cond_lhs (stmt),
1290 				    gimple_cond_rhs (stmt));
1291 
1292 	  /* Add new condition into destination's predicate list.  */
1293 	  extract_true_false_edges_from_block (gimple_bb (stmt),
1294 					       &true_edge, &false_edge);
1295 
1296 	  /* If C is true, then TRUE_EDGE is taken.  */
1297 	  add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1298 				     unshare_expr (c));
1299 
1300 	  /* If C is false, then FALSE_EDGE is taken.  */
1301 	  c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1302 			   unshare_expr (c));
1303 	  add_to_dst_predicate_list (loop, false_edge,
1304 				     unshare_expr (cond), c2);
1305 
1306 	  cond = NULL_TREE;
1307 	}
1308 
1309       /* If current bb has only one successor, then consider it as an
1310 	 unconditional goto.  */
1311       if (single_succ_p (bb))
1312 	{
1313 	  basic_block bb_n = single_succ (bb);
1314 
1315 	  /* The successor bb inherits the predicate of its
1316 	     predecessor.  If there is no predicate in the predecessor
1317 	     bb, then consider the successor bb as always executed.  */
1318 	  if (cond == NULL_TREE)
1319 	    cond = boolean_true_node;
1320 
1321 	  add_to_predicate_list (loop, bb_n, cond);
1322 	}
1323     }
1324 
1325   /* The loop header is always executed.  */
1326   reset_bb_predicate (loop->header);
1327   gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1328 	      && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1329 }
1330 
1331 /* Build region by adding loop pre-header and post-header blocks.  */
1332 
1333 static vec<basic_block>
build_region(struct loop * loop)1334 build_region (struct loop *loop)
1335 {
1336   vec<basic_block> region = vNULL;
1337   basic_block exit_bb = NULL;
1338 
1339   gcc_assert (ifc_bbs);
1340   /* The first element is loop pre-header.  */
1341   region.safe_push (loop_preheader_edge (loop)->src);
1342 
1343   for (unsigned int i = 0; i < loop->num_nodes; i++)
1344     {
1345       basic_block bb = ifc_bbs[i];
1346       region.safe_push (bb);
1347       /* Find loop postheader.  */
1348       edge e;
1349       edge_iterator ei;
1350       FOR_EACH_EDGE (e, ei, bb->succs)
1351 	if (loop_exit_edge_p (loop, e))
1352 	  {
1353 	      exit_bb = e->dest;
1354 	      break;
1355 	  }
1356     }
1357   /* The last element is loop post-header.  */
1358   gcc_assert (exit_bb);
1359   region.safe_push (exit_bb);
1360   return region;
1361 }
1362 
1363 /* Return true when LOOP is if-convertible.  This is a helper function
1364    for if_convertible_loop_p.  REFS and DDRS are initialized and freed
1365    in if_convertible_loop_p.  */
1366 
1367 static bool
if_convertible_loop_p_1(struct loop * loop,vec<data_reference_p> * refs)1368 if_convertible_loop_p_1 (struct loop *loop, vec<data_reference_p> *refs)
1369 {
1370   unsigned int i;
1371   basic_block exit_bb = NULL;
1372   vec<basic_block> region;
1373 
1374   if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1375     return false;
1376 
1377   calculate_dominance_info (CDI_DOMINATORS);
1378 
1379   /* Allow statements that can be handled during if-conversion.  */
1380   ifc_bbs = get_loop_body_in_if_conv_order (loop);
1381   if (!ifc_bbs)
1382     {
1383       if (dump_file && (dump_flags & TDF_DETAILS))
1384 	fprintf (dump_file, "Irreducible loop\n");
1385       return false;
1386     }
1387 
1388   for (i = 0; i < loop->num_nodes; i++)
1389     {
1390       basic_block bb = ifc_bbs[i];
1391 
1392       if (!if_convertible_bb_p (loop, bb, exit_bb))
1393 	return false;
1394 
1395       if (bb_with_exit_edge_p (loop, bb))
1396 	exit_bb = bb;
1397     }
1398 
1399   for (i = 0; i < loop->num_nodes; i++)
1400     {
1401       basic_block bb = ifc_bbs[i];
1402       gimple_stmt_iterator gsi;
1403 
1404       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1405 	switch (gimple_code (gsi_stmt (gsi)))
1406 	  {
1407 	  case GIMPLE_LABEL:
1408 	  case GIMPLE_ASSIGN:
1409 	  case GIMPLE_CALL:
1410 	  case GIMPLE_DEBUG:
1411 	  case GIMPLE_COND:
1412 	    gimple_set_uid (gsi_stmt (gsi), 0);
1413 	    break;
1414 	  default:
1415 	    return false;
1416 	  }
1417     }
1418 
1419   data_reference_p dr;
1420 
1421   innermost_DR_map
1422 	  = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1423   baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1424 
1425   /* Compute post-dominator tree locally.  */
1426   region = build_region (loop);
1427   calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1428 
1429   predicate_bbs (loop);
1430 
1431   /* Free post-dominator tree since it is not used after predication.  */
1432   free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1433   region.release ();
1434 
1435   for (i = 0; refs->iterate (i, &dr); i++)
1436     {
1437       tree ref = DR_REF (dr);
1438 
1439       dr->aux = XNEW (struct ifc_dr);
1440       DR_BASE_W_UNCONDITIONALLY (dr) = false;
1441       DR_RW_UNCONDITIONALLY (dr) = false;
1442       DR_W_UNCONDITIONALLY (dr) = false;
1443       IFC_DR (dr)->rw_predicate = boolean_false_node;
1444       IFC_DR (dr)->w_predicate = boolean_false_node;
1445       IFC_DR (dr)->base_w_predicate = boolean_false_node;
1446       if (gimple_uid (DR_STMT (dr)) == 0)
1447 	gimple_set_uid (DR_STMT (dr), i + 1);
1448 
1449       /* If DR doesn't have innermost loop behavior or it's a compound
1450          memory reference, we synthesize its innermost loop behavior
1451          for hashing.  */
1452       if (TREE_CODE (ref) == COMPONENT_REF
1453           || TREE_CODE (ref) == IMAGPART_EXPR
1454           || TREE_CODE (ref) == REALPART_EXPR
1455           || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1456 	       || DR_INIT (dr) || DR_STEP (dr)))
1457         {
1458           while (TREE_CODE (ref) == COMPONENT_REF
1459 	         || TREE_CODE (ref) == IMAGPART_EXPR
1460 	         || TREE_CODE (ref) == REALPART_EXPR)
1461 	    ref = TREE_OPERAND (ref, 0);
1462 
1463 	  memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1464 	  DR_BASE_ADDRESS (dr) = ref;
1465         }
1466       hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1467     }
1468 
1469   for (i = 0; i < loop->num_nodes; i++)
1470     {
1471       basic_block bb = ifc_bbs[i];
1472       gimple_stmt_iterator itr;
1473 
1474       /* Check the if-convertibility of statements in predicated BBs.  */
1475       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1476 	for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1477 	  if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1478 	    return false;
1479     }
1480 
1481   /* Checking PHIs needs to be done after stmts, as the fact whether there
1482      are any masked loads or stores affects the tests.  */
1483   for (i = 0; i < loop->num_nodes; i++)
1484     {
1485       basic_block bb = ifc_bbs[i];
1486       gphi_iterator itr;
1487 
1488       for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1489 	if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1490 	  return false;
1491     }
1492 
1493   if (dump_file)
1494     fprintf (dump_file, "Applying if-conversion\n");
1495 
1496   return true;
1497 }
1498 
1499 /* Return true when LOOP is if-convertible.
1500    LOOP is if-convertible if:
1501    - it is innermost,
1502    - it has two or more basic blocks,
1503    - it has only one exit,
1504    - loop header is not the exit edge,
1505    - if its basic blocks and phi nodes are if convertible.  */
1506 
1507 static bool
if_convertible_loop_p(struct loop * loop)1508 if_convertible_loop_p (struct loop *loop)
1509 {
1510   edge e;
1511   edge_iterator ei;
1512   bool res = false;
1513   vec<data_reference_p> refs;
1514 
1515   /* Handle only innermost loop.  */
1516   if (!loop || loop->inner)
1517     {
1518       if (dump_file && (dump_flags & TDF_DETAILS))
1519 	fprintf (dump_file, "not innermost loop\n");
1520       return false;
1521     }
1522 
1523   /* If only one block, no need for if-conversion.  */
1524   if (loop->num_nodes <= 2)
1525     {
1526       if (dump_file && (dump_flags & TDF_DETAILS))
1527 	fprintf (dump_file, "less than 2 basic blocks\n");
1528       return false;
1529     }
1530 
1531   /* More than one loop exit is too much to handle.  */
1532   if (!single_exit (loop))
1533     {
1534       if (dump_file && (dump_flags & TDF_DETAILS))
1535 	fprintf (dump_file, "multiple exits\n");
1536       return false;
1537     }
1538 
1539   /* If one of the loop header's edge is an exit edge then do not
1540      apply if-conversion.  */
1541   FOR_EACH_EDGE (e, ei, loop->header->succs)
1542     if (loop_exit_edge_p (loop, e))
1543       return false;
1544 
1545   refs.create (5);
1546   res = if_convertible_loop_p_1 (loop, &refs);
1547 
1548   data_reference_p dr;
1549   unsigned int i;
1550   for (i = 0; refs.iterate (i, &dr); i++)
1551     free (dr->aux);
1552 
1553   free_data_refs (refs);
1554 
1555   delete innermost_DR_map;
1556   innermost_DR_map = NULL;
1557 
1558   delete baseref_DR_map;
1559   baseref_DR_map = NULL;
1560 
1561   return res;
1562 }
1563 
1564 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1565    which is in predicated basic block.
1566    In fact, the following PHI pattern is searching:
1567       loop-header:
1568 	reduc_1 = PHI <..., reduc_2>
1569       ...
1570 	if (...)
1571 	  reduc_3 = ...
1572 	reduc_2 = PHI <reduc_1, reduc_3>
1573 
1574    ARG_0 and ARG_1 are correspondent PHI arguments.
1575    REDUC, OP0 and OP1 contain reduction stmt and its operands.
1576    EXTENDED is true if PHI has > 2 arguments.  */
1577 
1578 static bool
is_cond_scalar_reduction(gimple * phi,gimple ** reduc,tree arg_0,tree arg_1,tree * op0,tree * op1,bool extended)1579 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1580 			  tree *op0, tree *op1, bool extended)
1581 {
1582   tree lhs, r_op1, r_op2;
1583   gimple *stmt;
1584   gimple *header_phi = NULL;
1585   enum tree_code reduction_op;
1586   basic_block bb = gimple_bb (phi);
1587   struct loop *loop = bb->loop_father;
1588   edge latch_e = loop_latch_edge (loop);
1589   imm_use_iterator imm_iter;
1590   use_operand_p use_p;
1591   edge e;
1592   edge_iterator ei;
1593   bool result = false;
1594   if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1595     return false;
1596 
1597   if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1598     {
1599       lhs = arg_1;
1600       header_phi = SSA_NAME_DEF_STMT (arg_0);
1601       stmt = SSA_NAME_DEF_STMT (arg_1);
1602     }
1603   else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1604     {
1605       lhs = arg_0;
1606       header_phi = SSA_NAME_DEF_STMT (arg_1);
1607       stmt = SSA_NAME_DEF_STMT (arg_0);
1608     }
1609   else
1610     return false;
1611   if (gimple_bb (header_phi) != loop->header)
1612     return false;
1613 
1614   if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1615     return false;
1616 
1617   if (gimple_code (stmt) != GIMPLE_ASSIGN
1618       || gimple_has_volatile_ops (stmt))
1619     return false;
1620 
1621   if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1622     return false;
1623 
1624   if (!is_predicated (gimple_bb (stmt)))
1625     return false;
1626 
1627   /* Check that stmt-block is predecessor of phi-block.  */
1628   FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1629     if (e->dest == bb)
1630       {
1631 	result = true;
1632 	break;
1633       }
1634   if (!result)
1635     return false;
1636 
1637   if (!has_single_use (lhs))
1638     return false;
1639 
1640   reduction_op = gimple_assign_rhs_code (stmt);
1641   if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1642     return false;
1643   r_op1 = gimple_assign_rhs1 (stmt);
1644   r_op2 = gimple_assign_rhs2 (stmt);
1645 
1646   /* Make R_OP1 to hold reduction variable.  */
1647   if (r_op2 == PHI_RESULT (header_phi)
1648       && reduction_op == PLUS_EXPR)
1649     std::swap (r_op1, r_op2);
1650   else if (r_op1 != PHI_RESULT (header_phi))
1651     return false;
1652 
1653   /* Check that R_OP1 is used in reduction stmt or in PHI only.  */
1654   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1655     {
1656       gimple *use_stmt = USE_STMT (use_p);
1657       if (is_gimple_debug (use_stmt))
1658 	continue;
1659       if (use_stmt == stmt)
1660 	continue;
1661       if (gimple_code (use_stmt) != GIMPLE_PHI)
1662 	return false;
1663     }
1664 
1665   *op0 = r_op1; *op1 = r_op2;
1666   *reduc = stmt;
1667   return true;
1668 }
1669 
1670 /* Converts conditional scalar reduction into unconditional form, e.g.
1671      bb_4
1672        if (_5 != 0) goto bb_5 else goto bb_6
1673      end_bb_4
1674      bb_5
1675        res_6 = res_13 + 1;
1676      end_bb_5
1677      bb_6
1678        # res_2 = PHI <res_13(4), res_6(5)>
1679      end_bb_6
1680 
1681    will be converted into sequence
1682     _ifc__1 = _5 != 0 ? 1 : 0;
1683     res_2 = res_13 + _ifc__1;
1684   Argument SWAP tells that arguments of conditional expression should be
1685   swapped.
1686   Returns rhs of resulting PHI assignment.  */
1687 
1688 static tree
convert_scalar_cond_reduction(gimple * reduc,gimple_stmt_iterator * gsi,tree cond,tree op0,tree op1,bool swap)1689 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1690 			       tree cond, tree op0, tree op1, bool swap)
1691 {
1692   gimple_stmt_iterator stmt_it;
1693   gimple *new_assign;
1694   tree rhs;
1695   tree rhs1 = gimple_assign_rhs1 (reduc);
1696   tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1697   tree c;
1698   tree zero = build_zero_cst (TREE_TYPE (rhs1));
1699 
1700   if (dump_file && (dump_flags & TDF_DETAILS))
1701     {
1702       fprintf (dump_file, "Found cond scalar reduction.\n");
1703       print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1704     }
1705 
1706   /* Build cond expression using COND and constant operand
1707      of reduction rhs.  */
1708   c = fold_build_cond_expr (TREE_TYPE (rhs1),
1709 			    unshare_expr (cond),
1710 			    swap ? zero : op1,
1711 			    swap ? op1 : zero);
1712 
1713   /* Create assignment stmt and insert it at GSI.  */
1714   new_assign = gimple_build_assign (tmp, c);
1715   gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1716   /* Build rhs for unconditional increment/decrement.  */
1717   rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1718 		     TREE_TYPE (rhs1), op0, tmp);
1719 
1720   /* Delete original reduction stmt.  */
1721   stmt_it = gsi_for_stmt (reduc);
1722   gsi_remove (&stmt_it, true);
1723   release_defs (reduc);
1724   return rhs;
1725 }
1726 
1727 /* Produce condition for all occurrences of ARG in PHI node.  */
1728 
1729 static tree
gen_phi_arg_condition(gphi * phi,vec<int> * occur,gimple_stmt_iterator * gsi)1730 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1731 		       gimple_stmt_iterator *gsi)
1732 {
1733   int len;
1734   int i;
1735   tree cond = NULL_TREE;
1736   tree c;
1737   edge e;
1738 
1739   len = occur->length ();
1740   gcc_assert (len > 0);
1741   for (i = 0; i < len; i++)
1742     {
1743       e = gimple_phi_arg_edge (phi, (*occur)[i]);
1744       c = bb_predicate (e->src);
1745       if (is_true_predicate (c))
1746 	{
1747 	  cond = c;
1748 	  break;
1749 	}
1750       c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1751 				      is_gimple_condexpr, NULL_TREE,
1752 				      true, GSI_SAME_STMT);
1753       if (cond != NULL_TREE)
1754 	{
1755 	  /* Must build OR expression.  */
1756 	  cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1757 	  cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1758 					     is_gimple_condexpr, NULL_TREE,
1759 					     true, GSI_SAME_STMT);
1760 	}
1761       else
1762 	cond = c;
1763     }
1764   gcc_assert (cond != NULL_TREE);
1765   return cond;
1766 }
1767 
1768 /* Local valueization callback that follows all-use SSA edges.  */
1769 
1770 static tree
ifcvt_follow_ssa_use_edges(tree val)1771 ifcvt_follow_ssa_use_edges (tree val)
1772 {
1773   return val;
1774 }
1775 
1776 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1777    This routine can handle PHI nodes with more than two arguments.
1778 
1779    For example,
1780      S1: A = PHI <x1(1), x2(5)>
1781    is converted into,
1782      S2: A = cond ? x1 : x2;
1783 
1784    The generated code is inserted at GSI that points to the top of
1785    basic block's statement list.
1786    If PHI node has more than two arguments a chain of conditional
1787    expression is produced.  */
1788 
1789 
1790 static void
predicate_scalar_phi(gphi * phi,gimple_stmt_iterator * gsi)1791 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1792 {
1793   gimple *new_stmt = NULL, *reduc;
1794   tree rhs, res, arg0, arg1, op0, op1, scev;
1795   tree cond;
1796   unsigned int index0;
1797   unsigned int max, args_len;
1798   edge e;
1799   basic_block bb;
1800   unsigned int i;
1801 
1802   res = gimple_phi_result (phi);
1803   if (virtual_operand_p (res))
1804     return;
1805 
1806   if ((rhs = degenerate_phi_result (phi))
1807       || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1808 					    res))
1809 	  && !chrec_contains_undetermined (scev)
1810 	  && scev != res
1811 	  && (rhs = gimple_phi_arg_def (phi, 0))))
1812     {
1813       if (dump_file && (dump_flags & TDF_DETAILS))
1814 	{
1815 	  fprintf (dump_file, "Degenerate phi!\n");
1816 	  print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1817 	}
1818       new_stmt = gimple_build_assign (res, rhs);
1819       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1820       update_stmt (new_stmt);
1821       return;
1822     }
1823 
1824   bb = gimple_bb (phi);
1825   if (EDGE_COUNT (bb->preds) == 2)
1826     {
1827       /* Predicate ordinary PHI node with 2 arguments.  */
1828       edge first_edge, second_edge;
1829       basic_block true_bb;
1830       first_edge = EDGE_PRED (bb, 0);
1831       second_edge = EDGE_PRED (bb, 1);
1832       cond = bb_predicate (first_edge->src);
1833       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1834 	std::swap (first_edge, second_edge);
1835       if (EDGE_COUNT (first_edge->src->succs) > 1)
1836 	{
1837 	  cond = bb_predicate (second_edge->src);
1838 	  if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1839 	    cond = TREE_OPERAND (cond, 0);
1840 	  else
1841 	    first_edge = second_edge;
1842 	}
1843       else
1844 	cond = bb_predicate (first_edge->src);
1845       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
1846       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1847 					 is_gimple_condexpr, NULL_TREE,
1848 					 true, GSI_SAME_STMT);
1849       true_bb = first_edge->src;
1850       if (EDGE_PRED (bb, 1)->src == true_bb)
1851 	{
1852 	  arg0 = gimple_phi_arg_def (phi, 1);
1853 	  arg1 = gimple_phi_arg_def (phi, 0);
1854 	}
1855       else
1856 	{
1857 	  arg0 = gimple_phi_arg_def (phi, 0);
1858 	  arg1 = gimple_phi_arg_def (phi, 1);
1859 	}
1860       if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1861 				    &op0, &op1, false))
1862 	/* Convert reduction stmt into vectorizable form.  */
1863 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1864 					     true_bb != gimple_bb (reduc));
1865       else
1866 	/* Build new RHS using selected condition and arguments.  */
1867 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1868 				    arg0, arg1);
1869       new_stmt = gimple_build_assign (res, rhs);
1870       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1871       gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1872       if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1873 	{
1874 	  new_stmt = gsi_stmt (new_gsi);
1875 	  update_stmt (new_stmt);
1876 	}
1877 
1878       if (dump_file && (dump_flags & TDF_DETAILS))
1879 	{
1880 	  fprintf (dump_file, "new phi replacement stmt\n");
1881 	  print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1882 	}
1883       return;
1884     }
1885 
1886   /* Create hashmap for PHI node which contain vector of argument indexes
1887      having the same value.  */
1888   bool swap = false;
1889   hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1890   unsigned int num_args = gimple_phi_num_args (phi);
1891   int max_ind = -1;
1892   /* Vector of different PHI argument values.  */
1893   auto_vec<tree> args (num_args);
1894 
1895   /* Compute phi_arg_map.  */
1896   for (i = 0; i < num_args; i++)
1897     {
1898       tree arg;
1899 
1900       arg = gimple_phi_arg_def (phi, i);
1901       if (!phi_arg_map.get (arg))
1902 	args.quick_push (arg);
1903       phi_arg_map.get_or_insert (arg).safe_push (i);
1904     }
1905 
1906   /* Determine element with max number of occurrences.  */
1907   max_ind = -1;
1908   max = 1;
1909   args_len = args.length ();
1910   for (i = 0; i < args_len; i++)
1911     {
1912       unsigned int len;
1913       if ((len = phi_arg_map.get (args[i])->length ()) > max)
1914 	{
1915 	  max_ind = (int) i;
1916 	  max = len;
1917 	}
1918     }
1919 
1920   /* Put element with max number of occurences to the end of ARGS.  */
1921   if (max_ind != -1 && max_ind +1 != (int) args_len)
1922     std::swap (args[args_len - 1], args[max_ind]);
1923 
1924   /* Handle one special case when number of arguments with different values
1925      is equal 2 and one argument has the only occurrence.  Such PHI can be
1926      handled as if would have only 2 arguments.  */
1927   if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1928     {
1929       vec<int> *indexes;
1930       indexes = phi_arg_map.get (args[0]);
1931       index0 = (*indexes)[0];
1932       arg0 = args[0];
1933       arg1 = args[1];
1934       e = gimple_phi_arg_edge (phi, index0);
1935       cond = bb_predicate (e->src);
1936       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1937 	{
1938 	  swap = true;
1939 	  cond = TREE_OPERAND (cond, 0);
1940 	}
1941       /* Gimplify the condition to a valid cond-expr conditonal operand.  */
1942       cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1943 					 is_gimple_condexpr, NULL_TREE,
1944 					 true, GSI_SAME_STMT);
1945       if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1946 				      &op0, &op1, true)))
1947 	rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1948 				    swap? arg1 : arg0,
1949 				    swap? arg0 : arg1);
1950       else
1951 	/* Convert reduction stmt into vectorizable form.  */
1952 	rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1953 					     swap);
1954       new_stmt = gimple_build_assign (res, rhs);
1955       gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1956       update_stmt (new_stmt);
1957     }
1958   else
1959     {
1960       /* Common case.  */
1961       vec<int> *indexes;
1962       tree type = TREE_TYPE (gimple_phi_result (phi));
1963       tree lhs;
1964       arg1 = args[1];
1965       for (i = 0; i < args_len; i++)
1966 	{
1967 	  arg0 = args[i];
1968 	  indexes = phi_arg_map.get (args[i]);
1969 	  if (i != args_len - 1)
1970 	    lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1971 	  else
1972 	    lhs = res;
1973 	  cond = gen_phi_arg_condition (phi, indexes, gsi);
1974 	  rhs = fold_build_cond_expr (type, unshare_expr (cond),
1975 				      arg0, arg1);
1976 	  new_stmt = gimple_build_assign (lhs, rhs);
1977 	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1978 	  update_stmt (new_stmt);
1979 	  arg1 = lhs;
1980 	}
1981     }
1982 
1983   if (dump_file && (dump_flags & TDF_DETAILS))
1984     {
1985       fprintf (dump_file, "new extended phi replacement stmt\n");
1986       print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1987     }
1988 }
1989 
1990 /* Replaces in LOOP all the scalar phi nodes other than those in the
1991    LOOP->header block with conditional modify expressions.  */
1992 
1993 static void
predicate_all_scalar_phis(struct loop * loop)1994 predicate_all_scalar_phis (struct loop *loop)
1995 {
1996   basic_block bb;
1997   unsigned int orig_loop_num_nodes = loop->num_nodes;
1998   unsigned int i;
1999 
2000   for (i = 1; i < orig_loop_num_nodes; i++)
2001     {
2002       gphi *phi;
2003       gimple_stmt_iterator gsi;
2004       gphi_iterator phi_gsi;
2005       bb = ifc_bbs[i];
2006 
2007       if (bb == loop->header)
2008 	continue;
2009 
2010       phi_gsi = gsi_start_phis (bb);
2011       if (gsi_end_p (phi_gsi))
2012 	continue;
2013 
2014       gsi = gsi_after_labels (bb);
2015       while (!gsi_end_p (phi_gsi))
2016 	{
2017 	  phi = phi_gsi.phi ();
2018 	  if (virtual_operand_p (gimple_phi_result (phi)))
2019 	    gsi_next (&phi_gsi);
2020 	  else
2021 	    {
2022 	      predicate_scalar_phi (phi, &gsi);
2023 	      remove_phi_node (&phi_gsi, false);
2024 	    }
2025 	}
2026     }
2027 }
2028 
2029 /* Insert in each basic block of LOOP the statements produced by the
2030    gimplification of the predicates.  */
2031 
2032 static void
insert_gimplified_predicates(loop_p loop)2033 insert_gimplified_predicates (loop_p loop)
2034 {
2035   unsigned int i;
2036 
2037   for (i = 0; i < loop->num_nodes; i++)
2038     {
2039       basic_block bb = ifc_bbs[i];
2040       gimple_seq stmts;
2041       if (!is_predicated (bb))
2042 	gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2043       if (!is_predicated (bb))
2044 	{
2045 	  /* Do not insert statements for a basic block that is not
2046 	     predicated.  Also make sure that the predicate of the
2047 	     basic block is set to true.  */
2048 	  reset_bb_predicate (bb);
2049 	  continue;
2050 	}
2051 
2052       stmts = bb_predicate_gimplified_stmts (bb);
2053       if (stmts)
2054 	{
2055 	  if (any_pred_load_store)
2056 	    {
2057 	      /* Insert the predicate of the BB just after the label,
2058 		 as the if-conversion of memory writes will use this
2059 		 predicate.  */
2060 	      gimple_stmt_iterator gsi = gsi_after_labels (bb);
2061 	      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2062 	    }
2063 	  else
2064 	    {
2065 	      /* Insert the predicate of the BB at the end of the BB
2066 		 as this would reduce the register pressure: the only
2067 		 use of this predicate will be in successor BBs.  */
2068 	      gimple_stmt_iterator gsi = gsi_last_bb (bb);
2069 
2070 	      if (gsi_end_p (gsi)
2071 		  || stmt_ends_bb_p (gsi_stmt (gsi)))
2072 		gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2073 	      else
2074 		gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2075 	    }
2076 
2077 	  /* Once the sequence is code generated, set it to NULL.  */
2078 	  set_bb_predicate_gimplified_stmts (bb, NULL);
2079 	}
2080     }
2081 }
2082 
2083 /* Helper function for predicate_mem_writes. Returns index of existent
2084    mask if it was created for given SIZE and -1 otherwise.  */
2085 
2086 static int
mask_exists(int size,vec<int> vec)2087 mask_exists (int size, vec<int> vec)
2088 {
2089   unsigned int ix;
2090   int v;
2091   FOR_EACH_VEC_ELT (vec, ix, v)
2092     if (v == size)
2093       return (int) ix;
2094   return -1;
2095 }
2096 
2097 /* Predicate each write to memory in LOOP.
2098 
2099    This function transforms control flow constructs containing memory
2100    writes of the form:
2101 
2102    | for (i = 0; i < N; i++)
2103    |   if (cond)
2104    |     A[i] = expr;
2105 
2106    into the following form that does not contain control flow:
2107 
2108    | for (i = 0; i < N; i++)
2109    |   A[i] = cond ? expr : A[i];
2110 
2111    The original CFG looks like this:
2112 
2113    | bb_0
2114    |   i = 0
2115    | end_bb_0
2116    |
2117    | bb_1
2118    |   if (i < N) goto bb_5 else goto bb_2
2119    | end_bb_1
2120    |
2121    | bb_2
2122    |   cond = some_computation;
2123    |   if (cond) goto bb_3 else goto bb_4
2124    | end_bb_2
2125    |
2126    | bb_3
2127    |   A[i] = expr;
2128    |   goto bb_4
2129    | end_bb_3
2130    |
2131    | bb_4
2132    |   goto bb_1
2133    | end_bb_4
2134 
2135    insert_gimplified_predicates inserts the computation of the COND
2136    expression at the beginning of the destination basic block:
2137 
2138    | bb_0
2139    |   i = 0
2140    | end_bb_0
2141    |
2142    | bb_1
2143    |   if (i < N) goto bb_5 else goto bb_2
2144    | end_bb_1
2145    |
2146    | bb_2
2147    |   cond = some_computation;
2148    |   if (cond) goto bb_3 else goto bb_4
2149    | end_bb_2
2150    |
2151    | bb_3
2152    |   cond = some_computation;
2153    |   A[i] = expr;
2154    |   goto bb_4
2155    | end_bb_3
2156    |
2157    | bb_4
2158    |   goto bb_1
2159    | end_bb_4
2160 
2161    predicate_mem_writes is then predicating the memory write as follows:
2162 
2163    | bb_0
2164    |   i = 0
2165    | end_bb_0
2166    |
2167    | bb_1
2168    |   if (i < N) goto bb_5 else goto bb_2
2169    | end_bb_1
2170    |
2171    | bb_2
2172    |   if (cond) goto bb_3 else goto bb_4
2173    | end_bb_2
2174    |
2175    | bb_3
2176    |   cond = some_computation;
2177    |   A[i] = cond ? expr : A[i];
2178    |   goto bb_4
2179    | end_bb_3
2180    |
2181    | bb_4
2182    |   goto bb_1
2183    | end_bb_4
2184 
2185    and finally combine_blocks removes the basic block boundaries making
2186    the loop vectorizable:
2187 
2188    | bb_0
2189    |   i = 0
2190    |   if (i < N) goto bb_5 else goto bb_1
2191    | end_bb_0
2192    |
2193    | bb_1
2194    |   cond = some_computation;
2195    |   A[i] = cond ? expr : A[i];
2196    |   if (i < N) goto bb_5 else goto bb_4
2197    | end_bb_1
2198    |
2199    | bb_4
2200    |   goto bb_1
2201    | end_bb_4
2202 */
2203 
2204 static void
predicate_mem_writes(loop_p loop)2205 predicate_mem_writes (loop_p loop)
2206 {
2207   unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2208   auto_vec<int, 1> vect_sizes;
2209   auto_vec<tree, 1> vect_masks;
2210 
2211   for (i = 1; i < orig_loop_num_nodes; i++)
2212     {
2213       gimple_stmt_iterator gsi;
2214       basic_block bb = ifc_bbs[i];
2215       tree cond = bb_predicate (bb);
2216       bool swap;
2217       gimple *stmt;
2218       int index;
2219 
2220       if (is_true_predicate (cond))
2221 	continue;
2222 
2223       swap = false;
2224       if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2225 	{
2226 	  swap = true;
2227 	  cond = TREE_OPERAND (cond, 0);
2228 	}
2229 
2230       vect_sizes.truncate (0);
2231       vect_masks.truncate (0);
2232 
2233       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2234 	{
2235 	  if (!gimple_assign_single_p (stmt = gsi_stmt (gsi)))
2236 	    ;
2237 	  else if (is_false_predicate (cond)
2238 		   && gimple_vdef (stmt))
2239 	    {
2240 	      unlink_stmt_vdef (stmt);
2241 	      gsi_remove (&gsi, true);
2242 	      release_defs (stmt);
2243 	      continue;
2244 	    }
2245 	  else if (gimple_plf (stmt, GF_PLF_2))
2246 	    {
2247 	      tree lhs = gimple_assign_lhs (stmt);
2248 	      tree rhs = gimple_assign_rhs1 (stmt);
2249 	      tree ref, addr, ptr, mask;
2250 	      gcall *new_stmt;
2251 	      gimple_seq stmts = NULL;
2252 	      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2253 	      /* We checked before setting GF_PLF_2 that an equivalent
2254 		 integer mode exists.  */
2255 	      int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2256 	      ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2257 	      mark_addressable (ref);
2258 	      addr = force_gimple_operand_gsi (&gsi, build_fold_addr_expr (ref),
2259 					       true, NULL_TREE, true,
2260 					       GSI_SAME_STMT);
2261 	      if (!vect_sizes.is_empty ()
2262 		  && (index = mask_exists (bitsize, vect_sizes)) != -1)
2263 		/* Use created mask.  */
2264 		mask = vect_masks[index];
2265 	      else
2266 		{
2267 		  if (COMPARISON_CLASS_P (cond))
2268 		    mask = gimple_build (&stmts, TREE_CODE (cond),
2269 					 boolean_type_node,
2270 					 TREE_OPERAND (cond, 0),
2271 					 TREE_OPERAND (cond, 1));
2272 		  else
2273 		    mask = cond;
2274 
2275 		  if (swap)
2276 		    {
2277 		      tree true_val
2278 			= constant_boolean_node (true, TREE_TYPE (mask));
2279 		      mask = gimple_build (&stmts, BIT_XOR_EXPR,
2280 					   TREE_TYPE (mask), mask, true_val);
2281 		    }
2282 		  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2283 
2284 		  /* Save mask and its size for further use.  */
2285 		  vect_sizes.safe_push (bitsize);
2286 		  vect_masks.safe_push (mask);
2287 		}
2288 	      ptr = build_int_cst (reference_alias_ptr_type (ref),
2289 				   get_object_alignment (ref));
2290 	      /* Copy points-to info if possible.  */
2291 	      if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2292 		copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2293 			       ref);
2294 	      if (TREE_CODE (lhs) == SSA_NAME)
2295 		{
2296 		  new_stmt
2297 		    = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2298 						  ptr, mask);
2299 		  gimple_call_set_lhs (new_stmt, lhs);
2300 		  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2301 		}
2302 	      else
2303 		{
2304 		  new_stmt
2305 		    = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2306 						  mask, rhs);
2307 		  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2308 		  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2309 		  SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
2310 		}
2311 	      gimple_call_set_nothrow (new_stmt, true);
2312 
2313 	      gsi_replace (&gsi, new_stmt, true);
2314 	    }
2315 	  else if (gimple_vdef (stmt))
2316 	    {
2317 	      tree lhs = gimple_assign_lhs (stmt);
2318 	      tree rhs = gimple_assign_rhs1 (stmt);
2319 	      tree type = TREE_TYPE (lhs);
2320 
2321 	      lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2322 	      rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2323 	      if (swap)
2324 		std::swap (lhs, rhs);
2325 	      cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2326 						 is_gimple_condexpr, NULL_TREE,
2327 						 true, GSI_SAME_STMT);
2328 	      rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2329 	      gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2330 	      update_stmt (stmt);
2331 	    }
2332 	  gsi_next (&gsi);
2333 	}
2334     }
2335 }
2336 
2337 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2338    other than the exit and latch of the LOOP.  Also resets the
2339    GIMPLE_DEBUG information.  */
2340 
2341 static void
remove_conditions_and_labels(loop_p loop)2342 remove_conditions_and_labels (loop_p loop)
2343 {
2344   gimple_stmt_iterator gsi;
2345   unsigned int i;
2346 
2347   for (i = 0; i < loop->num_nodes; i++)
2348     {
2349       basic_block bb = ifc_bbs[i];
2350 
2351       if (bb_with_exit_edge_p (loop, bb)
2352         || bb == loop->latch)
2353       continue;
2354 
2355       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2356 	switch (gimple_code (gsi_stmt (gsi)))
2357 	  {
2358 	  case GIMPLE_COND:
2359 	  case GIMPLE_LABEL:
2360 	    gsi_remove (&gsi, true);
2361 	    break;
2362 
2363 	  case GIMPLE_DEBUG:
2364 	    /* ??? Should there be conditional GIMPLE_DEBUG_BINDs?  */
2365 	    if (gimple_debug_bind_p (gsi_stmt (gsi)))
2366 	      {
2367 		gimple_debug_bind_reset_value (gsi_stmt (gsi));
2368 		update_stmt (gsi_stmt (gsi));
2369 	      }
2370 	    gsi_next (&gsi);
2371 	    break;
2372 
2373 	  default:
2374 	    gsi_next (&gsi);
2375 	  }
2376     }
2377 }
2378 
2379 /* Combine all the basic blocks from LOOP into one or two super basic
2380    blocks.  Replace PHI nodes with conditional modify expressions.  */
2381 
2382 static void
combine_blocks(struct loop * loop)2383 combine_blocks (struct loop *loop)
2384 {
2385   basic_block bb, exit_bb, merge_target_bb;
2386   unsigned int orig_loop_num_nodes = loop->num_nodes;
2387   unsigned int i;
2388   edge e;
2389   edge_iterator ei;
2390 
2391   remove_conditions_and_labels (loop);
2392   insert_gimplified_predicates (loop);
2393   predicate_all_scalar_phis (loop);
2394 
2395   if (any_pred_load_store)
2396     predicate_mem_writes (loop);
2397 
2398   /* Merge basic blocks: first remove all the edges in the loop,
2399      except for those from the exit block.  */
2400   exit_bb = NULL;
2401   bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2402   for (i = 0; i < orig_loop_num_nodes; i++)
2403     {
2404       bb = ifc_bbs[i];
2405       predicated[i] = !is_true_predicate (bb_predicate (bb));
2406       free_bb_predicate (bb);
2407       if (bb_with_exit_edge_p (loop, bb))
2408 	{
2409 	  gcc_assert (exit_bb == NULL);
2410 	  exit_bb = bb;
2411 	}
2412     }
2413   gcc_assert (exit_bb != loop->latch);
2414 
2415   for (i = 1; i < orig_loop_num_nodes; i++)
2416     {
2417       bb = ifc_bbs[i];
2418 
2419       for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2420 	{
2421 	  if (e->src == exit_bb)
2422 	    ei_next (&ei);
2423 	  else
2424 	    remove_edge (e);
2425 	}
2426     }
2427 
2428   if (exit_bb != NULL)
2429     {
2430       if (exit_bb != loop->header)
2431 	{
2432 	  /* Connect this node to loop header.  */
2433 	  make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2434 	  set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2435 	}
2436 
2437       /* Redirect non-exit edges to loop->latch.  */
2438       FOR_EACH_EDGE (e, ei, exit_bb->succs)
2439 	{
2440 	  if (!loop_exit_edge_p (loop, e))
2441 	    redirect_edge_and_branch (e, loop->latch);
2442 	}
2443       set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2444     }
2445   else
2446     {
2447       /* If the loop does not have an exit, reconnect header and latch.  */
2448       make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2449       set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2450     }
2451 
2452   merge_target_bb = loop->header;
2453 
2454   /* Get at the virtual def valid for uses starting at the first block
2455      we merge into the header.  Without a virtual PHI the loop has the
2456      same virtual use on all stmts.  */
2457   gphi *vphi = get_virtual_phi (loop->header);
2458   tree last_vdef = NULL_TREE;
2459   if (vphi)
2460     {
2461       last_vdef = gimple_phi_result (vphi);
2462       for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2463 	   ! gsi_end_p (gsi); gsi_next (&gsi))
2464 	if (gimple_vdef (gsi_stmt (gsi)))
2465 	  last_vdef = gimple_vdef (gsi_stmt (gsi));
2466     }
2467   for (i = 1; i < orig_loop_num_nodes; i++)
2468     {
2469       gimple_stmt_iterator gsi;
2470       gimple_stmt_iterator last;
2471 
2472       bb = ifc_bbs[i];
2473 
2474       if (bb == exit_bb || bb == loop->latch)
2475 	continue;
2476 
2477       /* We release virtual PHIs late because we have to propagate them
2478          out using the current VUSE.  The def might be the one used
2479 	 after the loop.  */
2480       vphi = get_virtual_phi (bb);
2481       if (vphi)
2482 	{
2483 	  imm_use_iterator iter;
2484 	  use_operand_p use_p;
2485 	  gimple *use_stmt;
2486 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2487 	    {
2488 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2489 		SET_USE (use_p, last_vdef);
2490 	    }
2491 	  gsi = gsi_for_stmt (vphi);
2492 	  remove_phi_node (&gsi, true);
2493 	}
2494 
2495       /* Make stmts member of loop->header and clear range info from all stmts
2496 	 in BB which is now no longer executed conditional on a predicate we
2497 	 could have derived it from.  */
2498       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2499 	{
2500 	  gimple *stmt = gsi_stmt (gsi);
2501 	  gimple_set_bb (stmt, merge_target_bb);
2502 	  /* Update virtual operands.  */
2503 	  if (last_vdef)
2504 	    {
2505 	      use_operand_p use_p = ssa_vuse_operand (stmt);
2506 	      if (use_p
2507 		  && USE_FROM_PTR (use_p) != last_vdef)
2508 		SET_USE (use_p, last_vdef);
2509 	      if (gimple_vdef (stmt))
2510 		last_vdef = gimple_vdef (stmt);
2511 	    }
2512 	  if (predicated[i])
2513 	    {
2514 	      ssa_op_iter i;
2515 	      tree op;
2516 	      FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2517 		reset_flow_sensitive_info (op);
2518 	    }
2519 	}
2520 
2521       /* Update stmt list.  */
2522       last = gsi_last_bb (merge_target_bb);
2523       gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2524       set_bb_seq (bb, NULL);
2525 
2526       delete_basic_block (bb);
2527     }
2528 
2529   /* If possible, merge loop header to the block with the exit edge.
2530      This reduces the number of basic blocks to two, to please the
2531      vectorizer that handles only loops with two nodes.  */
2532   if (exit_bb
2533       && exit_bb != loop->header)
2534     {
2535       /* We release virtual PHIs late because we have to propagate them
2536          out using the current VUSE.  The def might be the one used
2537 	 after the loop.  */
2538       vphi = get_virtual_phi (exit_bb);
2539       if (vphi)
2540 	{
2541 	  imm_use_iterator iter;
2542 	  use_operand_p use_p;
2543 	  gimple *use_stmt;
2544 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2545 	    {
2546 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2547 		SET_USE (use_p, last_vdef);
2548 	    }
2549 	  gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2550 	  remove_phi_node (&gsi, true);
2551 	}
2552 
2553       if (can_merge_blocks_p (loop->header, exit_bb))
2554 	merge_blocks (loop->header, exit_bb);
2555     }
2556 
2557   free (ifc_bbs);
2558   ifc_bbs = NULL;
2559   free (predicated);
2560 }
2561 
2562 /* Version LOOP before if-converting it; the original loop
2563    will be if-converted, the new copy of the loop will not,
2564    and the LOOP_VECTORIZED internal call will be guarding which
2565    loop to execute.  The vectorizer pass will fold this
2566    internal call into either true or false.
2567 
2568    Note that this function intentionally invalidates profile.  Both edges
2569    out of LOOP_VECTORIZED must have 100% probability so the profile remains
2570    consistent after the condition is folded in the vectorizer.  */
2571 
2572 static struct loop *
version_loop_for_if_conversion(struct loop * loop)2573 version_loop_for_if_conversion (struct loop *loop)
2574 {
2575   basic_block cond_bb;
2576   tree cond = make_ssa_name (boolean_type_node);
2577   struct loop *new_loop;
2578   gimple *g;
2579   gimple_stmt_iterator gsi;
2580   unsigned int save_length;
2581 
2582   g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2583 				  build_int_cst (integer_type_node, loop->num),
2584 				  integer_zero_node);
2585   gimple_call_set_lhs (g, cond);
2586 
2587   /* Save BB->aux around loop_version as that uses the same field.  */
2588   save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2589   void **saved_preds = XALLOCAVEC (void *, save_length);
2590   for (unsigned i = 0; i < save_length; i++)
2591     saved_preds[i] = ifc_bbs[i]->aux;
2592 
2593   initialize_original_copy_tables ();
2594   /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2595      is re-merged in the vectorizer.  */
2596   new_loop = loop_version (loop, cond, &cond_bb,
2597 			   profile_probability::always (),
2598 			   profile_probability::always (),
2599 			   profile_probability::always (),
2600 			   profile_probability::always (), true);
2601   free_original_copy_tables ();
2602 
2603   for (unsigned i = 0; i < save_length; i++)
2604     ifc_bbs[i]->aux = saved_preds[i];
2605 
2606   if (new_loop == NULL)
2607     return NULL;
2608 
2609   new_loop->dont_vectorize = true;
2610   new_loop->force_vectorize = false;
2611   gsi = gsi_last_bb (cond_bb);
2612   gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2613   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2614   update_ssa (TODO_update_ssa);
2615   return new_loop;
2616 }
2617 
2618 /* Return true when LOOP satisfies the follow conditions that will
2619    allow it to be recognized by the vectorizer for outer-loop
2620    vectorization:
2621     - The loop is not the root node of the loop tree.
2622     - The loop has exactly one inner loop.
2623     - The loop has a single exit.
2624     - The loop header has a single successor, which is the inner
2625       loop header.
2626     - Each of the inner and outer loop latches have a single
2627       predecessor.
2628     - The loop exit block has a single predecessor, which is the
2629       inner loop's exit block.  */
2630 
2631 static bool
versionable_outer_loop_p(struct loop * loop)2632 versionable_outer_loop_p (struct loop *loop)
2633 {
2634   if (!loop_outer (loop)
2635       || loop->dont_vectorize
2636       || !loop->inner
2637       || loop->inner->next
2638       || !single_exit (loop)
2639       || !single_succ_p (loop->header)
2640       || single_succ (loop->header) != loop->inner->header
2641       || !single_pred_p (loop->latch)
2642       || !single_pred_p (loop->inner->latch))
2643     return false;
2644 
2645   basic_block outer_exit = single_pred (loop->latch);
2646   basic_block inner_exit = single_pred (loop->inner->latch);
2647 
2648   if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2649     return false;
2650 
2651   if (dump_file)
2652     fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2653 
2654   return true;
2655 }
2656 
2657 /* Performs splitting of critical edges.  Skip splitting and return false
2658    if LOOP will not be converted because:
2659 
2660      - LOOP is not well formed.
2661      - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2662 
2663    Last restriction is valid only if AGGRESSIVE_IF_CONV is false.  */
2664 
2665 static bool
ifcvt_split_critical_edges(struct loop * loop,bool aggressive_if_conv)2666 ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
2667 {
2668   basic_block *body;
2669   basic_block bb;
2670   unsigned int num = loop->num_nodes;
2671   unsigned int i;
2672   gimple *stmt;
2673   edge e;
2674   edge_iterator ei;
2675   auto_vec<edge> critical_edges;
2676 
2677   /* Loop is not well formed.  */
2678   if (num <= 2 || loop->inner || !single_exit (loop))
2679     return false;
2680 
2681   body = get_loop_body (loop);
2682   for (i = 0; i < num; i++)
2683     {
2684       bb = body[i];
2685       if (!aggressive_if_conv
2686 	  && phi_nodes (bb)
2687 	  && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2688 	{
2689 	  if (dump_file && (dump_flags & TDF_DETAILS))
2690 	    fprintf (dump_file,
2691 		     "BB %d has complicated PHI with more than %u args.\n",
2692 		     bb->index, MAX_PHI_ARG_NUM);
2693 
2694 	  free (body);
2695 	  return false;
2696 	}
2697       if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2698 	continue;
2699 
2700       stmt = last_stmt (bb);
2701       /* Skip basic blocks not ending with conditional branch.  */
2702       if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2703 	continue;
2704 
2705       FOR_EACH_EDGE (e, ei, bb->succs)
2706 	if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2707 	  critical_edges.safe_push (e);
2708     }
2709   free (body);
2710 
2711   while (critical_edges.length () > 0)
2712     {
2713       e = critical_edges.pop ();
2714       /* Don't split if bb can be predicated along non-critical edge.  */
2715       if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2716 	split_edge (e);
2717     }
2718 
2719   return true;
2720 }
2721 
2722 /* Delete redundant statements produced by predication which prevents
2723    loop vectorization.  */
2724 
2725 static void
ifcvt_local_dce(basic_block bb)2726 ifcvt_local_dce (basic_block bb)
2727 {
2728   gimple *stmt;
2729   gimple *stmt1;
2730   gimple *phi;
2731   gimple_stmt_iterator gsi;
2732   auto_vec<gimple *> worklist;
2733   enum gimple_code code;
2734   use_operand_p use_p;
2735   imm_use_iterator imm_iter;
2736 
2737   worklist.create (64);
2738   /* Consider all phi as live statements.  */
2739   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2740     {
2741       phi = gsi_stmt (gsi);
2742       gimple_set_plf (phi, GF_PLF_2, true);
2743       worklist.safe_push (phi);
2744     }
2745   /* Consider load/store statements, CALL and COND as live.  */
2746   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2747     {
2748       stmt = gsi_stmt (gsi);
2749       if (gimple_store_p (stmt)
2750 	  || gimple_assign_load_p (stmt)
2751 	  || is_gimple_debug (stmt))
2752 	{
2753 	  gimple_set_plf (stmt, GF_PLF_2, true);
2754 	  worklist.safe_push (stmt);
2755 	  continue;
2756 	}
2757       code = gimple_code (stmt);
2758       if (code == GIMPLE_COND || code == GIMPLE_CALL)
2759 	{
2760 	  gimple_set_plf (stmt, GF_PLF_2, true);
2761 	  worklist.safe_push (stmt);
2762 	  continue;
2763 	}
2764       gimple_set_plf (stmt, GF_PLF_2, false);
2765 
2766       if (code == GIMPLE_ASSIGN)
2767 	{
2768 	  tree lhs = gimple_assign_lhs (stmt);
2769 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2770 	    {
2771 	      stmt1 = USE_STMT (use_p);
2772 	      if (gimple_bb (stmt1) != bb)
2773 		{
2774 		  gimple_set_plf (stmt, GF_PLF_2, true);
2775 		  worklist.safe_push (stmt);
2776 		  break;
2777 		}
2778 	    }
2779 	}
2780     }
2781   /* Propagate liveness through arguments of live stmt.  */
2782   while (worklist.length () > 0)
2783     {
2784       ssa_op_iter iter;
2785       use_operand_p use_p;
2786       tree use;
2787 
2788       stmt = worklist.pop ();
2789       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2790 	{
2791 	  use = USE_FROM_PTR (use_p);
2792 	  if (TREE_CODE (use) != SSA_NAME)
2793 	    continue;
2794 	  stmt1 = SSA_NAME_DEF_STMT (use);
2795 	  if (gimple_bb (stmt1) != bb
2796 	      || gimple_plf (stmt1, GF_PLF_2))
2797 	    continue;
2798 	  gimple_set_plf (stmt1, GF_PLF_2, true);
2799 	  worklist.safe_push (stmt1);
2800 	}
2801     }
2802   /* Delete dead statements.  */
2803   gsi = gsi_start_bb (bb);
2804   while (!gsi_end_p (gsi))
2805     {
2806       stmt = gsi_stmt (gsi);
2807       if (gimple_plf (stmt, GF_PLF_2))
2808 	{
2809 	  gsi_next (&gsi);
2810 	  continue;
2811 	}
2812       if (dump_file && (dump_flags & TDF_DETAILS))
2813 	{
2814 	  fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2815 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2816 	}
2817       gsi_remove (&gsi, true);
2818       release_defs (stmt);
2819     }
2820 }
2821 
2822 /* If-convert LOOP when it is legal.  For the moment this pass has no
2823    profitability analysis.  Returns non-zero todo flags when something
2824    changed.  */
2825 
2826 unsigned int
tree_if_conversion(struct loop * loop)2827 tree_if_conversion (struct loop *loop)
2828 {
2829   unsigned int todo = 0;
2830   bool aggressive_if_conv;
2831   struct loop *rloop;
2832 
2833  again:
2834   rloop = NULL;
2835   ifc_bbs = NULL;
2836   any_pred_load_store = false;
2837   any_complicated_phi = false;
2838 
2839   /* Apply more aggressive if-conversion when loop or its outer loop were
2840      marked with simd pragma.  When that's the case, we try to if-convert
2841      loop containing PHIs with more than MAX_PHI_ARG_NUM arguments.  */
2842   aggressive_if_conv = loop->force_vectorize;
2843   if (!aggressive_if_conv)
2844     {
2845       struct loop *outer_loop = loop_outer (loop);
2846       if (outer_loop && outer_loop->force_vectorize)
2847 	aggressive_if_conv = true;
2848     }
2849 
2850   if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
2851     goto cleanup;
2852 
2853   if (!if_convertible_loop_p (loop)
2854       || !dbg_cnt (if_conversion_tree))
2855     goto cleanup;
2856 
2857   if ((any_pred_load_store || any_complicated_phi)
2858       && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
2859 	  || loop->dont_vectorize))
2860     goto cleanup;
2861 
2862   /* Since we have no cost model, always version loops unless the user
2863      specified -ftree-loop-if-convert or unless versioning is required.
2864      Either version this loop, or if the pattern is right for outer-loop
2865      vectorization, version the outer loop.  In the latter case we will
2866      still if-convert the original inner loop.  */
2867   if (any_pred_load_store
2868       || any_complicated_phi
2869       || flag_tree_loop_if_convert != 1)
2870     {
2871       struct loop *vloop
2872 	= (versionable_outer_loop_p (loop_outer (loop))
2873 	   ? loop_outer (loop) : loop);
2874       struct loop *nloop = version_loop_for_if_conversion (vloop);
2875       if (nloop == NULL)
2876 	goto cleanup;
2877       if (vloop != loop)
2878 	{
2879 	  /* If versionable_outer_loop_p decided to version the
2880 	     outer loop, version also the inner loop of the non-vectorized
2881 	     loop copy.  So we transform:
2882 	      loop1
2883 		loop2
2884 	     into:
2885 	      if (LOOP_VECTORIZED (1, 3))
2886 		{
2887 		  loop1
2888 		    loop2
2889 		}
2890 	      else
2891 		loop3 (copy of loop1)
2892 		  if (LOOP_VECTORIZED (4, 5))
2893 		    loop4 (copy of loop2)
2894 		  else
2895 		    loop5 (copy of loop4)  */
2896 	  gcc_assert (nloop->inner && nloop->inner->next == NULL);
2897 	  rloop = nloop->inner;
2898 	}
2899     }
2900 
2901   /* Now all statements are if-convertible.  Combine all the basic
2902      blocks into one huge basic block doing the if-conversion
2903      on-the-fly.  */
2904   combine_blocks (loop);
2905 
2906   /* Delete dead predicate computations.  */
2907   ifcvt_local_dce (loop->header);
2908 
2909   todo |= TODO_cleanup_cfg;
2910 
2911  cleanup:
2912   if (ifc_bbs)
2913     {
2914       unsigned int i;
2915 
2916       for (i = 0; i < loop->num_nodes; i++)
2917 	free_bb_predicate (ifc_bbs[i]);
2918 
2919       free (ifc_bbs);
2920       ifc_bbs = NULL;
2921     }
2922   if (rloop != NULL)
2923     {
2924       loop = rloop;
2925       goto again;
2926     }
2927 
2928   return todo;
2929 }
2930 
2931 /* Tree if-conversion pass management.  */
2932 
2933 namespace {
2934 
2935 const pass_data pass_data_if_conversion =
2936 {
2937   GIMPLE_PASS, /* type */
2938   "ifcvt", /* name */
2939   OPTGROUP_NONE, /* optinfo_flags */
2940   TV_TREE_LOOP_IFCVT, /* tv_id */
2941   ( PROP_cfg | PROP_ssa ), /* properties_required */
2942   0, /* properties_provided */
2943   0, /* properties_destroyed */
2944   0, /* todo_flags_start */
2945   0, /* todo_flags_finish */
2946 };
2947 
2948 class pass_if_conversion : public gimple_opt_pass
2949 {
2950 public:
pass_if_conversion(gcc::context * ctxt)2951   pass_if_conversion (gcc::context *ctxt)
2952     : gimple_opt_pass (pass_data_if_conversion, ctxt)
2953   {}
2954 
2955   /* opt_pass methods: */
2956   virtual bool gate (function *);
2957   virtual unsigned int execute (function *);
2958 
2959 }; // class pass_if_conversion
2960 
2961 bool
gate(function * fun)2962 pass_if_conversion::gate (function *fun)
2963 {
2964   return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
2965 	   && flag_tree_loop_if_convert != 0)
2966 	  || flag_tree_loop_if_convert == 1);
2967 }
2968 
2969 unsigned int
execute(function * fun)2970 pass_if_conversion::execute (function *fun)
2971 {
2972   struct loop *loop;
2973   unsigned todo = 0;
2974 
2975   if (number_of_loops (fun) <= 1)
2976     return 0;
2977 
2978   FOR_EACH_LOOP (loop, 0)
2979     if (flag_tree_loop_if_convert == 1
2980 	|| ((flag_tree_loop_vectorize || loop->force_vectorize)
2981 	    && !loop->dont_vectorize))
2982       todo |= tree_if_conversion (loop);
2983 
2984   if (todo)
2985     {
2986       free_numbers_of_iterations_estimates (fun);
2987       scev_reset ();
2988     }
2989 
2990   if (flag_checking)
2991     {
2992       basic_block bb;
2993       FOR_EACH_BB_FN (bb, fun)
2994 	gcc_assert (!bb->aux);
2995     }
2996 
2997   return todo;
2998 }
2999 
3000 } // anon namespace
3001 
3002 gimple_opt_pass *
make_pass_if_conversion(gcc::context * ctxt)3003 make_pass_if_conversion (gcc::context *ctxt)
3004 {
3005   return new pass_if_conversion (ctxt);
3006 }
3007