1 /* Loop autoparallelization.
2    Copyright (C) 2006-2020 Free Software Foundation, Inc.
3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4    Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "tree-ssa-alias.h"
56 #include "tree-eh.h"
57 #include "gomp-constants.h"
58 #include "tree-dfa.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 
62 /* This pass tries to distribute iterations of loops into several threads.
63    The implementation is straightforward -- for each loop we test whether its
64    iterations are independent, and if it is the case (and some additional
65    conditions regarding profitability and correctness are satisfied), we
66    add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67    machinery do its job.
68 
69    The most of the complexity is in bringing the code into shape expected
70    by the omp expanders:
71    -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72       variable and that the exit test is at the start of the loop body
73    -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74       variables by accesses through pointers, and breaking up ssa chains
75       by storing the values incoming to the parallelized loop to a structure
76       passed to the new function as an argument (something similar is done
77       in omp gimplification, unfortunately only a small part of the code
78       can be shared).
79 
80    TODO:
81    -- if there are several parallelizable loops in a function, it may be
82       possible to generate the threads just once (using synchronization to
83       ensure that cross-loop dependences are obeyed).
84    -- handling of common reduction patterns for outer loops.
85 
86    More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
87 /*
88   Reduction handling:
89   currently we use code inspired by vect_force_simple_reduction to detect
90   reduction patterns.
91   The code transformation will be introduced by an example.
92 
93 
94 parloop
95 {
96   int sum=1;
97 
98   for (i = 0; i < N; i++)
99    {
100     x[i] = i + 3;
101     sum+=x[i];
102    }
103 }
104 
105 gimple-like code:
106 header_bb:
107 
108   # sum_29 = PHI <sum_11(5), 1(3)>
109   # i_28 = PHI <i_12(5), 0(3)>
110   D.1795_8 = i_28 + 3;
111   x[i_28] = D.1795_8;
112   sum_11 = D.1795_8 + sum_29;
113   i_12 = i_28 + 1;
114   if (N_6(D) > i_12)
115     goto header_bb;
116 
117 
118 exit_bb:
119 
120   # sum_21 = PHI <sum_11(4)>
121   printf (&"%d"[0], sum_21);
122 
123 
124 after reduction transformation (only relevant parts):
125 
126 parloop
127 {
128 
129 ....
130 
131 
132   # Storing the initial value given by the user.  #
133 
134   .paral_data_store.32.sum.27 = 1;
135 
136   #pragma omp parallel num_threads(4)
137 
138   #pragma omp for schedule(static)
139 
140   # The neutral element corresponding to the particular
141   reduction's operation, e.g. 0 for PLUS_EXPR,
142   1 for MULT_EXPR, etc. replaces the user's initial value.  #
143 
144   # sum.27_29 = PHI <sum.27_11, 0>
145 
146   sum.27_11 = D.1827_8 + sum.27_29;
147 
148   GIMPLE_OMP_CONTINUE
149 
150   # Adding this reduction phi is done at create_phi_for_local_result() #
151   # sum.27_56 = PHI <sum.27_11, 0>
152   GIMPLE_OMP_RETURN
153 
154   # Creating the atomic operation is done at
155   create_call_for_reduction_1()  #
156 
157   #pragma omp atomic_load
158   D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159   D.1840_60 = sum.27_56 + D.1839_59;
160   #pragma omp atomic_store (D.1840_60);
161 
162   GIMPLE_OMP_RETURN
163 
164  # collecting the result after the join of the threads is done at
165   create_loads_for_reductions().
166   The value computed by the threads is loaded from the
167   shared struct.  #
168 
169 
170   .paral_data_load.33_52 = &.paral_data_store.32;
171   sum_37 =  .paral_data_load.33_52->sum.27;
172   sum_43 = D.1795_41 + sum_37;
173 
174   exit bb:
175   # sum_21 = PHI <sum_43, sum_26>
176   printf (&"%d"[0], sum_21);
177 
178 ...
179 
180 }
181 
182 */
183 
184 /* Error reporting helper for parloops_is_simple_reduction below.  GIMPLE
185    statement STMT is printed with a message MSG. */
186 
187 static void
report_ploop_op(dump_flags_t msg_type,gimple * stmt,const char * msg)188 report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
189 {
190   dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
191 }
192 
193 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194    operation.  Return true if the results of DEF_STMT_INFO are something
195    that can be accumulated by such a reduction.  */
196 
197 static bool
parloops_valid_reduction_input_p(stmt_vec_info def_stmt_info)198 parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
199 {
200   return (is_gimple_assign (def_stmt_info->stmt)
201 	  || is_gimple_call (def_stmt_info->stmt)
202 	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203 	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204 	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205 	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
206 }
207 
208 /* Detect SLP reduction of the form:
209 
210    #a1 = phi <a5, a0>
211    a2 = operation (a1)
212    a3 = operation (a2)
213    a4 = operation (a3)
214    a5 = operation (a4)
215 
216    #a = phi <a5>
217 
218    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219    FIRST_STMT is the first reduction stmt in the chain
220    (a2 = operation (a1)).
221 
222    Return TRUE if a reduction chain was detected.  */
223 
224 static bool
parloops_is_slp_reduction(loop_vec_info loop_info,gimple * phi,gimple * first_stmt)225 parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226 			   gimple *first_stmt)
227 {
228   class loop *loop = (gimple_bb (phi))->loop_father;
229   class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230   enum tree_code code;
231   gimple *loop_use_stmt = NULL;
232   stmt_vec_info use_stmt_info;
233   tree lhs;
234   imm_use_iterator imm_iter;
235   use_operand_p use_p;
236   int nloop_uses, size = 0, n_out_of_loop_uses;
237   bool found = false;
238 
239   if (loop != vect_loop)
240     return false;
241 
242   auto_vec<stmt_vec_info, 8> reduc_chain;
243   lhs = PHI_RESULT (phi);
244   code = gimple_assign_rhs_code (first_stmt);
245   while (1)
246     {
247       nloop_uses = 0;
248       n_out_of_loop_uses = 0;
249       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
250         {
251 	  gimple *use_stmt = USE_STMT (use_p);
252 	  if (is_gimple_debug (use_stmt))
253 	    continue;
254 
255           /* Check if we got back to the reduction phi.  */
256 	  if (use_stmt == phi)
257             {
258 	      loop_use_stmt = use_stmt;
259               found = true;
260               break;
261             }
262 
263           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
264             {
265 	      loop_use_stmt = use_stmt;
266 	      nloop_uses++;
267             }
268            else
269              n_out_of_loop_uses++;
270 
271            /* There are can be either a single use in the loop or two uses in
272               phi nodes.  */
273            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274              return false;
275         }
276 
277       if (found)
278         break;
279 
280       /* We reached a statement with no loop uses.  */
281       if (nloop_uses == 0)
282 	return false;
283 
284       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
285       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286         return false;
287 
288       if (!is_gimple_assign (loop_use_stmt)
289 	  || code != gimple_assign_rhs_code (loop_use_stmt)
290 	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291         return false;
292 
293       /* Insert USE_STMT into reduction chain.  */
294       use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295       reduc_chain.safe_push (use_stmt_info);
296 
297       lhs = gimple_assign_lhs (loop_use_stmt);
298       size++;
299    }
300 
301   if (!found || loop_use_stmt != phi || size < 2)
302     return false;
303 
304   /* Swap the operands, if needed, to make the reduction operand be the second
305      operand.  */
306   lhs = PHI_RESULT (phi);
307   for (unsigned i = 0; i < reduc_chain.length (); ++i)
308     {
309       gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310       if (gimple_assign_rhs2 (next_stmt) == lhs)
311 	{
312 	  tree op = gimple_assign_rhs1 (next_stmt);
313 	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
314 
315 	  /* Check that the other def is either defined in the loop
316 	     ("vect_internal_def"), or it's an induction (defined by a
317 	     loop-header phi-node).  */
318 	  if (def_stmt_info
319 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320 	      && parloops_valid_reduction_input_p (def_stmt_info))
321 	    {
322 	      lhs = gimple_assign_lhs (next_stmt);
323 	      continue;
324 	    }
325 
326 	  return false;
327 	}
328       else
329 	{
330           tree op = gimple_assign_rhs2 (next_stmt);
331 	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
332 
333           /* Check that the other def is either defined in the loop
334             ("vect_internal_def"), or it's an induction (defined by a
335             loop-header phi-node).  */
336 	  if (def_stmt_info
337 	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338 	      && parloops_valid_reduction_input_p (def_stmt_info))
339 	    {
340 	      if (dump_enabled_p ())
341 		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
342 				 next_stmt);
343 
344 	      swap_ssa_operands (next_stmt,
345 				 gimple_assign_rhs1_ptr (next_stmt),
346                                  gimple_assign_rhs2_ptr (next_stmt));
347 	      update_stmt (next_stmt);
348 	    }
349 	  else
350 	    return false;
351         }
352 
353       lhs = gimple_assign_lhs (next_stmt);
354     }
355 
356   /* Build up the actual chain.  */
357   for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
358     {
359       REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360       REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
361     }
362   REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363   REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
364 
365   /* Save the chain for further analysis in SLP detection.  */
366   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367   REDUC_GROUP_SIZE (reduc_chain[0]) = size;
368 
369   return true;
370 }
371 
372 /* Return true if we need an in-order reduction for operation CODE
373    on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374    overflow must wrap.  */
375 
376 static bool
parloops_needs_fold_left_reduction_p(tree type,tree_code code,bool need_wrapping_integral_overflow)377 parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378 				      bool need_wrapping_integral_overflow)
379 {
380   /* CHECKME: check for !flag_finite_math_only too?  */
381   if (SCALAR_FLOAT_TYPE_P (type))
382     switch (code)
383       {
384       case MIN_EXPR:
385       case MAX_EXPR:
386 	return false;
387 
388       default:
389 	return !flag_associative_math;
390       }
391 
392   if (INTEGRAL_TYPE_P (type))
393     {
394       if (!operation_no_trapping_overflow (type, code))
395 	return true;
396       if (need_wrapping_integral_overflow
397 	  && !TYPE_OVERFLOW_WRAPS (type)
398 	  && operation_can_overflow (code))
399 	return true;
400       return false;
401     }
402 
403   if (SAT_FIXED_POINT_TYPE_P (type))
404     return true;
405 
406   return false;
407 }
408 
409 
410 /* Function parloops_is_simple_reduction
411 
412    (1) Detect a cross-iteration def-use cycle that represents a simple
413    reduction computation.  We look for the following pattern:
414 
415    loop_header:
416      a1 = phi < a0, a2 >
417      a3 = ...
418      a2 = operation (a3, a1)
419 
420    or
421 
422    a3 = ...
423    loop_header:
424      a1 = phi < a0, a2 >
425      a2 = operation (a3, a1)
426 
427    such that:
428    1. operation is commutative and associative and it is safe to
429       change the order of the computation
430    2. no uses for a2 in the loop (a2 is used out of the loop)
431    3. no uses of a1 in the loop besides the reduction operation
432    4. no uses of a1 outside the loop.
433 
434    Conditions 1,4 are tested here.
435    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
436 
437    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438    nested cycles.
439 
440    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441    reductions:
442 
443      a1 = phi < a0, a2 >
444      inner loop (def of a3)
445      a2 = phi < a3 >
446 
447    (4) Detect condition expressions, ie:
448      for (int i = 0; i < N; i++)
449        if (a[i] < val)
450 	ret_val = a[i];
451 
452 */
453 
454 static stmt_vec_info
parloops_is_simple_reduction(loop_vec_info loop_info,stmt_vec_info phi_info,bool * double_reduc,bool need_wrapping_integral_overflow,enum vect_reduction_type * v_reduc_type)455 parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456 			  bool *double_reduc,
457 			  bool need_wrapping_integral_overflow,
458 			  enum vect_reduction_type *v_reduc_type)
459 {
460   gphi *phi = as_a <gphi *> (phi_info->stmt);
461   class loop *loop = (gimple_bb (phi))->loop_father;
462   class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463   bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464   gimple *phi_use_stmt = NULL;
465   enum tree_code orig_code, code;
466   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467   tree type;
468   tree name;
469   imm_use_iterator imm_iter;
470   use_operand_p use_p;
471   bool phi_def;
472 
473   *double_reduc = false;
474   *v_reduc_type = TREE_CODE_REDUCTION;
475 
476   tree phi_name = PHI_RESULT (phi);
477   /* ???  If there are no uses of the PHI result the inner loop reduction
478      won't be detected as possibly double-reduction by vectorizable_reduction
479      because that tries to walk the PHI arg from the preheader edge which
480      can be constant.  See PR60382.  */
481   if (has_zero_uses (phi_name))
482     return NULL;
483   unsigned nphi_def_loop_uses = 0;
484   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
485     {
486       gimple *use_stmt = USE_STMT (use_p);
487       if (is_gimple_debug (use_stmt))
488 	continue;
489 
490       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
491         {
492           if (dump_enabled_p ())
493 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 			     "intermediate value used outside loop.\n");
495 
496           return NULL;
497         }
498 
499       nphi_def_loop_uses++;
500       phi_use_stmt = use_stmt;
501     }
502 
503   edge latch_e = loop_latch_edge (loop);
504   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505   if (TREE_CODE (loop_arg) != SSA_NAME)
506     {
507       if (dump_enabled_p ())
508 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 			 "reduction: not ssa_name: %T\n", loop_arg);
510       return NULL;
511     }
512 
513   stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514   if (!def_stmt_info
515       || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516     return NULL;
517 
518   if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
519     {
520       name = gimple_assign_lhs (def_stmt);
521       phi_def = false;
522     }
523   else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
524     {
525       name = PHI_RESULT (def_stmt);
526       phi_def = true;
527     }
528   else
529     {
530       if (dump_enabled_p ())
531 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 			 "reduction: unhandled reduction operation: %G",
533 			 def_stmt_info->stmt);
534       return NULL;
535     }
536 
537   unsigned nlatch_def_loop_uses = 0;
538   auto_vec<gphi *, 3> lcphis;
539   bool inner_loop_of_double_reduc = false;
540   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
541     {
542       gimple *use_stmt = USE_STMT (use_p);
543       if (is_gimple_debug (use_stmt))
544 	continue;
545       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546 	nlatch_def_loop_uses++;
547       else
548 	{
549 	  /* We can have more than one loop-closed PHI.  */
550 	  lcphis.safe_push (as_a <gphi *> (use_stmt));
551 	  if (nested_in_vect_loop
552 	      && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553 		  == vect_double_reduction_def))
554 	    inner_loop_of_double_reduc = true;
555 	}
556     }
557 
558   /* If this isn't a nested cycle or if the nested cycle reduction value
559      is used ouside of the inner loop we cannot handle uses of the reduction
560      value.  */
561   if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562       && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
563     {
564       if (dump_enabled_p ())
565 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 			 "reduction used in loop.\n");
567       return NULL;
568     }
569 
570   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571      defined in the inner loop.  */
572   if (phi_def)
573     {
574       gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575       op1 = PHI_ARG_DEF (def_stmt, 0);
576 
577       if (gimple_phi_num_args (def_stmt) != 1
578           || TREE_CODE (op1) != SSA_NAME)
579         {
580           if (dump_enabled_p ())
581 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 			     "unsupported phi node definition.\n");
583 
584           return NULL;
585         }
586 
587       gimple *def1 = SSA_NAME_DEF_STMT (op1);
588       if (gimple_bb (def1)
589 	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590           && loop->inner
591           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592           && is_gimple_assign (def1)
593 	  && is_a <gphi *> (phi_use_stmt)
594 	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
595         {
596           if (dump_enabled_p ())
597             report_ploop_op (MSG_NOTE, def_stmt,
598 			     "detected double reduction: ");
599 
600           *double_reduc = true;
601 	  return def_stmt_info;
602         }
603 
604       return NULL;
605     }
606 
607   /* If we are vectorizing an inner reduction we are executing that
608      in the original order only in case we are not dealing with a
609      double reduction.  */
610   bool check_reduction = true;
611   if (flow_loop_nested_p (vect_loop, loop))
612     {
613       gphi *lcphi;
614       unsigned i;
615       check_reduction = false;
616       FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617 	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
618 	  {
619 	    gimple *use_stmt = USE_STMT (use_p);
620 	    if (is_gimple_debug (use_stmt))
621 	      continue;
622 	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623 	      check_reduction = true;
624 	  }
625     }
626 
627   gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628   code = orig_code = gimple_assign_rhs_code (def_stmt);
629 
630   if (nested_in_vect_loop && !check_reduction)
631     {
632       /* FIXME: Even for non-reductions code generation is funneled
633 	 through vectorizable_reduction for the stmt defining the
634 	 PHI latch value.  So we have to artificially restrict ourselves
635 	 for the supported operations.  */
636       switch (get_gimple_rhs_class (code))
637 	{
638 	case GIMPLE_BINARY_RHS:
639 	case GIMPLE_TERNARY_RHS:
640 	  break;
641 	default:
642 	  /* Not supported by vectorizable_reduction.  */
643 	  if (dump_enabled_p ())
644 	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645 			     "nested cycle: not handled operation: ");
646 	  return NULL;
647 	}
648       if (dump_enabled_p ())
649 	report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650       return def_stmt_info;
651     }
652 
653   /* We can handle "res -= x[i]", which is non-associative by
654      simply rewriting this into "res += -x[i]".  Avoid changing
655      gimple instruction for the first simple tests and only do this
656      if we're allowed to change code at all.  */
657   if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658     code = PLUS_EXPR;
659 
660   if (code == COND_EXPR)
661     {
662       if (! nested_in_vect_loop)
663 	*v_reduc_type = COND_REDUCTION;
664 
665       op3 = gimple_assign_rhs1 (def_stmt);
666       if (COMPARISON_CLASS_P (op3))
667         {
668           op4 = TREE_OPERAND (op3, 1);
669           op3 = TREE_OPERAND (op3, 0);
670         }
671       if (op3 == phi_name || op4 == phi_name)
672 	{
673 	  if (dump_enabled_p ())
674 	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675 			     "reduction: condition depends on previous"
676 			     " iteration: ");
677 	  return NULL;
678 	}
679 
680       op1 = gimple_assign_rhs2 (def_stmt);
681       op2 = gimple_assign_rhs3 (def_stmt);
682     }
683   else if (!commutative_tree_code (code) || !associative_tree_code (code))
684     {
685       if (dump_enabled_p ())
686 	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687 			 "reduction: not commutative/associative: ");
688       return NULL;
689     }
690   else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
691     {
692       op1 = gimple_assign_rhs1 (def_stmt);
693       op2 = gimple_assign_rhs2 (def_stmt);
694     }
695   else
696     {
697       if (dump_enabled_p ())
698 	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699 			 "reduction: not handled operation: ");
700       return NULL;
701     }
702 
703   if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
704     {
705       if (dump_enabled_p ())
706 	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707 			 "reduction: both uses not ssa_names: ");
708 
709       return NULL;
710     }
711 
712   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713   if ((TREE_CODE (op1) == SSA_NAME
714        && !types_compatible_p (type,TREE_TYPE (op1)))
715       || (TREE_CODE (op2) == SSA_NAME
716           && !types_compatible_p (type, TREE_TYPE (op2)))
717       || (op3 && TREE_CODE (op3) == SSA_NAME
718           && !types_compatible_p (type, TREE_TYPE (op3)))
719       || (op4 && TREE_CODE (op4) == SSA_NAME
720           && !types_compatible_p (type, TREE_TYPE (op4))))
721     {
722       if (dump_enabled_p ())
723         {
724           dump_printf_loc (MSG_NOTE, vect_location,
725 			   "reduction: multiple types: operation type: "
726 			   "%T, operands types: %T,%T",
727 			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
728           if (op3)
729 	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
730 
731           if (op4)
732 	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733           dump_printf (MSG_NOTE, "\n");
734         }
735 
736       return NULL;
737     }
738 
739   /* Check whether it's ok to change the order of the computation.
740      Generally, when vectorizing a reduction we change the order of the
741      computation.  This may change the behavior of the program in some
742      cases, so we need to check that this is ok.  One exception is when
743      vectorizing an outer-loop: the inner-loop is executed sequentially,
744      and therefore vectorizing reductions in the inner-loop during
745      outer-loop vectorization is safe.  */
746   if (check_reduction
747       && *v_reduc_type == TREE_CODE_REDUCTION
748       && parloops_needs_fold_left_reduction_p (type, code,
749 					       need_wrapping_integral_overflow))
750     *v_reduc_type = FOLD_LEFT_REDUCTION;
751 
752   /* Reduction is safe. We're dealing with one of the following:
753      1) integer arithmetic and no trapv
754      2) floating point arithmetic, and special flags permit this optimization
755      3) nested cycle (i.e., outer loop vectorization).  */
756   stmt_vec_info def1_info = loop_info->lookup_def (op1);
757   stmt_vec_info def2_info = loop_info->lookup_def (op2);
758   if (code != COND_EXPR && !def1_info && !def2_info)
759     {
760       if (dump_enabled_p ())
761 	report_ploop_op (MSG_NOTE, def_stmt,
762 			 "reduction: no defs for operands: ");
763       return NULL;
764     }
765 
766   /* Check that one def is the reduction def, defined by PHI,
767      the other def is either defined in the loop ("vect_internal_def"),
768      or it's an induction (defined by a loop-header phi-node).  */
769 
770   if (def2_info
771       && def2_info->stmt == phi
772       && (code == COND_EXPR
773 	  || !def1_info
774 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775 	  || parloops_valid_reduction_input_p (def1_info)))
776     {
777       if (dump_enabled_p ())
778 	report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779       return def_stmt_info;
780     }
781 
782   if (def1_info
783       && def1_info->stmt == phi
784       && (code == COND_EXPR
785 	  || !def2_info
786 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787 	  || parloops_valid_reduction_input_p (def2_info)))
788     {
789       if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
790 	{
791 	  /* Check if we can swap operands (just for simplicity - so that
792 	     the rest of the code can assume that the reduction variable
793 	     is always the last (second) argument).  */
794 	  if (code == COND_EXPR)
795 	    {
796 	      /* Swap cond_expr by inverting the condition.  */
797 	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
798 	      enum tree_code invert_code = ERROR_MARK;
799 	      enum tree_code cond_code = TREE_CODE (cond_expr);
800 
801 	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
802 		{
803 		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804 		  invert_code = invert_tree_comparison (cond_code, honor_nans);
805 		}
806 	      if (invert_code != ERROR_MARK)
807 		{
808 		  TREE_SET_CODE (cond_expr, invert_code);
809 		  swap_ssa_operands (def_stmt,
810 				     gimple_assign_rhs2_ptr (def_stmt),
811 				     gimple_assign_rhs3_ptr (def_stmt));
812 		}
813 	      else
814 		{
815 		  if (dump_enabled_p ())
816 		    report_ploop_op (MSG_NOTE, def_stmt,
817 				     "detected reduction: cannot swap operands "
818 				     "for cond_expr");
819 		  return NULL;
820 		}
821 	    }
822 	  else
823 	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824 			       gimple_assign_rhs2_ptr (def_stmt));
825 
826 	  if (dump_enabled_p ())
827 	    report_ploop_op (MSG_NOTE, def_stmt,
828 			     "detected reduction: need to swap operands: ");
829         }
830       else
831         {
832           if (dump_enabled_p ())
833             report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
834         }
835 
836       return def_stmt_info;
837     }
838 
839   /* Try to find SLP reduction chain.  */
840   if (! nested_in_vect_loop
841       && code != COND_EXPR
842       && orig_code != MINUS_EXPR
843       && parloops_is_slp_reduction (loop_info, phi, def_stmt))
844     {
845       if (dump_enabled_p ())
846         report_ploop_op (MSG_NOTE, def_stmt,
847 			 "reduction: detected reduction chain: ");
848 
849       return def_stmt_info;
850     }
851 
852   /* Look for the expression computing loop_arg from loop PHI result.  */
853   if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854     return def_stmt_info;
855 
856   if (dump_enabled_p ())
857     {
858       report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859 		       "reduction: unknown pattern: ");
860     }
861 
862   return NULL;
863 }
864 
865 /* Wrapper around vect_is_simple_reduction, which will modify code
866    in-place if it enables detection of more reductions.  Arguments
867    as there.  */
868 
869 stmt_vec_info
parloops_force_simple_reduction(loop_vec_info loop_info,stmt_vec_info phi_info,bool * double_reduc,bool need_wrapping_integral_overflow)870 parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871 			     bool *double_reduc,
872 			     bool need_wrapping_integral_overflow)
873 {
874   enum vect_reduction_type v_reduc_type;
875   stmt_vec_info def_info
876     = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877 				need_wrapping_integral_overflow,
878 				&v_reduc_type);
879   if (def_info)
880     {
881       STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882       STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883       STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884       STMT_VINFO_REDUC_DEF (def_info) = phi_info;
885     }
886   return def_info;
887 }
888 
889 /* Minimal number of iterations of a loop that should be executed in each
890    thread.  */
891 #define MIN_PER_THREAD param_parloops_min_per_thread
892 
893 /* Element of the hashtable, representing a
894    reduction in the current loop.  */
895 struct reduction_info
896 {
897   gimple *reduc_stmt;		/* reduction statement.  */
898   gimple *reduc_phi;		/* The phi node defining the reduction.  */
899   enum tree_code reduction_code;/* code for the reduction operation.  */
900   unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
901 				   result.  */
902   gphi *keep_res;		/* The PHI_RESULT of this phi is the resulting value
903 				   of the reduction variable when existing the loop. */
904   tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
905   tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
906   tree reduc_addr;		/* The address of the reduction variable for
907 				   openacc reductions.  */
908   tree init;			/* reduction initialization value.  */
909   gphi *new_phi;		/* (helper field) Newly created phi node whose result
910 				   will be passed to the atomic operation.  Represents
911 				   the local result each thread computed for the reduction
912 				   operation.  */
913 };
914 
915 /* Reduction info hashtable helpers.  */
916 
917 struct reduction_hasher : free_ptr_hash <reduction_info>
918 {
919   static inline hashval_t hash (const reduction_info *);
920   static inline bool equal (const reduction_info *, const reduction_info *);
921 };
922 
923 /* Equality and hash functions for hashtab code.  */
924 
925 inline bool
equal(const reduction_info * a,const reduction_info * b)926 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
927 {
928   return (a->reduc_phi == b->reduc_phi);
929 }
930 
931 inline hashval_t
hash(const reduction_info * a)932 reduction_hasher::hash (const reduction_info *a)
933 {
934   return a->reduc_version;
935 }
936 
937 typedef hash_table<reduction_hasher> reduction_info_table_type;
938 
939 
940 static struct reduction_info *
reduction_phi(reduction_info_table_type * reduction_list,gimple * phi)941 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
942 {
943   struct reduction_info tmpred, *red;
944 
945   if (reduction_list->is_empty () || phi == NULL)
946     return NULL;
947 
948   if (gimple_uid (phi) == (unsigned int)-1
949       || gimple_uid (phi) == 0)
950     return NULL;
951 
952   tmpred.reduc_phi = phi;
953   tmpred.reduc_version = gimple_uid (phi);
954   red = reduction_list->find (&tmpred);
955   gcc_assert (red == NULL || red->reduc_phi == phi);
956 
957   return red;
958 }
959 
960 /* Element of hashtable of names to copy.  */
961 
962 struct name_to_copy_elt
963 {
964   unsigned version;	/* The version of the name to copy.  */
965   tree new_name;	/* The new name used in the copy.  */
966   tree field;		/* The field of the structure used to pass the
967 			   value.  */
968 };
969 
970 /* Name copies hashtable helpers.  */
971 
972 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
973 {
974   static inline hashval_t hash (const name_to_copy_elt *);
975   static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
976 };
977 
978 /* Equality and hash functions for hashtab code.  */
979 
980 inline bool
equal(const name_to_copy_elt * a,const name_to_copy_elt * b)981 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
982 {
983   return a->version == b->version;
984 }
985 
986 inline hashval_t
hash(const name_to_copy_elt * a)987 name_to_copy_hasher::hash (const name_to_copy_elt *a)
988 {
989   return (hashval_t) a->version;
990 }
991 
992 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
993 
994 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995    matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
996    represents the denominator for every element in the matrix.  */
997 typedef struct lambda_trans_matrix_s
998 {
999   lambda_matrix matrix;
1000   int rowsize;
1001   int colsize;
1002   int denominator;
1003 } *lambda_trans_matrix;
1004 #define LTM_MATRIX(T) ((T)->matrix)
1005 #define LTM_ROWSIZE(T) ((T)->rowsize)
1006 #define LTM_COLSIZE(T) ((T)->colsize)
1007 #define LTM_DENOMINATOR(T) ((T)->denominator)
1008 
1009 /* Allocate a new transformation matrix.  */
1010 
1011 static lambda_trans_matrix
lambda_trans_matrix_new(int colsize,int rowsize,struct obstack * lambda_obstack)1012 lambda_trans_matrix_new (int colsize, int rowsize,
1013 			 struct obstack * lambda_obstack)
1014 {
1015   lambda_trans_matrix ret;
1016 
1017   ret = (lambda_trans_matrix)
1018     obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019   LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020   LTM_ROWSIZE (ret) = rowsize;
1021   LTM_COLSIZE (ret) = colsize;
1022   LTM_DENOMINATOR (ret) = 1;
1023   return ret;
1024 }
1025 
1026 /* Multiply a vector VEC by a matrix MAT.
1027    MAT is an M*N matrix, and VEC is a vector with length N.  The result
1028    is stored in DEST which must be a vector of length M.  */
1029 
1030 static void
lambda_matrix_vector_mult(lambda_matrix matrix,int m,int n,lambda_vector vec,lambda_vector dest)1031 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032 			   lambda_vector vec, lambda_vector dest)
1033 {
1034   int i, j;
1035 
1036   lambda_vector_clear (dest, m);
1037   for (i = 0; i < m; i++)
1038     for (j = 0; j < n; j++)
1039       dest[i] += matrix[i][j] * vec[j];
1040 }
1041 
1042 /* Return true if TRANS is a legal transformation matrix that respects
1043    the dependence vectors in DISTS and DIRS.  The conservative answer
1044    is false.
1045 
1046    "Wolfe proves that a unimodular transformation represented by the
1047    matrix T is legal when applied to a loop nest with a set of
1048    lexicographically non-negative distance vectors RDG if and only if
1049    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050    i.e.: if and only if it transforms the lexicographically positive
1051    distance vectors to lexicographically positive vectors.  Note that
1052    a unimodular matrix must transform the zero vector (and only it) to
1053    the zero vector." S.Muchnick.  */
1054 
1055 static bool
lambda_transform_legal_p(lambda_trans_matrix trans,int nb_loops,vec<ddr_p> dependence_relations)1056 lambda_transform_legal_p (lambda_trans_matrix trans,
1057 			  int nb_loops,
1058 			  vec<ddr_p> dependence_relations)
1059 {
1060   unsigned int i, j;
1061   lambda_vector distres;
1062   struct data_dependence_relation *ddr;
1063 
1064   gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065 	      && LTM_ROWSIZE (trans) == nb_loops);
1066 
1067   /* When there are no dependences, the transformation is correct.  */
1068   if (dependence_relations.length () == 0)
1069     return true;
1070 
1071   ddr = dependence_relations[0];
1072   if (ddr == NULL)
1073     return true;
1074 
1075   /* When there is an unknown relation in the dependence_relations, we
1076      know that it is no worth looking at this loop nest: give up.  */
1077   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078     return false;
1079 
1080   distres = lambda_vector_new (nb_loops);
1081 
1082   /* For each distance vector in the dependence graph.  */
1083   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1084     {
1085       /* Don't care about relations for which we know that there is no
1086 	 dependence, nor about read-read (aka. output-dependences):
1087 	 these data accesses can happen in any order.  */
1088       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089 	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090 	continue;
1091 
1092       /* Conservatively answer: "this transformation is not valid".  */
1093       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094 	return false;
1095 
1096       /* If the dependence could not be captured by a distance vector,
1097 	 conservatively answer that the transform is not valid.  */
1098       if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099 	return false;
1100 
1101       /* Compute trans.dist_vect */
1102       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1103 	{
1104 	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105 				     DDR_DIST_VECT (ddr, j), distres);
1106 
1107 	  if (!lambda_vector_lexico_pos (distres, nb_loops))
1108 	    return false;
1109 	}
1110     }
1111   return true;
1112 }
1113 
1114 /* Data dependency analysis. Returns true if the iterations of LOOP
1115    are independent on each other (that is, if we can execute them
1116    in parallel).  */
1117 
1118 static bool
loop_parallel_p(class loop * loop,struct obstack * parloop_obstack)1119 loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1120 {
1121   vec<ddr_p> dependence_relations;
1122   vec<data_reference_p> datarefs;
1123   lambda_trans_matrix trans;
1124   bool ret = false;
1125 
1126   if (dump_file && (dump_flags & TDF_DETAILS))
1127   {
1128     fprintf (dump_file, "Considering loop %d\n", loop->num);
1129     if (!loop->inner)
1130       fprintf (dump_file, "loop is innermost\n");
1131     else
1132       fprintf (dump_file, "loop NOT innermost\n");
1133    }
1134 
1135   /* Check for problems with dependences.  If the loop can be reversed,
1136      the iterations are independent.  */
1137   auto_vec<loop_p, 3> loop_nest;
1138   datarefs.create (10);
1139   dependence_relations.create (100);
1140   if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141 					   &dependence_relations))
1142     {
1143       if (dump_file && (dump_flags & TDF_DETAILS))
1144 	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
1145       ret = false;
1146       goto end;
1147     }
1148   if (dump_file && (dump_flags & TDF_DETAILS))
1149     dump_data_dependence_relations (dump_file, dependence_relations);
1150 
1151   trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1152   LTM_MATRIX (trans)[0][0] = -1;
1153 
1154   if (lambda_transform_legal_p (trans, 1, dependence_relations))
1155     {
1156       ret = true;
1157       if (dump_file && (dump_flags & TDF_DETAILS))
1158 	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
1159     }
1160   else if (dump_file && (dump_flags & TDF_DETAILS))
1161     fprintf (dump_file,
1162 	     "  FAILED: data dependencies exist across iterations\n");
1163 
1164  end:
1165   free_dependence_relations (dependence_relations);
1166   free_data_refs (datarefs);
1167 
1168   return ret;
1169 }
1170 
1171 /* Return true when LOOP contains basic blocks marked with the
1172    BB_IRREDUCIBLE_LOOP flag.  */
1173 
1174 static inline bool
loop_has_blocks_with_irreducible_flag(class loop * loop)1175 loop_has_blocks_with_irreducible_flag (class loop *loop)
1176 {
1177   unsigned i;
1178   basic_block *bbs = get_loop_body_in_dom_order (loop);
1179   bool res = true;
1180 
1181   for (i = 0; i < loop->num_nodes; i++)
1182     if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183       goto end;
1184 
1185   res = false;
1186  end:
1187   free (bbs);
1188   return res;
1189 }
1190 
1191 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1192    The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
1193    to their addresses that can be reused.  The address of OBJ is known to
1194    be invariant in the whole function.  Other needed statements are placed
1195    right before GSI.  */
1196 
1197 static tree
take_address_of(tree obj,tree type,edge entry,int_tree_htab_type * decl_address,gimple_stmt_iterator * gsi)1198 take_address_of (tree obj, tree type, edge entry,
1199 		 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1200 {
1201   int uid;
1202   tree *var_p, name, addr;
1203   gassign *stmt;
1204   gimple_seq stmts;
1205 
1206   /* Since the address of OBJ is invariant, the trees may be shared.
1207      Avoid rewriting unrelated parts of the code.  */
1208   obj = unshare_expr (obj);
1209   for (var_p = &obj;
1210        handled_component_p (*var_p);
1211        var_p = &TREE_OPERAND (*var_p, 0))
1212     continue;
1213 
1214   /* Canonicalize the access to base on a MEM_REF.  */
1215   if (DECL_P (*var_p))
1216     *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1217 
1218   /* Assign a canonical SSA name to the address of the base decl used
1219      in the address and share it for all accesses and addresses based
1220      on it.  */
1221   uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1222   int_tree_map elt;
1223   elt.uid = uid;
1224   int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1225   if (!slot->to)
1226     {
1227       if (gsi == NULL)
1228 	return NULL;
1229       addr = TREE_OPERAND (*var_p, 0);
1230       const char *obj_name
1231 	= get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1232       if (obj_name)
1233 	name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1234       else
1235 	name = make_ssa_name (TREE_TYPE (addr));
1236       stmt = gimple_build_assign (name, addr);
1237       gsi_insert_on_edge_immediate (entry, stmt);
1238 
1239       slot->uid = uid;
1240       slot->to = name;
1241     }
1242   else
1243     name = slot->to;
1244 
1245   /* Express the address in terms of the canonical SSA name.  */
1246   TREE_OPERAND (*var_p, 0) = name;
1247   if (gsi == NULL)
1248     return build_fold_addr_expr_with_type (obj, type);
1249 
1250   name = force_gimple_operand (build_addr (obj),
1251 			       &stmts, true, NULL_TREE);
1252   if (!gimple_seq_empty_p (stmts))
1253     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1254 
1255   if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1256     {
1257       name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1258 				   NULL_TREE);
1259       if (!gimple_seq_empty_p (stmts))
1260 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1261     }
1262 
1263   return name;
1264 }
1265 
1266 static tree
reduc_stmt_res(gimple * stmt)1267 reduc_stmt_res (gimple *stmt)
1268 {
1269   return (gimple_code (stmt) == GIMPLE_PHI
1270 	  ? gimple_phi_result (stmt)
1271 	  : gimple_assign_lhs (stmt));
1272 }
1273 
1274 /* Callback for htab_traverse.  Create the initialization statement
1275    for reduction described in SLOT, and place it at the preheader of
1276    the loop described in DATA.  */
1277 
1278 int
initialize_reductions(reduction_info ** slot,class loop * loop)1279 initialize_reductions (reduction_info **slot, class loop *loop)
1280 {
1281   tree init;
1282   tree type, arg;
1283   edge e;
1284 
1285   struct reduction_info *const reduc = *slot;
1286 
1287   /* Create initialization in preheader:
1288      reduction_variable = initialization value of reduction.  */
1289 
1290   /* In the phi node at the header, replace the argument coming
1291      from the preheader with the reduction initialization value.  */
1292 
1293   /* Initialize the reduction.  */
1294   type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1295   init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1296 				reduc->reduction_code, type);
1297   reduc->init = init;
1298 
1299   /* Replace the argument representing the initialization value
1300      with the initialization value for the reduction (neutral
1301      element for the particular operation, e.g. 0 for PLUS_EXPR,
1302      1 for MULT_EXPR, etc).
1303      Keep the old value in a new variable "reduction_initial",
1304      that will be taken in consideration after the parallel
1305      computing is done.  */
1306 
1307   e = loop_preheader_edge (loop);
1308   arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1309   /* Create new variable to hold the initial value.  */
1310 
1311   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1312 	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1313   reduc->initial_value = arg;
1314   return 1;
1315 }
1316 
1317 struct elv_data
1318 {
1319   struct walk_stmt_info info;
1320   edge entry;
1321   int_tree_htab_type *decl_address;
1322   gimple_stmt_iterator *gsi;
1323   bool changed;
1324   bool reset;
1325 };
1326 
1327 /* Eliminates references to local variables in *TP out of the single
1328    entry single exit region starting at DTA->ENTRY.
1329    DECL_ADDRESS contains addresses of the references that had their
1330    address taken already.  If the expression is changed, CHANGED is
1331    set to true.  Callback for walk_tree.  */
1332 
1333 static tree
eliminate_local_variables_1(tree * tp,int * walk_subtrees,void * data)1334 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1335 {
1336   struct elv_data *const dta = (struct elv_data *) data;
1337   tree t = *tp, var, addr, addr_type, type, obj;
1338 
1339   if (DECL_P (t))
1340     {
1341       *walk_subtrees = 0;
1342 
1343       if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1344 	return NULL_TREE;
1345 
1346       type = TREE_TYPE (t);
1347       addr_type = build_pointer_type (type);
1348       addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1349 			      dta->gsi);
1350       if (dta->gsi == NULL && addr == NULL_TREE)
1351 	{
1352 	  dta->reset = true;
1353 	  return NULL_TREE;
1354 	}
1355 
1356       *tp = build_simple_mem_ref (addr);
1357 
1358       dta->changed = true;
1359       return NULL_TREE;
1360     }
1361 
1362   if (TREE_CODE (t) == ADDR_EXPR)
1363     {
1364       /* ADDR_EXPR may appear in two contexts:
1365 	 -- as a gimple operand, when the address taken is a function invariant
1366 	 -- as gimple rhs, when the resulting address in not a function
1367 	    invariant
1368 	 We do not need to do anything special in the latter case (the base of
1369 	 the memory reference whose address is taken may be replaced in the
1370 	 DECL_P case).  The former case is more complicated, as we need to
1371 	 ensure that the new address is still a gimple operand.  Thus, it
1372 	 is not sufficient to replace just the base of the memory reference --
1373 	 we need to move the whole computation of the address out of the
1374 	 loop.  */
1375       if (!is_gimple_val (t))
1376 	return NULL_TREE;
1377 
1378       *walk_subtrees = 0;
1379       obj = TREE_OPERAND (t, 0);
1380       var = get_base_address (obj);
1381       if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1382 	return NULL_TREE;
1383 
1384       addr_type = TREE_TYPE (t);
1385       addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1386 			      dta->gsi);
1387       if (dta->gsi == NULL && addr == NULL_TREE)
1388 	{
1389 	  dta->reset = true;
1390 	  return NULL_TREE;
1391 	}
1392       *tp = addr;
1393 
1394       dta->changed = true;
1395       return NULL_TREE;
1396     }
1397 
1398   if (!EXPR_P (t))
1399     *walk_subtrees = 0;
1400 
1401   return NULL_TREE;
1402 }
1403 
1404 /* Moves the references to local variables in STMT at *GSI out of the single
1405    entry single exit region starting at ENTRY.  DECL_ADDRESS contains
1406    addresses of the references that had their address taken
1407    already.  */
1408 
1409 static void
eliminate_local_variables_stmt(edge entry,gimple_stmt_iterator * gsi,int_tree_htab_type * decl_address)1410 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1411 				int_tree_htab_type *decl_address)
1412 {
1413   struct elv_data dta;
1414   gimple *stmt = gsi_stmt (*gsi);
1415 
1416   memset (&dta.info, '\0', sizeof (dta.info));
1417   dta.entry = entry;
1418   dta.decl_address = decl_address;
1419   dta.changed = false;
1420   dta.reset = false;
1421 
1422   if (gimple_debug_bind_p (stmt))
1423     {
1424       dta.gsi = NULL;
1425       walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1426 		 eliminate_local_variables_1, &dta.info, NULL);
1427       if (dta.reset)
1428 	{
1429 	  gimple_debug_bind_reset_value (stmt);
1430 	  dta.changed = true;
1431 	}
1432     }
1433   else if (gimple_clobber_p (stmt))
1434     {
1435       unlink_stmt_vdef (stmt);
1436       stmt = gimple_build_nop ();
1437       gsi_replace (gsi, stmt, false);
1438       dta.changed = true;
1439     }
1440   else
1441     {
1442       dta.gsi = gsi;
1443       walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1444     }
1445 
1446   if (dta.changed)
1447     update_stmt (stmt);
1448 }
1449 
1450 /* Eliminates the references to local variables from the single entry
1451    single exit region between the ENTRY and EXIT edges.
1452 
1453    This includes:
1454    1) Taking address of a local variable -- these are moved out of the
1455    region (and temporary variable is created to hold the address if
1456    necessary).
1457 
1458    2) Dereferencing a local variable -- these are replaced with indirect
1459    references.  */
1460 
1461 static void
eliminate_local_variables(edge entry,edge exit)1462 eliminate_local_variables (edge entry, edge exit)
1463 {
1464   basic_block bb;
1465   auto_vec<basic_block, 3> body;
1466   unsigned i;
1467   gimple_stmt_iterator gsi;
1468   bool has_debug_stmt = false;
1469   int_tree_htab_type decl_address (10);
1470   basic_block entry_bb = entry->src;
1471   basic_block exit_bb = exit->dest;
1472 
1473   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1474 
1475   FOR_EACH_VEC_ELT (body, i, bb)
1476     if (bb != entry_bb && bb != exit_bb)
1477       {
1478         for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479 	  if (is_gimple_debug (gsi_stmt (gsi)))
1480 	    {
1481 	      if (gimple_debug_bind_p (gsi_stmt (gsi)))
1482 	        has_debug_stmt = true;
1483 	    }
1484 	  else
1485 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1486       }
1487 
1488   if (has_debug_stmt)
1489     FOR_EACH_VEC_ELT (body, i, bb)
1490       if (bb != entry_bb && bb != exit_bb)
1491 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492 	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
1493 	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1494 }
1495 
1496 /* Returns true if expression EXPR is not defined between ENTRY and
1497    EXIT, i.e. if all its operands are defined outside of the region.  */
1498 
1499 static bool
expr_invariant_in_region_p(edge entry,edge exit,tree expr)1500 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1501 {
1502   basic_block entry_bb = entry->src;
1503   basic_block exit_bb = exit->dest;
1504   basic_block def_bb;
1505 
1506   if (is_gimple_min_invariant (expr))
1507     return true;
1508 
1509   if (TREE_CODE (expr) == SSA_NAME)
1510     {
1511       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1512       if (def_bb
1513 	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1514 	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1515 	return false;
1516 
1517       return true;
1518     }
1519 
1520   return false;
1521 }
1522 
1523 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1524    The copies are stored to NAME_COPIES, if NAME was already duplicated,
1525    its duplicate stored in NAME_COPIES is returned.
1526 
1527    Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1528    duplicated, storing the copies in DECL_COPIES.  */
1529 
1530 static tree
separate_decls_in_region_name(tree name,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies,bool copy_name_p)1531 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1532 			       int_tree_htab_type *decl_copies,
1533 			       bool copy_name_p)
1534 {
1535   tree copy, var, var_copy;
1536   unsigned idx, uid, nuid;
1537   struct int_tree_map ielt;
1538   struct name_to_copy_elt elt, *nelt;
1539   name_to_copy_elt **slot;
1540   int_tree_map *dslot;
1541 
1542   if (TREE_CODE (name) != SSA_NAME)
1543     return name;
1544 
1545   idx = SSA_NAME_VERSION (name);
1546   elt.version = idx;
1547   slot = name_copies->find_slot_with_hash (&elt, idx,
1548 					   copy_name_p ? INSERT : NO_INSERT);
1549   if (slot && *slot)
1550     return (*slot)->new_name;
1551 
1552   if (copy_name_p)
1553     {
1554       copy = duplicate_ssa_name (name, NULL);
1555       nelt = XNEW (struct name_to_copy_elt);
1556       nelt->version = idx;
1557       nelt->new_name = copy;
1558       nelt->field = NULL_TREE;
1559       *slot = nelt;
1560     }
1561   else
1562     {
1563       gcc_assert (!slot);
1564       copy = name;
1565     }
1566 
1567   var = SSA_NAME_VAR (name);
1568   if (!var)
1569     return copy;
1570 
1571   uid = DECL_UID (var);
1572   ielt.uid = uid;
1573   dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1574   if (!dslot->to)
1575     {
1576       var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1577       DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
1578       dslot->uid = uid;
1579       dslot->to = var_copy;
1580 
1581       /* Ensure that when we meet this decl next time, we won't duplicate
1582          it again.  */
1583       nuid = DECL_UID (var_copy);
1584       ielt.uid = nuid;
1585       dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1586       gcc_assert (!dslot->to);
1587       dslot->uid = nuid;
1588       dslot->to = var_copy;
1589     }
1590   else
1591     var_copy = dslot->to;
1592 
1593   replace_ssa_name_symbol (copy, var_copy);
1594   return copy;
1595 }
1596 
1597 /* Finds the ssa names used in STMT that are defined outside the
1598    region between ENTRY and EXIT and replaces such ssa names with
1599    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
1600    decls of all ssa names used in STMT (including those defined in
1601    LOOP) are replaced with the new temporary variables; the
1602    replacement decls are stored in DECL_COPIES.  */
1603 
1604 static void
separate_decls_in_region_stmt(edge entry,edge exit,gimple * stmt,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies)1605 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1606 			       name_to_copy_table_type *name_copies,
1607 			       int_tree_htab_type *decl_copies)
1608 {
1609   use_operand_p use;
1610   def_operand_p def;
1611   ssa_op_iter oi;
1612   tree name, copy;
1613   bool copy_name_p;
1614 
1615   FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1616   {
1617     name = DEF_FROM_PTR (def);
1618     gcc_assert (TREE_CODE (name) == SSA_NAME);
1619     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1620 					  false);
1621     gcc_assert (copy == name);
1622   }
1623 
1624   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1625   {
1626     name = USE_FROM_PTR (use);
1627     if (TREE_CODE (name) != SSA_NAME)
1628       continue;
1629 
1630     copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1631     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1632 					  copy_name_p);
1633     SET_USE (use, copy);
1634   }
1635 }
1636 
1637 /* Finds the ssa names used in STMT that are defined outside the
1638    region between ENTRY and EXIT and replaces such ssa names with
1639    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
1640    decls of all ssa names used in STMT (including those defined in
1641    LOOP) are replaced with the new temporary variables; the
1642    replacement decls are stored in DECL_COPIES.  */
1643 
1644 static bool
separate_decls_in_region_debug(gimple * stmt,name_to_copy_table_type * name_copies,int_tree_htab_type * decl_copies)1645 separate_decls_in_region_debug (gimple *stmt,
1646 				name_to_copy_table_type *name_copies,
1647 				int_tree_htab_type *decl_copies)
1648 {
1649   use_operand_p use;
1650   ssa_op_iter oi;
1651   tree var, name;
1652   struct int_tree_map ielt;
1653   struct name_to_copy_elt elt;
1654   name_to_copy_elt **slot;
1655   int_tree_map *dslot;
1656 
1657   if (gimple_debug_bind_p (stmt))
1658     var = gimple_debug_bind_get_var (stmt);
1659   else if (gimple_debug_source_bind_p (stmt))
1660     var = gimple_debug_source_bind_get_var (stmt);
1661   else
1662     return true;
1663   if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1664     return true;
1665   gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1666   ielt.uid = DECL_UID (var);
1667   dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1668   if (!dslot)
1669     return true;
1670   if (gimple_debug_bind_p (stmt))
1671     gimple_debug_bind_set_var (stmt, dslot->to);
1672   else if (gimple_debug_source_bind_p (stmt))
1673     gimple_debug_source_bind_set_var (stmt, dslot->to);
1674 
1675   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1676   {
1677     name = USE_FROM_PTR (use);
1678     if (TREE_CODE (name) != SSA_NAME)
1679       continue;
1680 
1681     elt.version = SSA_NAME_VERSION (name);
1682     slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1683     if (!slot)
1684       {
1685 	gimple_debug_bind_reset_value (stmt);
1686 	update_stmt (stmt);
1687 	break;
1688       }
1689 
1690     SET_USE (use, (*slot)->new_name);
1691   }
1692 
1693   return false;
1694 }
1695 
1696 /* Callback for htab_traverse.  Adds a field corresponding to the reduction
1697    specified in SLOT. The type is passed in DATA.  */
1698 
1699 int
add_field_for_reduction(reduction_info ** slot,tree type)1700 add_field_for_reduction (reduction_info **slot, tree type)
1701 {
1702 
1703   struct reduction_info *const red = *slot;
1704   tree var = reduc_stmt_res (red->reduc_stmt);
1705   tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1706 			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1707 
1708   insert_field_into_struct (type, field);
1709 
1710   red->field = field;
1711 
1712   return 1;
1713 }
1714 
1715 /* Callback for htab_traverse.  Adds a field corresponding to a ssa name
1716    described in SLOT. The type is passed in DATA.  */
1717 
1718 int
add_field_for_name(name_to_copy_elt ** slot,tree type)1719 add_field_for_name (name_to_copy_elt **slot, tree type)
1720 {
1721   struct name_to_copy_elt *const elt = *slot;
1722   tree name = ssa_name (elt->version);
1723   tree field = build_decl (UNKNOWN_LOCATION,
1724 			   FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1725 			   TREE_TYPE (name));
1726 
1727   insert_field_into_struct (type, field);
1728   elt->field = field;
1729 
1730   return 1;
1731 }
1732 
1733 /* Callback for htab_traverse.  A local result is the intermediate result
1734    computed by a single
1735    thread, or the initial value in case no iteration was executed.
1736    This function creates a phi node reflecting these values.
1737    The phi's result will be stored in NEW_PHI field of the
1738    reduction's data structure.  */
1739 
1740 int
create_phi_for_local_result(reduction_info ** slot,class loop * loop)1741 create_phi_for_local_result (reduction_info **slot, class loop *loop)
1742 {
1743   struct reduction_info *const reduc = *slot;
1744   edge e;
1745   gphi *new_phi;
1746   basic_block store_bb, continue_bb;
1747   tree local_res;
1748   location_t locus;
1749 
1750   /* STORE_BB is the block where the phi
1751      should be stored.  It is the destination of the loop exit.
1752      (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1753   continue_bb = single_pred (loop->latch);
1754   store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1755 
1756   /* STORE_BB has two predecessors.  One coming from  the loop
1757      (the reduction's result is computed at the loop),
1758      and another coming from a block preceding the loop,
1759      when no iterations
1760      are executed (the initial value should be taken).  */
1761   if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1762     e = EDGE_PRED (store_bb, 1);
1763   else
1764     e = EDGE_PRED (store_bb, 0);
1765   tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1766   local_res = copy_ssa_name (lhs);
1767   locus = gimple_location (reduc->reduc_stmt);
1768   new_phi = create_phi_node (local_res, store_bb);
1769   add_phi_arg (new_phi, reduc->init, e, locus);
1770   add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1771   reduc->new_phi = new_phi;
1772 
1773   return 1;
1774 }
1775 
1776 struct clsn_data
1777 {
1778   tree store;
1779   tree load;
1780 
1781   basic_block store_bb;
1782   basic_block load_bb;
1783 };
1784 
1785 /* Callback for htab_traverse.  Create an atomic instruction for the
1786    reduction described in SLOT.
1787    DATA annotates the place in memory the atomic operation relates to,
1788    and the basic block it needs to be generated in.  */
1789 
1790 int
create_call_for_reduction_1(reduction_info ** slot,struct clsn_data * clsn_data)1791 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1792 {
1793   struct reduction_info *const reduc = *slot;
1794   gimple_stmt_iterator gsi;
1795   tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1796   tree load_struct;
1797   basic_block bb;
1798   basic_block new_bb;
1799   edge e;
1800   tree t, addr, ref, x;
1801   tree tmp_load, name;
1802   gimple *load;
1803 
1804   if (reduc->reduc_addr == NULL_TREE)
1805     {
1806       load_struct = build_simple_mem_ref (clsn_data->load);
1807       t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1808 
1809       addr = build_addr (t);
1810     }
1811   else
1812     {
1813       /* Set the address for the atomic store.  */
1814       addr = reduc->reduc_addr;
1815 
1816       /* Remove the non-atomic store '*addr = sum'.  */
1817       tree res = PHI_RESULT (reduc->keep_res);
1818       use_operand_p use_p;
1819       gimple *stmt;
1820       bool single_use_p = single_imm_use (res, &use_p, &stmt);
1821       gcc_assert (single_use_p);
1822       replace_uses_by (gimple_vdef (stmt),
1823 		       gimple_vuse (stmt));
1824       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1825       gsi_remove (&gsi, true);
1826     }
1827 
1828   /* Create phi node.  */
1829   bb = clsn_data->load_bb;
1830 
1831   gsi = gsi_last_bb (bb);
1832   e = split_block (bb, gsi_stmt (gsi));
1833   new_bb = e->dest;
1834 
1835   tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1836   tmp_load = make_ssa_name (tmp_load);
1837   load = gimple_build_omp_atomic_load (tmp_load, addr,
1838 				       OMP_MEMORY_ORDER_RELAXED);
1839   SSA_NAME_DEF_STMT (tmp_load) = load;
1840   gsi = gsi_start_bb (new_bb);
1841   gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1842 
1843   e = split_block (new_bb, load);
1844   new_bb = e->dest;
1845   gsi = gsi_start_bb (new_bb);
1846   ref = tmp_load;
1847   x = fold_build2 (reduc->reduction_code,
1848 		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1849 		   PHI_RESULT (reduc->new_phi));
1850 
1851   name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1852 				   GSI_CONTINUE_LINKING);
1853 
1854   gimple *store = gimple_build_omp_atomic_store (name,
1855 						 OMP_MEMORY_ORDER_RELAXED);
1856   gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1857   return 1;
1858 }
1859 
1860 /* Create the atomic operation at the join point of the threads.
1861    REDUCTION_LIST describes the reductions in the LOOP.
1862    LD_ST_DATA describes the shared data structure where
1863    shared data is stored in and loaded from.  */
1864 static void
create_call_for_reduction(class loop * loop,reduction_info_table_type * reduction_list,struct clsn_data * ld_st_data)1865 create_call_for_reduction (class loop *loop,
1866 			   reduction_info_table_type *reduction_list,
1867 			   struct clsn_data *ld_st_data)
1868 {
1869   reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1870   /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1871   basic_block continue_bb = single_pred (loop->latch);
1872   ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1873   reduction_list
1874     ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1875 }
1876 
1877 /* Callback for htab_traverse.  Loads the final reduction value at the
1878    join point of all threads, and inserts it in the right place.  */
1879 
1880 int
create_loads_for_reductions(reduction_info ** slot,struct clsn_data * clsn_data)1881 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1882 {
1883   struct reduction_info *const red = *slot;
1884   gimple *stmt;
1885   gimple_stmt_iterator gsi;
1886   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1887   tree load_struct;
1888   tree name;
1889   tree x;
1890 
1891   /* If there's no exit phi, the result of the reduction is unused.  */
1892   if (red->keep_res == NULL)
1893     return 1;
1894 
1895   gsi = gsi_after_labels (clsn_data->load_bb);
1896   load_struct = build_simple_mem_ref (clsn_data->load);
1897   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1898 			NULL_TREE);
1899 
1900   x = load_struct;
1901   name = PHI_RESULT (red->keep_res);
1902   stmt = gimple_build_assign (name, x);
1903 
1904   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1905 
1906   for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1907        !gsi_end_p (gsi); gsi_next (&gsi))
1908     if (gsi_stmt (gsi) == red->keep_res)
1909       {
1910 	remove_phi_node (&gsi, false);
1911 	return 1;
1912       }
1913   gcc_unreachable ();
1914 }
1915 
1916 /* Load the reduction result that was stored in LD_ST_DATA.
1917    REDUCTION_LIST describes the list of reductions that the
1918    loads should be generated for.  */
1919 static void
create_final_loads_for_reduction(reduction_info_table_type * reduction_list,struct clsn_data * ld_st_data)1920 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1921 				  struct clsn_data *ld_st_data)
1922 {
1923   gimple_stmt_iterator gsi;
1924   tree t;
1925   gimple *stmt;
1926 
1927   gsi = gsi_after_labels (ld_st_data->load_bb);
1928   t = build_fold_addr_expr (ld_st_data->store);
1929   stmt = gimple_build_assign (ld_st_data->load, t);
1930 
1931   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1932 
1933   reduction_list
1934     ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1935 
1936 }
1937 
1938 /* Callback for htab_traverse.  Store the neutral value for the
1939   particular reduction's operation, e.g. 0 for PLUS_EXPR,
1940   1 for MULT_EXPR, etc. into the reduction field.
1941   The reduction is specified in SLOT. The store information is
1942   passed in DATA.  */
1943 
1944 int
create_stores_for_reduction(reduction_info ** slot,struct clsn_data * clsn_data)1945 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1946 {
1947   struct reduction_info *const red = *slot;
1948   tree t;
1949   gimple *stmt;
1950   gimple_stmt_iterator gsi;
1951   tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1952 
1953   gsi = gsi_last_bb (clsn_data->store_bb);
1954   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1955   stmt = gimple_build_assign (t, red->initial_value);
1956   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1957 
1958   return 1;
1959 }
1960 
1961 /* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1962    store to a field of STORE in STORE_BB for the ssa name and its duplicate
1963    specified in SLOT.  */
1964 
1965 int
create_loads_and_stores_for_name(name_to_copy_elt ** slot,struct clsn_data * clsn_data)1966 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1967 				  struct clsn_data *clsn_data)
1968 {
1969   struct name_to_copy_elt *const elt = *slot;
1970   tree t;
1971   gimple *stmt;
1972   gimple_stmt_iterator gsi;
1973   tree type = TREE_TYPE (elt->new_name);
1974   tree load_struct;
1975 
1976   gsi = gsi_last_bb (clsn_data->store_bb);
1977   t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1978   stmt = gimple_build_assign (t, ssa_name (elt->version));
1979   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1980 
1981   gsi = gsi_last_bb (clsn_data->load_bb);
1982   load_struct = build_simple_mem_ref (clsn_data->load);
1983   t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1984   stmt = gimple_build_assign (elt->new_name, t);
1985   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1986 
1987   return 1;
1988 }
1989 
1990 /* Moves all the variables used in LOOP and defined outside of it (including
1991    the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1992    name) to a structure created for this purpose.  The code
1993 
1994    while (1)
1995      {
1996        use (a);
1997        use (b);
1998      }
1999 
2000    is transformed this way:
2001 
2002    bb0:
2003    old.a = a;
2004    old.b = b;
2005 
2006    bb1:
2007    a' = new->a;
2008    b' = new->b;
2009    while (1)
2010      {
2011        use (a');
2012        use (b');
2013      }
2014 
2015    `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
2016    pointer `new' is intentionally not initialized (the loop will be split to a
2017    separate function later, and `new' will be initialized from its arguments).
2018    LD_ST_DATA holds information about the shared data structure used to pass
2019    information among the threads.  It is initialized here, and
2020    gen_parallel_loop will pass it to create_call_for_reduction that
2021    needs this information.  REDUCTION_LIST describes the reductions
2022    in LOOP.  */
2023 
2024 static void
separate_decls_in_region(edge entry,edge exit,reduction_info_table_type * reduction_list,tree * arg_struct,tree * new_arg_struct,struct clsn_data * ld_st_data)2025 separate_decls_in_region (edge entry, edge exit,
2026 			  reduction_info_table_type *reduction_list,
2027 			  tree *arg_struct, tree *new_arg_struct,
2028 			  struct clsn_data *ld_st_data)
2029 
2030 {
2031   basic_block bb1 = split_edge (entry);
2032   basic_block bb0 = single_pred (bb1);
2033   name_to_copy_table_type name_copies (10);
2034   int_tree_htab_type decl_copies (10);
2035   unsigned i;
2036   tree type, type_name, nvar;
2037   gimple_stmt_iterator gsi;
2038   struct clsn_data clsn_data;
2039   auto_vec<basic_block, 3> body;
2040   basic_block bb;
2041   basic_block entry_bb = bb1;
2042   basic_block exit_bb = exit->dest;
2043   bool has_debug_stmt = false;
2044 
2045   entry = single_succ_edge (entry_bb);
2046   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2047 
2048   FOR_EACH_VEC_ELT (body, i, bb)
2049     {
2050       if (bb != entry_bb && bb != exit_bb)
2051 	{
2052 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2054 					   &name_copies, &decl_copies);
2055 
2056 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2057 	    {
2058 	      gimple *stmt = gsi_stmt (gsi);
2059 
2060 	      if (is_gimple_debug (stmt))
2061 		has_debug_stmt = true;
2062 	      else
2063 		separate_decls_in_region_stmt (entry, exit, stmt,
2064 					       &name_copies, &decl_copies);
2065 	    }
2066 	}
2067     }
2068 
2069   /* Now process debug bind stmts.  We must not create decls while
2070      processing debug stmts, so we defer their processing so as to
2071      make sure we will have debug info for as many variables as
2072      possible (all of those that were dealt with in the loop above),
2073      and discard those for which we know there's nothing we can
2074      do.  */
2075   if (has_debug_stmt)
2076     FOR_EACH_VEC_ELT (body, i, bb)
2077       if (bb != entry_bb && bb != exit_bb)
2078 	{
2079 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2080 	    {
2081 	      gimple *stmt = gsi_stmt (gsi);
2082 
2083 	      if (is_gimple_debug (stmt))
2084 		{
2085 		  if (separate_decls_in_region_debug (stmt, &name_copies,
2086 						      &decl_copies))
2087 		    {
2088 		      gsi_remove (&gsi, true);
2089 		      continue;
2090 		    }
2091 		}
2092 
2093 	      gsi_next (&gsi);
2094 	    }
2095 	}
2096 
2097   if (name_copies.is_empty () && reduction_list->is_empty ())
2098     {
2099       /* It may happen that there is nothing to copy (if there are only
2100          loop carried and external variables in the loop).  */
2101       *arg_struct = NULL;
2102       *new_arg_struct = NULL;
2103     }
2104   else
2105     {
2106       /* Create the type for the structure to store the ssa names to.  */
2107       type = lang_hooks.types.make_type (RECORD_TYPE);
2108       type_name = build_decl (UNKNOWN_LOCATION,
2109 			      TYPE_DECL, create_tmp_var_name (".paral_data"),
2110 			      type);
2111       TYPE_NAME (type) = type_name;
2112 
2113       name_copies.traverse <tree, add_field_for_name> (type);
2114       if (reduction_list && !reduction_list->is_empty ())
2115 	{
2116 	  /* Create the fields for reductions.  */
2117 	  reduction_list->traverse <tree, add_field_for_reduction> (type);
2118 	}
2119       layout_type (type);
2120 
2121       /* Create the loads and stores.  */
2122       *arg_struct = create_tmp_var (type, ".paral_data_store");
2123       nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2124       *new_arg_struct = make_ssa_name (nvar);
2125 
2126       ld_st_data->store = *arg_struct;
2127       ld_st_data->load = *new_arg_struct;
2128       ld_st_data->store_bb = bb0;
2129       ld_st_data->load_bb = bb1;
2130 
2131       name_copies
2132 	.traverse <struct clsn_data *, create_loads_and_stores_for_name>
2133 		  (ld_st_data);
2134 
2135       /* Load the calculation from memory (after the join of the threads).  */
2136 
2137       if (reduction_list && !reduction_list->is_empty ())
2138 	{
2139 	  reduction_list
2140 	    ->traverse <struct clsn_data *, create_stores_for_reduction>
2141 	    (ld_st_data);
2142 	  clsn_data.load = make_ssa_name (nvar);
2143 	  clsn_data.load_bb = exit->dest;
2144 	  clsn_data.store = ld_st_data->store;
2145 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
2146 	}
2147     }
2148 }
2149 
2150 /* Returns true if FN was created to run in parallel.  */
2151 
2152 bool
parallelized_function_p(tree fndecl)2153 parallelized_function_p (tree fndecl)
2154 {
2155   cgraph_node *node = cgraph_node::get (fndecl);
2156   gcc_assert (node != NULL);
2157   return node->parallelized_function;
2158 }
2159 
2160 /* Creates and returns an empty function that will receive the body of
2161    a parallelized loop.  */
2162 
2163 static tree
create_loop_fn(location_t loc)2164 create_loop_fn (location_t loc)
2165 {
2166   char buf[100];
2167   char *tname;
2168   tree decl, type, name, t;
2169   struct function *act_cfun = cfun;
2170   static unsigned loopfn_num;
2171 
2172   loc = LOCATION_LOCUS (loc);
2173   snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2174   ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2175   clean_symbol_name (tname);
2176   name = get_identifier (tname);
2177   type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2178 
2179   decl = build_decl (loc, FUNCTION_DECL, name, type);
2180   TREE_STATIC (decl) = 1;
2181   TREE_USED (decl) = 1;
2182   DECL_ARTIFICIAL (decl) = 1;
2183   DECL_IGNORED_P (decl) = 0;
2184   TREE_PUBLIC (decl) = 0;
2185   DECL_UNINLINABLE (decl) = 1;
2186   DECL_EXTERNAL (decl) = 0;
2187   DECL_CONTEXT (decl) = NULL_TREE;
2188   DECL_INITIAL (decl) = make_node (BLOCK);
2189   BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2190 
2191   t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2192   DECL_ARTIFICIAL (t) = 1;
2193   DECL_IGNORED_P (t) = 1;
2194   DECL_RESULT (decl) = t;
2195 
2196   t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2197 		  ptr_type_node);
2198   DECL_ARTIFICIAL (t) = 1;
2199   DECL_ARG_TYPE (t) = ptr_type_node;
2200   DECL_CONTEXT (t) = decl;
2201   TREE_USED (t) = 1;
2202   DECL_ARGUMENTS (decl) = t;
2203 
2204   allocate_struct_function (decl, false);
2205 
2206   /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2207      it.  */
2208   set_cfun (act_cfun);
2209 
2210   return decl;
2211 }
2212 
2213 /* Replace uses of NAME by VAL in block BB.  */
2214 
2215 static void
replace_uses_in_bb_by(tree name,tree val,basic_block bb)2216 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2217 {
2218   gimple *use_stmt;
2219   imm_use_iterator imm_iter;
2220 
2221   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2222     {
2223       if (gimple_bb (use_stmt) != bb)
2224 	continue;
2225 
2226       use_operand_p use_p;
2227       FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2228 	SET_USE (use_p, val);
2229     }
2230 }
2231 
2232 /* Do transformation from:
2233 
2234      <bb preheader>:
2235      ...
2236      goto <bb header>
2237 
2238      <bb header>:
2239      ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2240      sum_a = PHI <sum_init (preheader), sum_b (latch)>
2241      ...
2242      use (ivtmp_a)
2243      ...
2244      sum_b = sum_a + sum_update
2245      ...
2246      if (ivtmp_a < n)
2247        goto <bb latch>;
2248      else
2249        goto <bb exit>;
2250 
2251      <bb latch>:
2252      ivtmp_b = ivtmp_a + 1;
2253      goto <bb header>
2254 
2255      <bb exit>:
2256      sum_z = PHI <sum_b (cond[1]), ...>
2257 
2258      [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2259 	 that's <bb header>.
2260 
2261    to:
2262 
2263      <bb preheader>:
2264      ...
2265      goto <bb newheader>
2266 
2267      <bb header>:
2268      ivtmp_a = PHI <ivtmp_c (latch)>
2269      sum_a = PHI <sum_c (latch)>
2270      ...
2271      use (ivtmp_a)
2272      ...
2273      sum_b = sum_a + sum_update
2274      ...
2275      goto <bb latch>;
2276 
2277      <bb newheader>:
2278      ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2279      sum_c = PHI <sum_init (preheader), sum_b (latch)>
2280      if (ivtmp_c < n + 1)
2281        goto <bb header>;
2282      else
2283        goto <bb newexit>;
2284 
2285      <bb latch>:
2286      ivtmp_b = ivtmp_a + 1;
2287      goto <bb newheader>
2288 
2289      <bb newexit>:
2290      sum_y = PHI <sum_c (newheader)>
2291 
2292      <bb exit>:
2293      sum_z = PHI <sum_y (newexit), ...>
2294 
2295 
2296    In unified diff format:
2297 
2298       <bb preheader>:
2299       ...
2300 -     goto <bb header>
2301 +     goto <bb newheader>
2302 
2303       <bb header>:
2304 -     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2305 -     sum_a = PHI <sum_init (preheader), sum_b (latch)>
2306 +     ivtmp_a = PHI <ivtmp_c (latch)>
2307 +     sum_a = PHI <sum_c (latch)>
2308       ...
2309       use (ivtmp_a)
2310       ...
2311       sum_b = sum_a + sum_update
2312       ...
2313 -     if (ivtmp_a < n)
2314 -       goto <bb latch>;
2315 +     goto <bb latch>;
2316 +
2317 +     <bb newheader>:
2318 +     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2319 +     sum_c = PHI <sum_init (preheader), sum_b (latch)>
2320 +     if (ivtmp_c < n + 1)
2321 +       goto <bb header>;
2322       else
2323 	goto <bb exit>;
2324 
2325       <bb latch>:
2326       ivtmp_b = ivtmp_a + 1;
2327 -     goto <bb header>
2328 +     goto <bb newheader>
2329 
2330 +    <bb newexit>:
2331 +    sum_y = PHI <sum_c (newheader)>
2332 
2333       <bb exit>:
2334 -     sum_z = PHI <sum_b (cond[1]), ...>
2335 +     sum_z = PHI <sum_y (newexit), ...>
2336 
2337    Note: the example does not show any virtual phis, but these are handled more
2338    or less as reductions.
2339 
2340 
2341    Moves the exit condition of LOOP to the beginning of its header.
2342    REDUCTION_LIST describes the reductions in LOOP.  BOUND is the new loop
2343    bound.  */
2344 
2345 static void
transform_to_exit_first_loop_alt(class loop * loop,reduction_info_table_type * reduction_list,tree bound)2346 transform_to_exit_first_loop_alt (class loop *loop,
2347 				  reduction_info_table_type *reduction_list,
2348 				  tree bound)
2349 {
2350   basic_block header = loop->header;
2351   basic_block latch = loop->latch;
2352   edge exit = single_dom_exit (loop);
2353   basic_block exit_block = exit->dest;
2354   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2355   tree control = gimple_cond_lhs (cond_stmt);
2356   edge e;
2357 
2358   /* Rewriting virtuals into loop-closed ssa normal form makes this
2359      transformation simpler.  It also ensures that the virtuals are in
2360      loop-closed ssa normal from after the transformation, which is required by
2361      create_parallel_loop.  */
2362   rewrite_virtuals_into_loop_closed_ssa (loop);
2363 
2364   /* Create the new_header block.  */
2365   basic_block new_header = split_block_before_cond_jump (exit->src);
2366   edge edge_at_split = single_pred_edge (new_header);
2367 
2368   /* Redirect entry edge to new_header.  */
2369   edge entry = loop_preheader_edge (loop);
2370   e = redirect_edge_and_branch (entry, new_header);
2371   gcc_assert (e == entry);
2372 
2373   /* Redirect post_inc_edge to new_header.  */
2374   edge post_inc_edge = single_succ_edge (latch);
2375   e = redirect_edge_and_branch (post_inc_edge, new_header);
2376   gcc_assert (e == post_inc_edge);
2377 
2378   /* Redirect post_cond_edge to header.  */
2379   edge post_cond_edge = single_pred_edge (latch);
2380   e = redirect_edge_and_branch (post_cond_edge, header);
2381   gcc_assert (e == post_cond_edge);
2382 
2383   /* Redirect edge_at_split to latch.  */
2384   e = redirect_edge_and_branch (edge_at_split, latch);
2385   gcc_assert (e == edge_at_split);
2386 
2387   /* Set the new loop bound.  */
2388   gimple_cond_set_rhs (cond_stmt, bound);
2389   update_stmt (cond_stmt);
2390 
2391   /* Repair the ssa.  */
2392   vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2393   edge_var_map *vm;
2394   gphi_iterator gsi;
2395   int i;
2396   for (gsi = gsi_start_phis (header), i = 0;
2397        !gsi_end_p (gsi) && v->iterate (i, &vm);
2398        gsi_next (&gsi), i++)
2399     {
2400       gphi *phi = gsi.phi ();
2401       tree res_a = PHI_RESULT (phi);
2402 
2403       /* Create new phi.  */
2404       tree res_c = copy_ssa_name (res_a, phi);
2405       gphi *nphi = create_phi_node (res_c, new_header);
2406 
2407       /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'.  */
2408       replace_uses_in_bb_by (res_a, res_c, new_header);
2409 
2410       /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi.  */
2411       add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2412 
2413       /* Replace sum_b with sum_c in exit phi.  */
2414       tree res_b = redirect_edge_var_map_def (vm);
2415       replace_uses_in_bb_by (res_b, res_c, exit_block);
2416 
2417       struct reduction_info *red = reduction_phi (reduction_list, phi);
2418       gcc_assert (virtual_operand_p (res_a)
2419 		  || res_a == control
2420 		  || red != NULL);
2421 
2422       if (red)
2423 	{
2424 	  /* Register the new reduction phi.  */
2425 	  red->reduc_phi = nphi;
2426 	  gimple_set_uid (red->reduc_phi, red->reduc_version);
2427 	}
2428     }
2429   gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2430 
2431   /* Set the preheader argument of the new phis to ivtmp/sum_init.  */
2432   flush_pending_stmts (entry);
2433 
2434   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
2435   flush_pending_stmts (post_inc_edge);
2436 
2437 
2438   basic_block new_exit_block = NULL;
2439   if (!single_pred_p (exit->dest))
2440     {
2441       /* Create a new empty exit block, inbetween the new loop header and the
2442 	 old exit block.  The function separate_decls_in_region needs this block
2443 	 to insert code that is active on loop exit, but not any other path.  */
2444       new_exit_block = split_edge (exit);
2445     }
2446 
2447   /* Insert and register the reduction exit phis.  */
2448   for (gphi_iterator gsi = gsi_start_phis (exit_block);
2449        !gsi_end_p (gsi);
2450        gsi_next (&gsi))
2451     {
2452       gphi *phi = gsi.phi ();
2453       gphi *nphi = NULL;
2454       tree res_z = PHI_RESULT (phi);
2455       tree res_c;
2456 
2457       if (new_exit_block != NULL)
2458 	{
2459 	  /* Now that we have a new exit block, duplicate the phi of the old
2460 	     exit block in the new exit block to preserve loop-closed ssa.  */
2461 	  edge succ_new_exit_block = single_succ_edge (new_exit_block);
2462 	  edge pred_new_exit_block = single_pred_edge (new_exit_block);
2463 	  tree res_y = copy_ssa_name (res_z, phi);
2464 	  nphi = create_phi_node (res_y, new_exit_block);
2465 	  res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2466 	  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2467 	  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2468 	}
2469       else
2470 	res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2471 
2472       if (virtual_operand_p (res_z))
2473 	continue;
2474 
2475       gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2476       struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2477       if (red != NULL)
2478 	red->keep_res = (nphi != NULL
2479 			 ? nphi
2480 			 : phi);
2481     }
2482 
2483   /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2484      then we're still using some fields, so only bother about fields that are
2485      still used: header and latch.
2486      The loop has a new header bb, so we update it.  The latch bb stays the
2487      same.  */
2488   loop->header = new_header;
2489 
2490   /* Recalculate dominance info.  */
2491   free_dominance_info (CDI_DOMINATORS);
2492   calculate_dominance_info (CDI_DOMINATORS);
2493 
2494   checking_verify_ssa (true, true);
2495 }
2496 
2497 /* Tries to moves the exit condition of LOOP to the beginning of its header
2498    without duplication of the loop body.  NIT is the number of iterations of the
2499    loop.  REDUCTION_LIST describes the reductions in LOOP.  Return true if
2500    transformation is successful.  */
2501 
2502 static bool
try_transform_to_exit_first_loop_alt(class loop * loop,reduction_info_table_type * reduction_list,tree nit)2503 try_transform_to_exit_first_loop_alt (class loop *loop,
2504 				      reduction_info_table_type *reduction_list,
2505 				      tree nit)
2506 {
2507   /* Check whether the latch contains a single statement.  */
2508   if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2509     return false;
2510 
2511   /* Check whether the latch contains no phis.  */
2512   if (phi_nodes (loop->latch) != NULL)
2513     return false;
2514 
2515   /* Check whether the latch contains the loop iv increment.  */
2516   edge back = single_succ_edge (loop->latch);
2517   edge exit = single_dom_exit (loop);
2518   gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2519   tree control = gimple_cond_lhs (cond_stmt);
2520   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2521   tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2522   if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2523     return false;
2524 
2525   /* Check whether there's no code between the loop condition and the latch.  */
2526   if (!single_pred_p (loop->latch)
2527       || single_pred (loop->latch) != exit->src)
2528     return false;
2529 
2530   tree alt_bound = NULL_TREE;
2531   tree nit_type = TREE_TYPE (nit);
2532 
2533   /* Figure out whether nit + 1 overflows.  */
2534   if (TREE_CODE (nit) == INTEGER_CST)
2535     {
2536       if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2537 	{
2538 	  alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2539 				       nit, build_one_cst (nit_type));
2540 
2541 	  gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2542 	  transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2543 	  return true;
2544 	}
2545       else
2546 	{
2547 	  /* Todo: Figure out if we can trigger this, if it's worth to handle
2548 	     optimally, and if we can handle it optimally.  */
2549 	  return false;
2550 	}
2551     }
2552 
2553   gcc_assert (TREE_CODE (nit) == SSA_NAME);
2554 
2555   /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2556      iv with base 0 and step 1 that is incremented in the latch, like this:
2557 
2558      <bb header>:
2559      # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2560      ...
2561      if (iv_1 < nit)
2562        goto <bb latch>;
2563      else
2564        goto <bb exit>;
2565 
2566      <bb latch>:
2567      iv_2 = iv_1 + 1;
2568      goto <bb header>;
2569 
2570      The range of iv_1 is [0, nit].  The latch edge is taken for
2571      iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit.  So the
2572      number of latch executions is equal to nit.
2573 
2574      The function max_loop_iterations gives us the maximum number of latch
2575      executions, so it gives us the maximum value of nit.  */
2576   widest_int nit_max;
2577   if (!max_loop_iterations (loop, &nit_max))
2578     return false;
2579 
2580   /* Check if nit + 1 overflows.  */
2581   widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2582   if (nit_max >= type_max)
2583     return false;
2584 
2585   gimple *def = SSA_NAME_DEF_STMT (nit);
2586 
2587   /* Try to find nit + 1, in the form of n in an assignment nit = n - 1.  */
2588   if (def
2589       && is_gimple_assign (def)
2590       && gimple_assign_rhs_code (def) == PLUS_EXPR)
2591     {
2592       tree op1 = gimple_assign_rhs1 (def);
2593       tree op2 = gimple_assign_rhs2 (def);
2594       if (integer_minus_onep (op1))
2595 	alt_bound = op2;
2596       else if (integer_minus_onep (op2))
2597 	alt_bound = op1;
2598     }
2599 
2600   /* If not found, insert nit + 1.  */
2601   if (alt_bound == NULL_TREE)
2602     {
2603       alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2604 			       build_int_cst_type (nit_type, 1));
2605 
2606       gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2607 
2608       alt_bound
2609 	= force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2610 				    GSI_CONTINUE_LINKING);
2611     }
2612 
2613   transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2614   return true;
2615 }
2616 
2617 /* Moves the exit condition of LOOP to the beginning of its header.  NIT is the
2618    number of iterations of the loop.  REDUCTION_LIST describes the reductions in
2619    LOOP.  */
2620 
2621 static void
transform_to_exit_first_loop(class loop * loop,reduction_info_table_type * reduction_list,tree nit)2622 transform_to_exit_first_loop (class loop *loop,
2623 			      reduction_info_table_type *reduction_list,
2624 			      tree nit)
2625 {
2626   basic_block *bbs, *nbbs, ex_bb, orig_header;
2627   unsigned n;
2628   bool ok;
2629   edge exit = single_dom_exit (loop), hpred;
2630   tree control, control_name, res, t;
2631   gphi *phi, *nphi;
2632   gassign *stmt;
2633   gcond *cond_stmt, *cond_nit;
2634   tree nit_1;
2635 
2636   split_block_after_labels (loop->header);
2637   orig_header = single_succ (loop->header);
2638   hpred = single_succ_edge (loop->header);
2639 
2640   cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2641   control = gimple_cond_lhs (cond_stmt);
2642   gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2643 
2644   /* Make sure that we have phi nodes on exit for all loop header phis
2645      (create_parallel_loop requires that).  */
2646   for (gphi_iterator gsi = gsi_start_phis (loop->header);
2647        !gsi_end_p (gsi);
2648        gsi_next (&gsi))
2649     {
2650       phi = gsi.phi ();
2651       res = PHI_RESULT (phi);
2652       t = copy_ssa_name (res, phi);
2653       SET_PHI_RESULT (phi, t);
2654       nphi = create_phi_node (res, orig_header);
2655       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2656 
2657       if (res == control)
2658 	{
2659 	  gimple_cond_set_lhs (cond_stmt, t);
2660 	  update_stmt (cond_stmt);
2661 	  control = t;
2662 	}
2663     }
2664 
2665   bbs = get_loop_body_in_dom_order (loop);
2666 
2667   for (n = 0; bbs[n] != exit->src; n++)
2668    continue;
2669   nbbs = XNEWVEC (basic_block, n);
2670   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2671 				   bbs + 1, n, nbbs);
2672   gcc_assert (ok);
2673   free (bbs);
2674   ex_bb = nbbs[0];
2675   free (nbbs);
2676 
2677   /* Other than reductions, the only gimple reg that should be copied
2678      out of the loop is the control variable.  */
2679   exit = single_dom_exit (loop);
2680   control_name = NULL_TREE;
2681   for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2682        !gsi_end_p (gsi); )
2683     {
2684       phi = gsi.phi ();
2685       res = PHI_RESULT (phi);
2686       if (virtual_operand_p (res))
2687 	{
2688 	  gsi_next (&gsi);
2689 	  continue;
2690 	}
2691 
2692       /* Check if it is a part of reduction.  If it is,
2693          keep the phi at the reduction's keep_res field.  The
2694          PHI_RESULT of this phi is the resulting value of the reduction
2695          variable when exiting the loop.  */
2696 
2697       if (!reduction_list->is_empty ())
2698 	{
2699 	  struct reduction_info *red;
2700 
2701 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2702 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2703 	  if (red)
2704 	    {
2705 	      red->keep_res = phi;
2706 	      gsi_next (&gsi);
2707 	      continue;
2708 	    }
2709 	}
2710       gcc_assert (control_name == NULL_TREE
2711 		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2712       control_name = res;
2713       remove_phi_node (&gsi, false);
2714     }
2715   gcc_assert (control_name != NULL_TREE);
2716 
2717   /* Initialize the control variable to number of iterations
2718      according to the rhs of the exit condition.  */
2719   gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2720   cond_nit = as_a <gcond *> (last_stmt (exit->src));
2721   nit_1 =  gimple_cond_rhs (cond_nit);
2722   nit_1 = force_gimple_operand_gsi (&gsi,
2723 				  fold_convert (TREE_TYPE (control_name), nit_1),
2724 				  false, NULL_TREE, false, GSI_SAME_STMT);
2725   stmt = gimple_build_assign (control_name, nit_1);
2726   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2727 }
2728 
2729 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2730    LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2731    NEW_DATA is the variable that should be initialized from the argument
2732    of LOOP_FN.  N_THREADS is the requested number of threads, which can be 0 if
2733    that number is to be determined later.  */
2734 
2735 static void
create_parallel_loop(class loop * loop,tree loop_fn,tree data,tree new_data,unsigned n_threads,location_t loc,bool oacc_kernels_p)2736 create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2737 		      tree new_data, unsigned n_threads, location_t loc,
2738 		      bool oacc_kernels_p)
2739 {
2740   gimple_stmt_iterator gsi;
2741   basic_block for_bb, ex_bb, continue_bb;
2742   tree t, param;
2743   gomp_parallel *omp_par_stmt;
2744   gimple *omp_return_stmt1, *omp_return_stmt2;
2745   gimple *phi;
2746   gcond *cond_stmt;
2747   gomp_for *for_stmt;
2748   gomp_continue *omp_cont_stmt;
2749   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2750   edge exit, nexit, guard, end, e;
2751 
2752   if (oacc_kernels_p)
2753     {
2754       gcc_checking_assert (lookup_attribute ("oacc kernels",
2755 					     DECL_ATTRIBUTES (cfun->decl)));
2756       /* Indicate to later processing that this is a parallelized OpenACC
2757 	 kernels construct.  */
2758       DECL_ATTRIBUTES (cfun->decl)
2759 	= tree_cons (get_identifier ("oacc kernels parallelized"),
2760 		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2761     }
2762   else
2763     {
2764       /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
2765 
2766       basic_block bb = loop_preheader_edge (loop)->src;
2767       basic_block paral_bb = single_pred (bb);
2768       gsi = gsi_last_bb (paral_bb);
2769 
2770       gcc_checking_assert (n_threads != 0);
2771       t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2772       OMP_CLAUSE_NUM_THREADS_EXPR (t)
2773 	= build_int_cst (integer_type_node, n_threads);
2774       omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2775       gimple_set_location (omp_par_stmt, loc);
2776 
2777       gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2778 
2779       /* Initialize NEW_DATA.  */
2780       if (data)
2781 	{
2782 	  gassign *assign_stmt;
2783 
2784 	  gsi = gsi_after_labels (bb);
2785 
2786 	  param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2787 	  assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2788 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2789 
2790 	  assign_stmt = gimple_build_assign (new_data,
2791 					     fold_convert (TREE_TYPE (new_data), param));
2792 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2793 	}
2794 
2795       /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
2796       bb = split_loop_exit_edge (single_dom_exit (loop));
2797       gsi = gsi_last_bb (bb);
2798       omp_return_stmt1 = gimple_build_omp_return (false);
2799       gimple_set_location (omp_return_stmt1, loc);
2800       gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2801     }
2802 
2803   /* Extract data for GIMPLE_OMP_FOR.  */
2804   gcc_assert (loop->header == single_dom_exit (loop)->src);
2805   cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2806 
2807   cvar = gimple_cond_lhs (cond_stmt);
2808   cvar_base = SSA_NAME_VAR (cvar);
2809   phi = SSA_NAME_DEF_STMT (cvar);
2810   cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2811   initvar = copy_ssa_name (cvar);
2812   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2813 	   initvar);
2814   cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2815 
2816   gsi = gsi_last_nondebug_bb (loop->latch);
2817   gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2818   gsi_remove (&gsi, true);
2819 
2820   /* Prepare cfg.  */
2821   for_bb = split_edge (loop_preheader_edge (loop));
2822   ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2823   extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2824   gcc_assert (exit == single_dom_exit (loop));
2825 
2826   guard = make_edge (for_bb, ex_bb, 0);
2827   /* FIXME: What is the probability?  */
2828   guard->probability = profile_probability::guessed_never ();
2829   /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid.  */
2830   loop->latch = split_edge (single_succ_edge (loop->latch));
2831   single_pred_edge (loop->latch)->flags = 0;
2832   end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2833   rescan_loop_exit (end, true, false);
2834 
2835   for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2836        !gsi_end_p (gpi); gsi_next (&gpi))
2837     {
2838       location_t locus;
2839       gphi *phi = gpi.phi ();
2840       tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2841       gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2842 
2843       /* If the exit phi is not connected to a header phi in the same loop, this
2844 	 value is not modified in the loop, and we're done with this phi.  */
2845       if (!(gimple_code (def_stmt) == GIMPLE_PHI
2846 	    && gimple_bb (def_stmt) == loop->header))
2847 	{
2848 	  locus = gimple_phi_arg_location_from_edge (phi, exit);
2849 	  add_phi_arg (phi, def, guard, locus);
2850 	  add_phi_arg (phi, def, end, locus);
2851 	  continue;
2852 	}
2853 
2854       gphi *stmt = as_a <gphi *> (def_stmt);
2855       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2856       locus = gimple_phi_arg_location_from_edge (stmt,
2857 						 loop_preheader_edge (loop));
2858       add_phi_arg (phi, def, guard, locus);
2859 
2860       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2861       locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2862       add_phi_arg (phi, def, end, locus);
2863     }
2864   e = redirect_edge_and_branch (exit, nexit->dest);
2865   PENDING_STMT (e) = NULL;
2866 
2867   /* Emit GIMPLE_OMP_FOR.  */
2868   if (oacc_kernels_p)
2869     /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
2870        omp-offload.c:execute_oacc_device_lower.  */
2871     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2872   else
2873     {
2874       t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2875       int chunk_size = param_parloops_chunk_size;
2876       switch (param_parloops_schedule)
2877 	{
2878 	case PARLOOPS_SCHEDULE_STATIC:
2879 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2880 	  break;
2881 	case PARLOOPS_SCHEDULE_DYNAMIC:
2882 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2883 	  break;
2884 	case PARLOOPS_SCHEDULE_GUIDED:
2885 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2886 	  break;
2887 	case PARLOOPS_SCHEDULE_AUTO:
2888 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2889 	  chunk_size = 0;
2890 	  break;
2891 	case PARLOOPS_SCHEDULE_RUNTIME:
2892 	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2893 	  chunk_size = 0;
2894 	  break;
2895 	default:
2896 	  gcc_unreachable ();
2897 	}
2898       if (chunk_size != 0)
2899 	OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2900 	  = build_int_cst (integer_type_node, chunk_size);
2901     }
2902 
2903   for_stmt = gimple_build_omp_for (NULL,
2904 				   (oacc_kernels_p
2905 				    ? GF_OMP_FOR_KIND_OACC_LOOP
2906 				    : GF_OMP_FOR_KIND_FOR),
2907 				   t, 1, NULL);
2908 
2909   gimple_cond_set_lhs (cond_stmt, cvar_base);
2910   type = TREE_TYPE (cvar);
2911   gimple_set_location (for_stmt, loc);
2912   gimple_omp_for_set_index (for_stmt, 0, initvar);
2913   gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2914   gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2915   gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2916   gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2917 						cvar_base,
2918 						build_int_cst (type, 1)));
2919 
2920   gsi = gsi_last_bb (for_bb);
2921   gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2922   SSA_NAME_DEF_STMT (initvar) = for_stmt;
2923 
2924   /* Emit GIMPLE_OMP_CONTINUE.  */
2925   continue_bb = single_pred (loop->latch);
2926   gsi = gsi_last_bb (continue_bb);
2927   omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2928   gimple_set_location (omp_cont_stmt, loc);
2929   gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2930   SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2931 
2932   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
2933   gsi = gsi_last_bb (ex_bb);
2934   omp_return_stmt2 = gimple_build_omp_return (true);
2935   gimple_set_location (omp_return_stmt2, loc);
2936   gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2937 
2938   /* After the above dom info is hosed.  Re-compute it.  */
2939   free_dominance_info (CDI_DOMINATORS);
2940   calculate_dominance_info (CDI_DOMINATORS);
2941 }
2942 
2943 /* Return number of phis in bb.  If COUNT_VIRTUAL_P is false, don't count the
2944    virtual phi.  */
2945 
2946 static unsigned int
num_phis(basic_block bb,bool count_virtual_p)2947 num_phis (basic_block bb, bool count_virtual_p)
2948 {
2949   unsigned int nr_phis = 0;
2950   gphi_iterator gsi;
2951   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2952     {
2953       if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2954 	continue;
2955 
2956       nr_phis++;
2957     }
2958 
2959   return nr_phis;
2960 }
2961 
2962 /* Generates code to execute the iterations of LOOP in N_THREADS
2963    threads in parallel, which can be 0 if that number is to be determined
2964    later.
2965 
2966    NITER describes number of iterations of LOOP.
2967    REDUCTION_LIST describes the reductions existent in the LOOP.  */
2968 
2969 static void
gen_parallel_loop(class loop * loop,reduction_info_table_type * reduction_list,unsigned n_threads,class tree_niter_desc * niter,bool oacc_kernels_p)2970 gen_parallel_loop (class loop *loop,
2971 		   reduction_info_table_type *reduction_list,
2972 		   unsigned n_threads, class tree_niter_desc *niter,
2973 		   bool oacc_kernels_p)
2974 {
2975   tree many_iterations_cond, type, nit;
2976   tree arg_struct, new_arg_struct;
2977   gimple_seq stmts;
2978   edge entry, exit;
2979   struct clsn_data clsn_data;
2980   location_t loc;
2981   gimple *cond_stmt;
2982   unsigned int m_p_thread=2;
2983 
2984   /* From
2985 
2986      ---------------------------------------------------------------------
2987      loop
2988        {
2989 	 IV = phi (INIT, IV + STEP)
2990 	 BODY1;
2991 	 if (COND)
2992 	   break;
2993 	 BODY2;
2994        }
2995      ---------------------------------------------------------------------
2996 
2997      with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2998      we generate the following code:
2999 
3000      ---------------------------------------------------------------------
3001 
3002      if (MAY_BE_ZERO
3003      || NITER < MIN_PER_THREAD * N_THREADS)
3004      goto original;
3005 
3006      BODY1;
3007      store all local loop-invariant variables used in body of the loop to DATA.
3008      GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3009      load the variables from DATA.
3010      GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3011      BODY2;
3012      BODY1;
3013      GIMPLE_OMP_CONTINUE;
3014      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
3015      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
3016      goto end;
3017 
3018      original:
3019      loop
3020        {
3021 	 IV = phi (INIT, IV + STEP)
3022 	 BODY1;
3023 	 if (COND)
3024 	   break;
3025 	 BODY2;
3026        }
3027 
3028      end:
3029 
3030    */
3031 
3032   /* Create two versions of the loop -- in the old one, we know that the
3033      number of iterations is large enough, and we will transform it into the
3034      loop that will be split to loop_fn, the new one will be used for the
3035      remaining iterations.  */
3036 
3037   /* We should compute a better number-of-iterations value for outer loops.
3038      That is, if we have
3039 
3040     for (i = 0; i < n; ++i)
3041       for (j = 0; j < m; ++j)
3042         ...
3043 
3044     we should compute nit = n * m, not nit = n.
3045     Also may_be_zero handling would need to be adjusted.  */
3046 
3047   type = TREE_TYPE (niter->niter);
3048   nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3049 			      NULL_TREE);
3050   if (stmts)
3051     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3052 
3053   if (!oacc_kernels_p)
3054     {
3055       if (loop->inner)
3056 	m_p_thread=2;
3057       else
3058 	m_p_thread=MIN_PER_THREAD;
3059 
3060       gcc_checking_assert (n_threads != 0);
3061       many_iterations_cond =
3062 	fold_build2 (GE_EXPR, boolean_type_node,
3063 		     nit, build_int_cst (type, m_p_thread * n_threads - 1));
3064 
3065       many_iterations_cond
3066 	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3067 		       invert_truthvalue (unshare_expr (niter->may_be_zero)),
3068 		       many_iterations_cond);
3069       many_iterations_cond
3070 	= force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3071       if (stmts)
3072 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3073       if (!is_gimple_condexpr (many_iterations_cond))
3074 	{
3075 	  many_iterations_cond
3076 	    = force_gimple_operand (many_iterations_cond, &stmts,
3077 				    true, NULL_TREE);
3078 	  if (stmts)
3079 	    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3080 					      stmts);
3081 	}
3082 
3083       initialize_original_copy_tables ();
3084 
3085       /* We assume that the loop usually iterates a lot.  */
3086       loop_version (loop, many_iterations_cond, NULL,
3087 		    profile_probability::likely (),
3088 		    profile_probability::unlikely (),
3089 		    profile_probability::likely (),
3090 		    profile_probability::unlikely (), true);
3091       update_ssa (TODO_update_ssa);
3092       free_original_copy_tables ();
3093     }
3094 
3095   /* Base all the induction variables in LOOP on a single control one.  */
3096   canonicalize_loop_ivs (loop, &nit, true);
3097   if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3098     {
3099       /* The call to canonicalize_loop_ivs above failed to "base all the
3100 	 induction variables in LOOP on a single control one".  Do damage
3101 	 control.  */
3102       basic_block preheader = loop_preheader_edge (loop)->src;
3103       basic_block cond_bb = single_pred (preheader);
3104       gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3105       gimple_cond_make_true (cond);
3106       update_stmt (cond);
3107       /* We've gotten rid of the duplicate loop created by loop_version, but
3108 	 we can't undo whatever canonicalize_loop_ivs has done.
3109 	 TODO: Fix this properly by ensuring that the call to
3110 	 canonicalize_loop_ivs succeeds.  */
3111       if (dump_file
3112 	  && (dump_flags & TDF_DETAILS))
3113 	fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3114 		 " aborting transformation\n", loop->num);
3115       return;
3116     }
3117 
3118   /* Ensure that the exit condition is the first statement in the loop.
3119      The common case is that latch of the loop is empty (apart from the
3120      increment) and immediately follows the loop exit test.  Attempt to move the
3121      entry of the loop directly before the exit check and increase the number of
3122      iterations of the loop by one.  */
3123   if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3124     {
3125       if (dump_file
3126 	  && (dump_flags & TDF_DETAILS))
3127 	fprintf (dump_file,
3128 		 "alternative exit-first loop transform succeeded"
3129 		 " for loop %d\n", loop->num);
3130     }
3131   else
3132     {
3133       if (oacc_kernels_p)
3134 	n_threads = 1;
3135 
3136       /* Fall back on the method that handles more cases, but duplicates the
3137 	 loop body: move the exit condition of LOOP to the beginning of its
3138 	 header, and duplicate the part of the last iteration that gets disabled
3139 	 to the exit of the loop.  */
3140       transform_to_exit_first_loop (loop, reduction_list, nit);
3141     }
3142 
3143   /* Generate initializations for reductions.  */
3144   if (!reduction_list->is_empty ())
3145     reduction_list->traverse <class loop *, initialize_reductions> (loop);
3146 
3147   /* Eliminate the references to local variables from the loop.  */
3148   gcc_assert (single_exit (loop));
3149   entry = loop_preheader_edge (loop);
3150   exit = single_dom_exit (loop);
3151 
3152   /* This rewrites the body in terms of new variables.  This has already
3153      been done for oacc_kernels_p in pass_lower_omp/lower_omp ().  */
3154   if (!oacc_kernels_p)
3155     {
3156       eliminate_local_variables (entry, exit);
3157       /* In the old loop, move all variables non-local to the loop to a
3158 	 structure and back, and create separate decls for the variables used in
3159 	 loop.  */
3160       separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3161 				&new_arg_struct, &clsn_data);
3162     }
3163   else
3164     {
3165       arg_struct = NULL_TREE;
3166       new_arg_struct = NULL_TREE;
3167       clsn_data.load = NULL_TREE;
3168       clsn_data.load_bb = exit->dest;
3169       clsn_data.store = NULL_TREE;
3170       clsn_data.store_bb = NULL;
3171     }
3172 
3173   /* Create the parallel constructs.  */
3174   loc = UNKNOWN_LOCATION;
3175   cond_stmt = last_stmt (loop->header);
3176   if (cond_stmt)
3177     loc = gimple_location (cond_stmt);
3178   create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3179 			n_threads, loc, oacc_kernels_p);
3180   if (!reduction_list->is_empty ())
3181     create_call_for_reduction (loop, reduction_list, &clsn_data);
3182 
3183   scev_reset ();
3184 
3185   /* Free loop bound estimations that could contain references to
3186      removed statements.  */
3187   free_numbers_of_iterations_estimates (cfun);
3188 }
3189 
3190 /* Returns true when LOOP contains vector phi nodes.  */
3191 
3192 static bool
loop_has_vector_phi_nodes(class loop * loop ATTRIBUTE_UNUSED)3193 loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3194 {
3195   unsigned i;
3196   basic_block *bbs = get_loop_body_in_dom_order (loop);
3197   gphi_iterator gsi;
3198   bool res = true;
3199 
3200   for (i = 0; i < loop->num_nodes; i++)
3201     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3202       if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3203 	goto end;
3204 
3205   res = false;
3206  end:
3207   free (bbs);
3208   return res;
3209 }
3210 
3211 /* Create a reduction_info struct, initialize it with REDUC_STMT
3212    and PHI, insert it to the REDUCTION_LIST.  */
3213 
3214 static void
build_new_reduction(reduction_info_table_type * reduction_list,gimple * reduc_stmt,gphi * phi)3215 build_new_reduction (reduction_info_table_type *reduction_list,
3216 		     gimple *reduc_stmt, gphi *phi)
3217 {
3218   reduction_info **slot;
3219   struct reduction_info *new_reduction;
3220   enum tree_code reduction_code;
3221 
3222   gcc_assert (reduc_stmt);
3223 
3224   if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3225     {
3226       tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3227       gimple *def1 = SSA_NAME_DEF_STMT (op1);
3228       reduction_code = gimple_assign_rhs_code (def1);
3229     }
3230   else
3231     reduction_code = gimple_assign_rhs_code (reduc_stmt);
3232   /* Check for OpenMP supported reduction.  */
3233   switch (reduction_code)
3234     {
3235     case PLUS_EXPR:
3236     case MULT_EXPR:
3237     case MAX_EXPR:
3238     case MIN_EXPR:
3239     case BIT_IOR_EXPR:
3240     case BIT_XOR_EXPR:
3241     case BIT_AND_EXPR:
3242     case TRUTH_OR_EXPR:
3243     case TRUTH_XOR_EXPR:
3244     case TRUTH_AND_EXPR:
3245       break;
3246     default:
3247       return;
3248     }
3249 
3250   if (dump_file && (dump_flags & TDF_DETAILS))
3251     {
3252       fprintf (dump_file,
3253 	       "Detected reduction. reduction stmt is:\n");
3254       print_gimple_stmt (dump_file, reduc_stmt, 0);
3255       fprintf (dump_file, "\n");
3256     }
3257 
3258   new_reduction = XCNEW (struct reduction_info);
3259 
3260   new_reduction->reduc_stmt = reduc_stmt;
3261   new_reduction->reduc_phi = phi;
3262   new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3263   new_reduction->reduction_code = reduction_code;
3264   slot = reduction_list->find_slot (new_reduction, INSERT);
3265   *slot = new_reduction;
3266 }
3267 
3268 /* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
3269 
3270 int
set_reduc_phi_uids(reduction_info ** slot,void * data ATTRIBUTE_UNUSED)3271 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3272 {
3273   struct reduction_info *const red = *slot;
3274   gimple_set_uid (red->reduc_phi, red->reduc_version);
3275   return 1;
3276 }
3277 
3278 /* Return true if the type of reduction performed by STMT_INFO is suitable
3279    for this pass.  */
3280 
3281 static bool
valid_reduction_p(stmt_vec_info stmt_info)3282 valid_reduction_p (stmt_vec_info stmt_info)
3283 {
3284   /* Parallelization would reassociate the operation, which isn't
3285      allowed for in-order reductions.  */
3286   vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3287   return reduc_type != FOLD_LEFT_REDUCTION;
3288 }
3289 
3290 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
3291 
3292 static void
gather_scalar_reductions(loop_p loop,reduction_info_table_type * reduction_list)3293 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3294 {
3295   gphi_iterator gsi;
3296   loop_vec_info simple_loop_info;
3297   auto_vec<gphi *, 4> double_reduc_phis;
3298   auto_vec<gimple *, 4> double_reduc_stmts;
3299 
3300   vec_info_shared shared;
3301   simple_loop_info = vect_analyze_loop_form (loop, &shared);
3302   if (simple_loop_info == NULL)
3303     goto gather_done;
3304 
3305   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3306     {
3307       gphi *phi = gsi.phi ();
3308       affine_iv iv;
3309       tree res = PHI_RESULT (phi);
3310       bool double_reduc;
3311 
3312       if (virtual_operand_p (res))
3313 	continue;
3314 
3315       if (simple_iv (loop, loop, res, &iv, true))
3316 	continue;
3317 
3318       stmt_vec_info reduc_stmt_info
3319 	= parloops_force_simple_reduction (simple_loop_info,
3320 					   simple_loop_info->lookup_stmt (phi),
3321 					   &double_reduc, true);
3322       if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3323 	continue;
3324 
3325       if (double_reduc)
3326 	{
3327 	  if (loop->inner->inner != NULL)
3328 	    continue;
3329 
3330 	  double_reduc_phis.safe_push (phi);
3331 	  double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3332 	  continue;
3333 	}
3334 
3335       build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3336     }
3337   delete simple_loop_info;
3338 
3339   if (!double_reduc_phis.is_empty ())
3340     {
3341       vec_info_shared shared;
3342       simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
3343       if (simple_loop_info)
3344 	{
3345 	  gphi *phi;
3346 	  unsigned int i;
3347 
3348 	  FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3349 	    {
3350 	      affine_iv iv;
3351 	      tree res = PHI_RESULT (phi);
3352 	      bool double_reduc;
3353 
3354 	      use_operand_p use_p;
3355 	      gimple *inner_stmt;
3356 	      bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3357 	      gcc_assert (single_use_p);
3358 	      if (gimple_code (inner_stmt) != GIMPLE_PHI)
3359 		continue;
3360 	      gphi *inner_phi = as_a <gphi *> (inner_stmt);
3361 	      if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3362 			     &iv, true))
3363 		continue;
3364 
3365 	      stmt_vec_info inner_phi_info
3366 		= simple_loop_info->lookup_stmt (inner_phi);
3367 	      stmt_vec_info inner_reduc_stmt_info
3368 		= parloops_force_simple_reduction (simple_loop_info,
3369 						   inner_phi_info,
3370 						   &double_reduc, true);
3371 	      gcc_assert (!double_reduc);
3372 	      if (!inner_reduc_stmt_info
3373 		  || !valid_reduction_p (inner_reduc_stmt_info))
3374 		continue;
3375 
3376 	      build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3377 	    }
3378 	  delete simple_loop_info;
3379 	}
3380     }
3381 
3382  gather_done:
3383   if (reduction_list->is_empty ())
3384     return;
3385 
3386   /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3387      and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3388      now.  */
3389   basic_block bb;
3390   FOR_EACH_BB_FN (bb, cfun)
3391     for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3392       gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3393   reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3394 }
3395 
3396 /* Try to initialize NITER for code generation part.  */
3397 
3398 static bool
try_get_loop_niter(loop_p loop,class tree_niter_desc * niter)3399 try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3400 {
3401   edge exit = single_dom_exit (loop);
3402 
3403   gcc_assert (exit);
3404 
3405   /* We need to know # of iterations, and there should be no uses of values
3406      defined inside loop outside of it, unless the values are invariants of
3407      the loop.  */
3408   if (!number_of_iterations_exit (loop, exit, niter, false))
3409     {
3410       if (dump_file && (dump_flags & TDF_DETAILS))
3411 	fprintf (dump_file, "  FAILED: number of iterations not known\n");
3412       return false;
3413     }
3414 
3415   return true;
3416 }
3417 
3418 /* Return the default def of the first function argument.  */
3419 
3420 static tree
get_omp_data_i_param(void)3421 get_omp_data_i_param (void)
3422 {
3423   tree decl = DECL_ARGUMENTS (cfun->decl);
3424   gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3425   return ssa_default_def (cfun, decl);
3426 }
3427 
3428 /* For PHI in loop header of LOOP, look for pattern:
3429 
3430    <bb preheader>
3431    .omp_data_i = &.omp_data_arr;
3432    addr = .omp_data_i->sum;
3433    sum_a = *addr;
3434 
3435    <bb header>:
3436    sum_b = PHI <sum_a (preheader), sum_c (latch)>
3437 
3438    and return addr.  Otherwise, return NULL_TREE.  */
3439 
3440 static tree
find_reduc_addr(class loop * loop,gphi * phi)3441 find_reduc_addr (class loop *loop, gphi *phi)
3442 {
3443   edge e = loop_preheader_edge (loop);
3444   tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3445   gimple *stmt = SSA_NAME_DEF_STMT (arg);
3446   if (!gimple_assign_single_p (stmt))
3447     return NULL_TREE;
3448   tree memref = gimple_assign_rhs1 (stmt);
3449   if (TREE_CODE (memref) != MEM_REF)
3450     return NULL_TREE;
3451   tree addr = TREE_OPERAND (memref, 0);
3452 
3453   gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3454   if (!gimple_assign_single_p (stmt2))
3455     return NULL_TREE;
3456   tree compref = gimple_assign_rhs1 (stmt2);
3457   if (TREE_CODE (compref) != COMPONENT_REF)
3458     return NULL_TREE;
3459   tree addr2 = TREE_OPERAND (compref, 0);
3460   if (TREE_CODE (addr2) != MEM_REF)
3461     return NULL_TREE;
3462   addr2 = TREE_OPERAND (addr2, 0);
3463   if (TREE_CODE (addr2) != SSA_NAME
3464       || addr2 != get_omp_data_i_param ())
3465     return NULL_TREE;
3466 
3467   return addr;
3468 }
3469 
3470 /* Try to initialize REDUCTION_LIST for code generation part.
3471    REDUCTION_LIST describes the reductions.  */
3472 
3473 static bool
try_create_reduction_list(loop_p loop,reduction_info_table_type * reduction_list,bool oacc_kernels_p)3474 try_create_reduction_list (loop_p loop,
3475 			   reduction_info_table_type *reduction_list,
3476 			   bool oacc_kernels_p)
3477 {
3478   edge exit = single_dom_exit (loop);
3479   gphi_iterator gsi;
3480 
3481   gcc_assert (exit);
3482 
3483   /* Try to get rid of exit phis.  */
3484   final_value_replacement_loop (loop);
3485 
3486   gather_scalar_reductions (loop, reduction_list);
3487 
3488 
3489   for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3490     {
3491       gphi *phi = gsi.phi ();
3492       struct reduction_info *red;
3493       imm_use_iterator imm_iter;
3494       use_operand_p use_p;
3495       gimple *reduc_phi;
3496       tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3497 
3498       if (!virtual_operand_p (val))
3499 	{
3500 	  if (TREE_CODE (val) != SSA_NAME)
3501 	    {
3502 	      if (dump_file && (dump_flags & TDF_DETAILS))
3503 		fprintf (dump_file,
3504 			 "  FAILED: exit PHI argument invariant.\n");
3505 	      return false;
3506 	    }
3507 
3508 	  if (dump_file && (dump_flags & TDF_DETAILS))
3509 	    {
3510 	      fprintf (dump_file, "phi is ");
3511 	      print_gimple_stmt (dump_file, phi, 0);
3512 	      fprintf (dump_file, "arg of phi to exit:   value ");
3513 	      print_generic_expr (dump_file, val);
3514 	      fprintf (dump_file, " used outside loop\n");
3515 	      fprintf (dump_file,
3516 		       "  checking if it is part of reduction pattern:\n");
3517 	    }
3518 	  if (reduction_list->is_empty ())
3519 	    {
3520 	      if (dump_file && (dump_flags & TDF_DETAILS))
3521 		fprintf (dump_file,
3522 			 "  FAILED: it is not a part of reduction.\n");
3523 	      return false;
3524 	    }
3525 	  reduc_phi = NULL;
3526 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3527 	    {
3528 	      if (!gimple_debug_bind_p (USE_STMT (use_p))
3529 		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3530 		{
3531 		  reduc_phi = USE_STMT (use_p);
3532 		  break;
3533 		}
3534 	    }
3535 	  red = reduction_phi (reduction_list, reduc_phi);
3536 	  if (red == NULL)
3537 	    {
3538 	      if (dump_file && (dump_flags & TDF_DETAILS))
3539 		fprintf (dump_file,
3540 			 "  FAILED: it is not a part of reduction.\n");
3541 	      return false;
3542 	    }
3543 	  if (red->keep_res != NULL)
3544 	    {
3545 	      if (dump_file && (dump_flags & TDF_DETAILS))
3546 		fprintf (dump_file,
3547 			 "  FAILED: reduction has multiple exit phis.\n");
3548 	      return false;
3549 	    }
3550 	  red->keep_res = phi;
3551 	  if (dump_file && (dump_flags & TDF_DETAILS))
3552 	    {
3553 	      fprintf (dump_file, "reduction phi is  ");
3554 	      print_gimple_stmt (dump_file, red->reduc_phi, 0);
3555 	      fprintf (dump_file, "reduction stmt is  ");
3556 	      print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3557 	    }
3558 	}
3559     }
3560 
3561   /* The iterations of the loop may communicate only through bivs whose
3562      iteration space can be distributed efficiently.  */
3563   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3564     {
3565       gphi *phi = gsi.phi ();
3566       tree def = PHI_RESULT (phi);
3567       affine_iv iv;
3568 
3569       if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3570 	{
3571 	  struct reduction_info *red;
3572 
3573 	  red = reduction_phi (reduction_list, phi);
3574 	  if (red == NULL)
3575 	    {
3576 	      if (dump_file && (dump_flags & TDF_DETAILS))
3577 		fprintf (dump_file,
3578 			 "  FAILED: scalar dependency between iterations\n");
3579 	      return false;
3580 	    }
3581 	}
3582     }
3583 
3584   if (oacc_kernels_p)
3585     {
3586       for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3587 	   gsi_next (&gsi))
3588 	{
3589 	  gphi *phi = gsi.phi ();
3590 	  tree def = PHI_RESULT (phi);
3591 	  affine_iv iv;
3592 
3593 	  if (!virtual_operand_p (def)
3594 	      && !simple_iv (loop, loop, def, &iv, true))
3595 	    {
3596 	      tree addr = find_reduc_addr (loop, phi);
3597 	      if (addr == NULL_TREE)
3598 		return false;
3599 	      struct reduction_info *red = reduction_phi (reduction_list, phi);
3600 	      red->reduc_addr = addr;
3601 	    }
3602 	}
3603     }
3604 
3605   return true;
3606 }
3607 
3608 /* Return true if LOOP contains phis with ADDR_EXPR in args.  */
3609 
3610 static bool
loop_has_phi_with_address_arg(class loop * loop)3611 loop_has_phi_with_address_arg (class loop *loop)
3612 {
3613   basic_block *bbs = get_loop_body (loop);
3614   bool res = false;
3615 
3616   unsigned i, j;
3617   gphi_iterator gsi;
3618   for (i = 0; i < loop->num_nodes; i++)
3619     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3620       {
3621 	gphi *phi = gsi.phi ();
3622 	for (j = 0; j < gimple_phi_num_args (phi); j++)
3623 	  {
3624 	    tree arg = gimple_phi_arg_def (phi, j);
3625 	    if (TREE_CODE (arg) == ADDR_EXPR)
3626 	      {
3627 		/* This should be handled by eliminate_local_variables, but that
3628 		   function currently ignores phis.  */
3629 		res = true;
3630 		goto end;
3631 	      }
3632 	  }
3633       }
3634  end:
3635   free (bbs);
3636 
3637   return res;
3638 }
3639 
3640 /* Return true if memory ref REF (corresponding to the stmt at GSI in
3641    REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3642    or the statements in REGIONS_BB[I + n].  REF_IS_STORE indicates if REF is a
3643    store.  Ignore conflicts with SKIP_STMT.  */
3644 
3645 static bool
ref_conflicts_with_region(gimple_stmt_iterator gsi,ao_ref * ref,bool ref_is_store,vec<basic_block> region_bbs,unsigned int i,gimple * skip_stmt)3646 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3647 			   bool ref_is_store, vec<basic_block> region_bbs,
3648 			   unsigned int i, gimple *skip_stmt)
3649 {
3650   basic_block bb = region_bbs[i];
3651   gsi_next (&gsi);
3652 
3653   while (true)
3654     {
3655       for (; !gsi_end_p (gsi);
3656 	   gsi_next (&gsi))
3657 	{
3658 	  gimple *stmt = gsi_stmt (gsi);
3659 	  if (stmt == skip_stmt)
3660 	    {
3661 	      if (dump_file)
3662 		{
3663 		  fprintf (dump_file, "skipping reduction store: ");
3664 		  print_gimple_stmt (dump_file, stmt, 0);
3665 		}
3666 	      continue;
3667 	    }
3668 
3669 	  if (!gimple_vdef (stmt)
3670 	      && !gimple_vuse (stmt))
3671 	    continue;
3672 
3673 	  if (gimple_code (stmt) == GIMPLE_RETURN)
3674 	    continue;
3675 
3676 	  if (ref_is_store)
3677 	    {
3678 	      if (ref_maybe_used_by_stmt_p (stmt, ref))
3679 		{
3680 		  if (dump_file)
3681 		    {
3682 		      fprintf (dump_file, "Stmt ");
3683 		      print_gimple_stmt (dump_file, stmt, 0);
3684 		    }
3685 		  return true;
3686 		}
3687 	    }
3688 	  else
3689 	    {
3690 	      if (stmt_may_clobber_ref_p_1 (stmt, ref))
3691 		{
3692 		  if (dump_file)
3693 		    {
3694 		      fprintf (dump_file, "Stmt ");
3695 		      print_gimple_stmt (dump_file, stmt, 0);
3696 		    }
3697 		  return true;
3698 		}
3699 	    }
3700 	}
3701       i++;
3702       if (i == region_bbs.length ())
3703 	break;
3704       bb = region_bbs[i];
3705       gsi = gsi_start_bb (bb);
3706     }
3707 
3708   return false;
3709 }
3710 
3711 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3712    in parallel with REGION_BBS containing the loop.  Return the stores of
3713    reduction results in REDUCTION_STORES.  */
3714 
3715 static bool
oacc_entry_exit_ok_1(bitmap in_loop_bbs,vec<basic_block> region_bbs,reduction_info_table_type * reduction_list,bitmap reduction_stores)3716 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3717 		      reduction_info_table_type *reduction_list,
3718 		      bitmap reduction_stores)
3719 {
3720   tree omp_data_i = get_omp_data_i_param ();
3721 
3722   unsigned i;
3723   basic_block bb;
3724   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3725     {
3726       if (bitmap_bit_p (in_loop_bbs, bb->index))
3727 	continue;
3728 
3729       gimple_stmt_iterator gsi;
3730       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3731 	   gsi_next (&gsi))
3732 	{
3733 	  gimple *stmt = gsi_stmt (gsi);
3734 	  gimple *skip_stmt = NULL;
3735 
3736 	  if (is_gimple_debug (stmt)
3737 	      || gimple_code (stmt) == GIMPLE_COND)
3738 	    continue;
3739 
3740 	  ao_ref ref;
3741 	  bool ref_is_store = false;
3742 	  if (gimple_assign_load_p (stmt))
3743 	    {
3744 	      tree rhs = gimple_assign_rhs1 (stmt);
3745 	      tree base = get_base_address (rhs);
3746 	      if (TREE_CODE (base) == MEM_REF
3747 		  && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3748 		continue;
3749 
3750 	      tree lhs = gimple_assign_lhs (stmt);
3751 	      if (TREE_CODE (lhs) == SSA_NAME
3752 		  && has_single_use (lhs))
3753 		{
3754 		  use_operand_p use_p;
3755 		  gimple *use_stmt;
3756 		  struct reduction_info *red;
3757 		  single_imm_use (lhs, &use_p, &use_stmt);
3758 		  if (gimple_code (use_stmt) == GIMPLE_PHI
3759 		      && (red = reduction_phi (reduction_list, use_stmt)))
3760 		    {
3761 		      tree val = PHI_RESULT (red->keep_res);
3762 		      if (has_single_use (val))
3763 			{
3764 			  single_imm_use (val, &use_p, &use_stmt);
3765 			  if (gimple_store_p (use_stmt))
3766 			    {
3767 			      unsigned int id
3768 				= SSA_NAME_VERSION (gimple_vdef (use_stmt));
3769 			      bitmap_set_bit (reduction_stores, id);
3770 			      skip_stmt = use_stmt;
3771 			      if (dump_file)
3772 				{
3773 				  fprintf (dump_file, "found reduction load: ");
3774 				  print_gimple_stmt (dump_file, stmt, 0);
3775 				}
3776 			    }
3777 			}
3778 		    }
3779 		}
3780 
3781 	      ao_ref_init (&ref, rhs);
3782 	    }
3783 	  else if (gimple_store_p (stmt))
3784 	    {
3785 	      ao_ref_init (&ref, gimple_assign_lhs (stmt));
3786 	      ref_is_store = true;
3787 	    }
3788 	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3789 	    continue;
3790 	  else if (!gimple_has_side_effects (stmt)
3791 		   && !gimple_could_trap_p (stmt)
3792 		   && !stmt_could_throw_p (cfun, stmt)
3793 		   && !gimple_vdef (stmt)
3794 		   && !gimple_vuse (stmt))
3795 	    continue;
3796 	  else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3797 	    continue;
3798 	  else if (gimple_code (stmt) == GIMPLE_RETURN)
3799 	    continue;
3800 	  else
3801 	    {
3802 	      if (dump_file)
3803 		{
3804 		  fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3805 		  print_gimple_stmt (dump_file, stmt, 0);
3806 		}
3807 	      return false;
3808 	    }
3809 
3810 	  if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3811 					 i, skip_stmt))
3812 	    {
3813 	      if (dump_file)
3814 		{
3815 		  fprintf (dump_file, "conflicts with entry/exit stmt: ");
3816 		  print_gimple_stmt (dump_file, stmt, 0);
3817 		}
3818 	      return false;
3819 	    }
3820 	}
3821     }
3822 
3823   return true;
3824 }
3825 
3826 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3827    gang_pos == 0, except when the stores are REDUCTION_STORES.  Return true
3828    if any changes were made.  */
3829 
3830 static bool
oacc_entry_exit_single_gang(bitmap in_loop_bbs,vec<basic_block> region_bbs,bitmap reduction_stores)3831 oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3832 			     bitmap reduction_stores)
3833 {
3834   tree gang_pos = NULL_TREE;
3835   bool changed = false;
3836 
3837   unsigned i;
3838   basic_block bb;
3839   FOR_EACH_VEC_ELT (region_bbs, i, bb)
3840     {
3841       if (bitmap_bit_p (in_loop_bbs, bb->index))
3842 	continue;
3843 
3844       gimple_stmt_iterator gsi;
3845       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3846 	{
3847 	  gimple *stmt = gsi_stmt (gsi);
3848 
3849 	  if (!gimple_store_p (stmt))
3850 	    {
3851 	      /* Update gsi to point to next stmt.  */
3852 	      gsi_next (&gsi);
3853 	      continue;
3854 	    }
3855 
3856 	  if (bitmap_bit_p (reduction_stores,
3857 			    SSA_NAME_VERSION (gimple_vdef (stmt))))
3858 	    {
3859 	      if (dump_file)
3860 		{
3861 		  fprintf (dump_file,
3862 			   "skipped reduction store for single-gang"
3863 			   " neutering: ");
3864 		  print_gimple_stmt (dump_file, stmt, 0);
3865 		}
3866 
3867 	      /* Update gsi to point to next stmt.  */
3868 	      gsi_next (&gsi);
3869 	      continue;
3870 	    }
3871 
3872 	  changed = true;
3873 
3874 	  if (gang_pos == NULL_TREE)
3875 	    {
3876 	      tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3877 	      gcall *gang_single
3878 		= gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3879 	      gang_pos = make_ssa_name (integer_type_node);
3880 	      gimple_call_set_lhs (gang_single, gang_pos);
3881 	      gimple_stmt_iterator start
3882 		= gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3883 	      tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3884 	      gimple_set_vuse (gang_single, vuse);
3885 	      gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3886 	    }
3887 
3888 	  if (dump_file)
3889 	    {
3890 	      fprintf (dump_file,
3891 		       "found store that needs single-gang neutering: ");
3892 	      print_gimple_stmt (dump_file, stmt, 0);
3893 	    }
3894 
3895 	  {
3896 	    /* Split block before store.  */
3897 	    gimple_stmt_iterator gsi2 = gsi;
3898 	    gsi_prev (&gsi2);
3899 	    edge e;
3900 	    if (gsi_end_p (gsi2))
3901 	      {
3902 		e = split_block_after_labels (bb);
3903 		gsi2 = gsi_last_bb (bb);
3904 	      }
3905 	    else
3906 	      e = split_block (bb, gsi_stmt (gsi2));
3907 	    basic_block bb2 = e->dest;
3908 
3909 	    /* Split block after store.  */
3910 	    gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3911 	    edge e2 = split_block (bb2, gsi_stmt (gsi3));
3912 	    basic_block bb3 = e2->dest;
3913 
3914 	    gimple *cond
3915 	      = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3916 				   NULL_TREE, NULL_TREE);
3917 	    gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3918 
3919 	    edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3920 	    /* FIXME: What is the probability?  */
3921 	    e3->probability = profile_probability::guessed_never ();
3922 	    e->flags = EDGE_TRUE_VALUE;
3923 
3924 	    tree vdef = gimple_vdef (stmt);
3925 	    tree vuse = gimple_vuse (stmt);
3926 
3927 	    tree phi_res = copy_ssa_name (vdef);
3928 	    gphi *new_phi = create_phi_node (phi_res, bb3);
3929 	    replace_uses_by (vdef, phi_res);
3930 	    add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3931 	    add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3932 
3933 	    /* Update gsi to point to next stmt.  */
3934 	    bb = bb3;
3935 	    gsi = gsi_start_bb (bb);
3936 	  }
3937 	}
3938     }
3939 
3940   return changed;
3941 }
3942 
3943 /* Return true if the statements before and after the LOOP can be executed in
3944    parallel with the function containing the loop.  Resolve conflicting stores
3945    outside LOOP by guarding them such that only a single gang executes them.  */
3946 
3947 static bool
oacc_entry_exit_ok(class loop * loop,reduction_info_table_type * reduction_list)3948 oacc_entry_exit_ok (class loop *loop,
3949 		    reduction_info_table_type *reduction_list)
3950 {
3951   basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3952   vec<basic_block> region_bbs
3953     = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3954 
3955   bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3956   bitmap_clear (in_loop_bbs);
3957   for (unsigned int i = 0; i < loop->num_nodes; i++)
3958     bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3959 
3960   bitmap reduction_stores = BITMAP_ALLOC (NULL);
3961   bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3962 				   reduction_stores);
3963 
3964   if (res)
3965     {
3966       bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3967 						  reduction_stores);
3968       if (changed)
3969 	{
3970 	  free_dominance_info (CDI_DOMINATORS);
3971 	  calculate_dominance_info (CDI_DOMINATORS);
3972 	}
3973     }
3974 
3975   region_bbs.release ();
3976   free (loop_bbs);
3977 
3978   BITMAP_FREE (in_loop_bbs);
3979   BITMAP_FREE (reduction_stores);
3980 
3981   return res;
3982 }
3983 
3984 /* Detect parallel loops and generate parallel code using libgomp
3985    primitives.  Returns true if some loop was parallelized, false
3986    otherwise.  */
3987 
3988 static bool
parallelize_loops(bool oacc_kernels_p)3989 parallelize_loops (bool oacc_kernels_p)
3990 {
3991   unsigned n_threads;
3992   bool changed = false;
3993   class loop *loop;
3994   class loop *skip_loop = NULL;
3995   class tree_niter_desc niter_desc;
3996   struct obstack parloop_obstack;
3997   HOST_WIDE_INT estimated;
3998 
3999   /* Do not parallelize loops in the functions created by parallelization.  */
4000   if (!oacc_kernels_p
4001       && parallelized_function_p (cfun->decl))
4002     return false;
4003 
4004   /* Do not parallelize loops in offloaded functions.  */
4005   if (!oacc_kernels_p
4006       && oacc_get_fn_attrib (cfun->decl) != NULL)
4007      return false;
4008 
4009   if (cfun->has_nonlocal_label)
4010     return false;
4011 
4012   /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4013      the argument to -ftree-parallelize-loops.  */
4014   if (oacc_kernels_p)
4015     n_threads = 0;
4016   else
4017     n_threads = flag_tree_parallelize_loops;
4018 
4019   gcc_obstack_init (&parloop_obstack);
4020   reduction_info_table_type reduction_list (10);
4021 
4022   calculate_dominance_info (CDI_DOMINATORS);
4023 
4024   FOR_EACH_LOOP (loop, 0)
4025     {
4026       if (loop == skip_loop)
4027 	{
4028 	  if (!loop->in_oacc_kernels_region
4029 	      && dump_file && (dump_flags & TDF_DETAILS))
4030 	    fprintf (dump_file,
4031 		     "Skipping loop %d as inner loop of parallelized loop\n",
4032 		     loop->num);
4033 
4034 	  skip_loop = loop->inner;
4035 	  continue;
4036 	}
4037       else
4038 	skip_loop = NULL;
4039 
4040       reduction_list.empty ();
4041 
4042       if (oacc_kernels_p)
4043 	{
4044 	  if (!loop->in_oacc_kernels_region)
4045 	    continue;
4046 
4047 	  /* Don't try to parallelize inner loops in an oacc kernels region.  */
4048 	  if (loop->inner)
4049 	    skip_loop = loop->inner;
4050 
4051 	  if (dump_file && (dump_flags & TDF_DETAILS))
4052 	    fprintf (dump_file,
4053 		     "Trying loop %d with header bb %d in oacc kernels"
4054 		     " region\n", loop->num, loop->header->index);
4055 	}
4056 
4057       if (dump_file && (dump_flags & TDF_DETAILS))
4058       {
4059         fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4060 	if (loop->inner)
4061 	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4062 	else
4063 	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
4064       }
4065 
4066       if (!single_dom_exit (loop))
4067       {
4068 
4069         if (dump_file && (dump_flags & TDF_DETAILS))
4070 	  fprintf (dump_file, "loop is !single_dom_exit\n");
4071 
4072 	continue;
4073       }
4074 
4075       if (/* And of course, the loop must be parallelizable.  */
4076 	  !can_duplicate_loop_p (loop)
4077 	  || loop_has_blocks_with_irreducible_flag (loop)
4078 	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4079 	  /* FIXME: the check for vector phi nodes could be removed.  */
4080 	  || loop_has_vector_phi_nodes (loop))
4081 	continue;
4082 
4083       estimated = estimated_loop_iterations_int (loop);
4084       if (estimated == -1)
4085 	estimated = get_likely_max_loop_iterations_int (loop);
4086       /* FIXME: Bypass this check as graphite doesn't update the
4087 	 count and frequency correctly now.  */
4088       if (!flag_loop_parallelize_all
4089 	  && !oacc_kernels_p
4090 	  && ((estimated != -1
4091 	       && (estimated
4092 		   < ((HOST_WIDE_INT) n_threads
4093 		      * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4094 	      /* Do not bother with loops in cold areas.  */
4095 	      || optimize_loop_nest_for_size_p (loop)))
4096 	continue;
4097 
4098       if (!try_get_loop_niter (loop, &niter_desc))
4099 	continue;
4100 
4101       if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4102 	continue;
4103 
4104       if (loop_has_phi_with_address_arg (loop))
4105 	continue;
4106 
4107       if (!loop->can_be_parallel
4108 	  && !loop_parallel_p (loop, &parloop_obstack))
4109 	continue;
4110 
4111       if (oacc_kernels_p
4112 	&& !oacc_entry_exit_ok (loop, &reduction_list))
4113 	{
4114 	  if (dump_file)
4115 	    fprintf (dump_file, "entry/exit not ok: FAILED\n");
4116 	  continue;
4117 	}
4118 
4119       changed = true;
4120       skip_loop = loop->inner;
4121 
4122       if (dump_enabled_p ())
4123 	{
4124 	  dump_user_location_t loop_loc = find_loop_location (loop);
4125 	  if (loop->inner)
4126 	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4127 			     "parallelizing outer loop %d\n", loop->num);
4128 	  else
4129 	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4130 			     "parallelizing inner loop %d\n", loop->num);
4131 	}
4132 
4133       gen_parallel_loop (loop, &reduction_list,
4134 			 n_threads, &niter_desc, oacc_kernels_p);
4135     }
4136 
4137   obstack_free (&parloop_obstack, NULL);
4138 
4139   /* Parallelization will cause new function calls to be inserted through
4140      which local variables will escape.  Reset the points-to solution
4141      for ESCAPED.  */
4142   if (changed)
4143     pt_solution_reset (&cfun->gimple_df->escaped);
4144 
4145   return changed;
4146 }
4147 
4148 /* Parallelization.  */
4149 
4150 namespace {
4151 
4152 const pass_data pass_data_parallelize_loops =
4153 {
4154   GIMPLE_PASS, /* type */
4155   "parloops", /* name */
4156   OPTGROUP_LOOP, /* optinfo_flags */
4157   TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4158   ( PROP_cfg | PROP_ssa ), /* properties_required */
4159   0, /* properties_provided */
4160   0, /* properties_destroyed */
4161   0, /* todo_flags_start */
4162   0, /* todo_flags_finish */
4163 };
4164 
4165 class pass_parallelize_loops : public gimple_opt_pass
4166 {
4167 public:
pass_parallelize_loops(gcc::context * ctxt)4168   pass_parallelize_loops (gcc::context *ctxt)
4169     : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4170       oacc_kernels_p (false)
4171   {}
4172 
4173   /* opt_pass methods: */
gate(function *)4174   virtual bool gate (function *)
4175   {
4176     if (oacc_kernels_p)
4177       return flag_openacc;
4178     else
4179       return flag_tree_parallelize_loops > 1;
4180   }
4181   virtual unsigned int execute (function *);
clone()4182   opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
set_pass_param(unsigned int n,bool param)4183   void set_pass_param (unsigned int n, bool param)
4184     {
4185       gcc_assert (n == 0);
4186       oacc_kernels_p = param;
4187     }
4188 
4189  private:
4190   bool oacc_kernels_p;
4191 }; // class pass_parallelize_loops
4192 
4193 unsigned
execute(function * fun)4194 pass_parallelize_loops::execute (function *fun)
4195 {
4196   tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4197   if (nthreads == NULL_TREE)
4198     return 0;
4199 
4200   bool in_loop_pipeline = scev_initialized_p ();
4201   if (!in_loop_pipeline)
4202     loop_optimizer_init (LOOPS_NORMAL
4203 			 | LOOPS_HAVE_RECORDED_EXITS);
4204 
4205   if (number_of_loops (fun) <= 1)
4206     return 0;
4207 
4208   if (!in_loop_pipeline)
4209     {
4210       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4211       scev_initialize ();
4212     }
4213 
4214   unsigned int todo = 0;
4215   if (parallelize_loops (oacc_kernels_p))
4216     {
4217       fun->curr_properties &= ~(PROP_gimple_eomp);
4218 
4219       checking_verify_loop_structure ();
4220 
4221       todo |= TODO_update_ssa;
4222     }
4223 
4224   if (!in_loop_pipeline)
4225     {
4226       scev_finalize ();
4227       loop_optimizer_finalize ();
4228     }
4229 
4230   return todo;
4231 }
4232 
4233 } // anon namespace
4234 
4235 gimple_opt_pass *
make_pass_parallelize_loops(gcc::context * ctxt)4236 make_pass_parallelize_loops (gcc::context *ctxt)
4237 {
4238   return new pass_parallelize_loops (ctxt);
4239 }
4240