1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2021 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62 #include "opts.h"
63 
64 /* OMP region information.  Every parallel and workshare
65    directive is enclosed between two markers, the OMP_* directive
66    and a corresponding GIMPLE_OMP_RETURN statement.  */
67 
68 struct omp_region
69 {
70   /* The enclosing region.  */
71   struct omp_region *outer;
72 
73   /* First child region.  */
74   struct omp_region *inner;
75 
76   /* Next peer region.  */
77   struct omp_region *next;
78 
79   /* Block containing the omp directive as its last stmt.  */
80   basic_block entry;
81 
82   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
83   basic_block exit;
84 
85   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
86   basic_block cont;
87 
88   /* If this is a combined parallel+workshare region, this is a list
89      of additional arguments needed by the combined parallel+workshare
90      library call.  */
91   vec<tree, va_gc> *ws_args;
92 
93   /* The code for the omp directive of this region.  */
94   enum gimple_code type;
95 
96   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
97   enum omp_clause_schedule_kind sched_kind;
98 
99   /* Schedule modifiers.  */
100   unsigned char sched_modifiers;
101 
102   /* True if this is a combined parallel+workshare region.  */
103   bool is_combined_parallel;
104 
105   /* Copy of fd.lastprivate_conditional != 0.  */
106   bool has_lastprivate_conditional;
107 
108   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109      a depend clause.  */
110   gomp_ordered *ord_stmt;
111 };
112 
113 static struct omp_region *root_omp_region;
114 static bool omp_any_child_fn_dumped;
115 
116 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 				     bool = false);
118 static gphi *find_phi_with_arg_on_edge (tree, edge);
119 static void expand_omp (struct omp_region *region);
120 
121 /* Return true if REGION is a combined parallel+workshare region.  */
122 
123 static inline bool
is_combined_parallel(struct omp_region * region)124 is_combined_parallel (struct omp_region *region)
125 {
126   return region->is_combined_parallel;
127 }
128 
129 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130    is the immediate dominator of PAR_ENTRY_BB, return true if there
131    are no data dependencies that would prevent expanding the parallel
132    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 
134    When expanding a combined parallel+workshare region, the call to
135    the child function may need additional arguments in the case of
136    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
137    computed out of variables passed in from the parent to the child
138    via 'struct .omp_data_s'.  For instance:
139 
140 	#pragma omp parallel for schedule (guided, i * 4)
141 	for (j ...)
142 
143    Is lowered into:
144 
145 	# BLOCK 2 (PAR_ENTRY_BB)
146 	.omp_data_o.i = i;
147 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 
149 	# BLOCK 3 (WS_ENTRY_BB)
150 	.omp_data_i = &.omp_data_o;
151 	D.1667 = .omp_data_i->i;
152 	D.1598 = D.1667 * 4;
153 	#pragma omp for schedule (guided, D.1598)
154 
155    When we outline the parallel region, the call to the child function
156    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157    that value is computed *after* the call site.  So, in principle we
158    cannot do the transformation.
159 
160    To see whether the code in WS_ENTRY_BB blocks the combined
161    parallel+workshare call, we collect all the variables used in the
162    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
164    call.
165 
166    FIXME.  If we had the SSA form built at this point, we could merely
167    hoist the code in block 3 into block 2 and be done with it.  But at
168    this point we don't have dataflow information and though we could
169    hack something up here, it is really not worth the aggravation.  */
170 
171 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)172 workshare_safe_to_combine_p (basic_block ws_entry_bb)
173 {
174   struct omp_for_data fd;
175   gimple *ws_stmt = last_stmt (ws_entry_bb);
176 
177   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178     return true;
179 
180   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181   if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182     return false;
183 
184   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185 
186   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187     return false;
188   if (fd.iter_type != long_integer_type_node)
189     return false;
190 
191   /* FIXME.  We give up too easily here.  If any of these arguments
192      are not constants, they will likely involve variables that have
193      been mapped into fields of .omp_data_s for sharing with the child
194      function.  With appropriate data flow, it would be possible to
195      see through this.  */
196   if (!is_gimple_min_invariant (fd.loop.n1)
197       || !is_gimple_min_invariant (fd.loop.n2)
198       || !is_gimple_min_invariant (fd.loop.step)
199       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200     return false;
201 
202   return true;
203 }
204 
205 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206    presence (SIMD_SCHEDULE).  */
207 
208 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)209 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210 {
211   if (!simd_schedule || integer_zerop (chunk_size))
212     return chunk_size;
213 
214   poly_uint64 vf = omp_max_vf ();
215   if (known_eq (vf, 1U))
216     return chunk_size;
217 
218   tree type = TREE_TYPE (chunk_size);
219   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 			    build_int_cst (type, vf - 1));
221   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 		      build_int_cst (type, -vf));
223 }
224 
225 /* Collect additional arguments needed to emit a combined
226    parallel+workshare call.  WS_STMT is the workshare directive being
227    expanded.  */
228 
229 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)230 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231 {
232   tree t;
233   location_t loc = gimple_location (ws_stmt);
234   vec<tree, va_gc> *ws_args;
235 
236   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237     {
238       struct omp_for_data fd;
239       tree n1, n2;
240 
241       omp_extract_for_data (for_stmt, &fd, NULL);
242       n1 = fd.loop.n1;
243       n2 = fd.loop.n2;
244 
245       if (gimple_omp_for_combined_into_p (for_stmt))
246 	{
247 	  tree innerc
248 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249 			       OMP_CLAUSE__LOOPTEMP_);
250 	  gcc_assert (innerc);
251 	  n1 = OMP_CLAUSE_DECL (innerc);
252 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 				    OMP_CLAUSE__LOOPTEMP_);
254 	  gcc_assert (innerc);
255 	  n2 = OMP_CLAUSE_DECL (innerc);
256 	}
257 
258       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259 
260       t = fold_convert_loc (loc, long_integer_type_node, n1);
261       ws_args->quick_push (t);
262 
263       t = fold_convert_loc (loc, long_integer_type_node, n2);
264       ws_args->quick_push (t);
265 
266       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267       ws_args->quick_push (t);
268 
269       if (fd.chunk_size)
270 	{
271 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
273 	  ws_args->quick_push (t);
274 	}
275 
276       return ws_args;
277     }
278   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279     {
280       /* Number of sections is equal to the number of edges from the
281 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 	 the exit of the sections region.  */
283       basic_block bb = single_succ (gimple_bb (ws_stmt));
284       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285       vec_alloc (ws_args, 1);
286       ws_args->quick_push (t);
287       return ws_args;
288     }
289 
290   gcc_unreachable ();
291 }
292 
293 /* Discover whether REGION is a combined parallel+workshare region.  */
294 
295 static void
determine_parallel_type(struct omp_region * region)296 determine_parallel_type (struct omp_region *region)
297 {
298   basic_block par_entry_bb, par_exit_bb;
299   basic_block ws_entry_bb, ws_exit_bb;
300 
301   if (region == NULL || region->inner == NULL
302       || region->exit == NULL || region->inner->exit == NULL
303       || region->inner->cont == NULL)
304     return;
305 
306   /* We only support parallel+for and parallel+sections.  */
307   if (region->type != GIMPLE_OMP_PARALLEL
308       || (region->inner->type != GIMPLE_OMP_FOR
309 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
310     return;
311 
312   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313      WS_EXIT_BB -> PAR_EXIT_BB.  */
314   par_entry_bb = region->entry;
315   par_exit_bb = region->exit;
316   ws_entry_bb = region->inner->entry;
317   ws_exit_bb = region->inner->exit;
318 
319   /* Give up for task reductions on the parallel, while it is implementable,
320      adding another big set of APIs or slowing down the normal paths is
321      not acceptable.  */
322   tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323   if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324     return;
325 
326   if (single_succ (par_entry_bb) == ws_entry_bb
327       && single_succ (ws_exit_bb) == par_exit_bb
328       && workshare_safe_to_combine_p (ws_entry_bb)
329       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330 	  || (last_and_only_stmt (ws_entry_bb)
331 	      && last_and_only_stmt (par_exit_bb))))
332     {
333       gimple *par_stmt = last_stmt (par_entry_bb);
334       gimple *ws_stmt = last_stmt (ws_entry_bb);
335 
336       if (region->inner->type == GIMPLE_OMP_FOR)
337 	{
338 	  /* If this is a combined parallel loop, we need to determine
339 	     whether or not to use the combined library calls.  There
340 	     are two cases where we do not apply the transformation:
341 	     static loops and any kind of ordered loop.  In the first
342 	     case, we already open code the loop so there is no need
343 	     to do anything else.  In the latter case, the combined
344 	     parallel loop call would still need extra synchronization
345 	     to implement ordered semantics, so there would not be any
346 	     gain in using the combined call.  */
347 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
348 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349 	  if (c == NULL
350 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351 		  == OMP_CLAUSE_SCHEDULE_STATIC)
352 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353 	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354 	      || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355 		  && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356 	    return;
357 	}
358       else if (region->inner->type == GIMPLE_OMP_SECTIONS
359 	       && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 				    OMP_CLAUSE__REDUCTEMP_)
361 		   || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362 				       OMP_CLAUSE__CONDTEMP_)))
363 	return;
364 
365       region->is_combined_parallel = true;
366       region->inner->is_combined_parallel = true;
367       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
368     }
369 }
370 
371 /* Debugging dumps for parallel regions.  */
372 void dump_omp_region (FILE *, struct omp_region *, int);
373 void debug_omp_region (struct omp_region *);
374 void debug_all_omp_regions (void);
375 
376 /* Dump the parallel region tree rooted at REGION.  */
377 
378 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)379 dump_omp_region (FILE *file, struct omp_region *region, int indent)
380 {
381   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382 	   gimple_code_name[region->type]);
383 
384   if (region->inner)
385     dump_omp_region (file, region->inner, indent + 4);
386 
387   if (region->cont)
388     {
389       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390 	       region->cont->index);
391     }
392 
393   if (region->exit)
394     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395 	     region->exit->index);
396   else
397     fprintf (file, "%*s[no exit marker]\n", indent, "");
398 
399   if (region->next)
400     dump_omp_region (file, region->next, indent);
401 }
402 
403 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)404 debug_omp_region (struct omp_region *region)
405 {
406   dump_omp_region (stderr, region, 0);
407 }
408 
409 DEBUG_FUNCTION void
debug_all_omp_regions(void)410 debug_all_omp_regions (void)
411 {
412   dump_omp_region (stderr, root_omp_region, 0);
413 }
414 
415 /* Create a new parallel region starting at STMT inside region PARENT.  */
416 
417 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)418 new_omp_region (basic_block bb, enum gimple_code type,
419 		struct omp_region *parent)
420 {
421   struct omp_region *region = XCNEW (struct omp_region);
422 
423   region->outer = parent;
424   region->entry = bb;
425   region->type = type;
426 
427   if (parent)
428     {
429       /* This is a nested region.  Add it to the list of inner
430 	 regions in PARENT.  */
431       region->next = parent->inner;
432       parent->inner = region;
433     }
434   else
435     {
436       /* This is a toplevel region.  Add it to the list of toplevel
437 	 regions in ROOT_OMP_REGION.  */
438       region->next = root_omp_region;
439       root_omp_region = region;
440     }
441 
442   return region;
443 }
444 
445 /* Release the memory associated with the region tree rooted at REGION.  */
446 
447 static void
free_omp_region_1(struct omp_region * region)448 free_omp_region_1 (struct omp_region *region)
449 {
450   struct omp_region *i, *n;
451 
452   for (i = region->inner; i ; i = n)
453     {
454       n = i->next;
455       free_omp_region_1 (i);
456     }
457 
458   free (region);
459 }
460 
461 /* Release the memory for the entire omp region tree.  */
462 
463 void
omp_free_regions(void)464 omp_free_regions (void)
465 {
466   struct omp_region *r, *n;
467   for (r = root_omp_region; r ; r = n)
468     {
469       n = r->next;
470       free_omp_region_1 (r);
471     }
472   root_omp_region = NULL;
473 }
474 
475 /* A convenience function to build an empty GIMPLE_COND with just the
476    condition.  */
477 
478 static gcond *
gimple_build_cond_empty(tree cond)479 gimple_build_cond_empty (tree cond)
480 {
481   enum tree_code pred_code;
482   tree lhs, rhs;
483 
484   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
486 }
487 
488 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489    Add CHILD_FNDECL to decl chain of the supercontext of the block
490    ENTRY_BLOCK - this is the block which originally contained the
491    code from which CHILD_FNDECL was created.
492 
493    Together, these actions ensure that the debug info for the outlined
494    function will be emitted with the correct lexical scope.  */
495 
496 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)497 adjust_context_and_scope (struct omp_region *region, tree entry_block,
498 			  tree child_fndecl)
499 {
500   tree parent_fndecl = NULL_TREE;
501   gimple *entry_stmt;
502   /* OMP expansion expands inner regions before outer ones, so if
503      we e.g. have explicit task region nested in parallel region, when
504      expanding the task region current_function_decl will be the original
505      source function, but we actually want to use as context the child
506      function of the parallel.  */
507   for (region = region->outer;
508        region && parent_fndecl == NULL_TREE; region = region->outer)
509     switch (region->type)
510       {
511       case GIMPLE_OMP_PARALLEL:
512       case GIMPLE_OMP_TASK:
513       case GIMPLE_OMP_TEAMS:
514 	entry_stmt = last_stmt (region->entry);
515 	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516 	break;
517       case GIMPLE_OMP_TARGET:
518 	entry_stmt = last_stmt (region->entry);
519 	parent_fndecl
520 	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521 	break;
522       default:
523 	break;
524       }
525 
526   if (parent_fndecl == NULL_TREE)
527     parent_fndecl = current_function_decl;
528   DECL_CONTEXT (child_fndecl) = parent_fndecl;
529 
530   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531     {
532       tree b = BLOCK_SUPERCONTEXT (entry_block);
533       if (TREE_CODE (b) == BLOCK)
534         {
535 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536 	  BLOCK_VARS (b) = child_fndecl;
537 	}
538     }
539 }
540 
541 /* Build the function calls to GOMP_parallel etc to actually
542    generate the parallel operation.  REGION is the parallel region
543    being expanded.  BB is the block where to insert the code.  WS_ARGS
544    will be set if this is a call to a combined parallel+workshare
545    construct, it contains the list of additional arguments needed by
546    the workshare construct.  */
547 
548 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)549 expand_parallel_call (struct omp_region *region, basic_block bb,
550 		      gomp_parallel *entry_stmt,
551 		      vec<tree, va_gc> *ws_args)
552 {
553   tree t, t1, t2, val, cond, c, clauses, flags;
554   gimple_stmt_iterator gsi;
555   gimple *stmt;
556   enum built_in_function start_ix;
557   int start_ix2;
558   location_t clause_loc;
559   vec<tree, va_gc> *args;
560 
561   clauses = gimple_omp_parallel_clauses (entry_stmt);
562 
563   /* Determine what flavor of GOMP_parallel we will be
564      emitting.  */
565   start_ix = BUILT_IN_GOMP_PARALLEL;
566   tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567   if (rtmp)
568     start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569   else if (is_combined_parallel (region))
570     {
571       switch (region->inner->type)
572 	{
573 	case GIMPLE_OMP_FOR:
574 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575 	  switch (region->inner->sched_kind)
576 	    {
577 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
578 	      /* For lastprivate(conditional:), our implementation
579 		 requires monotonic behavior.  */
580 	      if (region->inner->has_lastprivate_conditional != 0)
581 		start_ix2 = 3;
582 	      else if ((region->inner->sched_modifiers
583 		       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584 		start_ix2 = 6;
585 	      else if ((region->inner->sched_modifiers
586 			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587 		start_ix2 = 7;
588 	      else
589 		start_ix2 = 3;
590 	      break;
591 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
593 	      if ((region->inner->sched_modifiers
594 		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595 		  && !region->inner->has_lastprivate_conditional)
596 		{
597 		  start_ix2 = 3 + region->inner->sched_kind;
598 		  break;
599 		}
600 	      /* FALLTHRU */
601 	    default:
602 	      start_ix2 = region->inner->sched_kind;
603 	      break;
604 	    }
605 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606 	  start_ix = (enum built_in_function) start_ix2;
607 	  break;
608 	case GIMPLE_OMP_SECTIONS:
609 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610 	  break;
611 	default:
612 	  gcc_unreachable ();
613 	}
614     }
615 
616   /* By default, the value of NUM_THREADS is zero (selected at run time)
617      and there is no conditional.  */
618   cond = NULL_TREE;
619   val = build_int_cst (unsigned_type_node, 0);
620   flags = build_int_cst (unsigned_type_node, 0);
621 
622   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623   if (c)
624     cond = OMP_CLAUSE_IF_EXPR (c);
625 
626   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627   if (c)
628     {
629       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630       clause_loc = OMP_CLAUSE_LOCATION (c);
631     }
632   else
633     clause_loc = gimple_location (entry_stmt);
634 
635   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636   if (c)
637     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638 
639   /* Ensure 'val' is of the correct type.  */
640   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641 
642   /* If we found the clause 'if (cond)', build either
643      (cond != 0) or (cond ? val : 1u).  */
644   if (cond)
645     {
646       cond = gimple_boolify (cond);
647 
648       if (integer_zerop (val))
649 	val = fold_build2_loc (clause_loc,
650 			   EQ_EXPR, unsigned_type_node, cond,
651 			   build_int_cst (TREE_TYPE (cond), 0));
652       else
653 	{
654 	  basic_block cond_bb, then_bb, else_bb;
655 	  edge e, e_then, e_else;
656 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
657 
658 	  tmp_var = create_tmp_var (TREE_TYPE (val));
659 	  if (gimple_in_ssa_p (cfun))
660 	    {
661 	      tmp_then = make_ssa_name (tmp_var);
662 	      tmp_else = make_ssa_name (tmp_var);
663 	      tmp_join = make_ssa_name (tmp_var);
664 	    }
665 	  else
666 	    {
667 	      tmp_then = tmp_var;
668 	      tmp_else = tmp_var;
669 	      tmp_join = tmp_var;
670 	    }
671 
672 	  e = split_block_after_labels (bb);
673 	  cond_bb = e->src;
674 	  bb = e->dest;
675 	  remove_edge (e);
676 
677 	  then_bb = create_empty_bb (cond_bb);
678 	  else_bb = create_empty_bb (then_bb);
679 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681 
682 	  stmt = gimple_build_cond_empty (cond);
683 	  gsi = gsi_start_bb (cond_bb);
684 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685 
686 	  gsi = gsi_start_bb (then_bb);
687 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
688 
689 	  gsi = gsi_start_bb (else_bb);
690 	  expand_omp_build_assign (&gsi, tmp_else,
691 				   build_int_cst (unsigned_type_node, 1),
692 				   true);
693 
694 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
697 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
698 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700 
701 	  if (gimple_in_ssa_p (cfun))
702 	    {
703 	      gphi *phi = create_phi_node (tmp_join, bb);
704 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
706 	    }
707 
708 	  val = tmp_join;
709 	}
710 
711       gsi = gsi_start_bb (bb);
712       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713 				      false, GSI_CONTINUE_LINKING);
714     }
715 
716   gsi = gsi_last_nondebug_bb (bb);
717   t = gimple_omp_parallel_data_arg (entry_stmt);
718   if (t == NULL)
719     t1 = null_pointer_node;
720   else
721     t1 = build_fold_addr_expr (t);
722   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723   t2 = build_fold_addr_expr (child_fndecl);
724 
725   vec_alloc (args, 4 + vec_safe_length (ws_args));
726   args->quick_push (t2);
727   args->quick_push (t1);
728   args->quick_push (val);
729   if (ws_args)
730     args->splice (*ws_args);
731   args->quick_push (flags);
732 
733   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734 			       builtin_decl_explicit (start_ix), args);
735 
736   if (rtmp)
737     {
738       tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739       t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740 		  fold_convert (type,
741 				fold_convert (pointer_sized_int_node, t)));
742     }
743   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744 			    false, GSI_CONTINUE_LINKING);
745 }
746 
747 /* Build the function call to GOMP_task to actually
748    generate the task operation.  BB is the block where to insert the code.  */
749 
750 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)751 expand_task_call (struct omp_region *region, basic_block bb,
752 		  gomp_task *entry_stmt)
753 {
754   tree t1, t2, t3;
755   gimple_stmt_iterator gsi;
756   location_t loc = gimple_location (entry_stmt);
757 
758   tree clauses = gimple_omp_task_clauses (entry_stmt);
759 
760   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766   tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767 
768   unsigned int iflags
769     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772 
773   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775   tree num_tasks = NULL_TREE;
776   bool ull = false;
777   if (taskloop_p)
778     {
779       gimple *g = last_stmt (region->outer->entry);
780       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782       struct omp_for_data fd;
783       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786 				OMP_CLAUSE__LOOPTEMP_);
787       startvar = OMP_CLAUSE_DECL (startvar);
788       endvar = OMP_CLAUSE_DECL (endvar);
789       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790       if (fd.loop.cond_code == LT_EXPR)
791 	iflags |= GOMP_TASK_FLAG_UP;
792       tree tclauses = gimple_omp_for_clauses (g);
793       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794       if (num_tasks)
795 	{
796 	  if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797 	    iflags |= GOMP_TASK_FLAG_STRICT;
798 	  num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799 	}
800       else
801 	{
802 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803 	  if (num_tasks)
804 	    {
805 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806 	      if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807 		iflags |= GOMP_TASK_FLAG_STRICT;
808 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809 	    }
810 	  else
811 	    num_tasks = integer_zero_node;
812 	}
813       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814       if (ifc == NULL_TREE)
815 	iflags |= GOMP_TASK_FLAG_IF;
816       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817 	iflags |= GOMP_TASK_FLAG_NOGROUP;
818       ull = fd.iter_type == long_long_unsigned_type_node;
819       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820 	iflags |= GOMP_TASK_FLAG_REDUCTION;
821     }
822   else
823     {
824       if (priority)
825 	iflags |= GOMP_TASK_FLAG_PRIORITY;
826       if (detach)
827 	iflags |= GOMP_TASK_FLAG_DETACH;
828     }
829 
830   tree flags = build_int_cst (unsigned_type_node, iflags);
831 
832   tree cond = boolean_true_node;
833   if (ifc)
834     {
835       if (taskloop_p)
836 	{
837 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839 			       build_int_cst (unsigned_type_node,
840 					      GOMP_TASK_FLAG_IF),
841 			       build_int_cst (unsigned_type_node, 0));
842 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843 				   flags, t);
844 	}
845       else
846 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847     }
848 
849   if (finalc)
850     {
851       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853 			   build_int_cst (unsigned_type_node,
854 					  GOMP_TASK_FLAG_FINAL),
855 			   build_int_cst (unsigned_type_node, 0));
856       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857     }
858   if (depend)
859     depend = OMP_CLAUSE_DECL (depend);
860   else
861     depend = build_int_cst (ptr_type_node, 0);
862   if (priority)
863     priority = fold_convert (integer_type_node,
864 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
865   else
866     priority = integer_zero_node;
867 
868   gsi = gsi_last_nondebug_bb (bb);
869 
870   detach = (detach
871 	    ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872 	    : null_pointer_node);
873 
874   tree t = gimple_omp_task_data_arg (entry_stmt);
875   if (t == NULL)
876     t2 = null_pointer_node;
877   else
878     t2 = build_fold_addr_expr_loc (loc, t);
879   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880   t = gimple_omp_task_copy_fn (entry_stmt);
881   if (t == NULL)
882     t3 = null_pointer_node;
883   else
884     t3 = build_fold_addr_expr_loc (loc, t);
885 
886   if (taskloop_p)
887     t = build_call_expr (ull
888 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890 			 11, t1, t2, t3,
891 			 gimple_omp_task_arg_size (entry_stmt),
892 			 gimple_omp_task_arg_align (entry_stmt), flags,
893 			 num_tasks, priority, startvar, endvar, step);
894   else
895     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896 			 10, t1, t2, t3,
897 			 gimple_omp_task_arg_size (entry_stmt),
898 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899 			 depend, priority, detach);
900 
901   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902 			    false, GSI_CONTINUE_LINKING);
903 }
904 
905 /* Build the function call to GOMP_taskwait_depend to actually
906    generate the taskwait operation.  BB is the block where to insert the
907    code.  */
908 
909 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)910 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911 {
912   tree clauses = gimple_omp_task_clauses (entry_stmt);
913   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914   if (depend == NULL_TREE)
915     return;
916 
917   depend = OMP_CLAUSE_DECL (depend);
918 
919   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920   tree t
921     = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922 		       1, depend);
923 
924   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925 			    false, GSI_CONTINUE_LINKING);
926 }
927 
928 /* Build the function call to GOMP_teams_reg to actually
929    generate the host teams operation.  REGION is the teams region
930    being expanded.  BB is the block where to insert the code.  */
931 
932 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)933 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934 {
935   tree clauses = gimple_omp_teams_clauses (entry_stmt);
936   tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937   if (num_teams == NULL_TREE)
938     num_teams = build_int_cst (unsigned_type_node, 0);
939   else
940     {
941       num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942       num_teams = fold_convert (unsigned_type_node, num_teams);
943     }
944   tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945   if (thread_limit == NULL_TREE)
946     thread_limit = build_int_cst (unsigned_type_node, 0);
947   else
948     {
949       thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950       thread_limit = fold_convert (unsigned_type_node, thread_limit);
951     }
952 
953   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954   tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955   if (t == NULL)
956     t1 = null_pointer_node;
957   else
958     t1 = build_fold_addr_expr (t);
959   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960   tree t2 = build_fold_addr_expr (child_fndecl);
961 
962   vec<tree, va_gc> *args;
963   vec_alloc (args, 5);
964   args->quick_push (t2);
965   args->quick_push (t1);
966   args->quick_push (num_teams);
967   args->quick_push (thread_limit);
968   /* For future extensibility.  */
969   args->quick_push (build_zero_cst (unsigned_type_node));
970 
971   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972 			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973 			       args);
974 
975   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976 			    false, GSI_CONTINUE_LINKING);
977 }
978 
979 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
980 
981 static tree
vec2chain(vec<tree,va_gc> * v)982 vec2chain (vec<tree, va_gc> *v)
983 {
984   tree chain = NULL_TREE, t;
985   unsigned ix;
986 
987   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988     {
989       DECL_CHAIN (t) = chain;
990       chain = t;
991     }
992 
993   return chain;
994 }
995 
996 /* Remove barriers in REGION->EXIT's block.  Note that this is only
997    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
998    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000    removed.  */
1001 
1002 static void
remove_exit_barrier(struct omp_region * region)1003 remove_exit_barrier (struct omp_region *region)
1004 {
1005   gimple_stmt_iterator gsi;
1006   basic_block exit_bb;
1007   edge_iterator ei;
1008   edge e;
1009   gimple *stmt;
1010   int any_addressable_vars = -1;
1011 
1012   exit_bb = region->exit;
1013 
1014   /* If the parallel region doesn't return, we don't have REGION->EXIT
1015      block at all.  */
1016   if (! exit_bb)
1017     return;
1018 
1019   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1020      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1021      statements that can appear in between are extremely limited -- no
1022      memory operations at all.  Here, we allow nothing at all, so the
1023      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1024   gsi = gsi_last_nondebug_bb (exit_bb);
1025   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026   gsi_prev_nondebug (&gsi);
1027   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028     return;
1029 
1030   FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031     {
1032       gsi = gsi_last_nondebug_bb (e->src);
1033       if (gsi_end_p (gsi))
1034 	continue;
1035       stmt = gsi_stmt (gsi);
1036       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037 	  && !gimple_omp_return_nowait_p (stmt))
1038 	{
1039 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040 	     in many cases.  If there could be tasks queued, the barrier
1041 	     might be needed to let the tasks run before some local
1042 	     variable of the parallel that the task uses as shared
1043 	     runs out of scope.  The task can be spawned either
1044 	     from within current function (this would be easy to check)
1045 	     or from some function it calls and gets passed an address
1046 	     of such a variable.  */
1047 	  if (any_addressable_vars < 0)
1048 	    {
1049 	      gomp_parallel *parallel_stmt
1050 		= as_a <gomp_parallel *> (last_stmt (region->entry));
1051 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052 	      tree local_decls, block, decl;
1053 	      unsigned ix;
1054 
1055 	      any_addressable_vars = 0;
1056 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057 		if (TREE_ADDRESSABLE (decl))
1058 		  {
1059 		    any_addressable_vars = 1;
1060 		    break;
1061 		  }
1062 	      for (block = gimple_block (stmt);
1063 		   !any_addressable_vars
1064 		   && block
1065 		   && TREE_CODE (block) == BLOCK;
1066 		   block = BLOCK_SUPERCONTEXT (block))
1067 		{
1068 		  for (local_decls = BLOCK_VARS (block);
1069 		       local_decls;
1070 		       local_decls = DECL_CHAIN (local_decls))
1071 		    if (TREE_ADDRESSABLE (local_decls))
1072 		      {
1073 			any_addressable_vars = 1;
1074 			break;
1075 		      }
1076 		  if (block == gimple_block (parallel_stmt))
1077 		    break;
1078 		}
1079 	    }
1080 	  if (!any_addressable_vars)
1081 	    gimple_omp_return_set_nowait (stmt);
1082 	}
1083     }
1084 }
1085 
1086 static void
remove_exit_barriers(struct omp_region * region)1087 remove_exit_barriers (struct omp_region *region)
1088 {
1089   if (region->type == GIMPLE_OMP_PARALLEL)
1090     remove_exit_barrier (region);
1091 
1092   if (region->inner)
1093     {
1094       region = region->inner;
1095       remove_exit_barriers (region);
1096       while (region->next)
1097 	{
1098 	  region = region->next;
1099 	  remove_exit_barriers (region);
1100 	}
1101     }
1102 }
1103 
1104 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105    calls.  These can't be declared as const functions, but
1106    within one parallel body they are constant, so they can be
1107    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108    which are declared const.  Similarly for task body, except
1109    that in untied task omp_get_thread_num () can change at any task
1110    scheduling point.  */
1111 
1112 static void
optimize_omp_library_calls(gimple * entry_stmt)1113 optimize_omp_library_calls (gimple *entry_stmt)
1114 {
1115   basic_block bb;
1116   gimple_stmt_iterator gsi;
1117   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123 					  OMP_CLAUSE_UNTIED) != NULL);
1124 
1125   FOR_EACH_BB_FN (bb, cfun)
1126     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127       {
1128 	gimple *call = gsi_stmt (gsi);
1129 	tree decl;
1130 
1131 	if (is_gimple_call (call)
1132 	    && (decl = gimple_call_fndecl (call))
1133 	    && DECL_EXTERNAL (decl)
1134 	    && TREE_PUBLIC (decl)
1135 	    && DECL_INITIAL (decl) == NULL)
1136 	  {
1137 	    tree built_in;
1138 
1139 	    if (DECL_NAME (decl) == thr_num_id)
1140 	      {
1141 		/* In #pragma omp task untied omp_get_thread_num () can change
1142 		   during the execution of the task region.  */
1143 		if (untied_task)
1144 		  continue;
1145 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146 	      }
1147 	    else if (DECL_NAME (decl) == num_thr_id)
1148 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149 	    else
1150 	      continue;
1151 
1152 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153 		|| gimple_call_num_args (call) != 0)
1154 	      continue;
1155 
1156 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1157 	      continue;
1158 
1159 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161 					TREE_TYPE (TREE_TYPE (built_in))))
1162 	      continue;
1163 
1164 	    gimple_call_set_fndecl (call, built_in);
1165 	  }
1166       }
1167 }
1168 
1169 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1170    regimplified.  */
1171 
1172 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1173 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174 {
1175   tree t = *tp;
1176 
1177   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1178   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179     return t;
1180 
1181   if (TREE_CODE (t) == ADDR_EXPR)
1182     recompute_tree_invariant_for_addr_expr (t);
1183 
1184   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185   return NULL_TREE;
1186 }
1187 
1188 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1189 
1190 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1191 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192 			 bool after)
1193 {
1194   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196 				   !after, after ? GSI_CONTINUE_LINKING
1197 						 : GSI_SAME_STMT);
1198   gimple *stmt = gimple_build_assign (to, from);
1199   if (after)
1200     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201   else
1202     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205     {
1206       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207       gimple_regimplify_operands (stmt, &gsi);
1208     }
1209 }
1210 
1211 /* Prepend or append LHS CODE RHS condition before or after *GSI_P.  */
1212 
1213 static gcond *
1214 expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215 		       tree lhs, tree rhs, bool after = false)
1216 {
1217   gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218   if (after)
1219     gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220   else
1221     gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223 		 NULL, NULL)
1224       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225 		    NULL, NULL))
1226     {
1227       gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228       gimple_regimplify_operands (cond_stmt, &gsi);
1229     }
1230   return cond_stmt;
1231 }
1232 
1233 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1234 
1235 static void
expand_omp_taskreg(struct omp_region * region)1236 expand_omp_taskreg (struct omp_region *region)
1237 {
1238   basic_block entry_bb, exit_bb, new_bb;
1239   struct function *child_cfun;
1240   tree child_fn, block, t;
1241   gimple_stmt_iterator gsi;
1242   gimple *entry_stmt, *stmt;
1243   edge e;
1244   vec<tree, va_gc> *ws_args;
1245 
1246   entry_stmt = last_stmt (region->entry);
1247   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248       && gimple_omp_task_taskwait_p (entry_stmt))
1249     {
1250       new_bb = region->entry;
1251       gsi = gsi_last_nondebug_bb (region->entry);
1252       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253       gsi_remove (&gsi, true);
1254       expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255       return;
1256     }
1257 
1258   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1260 
1261   entry_bb = region->entry;
1262   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263     exit_bb = region->cont;
1264   else
1265     exit_bb = region->exit;
1266 
1267   if (is_combined_parallel (region))
1268     ws_args = region->ws_args;
1269   else
1270     ws_args = NULL;
1271 
1272   if (child_cfun->cfg)
1273     {
1274       /* Due to inlining, it may happen that we have already outlined
1275 	 the region, in which case all we need to do is make the
1276 	 sub-graph unreachable and emit the parallel call.  */
1277       edge entry_succ_e, exit_succ_e;
1278 
1279       entry_succ_e = single_succ_edge (entry_bb);
1280 
1281       gsi = gsi_last_nondebug_bb (entry_bb);
1282       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285       gsi_remove (&gsi, true);
1286 
1287       new_bb = entry_bb;
1288       if (exit_bb)
1289 	{
1290 	  exit_succ_e = single_succ_edge (exit_bb);
1291 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1292 	}
1293       remove_edge_and_dominated_blocks (entry_succ_e);
1294     }
1295   else
1296     {
1297       unsigned srcidx, dstidx, num;
1298 
1299       /* If the parallel region needs data sent from the parent
1300 	 function, then the very first statement (except possible
1301 	 tree profile counter updates) of the parallel body
1302 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1303 	 &.OMP_DATA_O is passed as an argument to the child function,
1304 	 we need to replace it with the argument as seen by the child
1305 	 function.
1306 
1307 	 In most cases, this will end up being the identity assignment
1308 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1309 	 a function call that has been inlined, the original PARM_DECL
1310 	 .OMP_DATA_I may have been converted into a different local
1311 	 variable.  In which case, we need to keep the assignment.  */
1312       if (gimple_omp_taskreg_data_arg (entry_stmt))
1313 	{
1314 	  basic_block entry_succ_bb
1315 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316 				       : FALLTHRU_EDGE (entry_bb)->dest;
1317 	  tree arg;
1318 	  gimple *parcopy_stmt = NULL;
1319 
1320 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1321 	    {
1322 	      gimple *stmt;
1323 
1324 	      gcc_assert (!gsi_end_p (gsi));
1325 	      stmt = gsi_stmt (gsi);
1326 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327 		continue;
1328 
1329 	      if (gimple_num_ops (stmt) == 2)
1330 		{
1331 		  tree arg = gimple_assign_rhs1 (stmt);
1332 
1333 		  /* We're ignore the subcode because we're
1334 		     effectively doing a STRIP_NOPS.  */
1335 
1336 		  if (TREE_CODE (arg) == ADDR_EXPR
1337 		      && (TREE_OPERAND (arg, 0)
1338 			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1339 		    {
1340 		      parcopy_stmt = stmt;
1341 		      break;
1342 		    }
1343 		}
1344 	    }
1345 
1346 	  gcc_assert (parcopy_stmt != NULL);
1347 	  arg = DECL_ARGUMENTS (child_fn);
1348 
1349 	  if (!gimple_in_ssa_p (cfun))
1350 	    {
1351 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1352 		gsi_remove (&gsi, true);
1353 	      else
1354 		{
1355 		  /* ?? Is setting the subcode really necessary ??  */
1356 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1358 		}
1359 	    }
1360 	  else
1361 	    {
1362 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1363 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364 	      /* We'd like to set the rhs to the default def in the child_fn,
1365 		 but it's too early to create ssa names in the child_fn.
1366 		 Instead, we set the rhs to the parm.  In
1367 		 move_sese_region_to_fn, we introduce a default def for the
1368 		 parm, map the parm to it's default def, and once we encounter
1369 		 this stmt, replace the parm with the default def.  */
1370 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371 	      update_stmt (parcopy_stmt);
1372 	    }
1373 	}
1374 
1375       /* Declare local variables needed in CHILD_CFUN.  */
1376       block = DECL_INITIAL (child_fn);
1377       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378       /* The gimplifier could record temporaries in parallel/task block
1379 	 rather than in containing function's local_decls chain,
1380 	 which would mean cgraph missed finalizing them.  Do it now.  */
1381       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383 	  varpool_node::finalize_decl (t);
1384       DECL_SAVED_TREE (child_fn) = NULL;
1385       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1386       gimple_set_body (child_fn, NULL);
1387       TREE_USED (block) = 1;
1388 
1389       /* Reset DECL_CONTEXT on function arguments.  */
1390       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391 	DECL_CONTEXT (t) = child_fn;
1392 
1393       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394 	 so that it can be moved to the child function.  */
1395       gsi = gsi_last_nondebug_bb (entry_bb);
1396       stmt = gsi_stmt (gsi);
1397       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398 			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1399 			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400       e = split_block (entry_bb, stmt);
1401       gsi_remove (&gsi, true);
1402       entry_bb = e->dest;
1403       edge e2 = NULL;
1404       if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406       else
1407 	{
1408 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409 	  gcc_assert (e2->dest == region->exit);
1410 	  remove_edge (BRANCH_EDGE (entry_bb));
1411 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412 	  gsi = gsi_last_nondebug_bb (region->exit);
1413 	  gcc_assert (!gsi_end_p (gsi)
1414 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415 	  gsi_remove (&gsi, true);
1416 	}
1417 
1418       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1419       if (exit_bb)
1420 	{
1421 	  gsi = gsi_last_nondebug_bb (exit_bb);
1422 	  gcc_assert (!gsi_end_p (gsi)
1423 		      && (gimple_code (gsi_stmt (gsi))
1424 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425 	  stmt = gimple_build_return (NULL);
1426 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427 	  gsi_remove (&gsi, true);
1428 	}
1429 
1430       /* Move the parallel region into CHILD_CFUN.  */
1431 
1432       if (gimple_in_ssa_p (cfun))
1433 	{
1434 	  init_tree_ssa (child_cfun);
1435 	  init_ssa_operands (child_cfun);
1436 	  child_cfun->gimple_df->in_ssa_p = true;
1437 	  block = NULL_TREE;
1438 	}
1439       else
1440 	block = gimple_block (entry_stmt);
1441 
1442       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443       if (exit_bb)
1444 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445       if (e2)
1446 	{
1447 	  basic_block dest_bb = e2->dest;
1448 	  if (!exit_bb)
1449 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450 	  remove_edge (e2);
1451 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1452 	}
1453       /* When the OMP expansion process cannot guarantee an up-to-date
1454 	 loop tree arrange for the child function to fixup loops.  */
1455       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1457 
1458       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1459       num = vec_safe_length (child_cfun->local_decls);
1460       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1461 	{
1462 	  t = (*child_cfun->local_decls)[srcidx];
1463 	  if (DECL_CONTEXT (t) == cfun->decl)
1464 	    continue;
1465 	  if (srcidx != dstidx)
1466 	    (*child_cfun->local_decls)[dstidx] = t;
1467 	  dstidx++;
1468 	}
1469       if (dstidx != num)
1470 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1471 
1472       /* Inform the callgraph about the new function.  */
1473       child_cfun->curr_properties = cfun->curr_properties;
1474       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476       cgraph_node *node = cgraph_node::get_create (child_fn);
1477       node->parallelized_function = 1;
1478       cgraph_node::add_new_function (child_fn, true);
1479 
1480       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1482 
1483       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1484 	 fixed in a following pass.  */
1485       push_cfun (child_cfun);
1486       if (need_asm)
1487 	assign_assembler_name_if_needed (child_fn);
1488 
1489       if (optimize)
1490 	optimize_omp_library_calls (entry_stmt);
1491       update_max_bb_count ();
1492       cgraph_edge::rebuild_edges ();
1493 
1494       /* Some EH regions might become dead, see PR34608.  If
1495 	 pass_cleanup_cfg isn't the first pass to happen with the
1496 	 new child, these dead EH edges might cause problems.
1497 	 Clean them up now.  */
1498       if (flag_exceptions)
1499 	{
1500 	  basic_block bb;
1501 	  bool changed = false;
1502 
1503 	  FOR_EACH_BB_FN (bb, cfun)
1504 	    changed |= gimple_purge_dead_eh_edges (bb);
1505 	  if (changed)
1506 	    cleanup_tree_cfg ();
1507 	}
1508       if (gimple_in_ssa_p (cfun))
1509 	update_ssa (TODO_update_ssa);
1510       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511 	verify_loop_structure ();
1512       pop_cfun ();
1513 
1514       if (dump_file && !gimple_in_ssa_p (cfun))
1515 	{
1516 	  omp_any_child_fn_dumped = true;
1517 	  dump_function_header (dump_file, child_fn, dump_flags);
1518 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1519 	}
1520     }
1521 
1522   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1523 
1524   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525     expand_parallel_call (region, new_bb,
1526 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1527   else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528     expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529   else
1530     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531   if (gimple_in_ssa_p (cfun))
1532     update_ssa (TODO_update_ssa_only_virtuals);
1533 }
1534 
1535 /* Information about members of an OpenACC collapsed loop nest.  */
1536 
1537 struct oacc_collapse
1538 {
1539   tree base;  /* Base value.  */
1540   tree iters; /* Number of steps.  */
1541   tree step;  /* Step size.  */
1542   tree tile;  /* Tile increment (if tiled).  */
1543   tree outer; /* Tile iterator var. */
1544 };
1545 
1546 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1547    Fill in COUNTS array.  Emit any initialization code before GSI.
1548    Return the calculated outer loop bound of BOUND_TYPE.  */
1549 
1550 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1551 expand_oacc_collapse_init (const struct omp_for_data *fd,
1552 			   gimple_stmt_iterator *gsi,
1553 			   oacc_collapse *counts, tree diff_type,
1554 			   tree bound_type, location_t loc)
1555 {
1556   tree tiling = fd->tiling;
1557   tree total = build_int_cst (bound_type, 1);
1558   int ix;
1559 
1560   gcc_assert (integer_onep (fd->loop.step));
1561   gcc_assert (integer_zerop (fd->loop.n1));
1562 
1563   /* When tiling, the first operand of the tile clause applies to the
1564      innermost loop, and we work outwards from there.  Seems
1565      backwards, but whatever.  */
1566   for (ix = fd->collapse; ix--;)
1567     {
1568       const omp_for_data_loop *loop = &fd->loops[ix];
1569 
1570       tree iter_type = TREE_TYPE (loop->v);
1571       tree plus_type = iter_type;
1572 
1573       gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1574 
1575       if (POINTER_TYPE_P (iter_type))
1576 	plus_type = sizetype;
1577 
1578       if (tiling)
1579 	{
1580 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1581 	  tree loop_no = build_int_cst (integer_type_node, ix);
1582 	  tree tile = TREE_VALUE (tiling);
1583 	  gcall *call
1584 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585 					  /* gwv-outer=*/integer_zero_node,
1586 					  /* gwv-inner=*/integer_zero_node);
1587 
1588 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590 	  gimple_call_set_lhs (call, counts[ix].tile);
1591 	  gimple_set_location (call, loc);
1592 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1593 
1594 	  tiling = TREE_CHAIN (tiling);
1595 	}
1596       else
1597 	{
1598 	  counts[ix].tile = NULL;
1599 	  counts[ix].outer = loop->v;
1600 	}
1601 
1602       tree b = loop->n1;
1603       tree e = loop->n2;
1604       tree s = loop->step;
1605       bool up = loop->cond_code == LT_EXPR;
1606       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607       bool negating;
1608       tree expr;
1609 
1610       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611 				    true, GSI_SAME_STMT);
1612       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613 				    true, GSI_SAME_STMT);
1614 
1615       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1616       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617       if (negating)
1618 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619       s = fold_convert (diff_type, s);
1620       if (negating)
1621 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623 				    true, GSI_SAME_STMT);
1624 
1625       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1626       negating = !up && TYPE_UNSIGNED (iter_type);
1627       expr = fold_build2 (MINUS_EXPR, plus_type,
1628 			  fold_convert (plus_type, negating ? b : e),
1629 			  fold_convert (plus_type, negating ? e : b));
1630       expr = fold_convert (diff_type, expr);
1631       if (negating)
1632 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633       tree range = force_gimple_operand_gsi
1634 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1635 
1636       /* Determine number of iterations.  */
1637       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1640 
1641       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642 					     true, GSI_SAME_STMT);
1643 
1644       counts[ix].base = b;
1645       counts[ix].iters = iters;
1646       counts[ix].step = s;
1647 
1648       total = fold_build2 (MULT_EXPR, bound_type, total,
1649 			   fold_convert (bound_type, iters));
1650     }
1651 
1652   return total;
1653 }
1654 
1655 /* Emit initializers for collapsed loop members.  INNER is true if
1656    this is for the element loop of a TILE.  IVAR is the outer
1657    loop iteration variable, from which collapsed loop iteration values
1658    are  calculated.  COUNTS array has been initialized by
1659    expand_oacc_collapse_inits.  */
1660 
1661 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1662 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663 			   gimple_stmt_iterator *gsi,
1664 			   const oacc_collapse *counts, tree ivar,
1665 			   tree diff_type)
1666 {
1667   tree ivar_type = TREE_TYPE (ivar);
1668 
1669   /*  The most rapidly changing iteration variable is the innermost
1670       one.  */
1671   for (int ix = fd->collapse; ix--;)
1672     {
1673       const omp_for_data_loop *loop = &fd->loops[ix];
1674       const oacc_collapse *collapse = &counts[ix];
1675       tree v = inner ? loop->v : collapse->outer;
1676       tree iter_type = TREE_TYPE (v);
1677       tree plus_type = iter_type;
1678       enum tree_code plus_code = PLUS_EXPR;
1679       tree expr;
1680 
1681       if (POINTER_TYPE_P (iter_type))
1682 	{
1683 	  plus_code = POINTER_PLUS_EXPR;
1684 	  plus_type = sizetype;
1685 	}
1686 
1687       expr = ivar;
1688       if (ix)
1689 	{
1690 	  tree mod = fold_convert (ivar_type, collapse->iters);
1691 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694 					   true, GSI_SAME_STMT);
1695 	}
1696 
1697       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698 			  fold_convert (diff_type, collapse->step));
1699       expr = fold_build2 (plus_code, iter_type,
1700 			  inner ? collapse->outer : collapse->base,
1701 			  fold_convert (plus_type, expr));
1702       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703 				       true, GSI_SAME_STMT);
1704       gassign *ass = gimple_build_assign (v, expr);
1705       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1706     }
1707 }
1708 
1709 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1710    of the combined collapse > 1 loop constructs, generate code like:
1711 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712 	if (cond3 is <)
1713 	  adj = STEP3 - 1;
1714 	else
1715 	  adj = STEP3 + 1;
1716 	count3 = (adj + N32 - N31) / STEP3;
1717 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718 	if (cond2 is <)
1719 	  adj = STEP2 - 1;
1720 	else
1721 	  adj = STEP2 + 1;
1722 	count2 = (adj + N22 - N21) / STEP2;
1723 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724 	if (cond1 is <)
1725 	  adj = STEP1 - 1;
1726 	else
1727 	  adj = STEP1 + 1;
1728 	count1 = (adj + N12 - N11) / STEP1;
1729 	count = count1 * count2 * count3;
1730    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731 	count = 0;
1732    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1733    of the combined loop constructs, just initialize COUNTS array
1734    from the _looptemp_ clauses.  For loop nests with non-rectangular
1735    loops, do this only for the rectangular loops.  Then pick
1736    the loops which reference outer vars in their bound expressions
1737    and the loops which they refer to and for this sub-nest compute
1738    number of iterations.  For triangular loops use Faulhaber's formula,
1739    otherwise as a fallback, compute by iterating the loops.
1740    If e.g. the sub-nest is
1741 	for (I = N11; I COND1 N12; I += STEP1)
1742 	for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743 	for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1744    do:
1745 	COUNT = 0;
1746 	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747 	for (tmpj = M21 * tmpi + N21;
1748 	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1749 	  {
1750 	    int tmpk1 = M31 * tmpj + N31;
1751 	    int tmpk2 = M32 * tmpj + N32;
1752 	    if (tmpk1 COND3 tmpk2)
1753 	      {
1754 		if (COND3 is <)
1755 		  adj = STEP3 - 1;
1756 		else
1757 		  adj = STEP3 + 1;
1758 		COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1759 	      }
1760 	  }
1761    and finally multiply the counts of the rectangular loops not
1762    in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
1763    store number of iterations of the loops from fd->first_nonrect
1764    to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765    by the counts of rectangular loops not referenced in any non-rectangular
1766    loops sandwitched in between those.  */
1767 
1768 /* NOTE: It *could* be better to moosh all of the BBs together,
1769    creating one larger BB with all the computation and the unexpected
1770    jump at the end.  I.e.
1771 
1772    bool zero3, zero2, zero1, zero;
1773 
1774    zero3 = N32 c3 N31;
1775    count3 = (N32 - N31) /[cl] STEP3;
1776    zero2 = N22 c2 N21;
1777    count2 = (N22 - N21) /[cl] STEP2;
1778    zero1 = N12 c1 N11;
1779    count1 = (N12 - N11) /[cl] STEP1;
1780    zero = zero3 || zero2 || zero1;
1781    count = count1 * count2 * count3;
1782    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1783 
1784    After all, we expect the zero=false, and thus we expect to have to
1785    evaluate all of the comparison expressions, so short-circuiting
1786    oughtn't be a win.  Since the condition isn't protecting a
1787    denominator, we're not concerned about divide-by-zero, so we can
1788    fully evaluate count even if a numerator turned out to be wrong.
1789 
1790    It seems like putting this all together would create much better
1791    scheduling opportunities, and less pressure on the chip's branch
1792    predictor.  */
1793 
1794 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1795 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796 			    basic_block &entry_bb, tree *counts,
1797 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1798 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1799 			    basic_block &l2_dom_bb)
1800 {
1801   tree t, type = TREE_TYPE (fd->loop.v);
1802   edge e, ne;
1803   int i;
1804 
1805   /* Collapsed loops need work for expansion into SSA form.  */
1806   gcc_assert (!gimple_in_ssa_p (cfun));
1807 
1808   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1810     {
1811       gcc_assert (fd->ordered == 0);
1812       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813 	 isn't supposed to be handled, as the inner loop doesn't
1814 	 use it.  */
1815       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816 				     OMP_CLAUSE__LOOPTEMP_);
1817       gcc_assert (innerc);
1818       for (i = 0; i < fd->collapse; i++)
1819 	{
1820 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821 				    OMP_CLAUSE__LOOPTEMP_);
1822 	  gcc_assert (innerc);
1823 	  if (i)
1824 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1825 	  else
1826 	    counts[0] = NULL_TREE;
1827 	}
1828       if (fd->non_rect
1829 	  && fd->last_nonrect == fd->first_nonrect + 1
1830 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1831 	{
1832 	  tree c[4];
1833 	  for (i = 0; i < 4; i++)
1834 	    {
1835 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836 					OMP_CLAUSE__LOOPTEMP_);
1837 	      gcc_assert (innerc);
1838 	      c[i] = OMP_CLAUSE_DECL (innerc);
1839 	    }
1840 	  counts[0] = c[0];
1841 	  fd->first_inner_iterations = c[1];
1842 	  fd->factor = c[2];
1843 	  fd->adjn1 = c[3];
1844 	}
1845       return;
1846     }
1847 
1848   for (i = fd->collapse; i < fd->ordered; i++)
1849     {
1850       tree itype = TREE_TYPE (fd->loops[i].v);
1851       counts[i] = NULL_TREE;
1852       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853 		       fold_convert (itype, fd->loops[i].n1),
1854 		       fold_convert (itype, fd->loops[i].n2));
1855       if (t && integer_zerop (t))
1856 	{
1857 	  for (i = fd->collapse; i < fd->ordered; i++)
1858 	    counts[i] = build_int_cst (type, 0);
1859 	  break;
1860 	}
1861     }
1862   bool rect_count_seen = false;
1863   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1864     {
1865       tree itype = TREE_TYPE (fd->loops[i].v);
1866 
1867       if (i >= fd->collapse && counts[i])
1868 	continue;
1869       if (fd->non_rect)
1870 	{
1871 	  /* Skip loops that use outer iterators in their expressions
1872 	     during this phase.  */
1873 	  if (fd->loops[i].m1 || fd->loops[i].m2)
1874 	    {
1875 	      counts[i] = build_zero_cst (type);
1876 	      continue;
1877 	    }
1878 	}
1879       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881 				fold_convert (itype, fd->loops[i].n1),
1882 				fold_convert (itype, fd->loops[i].n2)))
1883 	      == NULL_TREE || !integer_onep (t)))
1884 	{
1885 	  gcond *cond_stmt;
1886 	  tree n1, n2;
1887 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889 					 true, GSI_SAME_STMT);
1890 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892 					 true, GSI_SAME_STMT);
1893 	  cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894 					     n1, n2);
1895 	  e = split_block (entry_bb, cond_stmt);
1896 	  basic_block &zero_iter_bb
1897 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898 	  int &first_zero_iter
1899 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900 	  if (zero_iter_bb == NULL)
1901 	    {
1902 	      gassign *assign_stmt;
1903 	      first_zero_iter = i;
1904 	      zero_iter_bb = create_empty_bb (entry_bb);
1905 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906 	      *gsi = gsi_after_labels (zero_iter_bb);
1907 	      if (i < fd->collapse)
1908 		assign_stmt = gimple_build_assign (fd->loop.n2,
1909 						   build_zero_cst (type));
1910 	      else
1911 		{
1912 		  counts[i] = create_tmp_reg (type, ".count");
1913 		  assign_stmt
1914 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1915 		}
1916 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918 				       entry_bb);
1919 	    }
1920 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921 	  ne->probability = profile_probability::very_unlikely ();
1922 	  e->flags = EDGE_TRUE_VALUE;
1923 	  e->probability = ne->probability.invert ();
1924 	  if (l2_dom_bb == NULL)
1925 	    l2_dom_bb = entry_bb;
1926 	  entry_bb = e->dest;
1927 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1928 	}
1929 
1930       if (POINTER_TYPE_P (itype))
1931 	itype = signed_type_for (itype);
1932       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933 				 ? -1 : 1));
1934       t = fold_build2 (PLUS_EXPR, itype,
1935 		       fold_convert (itype, fd->loops[i].step), t);
1936       t = fold_build2 (PLUS_EXPR, itype, t,
1937 		       fold_convert (itype, fd->loops[i].n2));
1938       t = fold_build2 (MINUS_EXPR, itype, t,
1939 		       fold_convert (itype, fd->loops[i].n1));
1940       /* ?? We could probably use CEIL_DIV_EXPR instead of
1941 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1942 	 generate the same code in the end because generically we
1943 	 don't know that the values involved must be negative for
1944 	 GT??  */
1945       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947 			 fold_build1 (NEGATE_EXPR, itype, t),
1948 			 fold_build1 (NEGATE_EXPR, itype,
1949 				      fold_convert (itype,
1950 						    fd->loops[i].step)));
1951       else
1952 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953 			 fold_convert (itype, fd->loops[i].step));
1954       t = fold_convert (type, t);
1955       if (TREE_CODE (t) == INTEGER_CST)
1956 	counts[i] = t;
1957       else
1958 	{
1959 	  if (i < fd->collapse || i != first_zero_iter2)
1960 	    counts[i] = create_tmp_reg (type, ".count");
1961 	  expand_omp_build_assign (gsi, counts[i], t);
1962 	}
1963       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1964 	{
1965 	  if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966 	    continue;
1967 	  if (!rect_count_seen)
1968 	    {
1969 	      t = counts[i];
1970 	      rect_count_seen = true;
1971 	    }
1972 	  else
1973 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1975 	}
1976     }
1977   if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1978     {
1979       gcc_assert (fd->last_nonrect != -1);
1980 
1981       counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982       expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983 			       build_zero_cst (type));
1984       for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985 	if (fd->loops[i].m1
1986 	    || fd->loops[i].m2
1987 	    || fd->loops[i].non_rect_referenced)
1988 	  break;
1989       if (i == fd->last_nonrect
1990 	  && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991 	  && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1993 	{
1994 	  int o = fd->first_nonrect;
1995 	  tree itype = TREE_TYPE (fd->loops[o].v);
1996 	  tree n1o = create_tmp_reg (itype, ".n1o");
1997 	  t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998 	  expand_omp_build_assign (gsi, n1o, t);
1999 	  tree n2o = create_tmp_reg (itype, ".n2o");
2000 	  t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001 	  expand_omp_build_assign (gsi, n2o, t);
2002 	  if (fd->loops[i].m1 && fd->loops[i].m2)
2003 	    t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004 			     unshare_expr (fd->loops[i].m1));
2005 	  else if (fd->loops[i].m1)
2006 	    t = fold_unary (NEGATE_EXPR, itype,
2007 			    unshare_expr (fd->loops[i].m1));
2008 	  else
2009 	    t = unshare_expr (fd->loops[i].m2);
2010 	  tree m2minusm1
2011 	    = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012 					true, GSI_SAME_STMT);
2013 
2014 	  gimple_stmt_iterator gsi2 = *gsi;
2015 	  gsi_prev (&gsi2);
2016 	  e = split_block (entry_bb, gsi_stmt (gsi2));
2017 	  e = split_block (e->dest, (gimple *) NULL);
2018 	  basic_block bb1 = e->src;
2019 	  entry_bb = e->dest;
2020 	  *gsi = gsi_after_labels (entry_bb);
2021 
2022 	  gsi2 = gsi_after_labels (bb1);
2023 	  tree ostep = fold_convert (itype, fd->loops[o].step);
2024 	  t = build_int_cst (itype, (fd->loops[o].cond_code
2025 				     == LT_EXPR ? -1 : 1));
2026 	  t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027 	  t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028 	  t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029 	  if (TYPE_UNSIGNED (itype)
2030 	      && fd->loops[o].cond_code == GT_EXPR)
2031 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032 			     fold_build1 (NEGATE_EXPR, itype, t),
2033 			     fold_build1 (NEGATE_EXPR, itype, ostep));
2034 	  else
2035 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036 	  tree outer_niters
2037 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038 					true, GSI_SAME_STMT);
2039 	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040 			   build_one_cst (itype));
2041 	  t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042 	  t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043 	  tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044 						true, GSI_SAME_STMT);
2045 	  tree n1, n2, n1e, n2e;
2046 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047 	  if (fd->loops[i].m1)
2048 	    {
2049 	      n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050 	      n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051 	      n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2052 	    }
2053 	  else
2054 	    n1 = t;
2055 	  n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056 					 true, GSI_SAME_STMT);
2057 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058 	  if (fd->loops[i].m2)
2059 	    {
2060 	      n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061 	      n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062 	      n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2063 	    }
2064 	  else
2065 	    n2 = t;
2066 	  n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067 					 true, GSI_SAME_STMT);
2068 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069 	  if (fd->loops[i].m1)
2070 	    {
2071 	      n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072 	      n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073 	      n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2074 	    }
2075 	  else
2076 	    n1e = t;
2077 	  n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078 					  true, GSI_SAME_STMT);
2079 	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080 	  if (fd->loops[i].m2)
2081 	    {
2082 	      n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083 	      n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084 	      n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2085 	    }
2086 	  else
2087 	    n2e = t;
2088 	  n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089 					  true, GSI_SAME_STMT);
2090 	  gcond *cond_stmt
2091 	    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092 				     n1, n2);
2093 	  e = split_block (bb1, cond_stmt);
2094 	  e->flags = EDGE_TRUE_VALUE;
2095 	  e->probability = profile_probability::likely ().guessed ();
2096 	  basic_block bb2 = e->dest;
2097 	  gsi2 = gsi_after_labels (bb2);
2098 
2099 	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100 					     n1e, n2e);
2101 	  e = split_block (bb2, cond_stmt);
2102 	  e->flags = EDGE_TRUE_VALUE;
2103 	  e->probability = profile_probability::likely ().guessed ();
2104 	  gsi2 = gsi_after_labels (e->dest);
2105 
2106 	  tree step = fold_convert (itype, fd->loops[i].step);
2107 	  t = build_int_cst (itype, (fd->loops[i].cond_code
2108 				     == LT_EXPR ? -1 : 1));
2109 	  t = fold_build2 (PLUS_EXPR, itype, step, t);
2110 	  t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111 	  t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112 	  if (TYPE_UNSIGNED (itype)
2113 	      && fd->loops[i].cond_code == GT_EXPR)
2114 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115 			     fold_build1 (NEGATE_EXPR, itype, t),
2116 			     fold_build1 (NEGATE_EXPR, itype, step));
2117 	  else
2118 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119 	  tree first_inner_iterations
2120 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121 					true, GSI_SAME_STMT);
2122 	  t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123 	  if (TYPE_UNSIGNED (itype)
2124 	      && fd->loops[i].cond_code == GT_EXPR)
2125 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126 			     fold_build1 (NEGATE_EXPR, itype, t),
2127 			     fold_build1 (NEGATE_EXPR, itype, step));
2128 	  else
2129 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130 	  tree factor
2131 	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132 					true, GSI_SAME_STMT);
2133 	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134 			   build_one_cst (itype));
2135 	  t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136 	  t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137 	  t = fold_build2 (MULT_EXPR, itype, factor, t);
2138 	  t = fold_build2 (PLUS_EXPR, itype,
2139 			   fold_build2 (MULT_EXPR, itype, outer_niters,
2140 					first_inner_iterations), t);
2141 	  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142 				   fold_convert (type, t));
2143 
2144 	  basic_block bb3 = create_empty_bb (bb1);
2145 	  add_bb_to_loop (bb3, bb1->loop_father);
2146 
2147 	  e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148 	  e->probability = profile_probability::unlikely ().guessed ();
2149 
2150 	  gsi2 = gsi_after_labels (bb3);
2151 	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152 					     n1e, n2e);
2153 	  e = split_block (bb3, cond_stmt);
2154 	  e->flags = EDGE_TRUE_VALUE;
2155 	  e->probability = profile_probability::likely ().guessed ();
2156 	  basic_block bb4 = e->dest;
2157 
2158 	  ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159 	  ne->probability = e->probability.invert ();
2160 
2161 	  basic_block bb5 = create_empty_bb (bb2);
2162 	  add_bb_to_loop (bb5, bb2->loop_father);
2163 
2164 	  ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165 	  ne->probability = profile_probability::unlikely ().guessed ();
2166 
2167 	  for (int j = 0; j < 2; j++)
2168 	    {
2169 	      gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170 	      t = fold_build2 (MINUS_EXPR, itype,
2171 			       unshare_expr (fd->loops[i].n1),
2172 			       unshare_expr (fd->loops[i].n2));
2173 	      t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174 	      tree tem
2175 		= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 					    true, GSI_SAME_STMT);
2177 	      t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178 	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179 	      t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180 	      tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181 					      true, GSI_SAME_STMT);
2182 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183 	      if (fd->loops[i].m1)
2184 		{
2185 		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186 		  n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2188 		}
2189 	      else
2190 		n1 = t;
2191 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192 					     true, GSI_SAME_STMT);
2193 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194 	      if (fd->loops[i].m2)
2195 		{
2196 		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197 		  n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2199 		}
2200 	      else
2201 		n2 = t;
2202 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203 					     true, GSI_SAME_STMT);
2204 	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2205 
2206 	      cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207 						 n1, n2);
2208 	      e = split_block (gsi_bb (gsi2), cond_stmt);
2209 	      e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210 	      e->probability = profile_probability::unlikely ().guessed ();
2211 	      ne = make_edge (e->src, bb1,
2212 			      j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213 	      ne->probability = e->probability.invert ();
2214 	      gsi2 = gsi_after_labels (e->dest);
2215 
2216 	      t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217 	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2218 
2219 	      make_edge (e->dest, bb1, EDGE_FALLTHRU);
2220 	    }
2221 
2222 	  set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223 	  set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2225 
2226 	  if (fd->first_nonrect + 1 == fd->last_nonrect)
2227 	    {
2228 	      fd->first_inner_iterations = first_inner_iterations;
2229 	      fd->factor = factor;
2230 	      fd->adjn1 = n1o;
2231 	    }
2232 	}
2233       else
2234 	{
2235 	  /* Fallback implementation.  Evaluate the loops with m1/m2
2236 	     non-NULL as well as their outer loops at runtime using temporaries
2237 	     instead of the original iteration variables, and in the
2238 	     body just bump the counter.  */
2239 	  gimple_stmt_iterator gsi2 = *gsi;
2240 	  gsi_prev (&gsi2);
2241 	  e = split_block (entry_bb, gsi_stmt (gsi2));
2242 	  e = split_block (e->dest, (gimple *) NULL);
2243 	  basic_block cur_bb = e->src;
2244 	  basic_block next_bb = e->dest;
2245 	  entry_bb = e->dest;
2246 	  *gsi = gsi_after_labels (entry_bb);
2247 
2248 	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249 	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2250 
2251 	  for (i = 0; i <= fd->last_nonrect; i++)
2252 	    {
2253 	      if (fd->loops[i].m1 == NULL_TREE
2254 		  && fd->loops[i].m2 == NULL_TREE
2255 		  && !fd->loops[i].non_rect_referenced)
2256 		continue;
2257 
2258 	      tree itype = TREE_TYPE (fd->loops[i].v);
2259 
2260 	      gsi2 = gsi_after_labels (cur_bb);
2261 	      tree n1, n2;
2262 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263 	      if (fd->loops[i].m1 == NULL_TREE)
2264 		n1 = t;
2265 	      else if (POINTER_TYPE_P (itype))
2266 		{
2267 		  gcc_assert (integer_onep (fd->loops[i].m1));
2268 		  t = fold_convert (sizetype,
2269 				    unshare_expr (fd->loops[i].n1));
2270 		  n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 		}
2272 	      else
2273 		{
2274 		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 		  n1 = fold_build2 (MULT_EXPR, itype,
2276 				    vs[i - fd->loops[i].outer], n1);
2277 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 		}
2279 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 					     true, GSI_SAME_STMT);
2281 	      if (i < fd->last_nonrect)
2282 		{
2283 		  vs[i] = create_tmp_reg (itype, ".it");
2284 		  expand_omp_build_assign (&gsi2, vs[i], n1);
2285 		}
2286 	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 	      if (fd->loops[i].m2 == NULL_TREE)
2288 		n2 = t;
2289 	      else if (POINTER_TYPE_P (itype))
2290 		{
2291 		  gcc_assert (integer_onep (fd->loops[i].m2));
2292 		  t = fold_convert (sizetype,
2293 				    unshare_expr (fd->loops[i].n2));
2294 		  n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295 		}
2296 	      else
2297 		{
2298 		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299 		  n2 = fold_build2 (MULT_EXPR, itype,
2300 				    vs[i - fd->loops[i].outer], n2);
2301 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302 		}
2303 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304 					     true, GSI_SAME_STMT);
2305 	      if (POINTER_TYPE_P (itype))
2306 		itype = signed_type_for (itype);
2307 	      if (i == fd->last_nonrect)
2308 		{
2309 		  gcond *cond_stmt
2310 		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311 					     n1, n2);
2312 		  e = split_block (cur_bb, cond_stmt);
2313 		  e->flags = EDGE_TRUE_VALUE;
2314 		  ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315 		  e->probability = profile_probability::likely ().guessed ();
2316 		  ne->probability = e->probability.invert ();
2317 		  gsi2 = gsi_after_labels (e->dest);
2318 
2319 		  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320 					     ? -1 : 1));
2321 		  t = fold_build2 (PLUS_EXPR, itype,
2322 				   fold_convert (itype, fd->loops[i].step), t);
2323 		  t = fold_build2 (PLUS_EXPR, itype, t,
2324 				   fold_convert (itype, n2));
2325 		  t = fold_build2 (MINUS_EXPR, itype, t,
2326 				   fold_convert (itype, n1));
2327 		  tree step = fold_convert (itype, fd->loops[i].step);
2328 		  if (TYPE_UNSIGNED (itype)
2329 		      && fd->loops[i].cond_code == GT_EXPR)
2330 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331 				     fold_build1 (NEGATE_EXPR, itype, t),
2332 				     fold_build1 (NEGATE_EXPR, itype, step));
2333 		  else
2334 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335 		  t = fold_convert (type, t);
2336 		  t = fold_build2 (PLUS_EXPR, type,
2337 				   counts[fd->last_nonrect], t);
2338 		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339 						true, GSI_SAME_STMT);
2340 		  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341 		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342 		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343 		  break;
2344 		}
2345 	      e = split_block (cur_bb, last_stmt (cur_bb));
2346 
2347 	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2348 	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349 
2350 	      gsi2 = gsi_after_labels (e->dest);
2351 	      tree step = fold_convert (itype,
2352 					unshare_expr (fd->loops[i].step));
2353 	      if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354 		t = fold_build_pointer_plus (vs[i],
2355 					     fold_convert (sizetype, step));
2356 	      else
2357 		t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358 	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359 					    true, GSI_SAME_STMT);
2360 	      expand_omp_build_assign (&gsi2, vs[i], t);
2361 
2362 	      ne = split_block (e->dest, last_stmt (e->dest));
2363 	      gsi2 = gsi_after_labels (ne->dest);
2364 
2365 	      expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366 	      edge e3, e4;
2367 	      if (next_bb == entry_bb)
2368 		{
2369 		  e3 = find_edge (ne->dest, next_bb);
2370 		  e3->flags = EDGE_FALSE_VALUE;
2371 		}
2372 	      else
2373 		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374 	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375 	      e4->probability = profile_probability::likely ().guessed ();
2376 	      e3->probability = e4->probability.invert ();
2377 	      basic_block esrc = e->src;
2378 	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379 	      cur_bb = new_cur_bb;
2380 	      basic_block latch_bb = next_bb;
2381 	      next_bb = e->dest;
2382 	      remove_edge (e);
2383 	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384 	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385 	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386 	    }
2387 	}
2388       t = NULL_TREE;
2389       for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390 	if (!fd->loops[i].non_rect_referenced
2391 	    && fd->loops[i].m1 == NULL_TREE
2392 	    && fd->loops[i].m2 == NULL_TREE)
2393 	  {
2394 	    if (t == NULL_TREE)
2395 	      t = counts[i];
2396 	    else
2397 	      t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2398 	  }
2399       if (t)
2400 	{
2401 	  t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402 	  expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2403 	}
2404       if (!rect_count_seen)
2405 	t = counts[fd->last_nonrect];
2406       else
2407 	t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408 			 counts[fd->last_nonrect]);
2409       expand_omp_build_assign (gsi, fd->loop.n2, t);
2410     }
2411   else if (fd->non_rect)
2412     {
2413       tree t = fd->loop.n2;
2414       gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415       int non_rect_referenced = 0, non_rect = 0;
2416       for (i = 0; i < fd->collapse; i++)
2417 	{
2418 	  if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419 	      && !integer_zerop (counts[i]))
2420 	    t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421 	  if (fd->loops[i].non_rect_referenced)
2422 	    non_rect_referenced++;
2423 	  if (fd->loops[i].m1 || fd->loops[i].m2)
2424 	    non_rect++;
2425 	}
2426       gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427       counts[fd->last_nonrect] = t;
2428     }
2429 }
2430 
2431 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
2432 	T = V;
2433 	V3 = N31 + (T % count3) * STEP3;
2434 	T = T / count3;
2435 	V2 = N21 + (T % count2) * STEP2;
2436 	T = T / count2;
2437 	V1 = N11 + T * STEP1;
2438    if this loop doesn't have an inner loop construct combined with it.
2439    If it does have an inner loop construct combined with it and the
2440    iteration count isn't known constant, store values from counts array
2441    into its _looptemp_ temporaries instead.
2442    For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443    inclusive), use the count of all those loops together, and either
2444    find quadratic etc. equation roots, or as a fallback, do:
2445 	COUNT = 0;
2446 	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447 	for (tmpj = M21 * tmpi + N21;
2448 	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2449 	  {
2450 	    int tmpk1 = M31 * tmpj + N31;
2451 	    int tmpk2 = M32 * tmpj + N32;
2452 	    if (tmpk1 COND3 tmpk2)
2453 	      {
2454 		if (COND3 is <)
2455 		  adj = STEP3 - 1;
2456 		else
2457 		  adj = STEP3 + 1;
2458 		int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459 		if (COUNT + temp > T)
2460 		  {
2461 		    V1 = tmpi;
2462 		    V2 = tmpj;
2463 		    V3 = tmpk1 + (T - COUNT) * STEP3;
2464 		    goto done;
2465 		  }
2466 		else
2467 		  COUNT += temp;
2468 	      }
2469 	  }
2470 	done:;
2471    but for optional innermost or outermost rectangular loops that aren't
2472    referenced by other loop expressions keep doing the division/modulo.  */
2473 
2474 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,tree * nonrect_bounds,gimple * inner_stmt,tree startvar)2475 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476 			  tree *counts, tree *nonrect_bounds,
2477 			  gimple *inner_stmt, tree startvar)
2478 {
2479   int i;
2480   if (gimple_omp_for_combined_p (fd->for_stmt))
2481     {
2482       /* If fd->loop.n2 is constant, then no propagation of the counts
2483 	 is needed, they are constant.  */
2484       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485 	return;
2486 
2487       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488 		     ? gimple_omp_taskreg_clauses (inner_stmt)
2489 		     : gimple_omp_for_clauses (inner_stmt);
2490       /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491 	 isn't supposed to be handled, as the inner loop doesn't
2492 	 use it.  */
2493       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494       gcc_assert (innerc);
2495       int count = 0;
2496       if (fd->non_rect
2497 	  && fd->last_nonrect == fd->first_nonrect + 1
2498 	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499 	count = 4;
2500       for (i = 0; i < fd->collapse + count; i++)
2501 	{
2502 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503 				    OMP_CLAUSE__LOOPTEMP_);
2504 	  gcc_assert (innerc);
2505 	  if (i)
2506 	    {
2507 	      tree tem = OMP_CLAUSE_DECL (innerc);
2508 	      tree t;
2509 	      if (i < fd->collapse)
2510 		t = counts[i];
2511 	      else
2512 		switch (i - fd->collapse)
2513 		  {
2514 		  case 0: t = counts[0]; break;
2515 		  case 1: t = fd->first_inner_iterations; break;
2516 		  case 2: t = fd->factor; break;
2517 		  case 3: t = fd->adjn1; break;
2518 		  default: gcc_unreachable ();
2519 		  }
2520 	      t = fold_convert (TREE_TYPE (tem), t);
2521 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522 					    false, GSI_CONTINUE_LINKING);
2523 	      gassign *stmt = gimple_build_assign (tem, t);
2524 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525 	    }
2526 	}
2527       return;
2528     }
2529 
2530   tree type = TREE_TYPE (fd->loop.v);
2531   tree tem = create_tmp_reg (type, ".tem");
2532   gassign *stmt = gimple_build_assign (tem, startvar);
2533   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2534 
2535   for (i = fd->collapse - 1; i >= 0; i--)
2536     {
2537       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538       itype = vtype;
2539       if (POINTER_TYPE_P (vtype))
2540 	itype = signed_type_for (vtype);
2541       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543       else
2544 	t = tem;
2545       if (i == fd->last_nonrect)
2546 	{
2547 	  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548 					false, GSI_CONTINUE_LINKING);
2549 	  tree stopval = t;
2550 	  tree idx = create_tmp_reg (type, ".count");
2551 	  expand_omp_build_assign (gsi, idx,
2552 				   build_zero_cst (type), true);
2553 	  basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554 	  if (fd->first_nonrect + 1 == fd->last_nonrect
2555 	      && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556 		  || fd->first_inner_iterations)
2557 	      && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558 		  != CODE_FOR_nothing)
2559 	      && !integer_zerop (fd->loop.n2))
2560 	    {
2561 	      tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562 	      tree itype = TREE_TYPE (fd->loops[i].v);
2563 	      tree first_inner_iterations = fd->first_inner_iterations;
2564 	      tree factor = fd->factor;
2565 	      gcond *cond_stmt
2566 		= expand_omp_build_cond (gsi, NE_EXPR, factor,
2567 					 build_zero_cst (TREE_TYPE (factor)));
2568 	      edge e = split_block (gsi_bb (*gsi), cond_stmt);
2569 	      basic_block bb0 = e->src;
2570 	      e->flags = EDGE_TRUE_VALUE;
2571 	      e->probability = profile_probability::likely ();
2572 	      bb_triang_dom = bb0;
2573 	      *gsi = gsi_after_labels (e->dest);
2574 	      tree slltype = long_long_integer_type_node;
2575 	      tree ulltype = long_long_unsigned_type_node;
2576 	      tree stopvalull = fold_convert (ulltype, stopval);
2577 	      stopvalull
2578 		= force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2579 					    false, GSI_CONTINUE_LINKING);
2580 	      first_inner_iterations
2581 		= fold_convert (slltype, first_inner_iterations);
2582 	      first_inner_iterations
2583 		= force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2584 					    NULL_TREE, false,
2585 					    GSI_CONTINUE_LINKING);
2586 	      factor = fold_convert (slltype, factor);
2587 	      factor
2588 		= force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2589 					    false, GSI_CONTINUE_LINKING);
2590 	      tree first_inner_iterationsd
2591 		= fold_build1 (FLOAT_EXPR, double_type_node,
2592 			       first_inner_iterations);
2593 	      first_inner_iterationsd
2594 		= force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2595 					    NULL_TREE, false,
2596 					    GSI_CONTINUE_LINKING);
2597 	      tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2598 					  factor);
2599 	      factord = force_gimple_operand_gsi (gsi, factord, true,
2600 						  NULL_TREE, false,
2601 						  GSI_CONTINUE_LINKING);
2602 	      tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2603 					   stopvalull);
2604 	      stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2605 						   NULL_TREE, false,
2606 						   GSI_CONTINUE_LINKING);
2607 	      /* Temporarily disable flag_rounding_math, values will be
2608 		 decimal numbers divided by 2 and worst case imprecisions
2609 		 due to too large values ought to be caught later by the
2610 		 checks for fallback.  */
2611 	      int save_flag_rounding_math = flag_rounding_math;
2612 	      flag_rounding_math = 0;
2613 	      t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2614 			       build_real (double_type_node, dconst2));
2615 	      tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2616 				     first_inner_iterationsd, t);
2617 	      t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2618 					     GSI_CONTINUE_LINKING);
2619 	      t = fold_build2 (MULT_EXPR, double_type_node, factord,
2620 			       build_real (double_type_node, dconst2));
2621 	      t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2622 	      t = fold_build2 (PLUS_EXPR, double_type_node, t,
2623 			       fold_build2 (MULT_EXPR, double_type_node,
2624 					    t3, t3));
2625 	      flag_rounding_math = save_flag_rounding_math;
2626 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2627 					    GSI_CONTINUE_LINKING);
2628 	      if (flag_exceptions
2629 		  && cfun->can_throw_non_call_exceptions
2630 		  && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2631 		{
2632 		  tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2633 					  build_zero_cst (double_type_node));
2634 		  tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2635 						  false, GSI_CONTINUE_LINKING);
2636 		  cond_stmt = gimple_build_cond (NE_EXPR, tem,
2637 						 boolean_false_node,
2638 						 NULL_TREE, NULL_TREE);
2639 		}
2640 	      else
2641 		cond_stmt
2642 		  = gimple_build_cond (LT_EXPR, t,
2643 				       build_zero_cst (double_type_node),
2644 				       NULL_TREE, NULL_TREE);
2645 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2646 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2647 	      basic_block bb1 = e->src;
2648 	      e->flags = EDGE_FALSE_VALUE;
2649 	      e->probability = profile_probability::very_likely ();
2650 	      *gsi = gsi_after_labels (e->dest);
2651 	      gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2652 	      tree sqrtr = create_tmp_var (double_type_node);
2653 	      gimple_call_set_lhs (call, sqrtr);
2654 	      gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2655 	      t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2656 	      t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2657 	      t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2658 	      tree c = create_tmp_var (ulltype);
2659 	      tree d = create_tmp_var (ulltype);
2660 	      expand_omp_build_assign (gsi, c, t, true);
2661 	      t = fold_build2 (MINUS_EXPR, ulltype, c,
2662 			       build_one_cst (ulltype));
2663 	      t = fold_build2 (MULT_EXPR, ulltype, c, t);
2664 	      t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2665 	      t = fold_build2 (MULT_EXPR, ulltype,
2666 			       fold_convert (ulltype, fd->factor), t);
2667 	      tree t2
2668 		= fold_build2 (MULT_EXPR, ulltype, c,
2669 			       fold_convert (ulltype,
2670 					     fd->first_inner_iterations));
2671 	      t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2672 	      expand_omp_build_assign (gsi, d, t, true);
2673 	      t = fold_build2 (MULT_EXPR, ulltype,
2674 			       fold_convert (ulltype, fd->factor), c);
2675 	      t = fold_build2 (PLUS_EXPR, ulltype,
2676 			       t, fold_convert (ulltype,
2677 						fd->first_inner_iterations));
2678 	      t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2679 					     GSI_CONTINUE_LINKING);
2680 	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2681 					     NULL_TREE, NULL_TREE);
2682 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2683 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2684 	      basic_block bb2 = e->src;
2685 	      e->flags = EDGE_TRUE_VALUE;
2686 	      e->probability = profile_probability::very_likely ();
2687 	      *gsi = gsi_after_labels (e->dest);
2688 	      t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2689 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2690 					    GSI_CONTINUE_LINKING);
2691 	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2692 					     NULL_TREE, NULL_TREE);
2693 	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2694 	      e = split_block (gsi_bb (*gsi), cond_stmt);
2695 	      basic_block bb3 = e->src;
2696 	      e->flags = EDGE_FALSE_VALUE;
2697 	      e->probability = profile_probability::very_likely ();
2698 	      *gsi = gsi_after_labels (e->dest);
2699 	      t = fold_convert (itype, c);
2700 	      t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2701 	      t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2702 	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2703 					    GSI_CONTINUE_LINKING);
2704 	      expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2705 	      t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2706 	      t2 = fold_convert (itype, t2);
2707 	      t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2708 	      t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2709 	      if (fd->loops[i].m1)
2710 		{
2711 		  t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2712 		  t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2713 		}
2714 	      expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2715 	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2716 	      bb_triang = e->src;
2717 	      *gsi = gsi_after_labels (e->dest);
2718 	      remove_edge (e);
2719 	      e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2720 	      e->probability = profile_probability::very_unlikely ();
2721 	      e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2722 	      e->probability = profile_probability::very_unlikely ();
2723 	      e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2724 	      e->probability = profile_probability::very_unlikely ();
2725 
2726 	      basic_block bb4 = create_empty_bb (bb0);
2727 	      add_bb_to_loop (bb4, bb0->loop_father);
2728 	      e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2729 	      e->probability = profile_probability::unlikely ();
2730 	      make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2731 	      set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2732 	      set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2733 	      gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2734 	      t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2735 				counts[i], counts[i - 1]);
2736 	      t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2737 					     GSI_CONTINUE_LINKING);
2738 	      t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2739 	      t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2740 	      t = fold_convert (itype, t);
2741 	      t2 = fold_convert (itype, t2);
2742 	      t = fold_build2 (MULT_EXPR, itype, t,
2743 			       fold_convert (itype, fd->loops[i].step));
2744 	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2745 	      t2 = fold_build2 (MULT_EXPR, itype, t2,
2746 				fold_convert (itype, fd->loops[i - 1].step));
2747 	      t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2748 	      t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2749 					     false, GSI_CONTINUE_LINKING);
2750 	      stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2751 	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2752 	      if (fd->loops[i].m1)
2753 		{
2754 		  t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2755 				    fd->loops[i - 1].v);
2756 		  t = fold_build2 (PLUS_EXPR, itype, t, t2);
2757 		}
2758 	      t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2759 					    false, GSI_CONTINUE_LINKING);
2760 	      stmt = gimple_build_assign (fd->loops[i].v, t);
2761 	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2762 	    }
2763 	  /* Fallback implementation.  Evaluate the loops in between
2764 	     (inclusive) fd->first_nonrect and fd->last_nonrect at
2765 	     runtime unsing temporaries instead of the original iteration
2766 	     variables, in the body just bump the counter and compare
2767 	     with the desired value.  */
2768 	  gimple_stmt_iterator gsi2 = *gsi;
2769 	  basic_block entry_bb = gsi_bb (gsi2);
2770 	  edge e = split_block (entry_bb, gsi_stmt (gsi2));
2771 	  e = split_block (e->dest, (gimple *) NULL);
2772 	  basic_block dom_bb = NULL;
2773 	  basic_block cur_bb = e->src;
2774 	  basic_block next_bb = e->dest;
2775 	  entry_bb = e->dest;
2776 	  *gsi = gsi_after_labels (entry_bb);
2777 
2778 	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2779 	  tree n1 = NULL_TREE, n2 = NULL_TREE;
2780 	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2781 
2782 	  for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2783 	    {
2784 	      tree itype = TREE_TYPE (fd->loops[j].v);
2785 	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2786 			     && fd->loops[j].m2 == NULL_TREE
2787 			     && !fd->loops[j].non_rect_referenced);
2788 	      gsi2 = gsi_after_labels (cur_bb);
2789 	      t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2790 	      if (fd->loops[j].m1 == NULL_TREE)
2791 		n1 = rect_p ? build_zero_cst (type) : t;
2792 	      else if (POINTER_TYPE_P (itype))
2793 		{
2794 		  gcc_assert (integer_onep (fd->loops[j].m1));
2795 		  t = fold_convert (sizetype,
2796 				    unshare_expr (fd->loops[j].n1));
2797 		  n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2798 		}
2799 	      else
2800 		{
2801 		  n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2802 		  n1 = fold_build2 (MULT_EXPR, itype,
2803 				    vs[j - fd->loops[j].outer], n1);
2804 		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2805 		}
2806 	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2807 					     true, GSI_SAME_STMT);
2808 	      if (j < fd->last_nonrect)
2809 		{
2810 		  vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2811 		  expand_omp_build_assign (&gsi2, vs[j], n1);
2812 		}
2813 	      t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2814 	      if (fd->loops[j].m2 == NULL_TREE)
2815 		n2 = rect_p ? counts[j] : t;
2816 	      else if (POINTER_TYPE_P (itype))
2817 		{
2818 		  gcc_assert (integer_onep (fd->loops[j].m2));
2819 		  t = fold_convert (sizetype,
2820 				    unshare_expr (fd->loops[j].n2));
2821 		  n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2822 		}
2823 	      else
2824 		{
2825 		  n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2826 		  n2 = fold_build2 (MULT_EXPR, itype,
2827 				    vs[j - fd->loops[j].outer], n2);
2828 		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2829 		}
2830 	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2831 					     true, GSI_SAME_STMT);
2832 	      if (POINTER_TYPE_P (itype))
2833 		itype = signed_type_for (itype);
2834 	      if (j == fd->last_nonrect)
2835 		{
2836 		  gcond *cond_stmt
2837 		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2838 					     n1, n2);
2839 		  e = split_block (cur_bb, cond_stmt);
2840 		  e->flags = EDGE_TRUE_VALUE;
2841 		  edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2842 		  e->probability = profile_probability::likely ().guessed ();
2843 		  ne->probability = e->probability.invert ();
2844 		  gsi2 = gsi_after_labels (e->dest);
2845 
2846 		  t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2847 					     ? -1 : 1));
2848 		  t = fold_build2 (PLUS_EXPR, itype,
2849 				   fold_convert (itype, fd->loops[j].step), t);
2850 		  t = fold_build2 (PLUS_EXPR, itype, t,
2851 				   fold_convert (itype, n2));
2852 		  t = fold_build2 (MINUS_EXPR, itype, t,
2853 				   fold_convert (itype, n1));
2854 		  tree step = fold_convert (itype, fd->loops[j].step);
2855 		  if (TYPE_UNSIGNED (itype)
2856 		      && fd->loops[j].cond_code == GT_EXPR)
2857 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2858 				     fold_build1 (NEGATE_EXPR, itype, t),
2859 				     fold_build1 (NEGATE_EXPR, itype, step));
2860 		  else
2861 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2862 		  t = fold_convert (type, t);
2863 		  t = fold_build2 (PLUS_EXPR, type, idx, t);
2864 		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2865 						true, GSI_SAME_STMT);
2866 		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2867 		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2868 		  cond_stmt
2869 		    = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2870 					 NULL_TREE);
2871 		  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2872 		  e = split_block (gsi_bb (gsi2), cond_stmt);
2873 		  e->flags = EDGE_TRUE_VALUE;
2874 		  e->probability = profile_probability::likely ().guessed ();
2875 		  ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2876 		  ne->probability = e->probability.invert ();
2877 		  gsi2 = gsi_after_labels (e->dest);
2878 		  expand_omp_build_assign (&gsi2, idx, t);
2879 		  set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2880 		  break;
2881 		}
2882 	      e = split_block (cur_bb, last_stmt (cur_bb));
2883 
2884 	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2885 	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2886 
2887 	      gsi2 = gsi_after_labels (e->dest);
2888 	      if (rect_p)
2889 		t = fold_build2 (PLUS_EXPR, type, vs[j],
2890 				 build_one_cst (type));
2891 	      else
2892 		{
2893 		  tree step
2894 		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2895 		  if (POINTER_TYPE_P (vtype))
2896 		    t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2897 								      step));
2898 		  else
2899 		    t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2900 		}
2901 	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2902 					    true, GSI_SAME_STMT);
2903 	      expand_omp_build_assign (&gsi2, vs[j], t);
2904 
2905 	      edge ne = split_block (e->dest, last_stmt (e->dest));
2906 	      gsi2 = gsi_after_labels (ne->dest);
2907 
2908 	      gcond *cond_stmt;
2909 	      if (next_bb == entry_bb)
2910 		/* No need to actually check the outermost condition.  */
2911 		cond_stmt
2912 		  = gimple_build_cond (EQ_EXPR, boolean_true_node,
2913 				       boolean_true_node,
2914 				       NULL_TREE, NULL_TREE);
2915 	      else
2916 		cond_stmt
2917 		  = gimple_build_cond (rect_p ? LT_EXPR
2918 					      : fd->loops[j].cond_code,
2919 				       vs[j], n2, NULL_TREE, NULL_TREE);
2920 	      gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2921 	      edge e3, e4;
2922 	      if (next_bb == entry_bb)
2923 		{
2924 		  e3 = find_edge (ne->dest, next_bb);
2925 		  e3->flags = EDGE_FALSE_VALUE;
2926 		  dom_bb = ne->dest;
2927 		}
2928 	      else
2929 		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2930 	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2931 	      e4->probability = profile_probability::likely ().guessed ();
2932 	      e3->probability = e4->probability.invert ();
2933 	      basic_block esrc = e->src;
2934 	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2935 	      cur_bb = new_cur_bb;
2936 	      basic_block latch_bb = next_bb;
2937 	      next_bb = e->dest;
2938 	      remove_edge (e);
2939 	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2940 	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2941 	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2942 	    }
2943 	  for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2944 	    {
2945 	      tree vtype = TREE_TYPE (fd->loops[j].v);
2946 	      tree itype = vtype;
2947 	      if (POINTER_TYPE_P (itype))
2948 		itype = signed_type_for (itype);
2949 	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2950 			     && fd->loops[j].m2 == NULL_TREE
2951 			     && !fd->loops[j].non_rect_referenced);
2952 	      if (j == fd->last_nonrect)
2953 		{
2954 		  t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2955 		  t = fold_convert (itype, t);
2956 		  tree t2
2957 		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2958 		  t = fold_build2 (MULT_EXPR, itype, t, t2);
2959 		  if (POINTER_TYPE_P (vtype))
2960 		    t = fold_build_pointer_plus (n1,
2961 						 fold_convert (sizetype, t));
2962 		  else
2963 		    t = fold_build2 (PLUS_EXPR, itype, n1, t);
2964 		}
2965 	      else if (rect_p)
2966 		{
2967 		  t = fold_convert (itype, vs[j]);
2968 		  t = fold_build2 (MULT_EXPR, itype, t,
2969 				   fold_convert (itype, fd->loops[j].step));
2970 		  if (POINTER_TYPE_P (vtype))
2971 		    t = fold_build_pointer_plus (fd->loops[j].n1,
2972 						 fold_convert (sizetype, t));
2973 		  else
2974 		    t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2975 		}
2976 	      else
2977 		t = vs[j];
2978 	      t = force_gimple_operand_gsi (gsi, t, false,
2979 					    NULL_TREE, true,
2980 					    GSI_SAME_STMT);
2981 	      stmt = gimple_build_assign (fd->loops[j].v, t);
2982 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2983 	    }
2984 	  if (gsi_end_p (*gsi))
2985 	    *gsi = gsi_last_bb (gsi_bb (*gsi));
2986 	  else
2987 	    gsi_prev (gsi);
2988 	  if (bb_triang)
2989 	    {
2990 	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2991 	      make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2992 	      *gsi = gsi_after_labels (e->dest);
2993 	      if (!gsi_end_p (*gsi))
2994 		gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2995 	      set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2996 	    }
2997 	}
2998       else
2999 	{
3000 	  t = fold_convert (itype, t);
3001 	  t = fold_build2 (MULT_EXPR, itype, t,
3002 			   fold_convert (itype, fd->loops[i].step));
3003 	  if (POINTER_TYPE_P (vtype))
3004 	    t = fold_build_pointer_plus (fd->loops[i].n1, t);
3005 	  else
3006 	    t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3007 	  t = force_gimple_operand_gsi (gsi, t,
3008 					DECL_P (fd->loops[i].v)
3009 					&& TREE_ADDRESSABLE (fd->loops[i].v),
3010 					NULL_TREE, false,
3011 					GSI_CONTINUE_LINKING);
3012 	  stmt = gimple_build_assign (fd->loops[i].v, t);
3013 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3014 	}
3015       if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3016 	{
3017 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3018 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3019 					false, GSI_CONTINUE_LINKING);
3020 	  stmt = gimple_build_assign (tem, t);
3021 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3022 	}
3023       if (i == fd->last_nonrect)
3024 	i = fd->first_nonrect;
3025     }
3026   if (fd->non_rect)
3027     for (i = 0; i <= fd->last_nonrect; i++)
3028       if (fd->loops[i].m2)
3029 	{
3030 	  tree itype = TREE_TYPE (fd->loops[i].v);
3031 
3032 	  tree t;
3033 	  if (POINTER_TYPE_P (itype))
3034 	    {
3035 	      gcc_assert (integer_onep (fd->loops[i].m2));
3036 	      t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3037 	      t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3038 					   t);
3039 	    }
3040 	  else
3041 	    {
3042 	      t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3043 	      t = fold_build2 (MULT_EXPR, itype,
3044 			       fd->loops[i - fd->loops[i].outer].v, t);
3045 	      t = fold_build2 (PLUS_EXPR, itype, t,
3046 			       fold_convert (itype,
3047 					     unshare_expr (fd->loops[i].n2)));
3048 	    }
3049 	  nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3050 	  t = force_gimple_operand_gsi (gsi, t, false,
3051 					NULL_TREE, false,
3052 					GSI_CONTINUE_LINKING);
3053 	  stmt = gimple_build_assign (nonrect_bounds[i], t);
3054 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3055 	}
3056 }
3057 
3058 /* Helper function for expand_omp_for_*.  Generate code like:
3059     L10:
3060 	V3 += STEP3;
3061 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
3062     L11:
3063 	V3 = N31;
3064 	V2 += STEP2;
3065 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
3066     L12:
3067 	V2 = N21;
3068 	V1 += STEP1;
3069 	goto BODY_BB;
3070    For non-rectangular loops, use temporaries stored in nonrect_bounds
3071    for the upper bounds if M?2 multiplier is present.  Given e.g.
3072    for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3073    for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3074    for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3075    for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3076    do:
3077     L10:
3078 	V4 += STEP4;
3079 	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3080     L11:
3081 	V4 = N41 + M41 * V2; // This can be left out if the loop
3082 			     // refers to the immediate parent loop
3083 	V3 += STEP3;
3084 	if (V3 cond3 N32) goto BODY_BB; else goto L12;
3085     L12:
3086 	V3 = N31;
3087 	V2 += STEP2;
3088 	if (V2 cond2 N22) goto L120; else goto L13;
3089     L120:
3090 	V4 = N41 + M41 * V2;
3091 	NONRECT_BOUND4 = N42 + M42 * V2;
3092 	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3093     L13:
3094 	V2 = N21;
3095 	V1 += STEP1;
3096 	goto L120;  */
3097 
3098 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,tree * nonrect_bounds,basic_block cont_bb,basic_block body_bb)3099 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3100 			     basic_block cont_bb, basic_block body_bb)
3101 {
3102   basic_block last_bb, bb, collapse_bb = NULL;
3103   int i;
3104   gimple_stmt_iterator gsi;
3105   edge e;
3106   tree t;
3107   gimple *stmt;
3108 
3109   last_bb = cont_bb;
3110   for (i = fd->collapse - 1; i >= 0; i--)
3111     {
3112       tree vtype = TREE_TYPE (fd->loops[i].v);
3113 
3114       bb = create_empty_bb (last_bb);
3115       add_bb_to_loop (bb, last_bb->loop_father);
3116       gsi = gsi_start_bb (bb);
3117 
3118       if (i < fd->collapse - 1)
3119 	{
3120 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3121 	  e->probability
3122 	    = profile_probability::guessed_always ().apply_scale (1, 8);
3123 
3124 	  struct omp_for_data_loop *l = &fd->loops[i + 1];
3125 	  if (l->m1 == NULL_TREE || l->outer != 1)
3126 	    {
3127 	      t = l->n1;
3128 	      if (l->m1)
3129 		{
3130 		  if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3131 		    t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3132 						 fold_convert (sizetype, t));
3133 		  else
3134 		    {
3135 		      tree t2
3136 			= fold_build2 (MULT_EXPR, TREE_TYPE (t),
3137 				       fd->loops[i + 1 - l->outer].v, l->m1);
3138 		      t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3139 		    }
3140 		}
3141 	      t = force_gimple_operand_gsi (&gsi, t,
3142 					    DECL_P (l->v)
3143 					    && TREE_ADDRESSABLE (l->v),
3144 					    NULL_TREE, false,
3145 					    GSI_CONTINUE_LINKING);
3146 	      stmt = gimple_build_assign (l->v, t);
3147 	      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3148 	    }
3149 	}
3150       else
3151 	collapse_bb = bb;
3152 
3153       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3154 
3155       if (POINTER_TYPE_P (vtype))
3156 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3157       else
3158 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3159       t = force_gimple_operand_gsi (&gsi, t,
3160 				    DECL_P (fd->loops[i].v)
3161 				    && TREE_ADDRESSABLE (fd->loops[i].v),
3162 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
3163       stmt = gimple_build_assign (fd->loops[i].v, t);
3164       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3165 
3166       if (fd->loops[i].non_rect_referenced)
3167 	{
3168 	  basic_block update_bb = NULL, prev_bb = NULL;
3169 	  for (int j = i + 1; j <= fd->last_nonrect; j++)
3170 	    if (j - fd->loops[j].outer == i)
3171 	      {
3172 		tree n1, n2;
3173 		struct omp_for_data_loop *l = &fd->loops[j];
3174 		basic_block this_bb = create_empty_bb (last_bb);
3175 		add_bb_to_loop (this_bb, last_bb->loop_father);
3176 		gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3177 		if (prev_bb)
3178 		  {
3179 		    e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3180 		    e->probability
3181 		      = profile_probability::guessed_always ().apply_scale (7,
3182 									    8);
3183 		    set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3184 		  }
3185 		if (l->m1)
3186 		  {
3187 		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3188 		      t = fold_build_pointer_plus (fd->loops[i].v,
3189 						   fold_convert (sizetype,
3190 								 l->n1));
3191 		    else
3192 		      {
3193 			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3194 					 fd->loops[i].v);
3195 			t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3196 					 t, l->n1);
3197 		      }
3198 		    n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3199 						   false,
3200 						   GSI_CONTINUE_LINKING);
3201 		    stmt = gimple_build_assign (l->v, n1);
3202 		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3203 		    n1 = l->v;
3204 		  }
3205 		else
3206 		  n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3207 						 NULL_TREE, false,
3208 						 GSI_CONTINUE_LINKING);
3209 		if (l->m2)
3210 		  {
3211 		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3212 		      t = fold_build_pointer_plus (fd->loops[i].v,
3213 						   fold_convert (sizetype,
3214 								 l->n2));
3215 		    else
3216 		      {
3217 			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3218 					 fd->loops[i].v);
3219 			t = fold_build2 (PLUS_EXPR,
3220 					 TREE_TYPE (nonrect_bounds[j]),
3221 					 t, unshare_expr (l->n2));
3222 		      }
3223 		    n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3224 						   false,
3225 						   GSI_CONTINUE_LINKING);
3226 		    stmt = gimple_build_assign (nonrect_bounds[j], n2);
3227 		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3228 		    n2 = nonrect_bounds[j];
3229 		  }
3230 		else
3231 		  n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3232 						 true, NULL_TREE, false,
3233 						 GSI_CONTINUE_LINKING);
3234 		gcond *cond_stmt
3235 		  = gimple_build_cond (l->cond_code, n1, n2,
3236 				       NULL_TREE, NULL_TREE);
3237 		gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3238 		if (update_bb == NULL)
3239 		  update_bb = this_bb;
3240 		e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3241 		e->probability
3242 		  = profile_probability::guessed_always ().apply_scale (1, 8);
3243 		if (prev_bb == NULL)
3244 		  set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3245 		prev_bb = this_bb;
3246 	      }
3247 	  e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3248 	  e->probability
3249 	    = profile_probability::guessed_always ().apply_scale (7, 8);
3250 	  body_bb = update_bb;
3251 	}
3252 
3253       if (i > 0)
3254 	{
3255 	  if (fd->loops[i].m2)
3256 	    t = nonrect_bounds[i];
3257 	  else
3258 	    t = unshare_expr (fd->loops[i].n2);
3259 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260 					false, GSI_CONTINUE_LINKING);
3261 	  tree v = fd->loops[i].v;
3262 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
3263 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3264 					  false, GSI_CONTINUE_LINKING);
3265 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3266 	  stmt = gimple_build_cond_empty (t);
3267 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3268 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3269 			 expand_omp_regimplify_p, NULL, NULL)
3270 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3271 			    expand_omp_regimplify_p, NULL, NULL))
3272 	    gimple_regimplify_operands (stmt, &gsi);
3273 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3274 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3275 	}
3276       else
3277 	make_edge (bb, body_bb, EDGE_FALLTHRU);
3278       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3279       last_bb = bb;
3280     }
3281 
3282   return collapse_bb;
3283 }
3284 
3285 /* Expand #pragma omp ordered depend(source).  */
3286 
3287 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)3288 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3289 			   tree *counts, location_t loc)
3290 {
3291   enum built_in_function source_ix
3292     = fd->iter_type == long_integer_type_node
3293       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3294   gimple *g
3295     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3296 			 build_fold_addr_expr (counts[fd->ordered]));
3297   gimple_set_location (g, loc);
3298   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3299 }
3300 
3301 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
3302 
3303 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)3304 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3305 			 tree *counts, tree c, location_t loc)
3306 {
3307   auto_vec<tree, 10> args;
3308   enum built_in_function sink_ix
3309     = fd->iter_type == long_integer_type_node
3310       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3311   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3312   int i;
3313   gimple_stmt_iterator gsi2 = *gsi;
3314   bool warned_step = false;
3315 
3316   for (i = 0; i < fd->ordered; i++)
3317     {
3318       tree step = NULL_TREE;
3319       off = TREE_PURPOSE (deps);
3320       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3321 	{
3322 	  step = TREE_OPERAND (off, 1);
3323 	  off = TREE_OPERAND (off, 0);
3324 	}
3325       if (!integer_zerop (off))
3326 	{
3327 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3328 		      || fd->loops[i].cond_code == GT_EXPR);
3329 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
3330 	  if (step)
3331 	    {
3332 	      /* Non-simple Fortran DO loops.  If step is variable,
3333 		 we don't know at compile even the direction, so can't
3334 		 warn.  */
3335 	      if (TREE_CODE (step) != INTEGER_CST)
3336 		break;
3337 	      forward = tree_int_cst_sgn (step) != -1;
3338 	    }
3339 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3340 	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3341 				"waiting for lexically later iteration");
3342 	  break;
3343 	}
3344       deps = TREE_CHAIN (deps);
3345     }
3346   /* If all offsets corresponding to the collapsed loops are zero,
3347      this depend clause can be ignored.  FIXME: but there is still a
3348      flush needed.  We need to emit one __sync_synchronize () for it
3349      though (perhaps conditionally)?  Solve this together with the
3350      conservative dependence folding optimization.
3351   if (i >= fd->collapse)
3352     return;  */
3353 
3354   deps = OMP_CLAUSE_DECL (c);
3355   gsi_prev (&gsi2);
3356   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3357   edge e2 = split_block_after_labels (e1->dest);
3358 
3359   gsi2 = gsi_after_labels (e1->dest);
3360   *gsi = gsi_last_bb (e1->src);
3361   for (i = 0; i < fd->ordered; i++)
3362     {
3363       tree itype = TREE_TYPE (fd->loops[i].v);
3364       tree step = NULL_TREE;
3365       tree orig_off = NULL_TREE;
3366       if (POINTER_TYPE_P (itype))
3367 	itype = sizetype;
3368       if (i)
3369 	deps = TREE_CHAIN (deps);
3370       off = TREE_PURPOSE (deps);
3371       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3372 	{
3373 	  step = TREE_OPERAND (off, 1);
3374 	  off = TREE_OPERAND (off, 0);
3375 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3376 		      && integer_onep (fd->loops[i].step)
3377 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3378 	}
3379       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3380       if (step)
3381 	{
3382 	  off = fold_convert_loc (loc, itype, off);
3383 	  orig_off = off;
3384 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3385 	}
3386 
3387       if (integer_zerop (off))
3388 	t = boolean_true_node;
3389       else
3390 	{
3391 	  tree a;
3392 	  tree co = fold_convert_loc (loc, itype, off);
3393 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3394 	    {
3395 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3396 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3397 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3398 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3399 				   co);
3400 	    }
3401 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3402 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3403 				 fd->loops[i].v, co);
3404 	  else
3405 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3406 				 fd->loops[i].v, co);
3407 	  if (step)
3408 	    {
3409 	      tree t1, t2;
3410 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3411 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3412 				      fd->loops[i].n1);
3413 	      else
3414 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3415 				      fd->loops[i].n2);
3416 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3417 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3418 				      fd->loops[i].n2);
3419 	      else
3420 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3421 				      fd->loops[i].n1);
3422 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3423 				   step, build_int_cst (TREE_TYPE (step), 0));
3424 	      if (TREE_CODE (step) != INTEGER_CST)
3425 		{
3426 		  t1 = unshare_expr (t1);
3427 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3428 						 false, GSI_CONTINUE_LINKING);
3429 		  t2 = unshare_expr (t2);
3430 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3431 						 false, GSI_CONTINUE_LINKING);
3432 		}
3433 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3434 				   t, t2, t1);
3435 	    }
3436 	  else if (fd->loops[i].cond_code == LT_EXPR)
3437 	    {
3438 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3439 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3440 				     fd->loops[i].n1);
3441 	      else
3442 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3443 				     fd->loops[i].n2);
3444 	    }
3445 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3446 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3447 				 fd->loops[i].n2);
3448 	  else
3449 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3450 				 fd->loops[i].n1);
3451 	}
3452       if (cond)
3453 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3454       else
3455 	cond = t;
3456 
3457       off = fold_convert_loc (loc, itype, off);
3458 
3459       if (step
3460 	  || (fd->loops[i].cond_code == LT_EXPR
3461 	      ? !integer_onep (fd->loops[i].step)
3462 	      : !integer_minus_onep (fd->loops[i].step)))
3463 	{
3464 	  if (step == NULL_TREE
3465 	      && TYPE_UNSIGNED (itype)
3466 	      && fd->loops[i].cond_code == GT_EXPR)
3467 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3468 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
3469 						  s));
3470 	  else
3471 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3472 				 orig_off ? orig_off : off, s);
3473 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3474 			       build_int_cst (itype, 0));
3475 	  if (integer_zerop (t) && !warned_step)
3476 	    {
3477 	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3478 				  "refers to iteration never in the iteration "
3479 				  "space");
3480 	      warned_step = true;
3481 	    }
3482 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3483 				  cond, t);
3484 	}
3485 
3486       if (i <= fd->collapse - 1 && fd->collapse > 1)
3487 	t = fd->loop.v;
3488       else if (counts[i])
3489 	t = counts[i];
3490       else
3491 	{
3492 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3493 			       fd->loops[i].v, fd->loops[i].n1);
3494 	  t = fold_convert_loc (loc, fd->iter_type, t);
3495 	}
3496       if (step)
3497 	/* We have divided off by step already earlier.  */;
3498       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3499 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3500 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
3501 						s));
3502       else
3503 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3504       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3505 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3506       off = fold_convert_loc (loc, fd->iter_type, off);
3507       if (i <= fd->collapse - 1 && fd->collapse > 1)
3508 	{
3509 	  if (i)
3510 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3511 				   off);
3512 	  if (i < fd->collapse - 1)
3513 	    {
3514 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3515 				      counts[i]);
3516 	      continue;
3517 	    }
3518 	}
3519       off = unshare_expr (off);
3520       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3521       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3522 				    true, GSI_SAME_STMT);
3523       args.safe_push (t);
3524     }
3525   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3526   gimple_set_location (g, loc);
3527   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3528 
3529   cond = unshare_expr (cond);
3530   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3531 				   GSI_CONTINUE_LINKING);
3532   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3533   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3534   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3535   e1->probability = e3->probability.invert ();
3536   e1->flags = EDGE_TRUE_VALUE;
3537   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3538 
3539   *gsi = gsi_after_labels (e2->dest);
3540 }
3541 
3542 /* Expand all #pragma omp ordered depend(source) and
3543    #pragma omp ordered depend(sink:...) constructs in the current
3544    #pragma omp for ordered(n) region.  */
3545 
3546 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)3547 expand_omp_ordered_source_sink (struct omp_region *region,
3548 				struct omp_for_data *fd, tree *counts,
3549 				basic_block cont_bb)
3550 {
3551   struct omp_region *inner;
3552   int i;
3553   for (i = fd->collapse - 1; i < fd->ordered; i++)
3554     if (i == fd->collapse - 1 && fd->collapse > 1)
3555       counts[i] = NULL_TREE;
3556     else if (i >= fd->collapse && !cont_bb)
3557       counts[i] = build_zero_cst (fd->iter_type);
3558     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3559 	     && integer_onep (fd->loops[i].step))
3560       counts[i] = NULL_TREE;
3561     else
3562       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3563   tree atype
3564     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3565   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3566   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3567 
3568   for (inner = region->inner; inner; inner = inner->next)
3569     if (inner->type == GIMPLE_OMP_ORDERED)
3570       {
3571 	gomp_ordered *ord_stmt = inner->ord_stmt;
3572 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3573 	location_t loc = gimple_location (ord_stmt);
3574 	tree c;
3575 	for (c = gimple_omp_ordered_clauses (ord_stmt);
3576 	     c; c = OMP_CLAUSE_CHAIN (c))
3577 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3578 	    break;
3579 	if (c)
3580 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
3581 	for (c = gimple_omp_ordered_clauses (ord_stmt);
3582 	     c; c = OMP_CLAUSE_CHAIN (c))
3583 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3584 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3585 	gsi_remove (&gsi, true);
3586       }
3587 }
3588 
3589 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3590    collapsed.  */
3591 
3592 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,bool ordered_lastprivate)3593 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3594 			      basic_block cont_bb, basic_block body_bb,
3595 			      bool ordered_lastprivate)
3596 {
3597   if (fd->ordered == fd->collapse)
3598     return cont_bb;
3599 
3600   if (!cont_bb)
3601     {
3602       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3603       for (int i = fd->collapse; i < fd->ordered; i++)
3604 	{
3605 	  tree type = TREE_TYPE (fd->loops[i].v);
3606 	  tree n1 = fold_convert (type, fd->loops[i].n1);
3607 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3608 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3609 			      size_int (i - fd->collapse + 1),
3610 			      NULL_TREE, NULL_TREE);
3611 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3612 	}
3613       return NULL;
3614     }
3615 
3616   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3617     {
3618       tree t, type = TREE_TYPE (fd->loops[i].v);
3619       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3620       expand_omp_build_assign (&gsi, fd->loops[i].v,
3621 			       fold_convert (type, fd->loops[i].n1));
3622       if (counts[i])
3623 	expand_omp_build_assign (&gsi, counts[i],
3624 				 build_zero_cst (fd->iter_type));
3625       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3626 			  size_int (i - fd->collapse + 1),
3627 			  NULL_TREE, NULL_TREE);
3628       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3629       if (!gsi_end_p (gsi))
3630 	gsi_prev (&gsi);
3631       else
3632 	gsi = gsi_last_bb (body_bb);
3633       edge e1 = split_block (body_bb, gsi_stmt (gsi));
3634       basic_block new_body = e1->dest;
3635       if (body_bb == cont_bb)
3636 	cont_bb = new_body;
3637       edge e2 = NULL;
3638       basic_block new_header;
3639       if (EDGE_COUNT (cont_bb->preds) > 0)
3640 	{
3641 	  gsi = gsi_last_bb (cont_bb);
3642 	  if (POINTER_TYPE_P (type))
3643 	    t = fold_build_pointer_plus (fd->loops[i].v,
3644 					 fold_convert (sizetype,
3645 						       fd->loops[i].step));
3646 	  else
3647 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3648 			     fold_convert (type, fd->loops[i].step));
3649 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3650 	  if (counts[i])
3651 	    {
3652 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3653 			       build_int_cst (fd->iter_type, 1));
3654 	      expand_omp_build_assign (&gsi, counts[i], t);
3655 	      t = counts[i];
3656 	    }
3657 	  else
3658 	    {
3659 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3660 			       fd->loops[i].v, fd->loops[i].n1);
3661 	      t = fold_convert (fd->iter_type, t);
3662 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3663 					    true, GSI_SAME_STMT);
3664 	    }
3665 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3666 			 size_int (i - fd->collapse + 1),
3667 			 NULL_TREE, NULL_TREE);
3668 	  expand_omp_build_assign (&gsi, aref, t);
3669 	  gsi_prev (&gsi);
3670 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
3671 	  new_header = e2->dest;
3672 	}
3673       else
3674 	new_header = cont_bb;
3675       gsi = gsi_after_labels (new_header);
3676       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3677 					 true, GSI_SAME_STMT);
3678       tree n2
3679 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3680 				    true, NULL_TREE, true, GSI_SAME_STMT);
3681       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3682       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3683       edge e3 = split_block (new_header, gsi_stmt (gsi));
3684       cont_bb = e3->dest;
3685       remove_edge (e1);
3686       make_edge (body_bb, new_header, EDGE_FALLTHRU);
3687       e3->flags = EDGE_FALSE_VALUE;
3688       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3689       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3690       e1->probability = e3->probability.invert ();
3691 
3692       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3693       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3694 
3695       if (e2)
3696 	{
3697 	  class loop *loop = alloc_loop ();
3698 	  loop->header = new_header;
3699 	  loop->latch = e2->src;
3700 	  add_loop (loop, body_bb->loop_father);
3701 	}
3702     }
3703 
3704   /* If there are any lastprivate clauses and it is possible some loops
3705      might have zero iterations, ensure all the decls are initialized,
3706      otherwise we could crash evaluating C++ class iterators with lastprivate
3707      clauses.  */
3708   bool need_inits = false;
3709   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3710     if (need_inits)
3711       {
3712 	tree type = TREE_TYPE (fd->loops[i].v);
3713 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3714 	expand_omp_build_assign (&gsi, fd->loops[i].v,
3715 				 fold_convert (type, fd->loops[i].n1));
3716       }
3717     else
3718       {
3719 	tree type = TREE_TYPE (fd->loops[i].v);
3720 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
3721 				      boolean_type_node,
3722 				      fold_convert (type, fd->loops[i].n1),
3723 				      fold_convert (type, fd->loops[i].n2));
3724 	if (!integer_onep (this_cond))
3725 	  need_inits = true;
3726       }
3727 
3728   return cont_bb;
3729 }
3730 
3731 /* A subroutine of expand_omp_for.  Generate code for a parallel
3732    loop with any schedule.  Given parameters:
3733 
3734 	for (V = N1; V cond N2; V += STEP) BODY;
3735 
3736    where COND is "<" or ">", we generate pseudocode
3737 
3738 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3739 	if (more) goto L0; else goto L3;
3740     L0:
3741 	V = istart0;
3742 	iend = iend0;
3743     L1:
3744 	BODY;
3745 	V += STEP;
3746 	if (V cond iend) goto L1; else goto L2;
3747     L2:
3748 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3749     L3:
3750 
3751     If this is a combined omp parallel loop, instead of the call to
3752     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3753     If this is gimple_omp_for_combined_p loop, then instead of assigning
3754     V and iend in L0 we assign the first two _looptemp_ clause decls of the
3755     inner GIMPLE_OMP_FOR and V += STEP; and
3756     if (V cond iend) goto L1; else goto L2; are removed.
3757 
3758     For collapsed loops, given parameters:
3759       collapse(3)
3760       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3761 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3762 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3763 	    BODY;
3764 
3765     we generate pseudocode
3766 
3767 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3768 	if (cond3 is <)
3769 	  adj = STEP3 - 1;
3770 	else
3771 	  adj = STEP3 + 1;
3772 	count3 = (adj + N32 - N31) / STEP3;
3773 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3774 	if (cond2 is <)
3775 	  adj = STEP2 - 1;
3776 	else
3777 	  adj = STEP2 + 1;
3778 	count2 = (adj + N22 - N21) / STEP2;
3779 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3780 	if (cond1 is <)
3781 	  adj = STEP1 - 1;
3782 	else
3783 	  adj = STEP1 + 1;
3784 	count1 = (adj + N12 - N11) / STEP1;
3785 	count = count1 * count2 * count3;
3786 	goto Z1;
3787     Z0:
3788 	count = 0;
3789     Z1:
3790 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3791 	if (more) goto L0; else goto L3;
3792     L0:
3793 	V = istart0;
3794 	T = V;
3795 	V3 = N31 + (T % count3) * STEP3;
3796 	T = T / count3;
3797 	V2 = N21 + (T % count2) * STEP2;
3798 	T = T / count2;
3799 	V1 = N11 + T * STEP1;
3800 	iend = iend0;
3801     L1:
3802 	BODY;
3803 	V += 1;
3804 	if (V < iend) goto L10; else goto L2;
3805     L10:
3806 	V3 += STEP3;
3807 	if (V3 cond3 N32) goto L1; else goto L11;
3808     L11:
3809 	V3 = N31;
3810 	V2 += STEP2;
3811 	if (V2 cond2 N22) goto L1; else goto L12;
3812     L12:
3813 	V2 = N21;
3814 	V1 += STEP1;
3815 	goto L1;
3816     L2:
3817 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3818     L3:
3819 
3820       */
3821 
3822 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)3823 expand_omp_for_generic (struct omp_region *region,
3824 			struct omp_for_data *fd,
3825 			enum built_in_function start_fn,
3826 			enum built_in_function next_fn,
3827 			tree sched_arg,
3828 			gimple *inner_stmt)
3829 {
3830   tree type, istart0, iend0, iend;
3831   tree t, vmain, vback, bias = NULL_TREE;
3832   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3833   basic_block l2_bb = NULL, l3_bb = NULL;
3834   gimple_stmt_iterator gsi;
3835   gassign *assign_stmt;
3836   bool in_combined_parallel = is_combined_parallel (region);
3837   bool broken_loop = region->cont == NULL;
3838   edge e, ne;
3839   tree *counts = NULL;
3840   int i;
3841   bool ordered_lastprivate = false;
3842 
3843   gcc_assert (!broken_loop || !in_combined_parallel);
3844   gcc_assert (fd->iter_type == long_integer_type_node
3845 	      || !in_combined_parallel);
3846 
3847   entry_bb = region->entry;
3848   cont_bb = region->cont;
3849   collapse_bb = NULL;
3850   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3851   gcc_assert (broken_loop
3852 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3853   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3854   l1_bb = single_succ (l0_bb);
3855   if (!broken_loop)
3856     {
3857       l2_bb = create_empty_bb (cont_bb);
3858       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3859 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3860 		      == l1_bb));
3861       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3862     }
3863   else
3864     l2_bb = NULL;
3865   l3_bb = BRANCH_EDGE (entry_bb)->dest;
3866   exit_bb = region->exit;
3867 
3868   gsi = gsi_last_nondebug_bb (entry_bb);
3869 
3870   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3871   if (fd->ordered
3872       && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 			  OMP_CLAUSE_LASTPRIVATE))
3874     ordered_lastprivate = false;
3875   tree reductions = NULL_TREE;
3876   tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3877   tree memv = NULL_TREE;
3878   if (fd->lastprivate_conditional)
3879     {
3880       tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3881 				OMP_CLAUSE__CONDTEMP_);
3882       if (fd->have_pointer_condtemp)
3883 	condtemp = OMP_CLAUSE_DECL (c);
3884       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3885       cond_var = OMP_CLAUSE_DECL (c);
3886     }
3887   if (sched_arg)
3888     {
3889       if (fd->have_reductemp)
3890 	{
3891 	  tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3892 				    OMP_CLAUSE__REDUCTEMP_);
3893 	  reductions = OMP_CLAUSE_DECL (c);
3894 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3895 	  gimple *g = SSA_NAME_DEF_STMT (reductions);
3896 	  reductions = gimple_assign_rhs1 (g);
3897 	  OMP_CLAUSE_DECL (c) = reductions;
3898 	  entry_bb = gimple_bb (g);
3899 	  edge e = split_block (entry_bb, g);
3900 	  if (region->entry == entry_bb)
3901 	    region->entry = e->dest;
3902 	  gsi = gsi_last_bb (entry_bb);
3903 	}
3904       else
3905 	reductions = null_pointer_node;
3906       if (fd->have_pointer_condtemp)
3907 	{
3908 	  tree type = TREE_TYPE (condtemp);
3909 	  memv = create_tmp_var (type);
3910 	  TREE_ADDRESSABLE (memv) = 1;
3911 	  unsigned HOST_WIDE_INT sz
3912 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3913 	  sz *= fd->lastprivate_conditional;
3914 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3915 				   false);
3916 	  mem = build_fold_addr_expr (memv);
3917 	}
3918       else
3919 	mem = null_pointer_node;
3920     }
3921   if (fd->collapse > 1 || fd->ordered)
3922     {
3923       int first_zero_iter1 = -1, first_zero_iter2 = -1;
3924       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3925 
3926       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3927       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3928 				  zero_iter1_bb, first_zero_iter1,
3929 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3930 
3931       if (zero_iter1_bb)
3932 	{
3933 	  /* Some counts[i] vars might be uninitialized if
3934 	     some loop has zero iterations.  But the body shouldn't
3935 	     be executed in that case, so just avoid uninit warnings.  */
3936 	  for (i = first_zero_iter1;
3937 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3938 	    if (SSA_VAR_P (counts[i]))
3939 	      suppress_warning (counts[i], OPT_Wuninitialized);
3940 	  gsi_prev (&gsi);
3941 	  e = split_block (entry_bb, gsi_stmt (gsi));
3942 	  entry_bb = e->dest;
3943 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3944 	  gsi = gsi_last_nondebug_bb (entry_bb);
3945 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3946 				   get_immediate_dominator (CDI_DOMINATORS,
3947 							    zero_iter1_bb));
3948 	}
3949       if (zero_iter2_bb)
3950 	{
3951 	  /* Some counts[i] vars might be uninitialized if
3952 	     some loop has zero iterations.  But the body shouldn't
3953 	     be executed in that case, so just avoid uninit warnings.  */
3954 	  for (i = first_zero_iter2; i < fd->ordered; i++)
3955 	    if (SSA_VAR_P (counts[i]))
3956 	      suppress_warning (counts[i], OPT_Wuninitialized);
3957 	  if (zero_iter1_bb)
3958 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3959 	  else
3960 	    {
3961 	      gsi_prev (&gsi);
3962 	      e = split_block (entry_bb, gsi_stmt (gsi));
3963 	      entry_bb = e->dest;
3964 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3965 	      gsi = gsi_last_nondebug_bb (entry_bb);
3966 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3967 				       get_immediate_dominator
3968 					 (CDI_DOMINATORS, zero_iter2_bb));
3969 	    }
3970 	}
3971       if (fd->collapse == 1)
3972 	{
3973 	  counts[0] = fd->loop.n2;
3974 	  fd->loop = fd->loops[0];
3975 	}
3976     }
3977 
3978   type = TREE_TYPE (fd->loop.v);
3979   istart0 = create_tmp_var (fd->iter_type, ".istart0");
3980   iend0 = create_tmp_var (fd->iter_type, ".iend0");
3981   TREE_ADDRESSABLE (istart0) = 1;
3982   TREE_ADDRESSABLE (iend0) = 1;
3983 
3984   /* See if we need to bias by LLONG_MIN.  */
3985   if (fd->iter_type == long_long_unsigned_type_node
3986       && TREE_CODE (type) == INTEGER_TYPE
3987       && !TYPE_UNSIGNED (type)
3988       && fd->ordered == 0)
3989     {
3990       tree n1, n2;
3991 
3992       if (fd->loop.cond_code == LT_EXPR)
3993 	{
3994 	  n1 = fd->loop.n1;
3995 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3996 	}
3997       else
3998 	{
3999 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4000 	  n2 = fd->loop.n1;
4001 	}
4002       if (TREE_CODE (n1) != INTEGER_CST
4003 	  || TREE_CODE (n2) != INTEGER_CST
4004 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4005 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4006     }
4007 
4008   gimple_stmt_iterator gsif = gsi;
4009   gsi_prev (&gsif);
4010 
4011   tree arr = NULL_TREE;
4012   if (in_combined_parallel)
4013     {
4014       gcc_assert (fd->ordered == 0);
4015       /* In a combined parallel loop, emit a call to
4016 	 GOMP_loop_foo_next.  */
4017       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4018 			   build_fold_addr_expr (istart0),
4019 			   build_fold_addr_expr (iend0));
4020     }
4021   else
4022     {
4023       tree t0, t1, t2, t3, t4;
4024       /* If this is not a combined parallel loop, emit a call to
4025 	 GOMP_loop_foo_start in ENTRY_BB.  */
4026       t4 = build_fold_addr_expr (iend0);
4027       t3 = build_fold_addr_expr (istart0);
4028       if (fd->ordered)
4029 	{
4030 	  t0 = build_int_cst (unsigned_type_node,
4031 			      fd->ordered - fd->collapse + 1);
4032 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4033 							fd->ordered
4034 							- fd->collapse + 1),
4035 				".omp_counts");
4036 	  DECL_NAMELESS (arr) = 1;
4037 	  TREE_ADDRESSABLE (arr) = 1;
4038 	  TREE_STATIC (arr) = 1;
4039 	  vec<constructor_elt, va_gc> *v;
4040 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
4041 	  int idx;
4042 
4043 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4044 	    {
4045 	      tree c;
4046 	      if (idx == 0 && fd->collapse > 1)
4047 		c = fd->loop.n2;
4048 	      else
4049 		c = counts[idx + fd->collapse - 1];
4050 	      tree purpose = size_int (idx);
4051 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4052 	      if (TREE_CODE (c) != INTEGER_CST)
4053 		TREE_STATIC (arr) = 0;
4054 	    }
4055 
4056 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4057 	  if (!TREE_STATIC (arr))
4058 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4059 						    void_type_node, arr),
4060 				      true, NULL_TREE, true, GSI_SAME_STMT);
4061 	  t1 = build_fold_addr_expr (arr);
4062 	  t2 = NULL_TREE;
4063 	}
4064       else
4065 	{
4066 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
4067 	  t1 = fd->loop.n2;
4068 	  t0 = fd->loop.n1;
4069 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4070 	    {
4071 	      tree innerc
4072 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4073 				   OMP_CLAUSE__LOOPTEMP_);
4074 	      gcc_assert (innerc);
4075 	      t0 = OMP_CLAUSE_DECL (innerc);
4076 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4077 					OMP_CLAUSE__LOOPTEMP_);
4078 	      gcc_assert (innerc);
4079 	      t1 = OMP_CLAUSE_DECL (innerc);
4080 	    }
4081 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
4082 	      && TYPE_PRECISION (TREE_TYPE (t0))
4083 		 != TYPE_PRECISION (fd->iter_type))
4084 	    {
4085 	      /* Avoid casting pointers to integer of a different size.  */
4086 	      tree itype = signed_type_for (type);
4087 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4088 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4089 	    }
4090 	  else
4091 	    {
4092 	      t1 = fold_convert (fd->iter_type, t1);
4093 	      t0 = fold_convert (fd->iter_type, t0);
4094 	    }
4095 	  if (bias)
4096 	    {
4097 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4098 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4099 	    }
4100 	}
4101       if (fd->iter_type == long_integer_type_node || fd->ordered)
4102 	{
4103 	  if (fd->chunk_size)
4104 	    {
4105 	      t = fold_convert (fd->iter_type, fd->chunk_size);
4106 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4107 	      if (sched_arg)
4108 		{
4109 		  if (fd->ordered)
4110 		    t = build_call_expr (builtin_decl_explicit (start_fn),
4111 					 8, t0, t1, sched_arg, t, t3, t4,
4112 					 reductions, mem);
4113 		  else
4114 		    t = build_call_expr (builtin_decl_explicit (start_fn),
4115 					 9, t0, t1, t2, sched_arg, t, t3, t4,
4116 					 reductions, mem);
4117 		}
4118 	      else if (fd->ordered)
4119 		t = build_call_expr (builtin_decl_explicit (start_fn),
4120 				     5, t0, t1, t, t3, t4);
4121 	      else
4122 		t = build_call_expr (builtin_decl_explicit (start_fn),
4123 				     6, t0, t1, t2, t, t3, t4);
4124 	    }
4125 	  else if (fd->ordered)
4126 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4127 				 4, t0, t1, t3, t4);
4128 	  else
4129 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4130 				 5, t0, t1, t2, t3, t4);
4131 	}
4132       else
4133 	{
4134 	  tree t5;
4135 	  tree c_bool_type;
4136 	  tree bfn_decl;
4137 
4138 	  /* The GOMP_loop_ull_*start functions have additional boolean
4139 	     argument, true for < loops and false for > loops.
4140 	     In Fortran, the C bool type can be different from
4141 	     boolean_type_node.  */
4142 	  bfn_decl = builtin_decl_explicit (start_fn);
4143 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4144 	  t5 = build_int_cst (c_bool_type,
4145 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
4146 	  if (fd->chunk_size)
4147 	    {
4148 	      tree bfn_decl = builtin_decl_explicit (start_fn);
4149 	      t = fold_convert (fd->iter_type, fd->chunk_size);
4150 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4151 	      if (sched_arg)
4152 		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4153 				     t, t3, t4, reductions, mem);
4154 	      else
4155 		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4156 	    }
4157 	  else
4158 	    t = build_call_expr (builtin_decl_explicit (start_fn),
4159 				 6, t5, t0, t1, t2, t3, t4);
4160 	}
4161     }
4162   if (TREE_TYPE (t) != boolean_type_node)
4163     t = fold_build2 (NE_EXPR, boolean_type_node,
4164 		     t, build_int_cst (TREE_TYPE (t), 0));
4165   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4166 				true, GSI_SAME_STMT);
4167   if (arr && !TREE_STATIC (arr))
4168     {
4169       tree clobber = build_clobber (TREE_TYPE (arr));
4170       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4171 			 GSI_SAME_STMT);
4172     }
4173   if (fd->have_pointer_condtemp)
4174     expand_omp_build_assign (&gsi, condtemp, memv, false);
4175   if (fd->have_reductemp)
4176     {
4177       gimple *g = gsi_stmt (gsi);
4178       gsi_remove (&gsi, true);
4179       release_ssa_name (gimple_assign_lhs (g));
4180 
4181       entry_bb = region->entry;
4182       gsi = gsi_last_nondebug_bb (entry_bb);
4183 
4184       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4185     }
4186   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4187 
4188   /* Remove the GIMPLE_OMP_FOR statement.  */
4189   gsi_remove (&gsi, true);
4190 
4191   if (gsi_end_p (gsif))
4192     gsif = gsi_after_labels (gsi_bb (gsif));
4193   gsi_next (&gsif);
4194 
4195   /* Iteration setup for sequential loop goes in L0_BB.  */
4196   tree startvar = fd->loop.v;
4197   tree endvar = NULL_TREE;
4198 
4199   if (gimple_omp_for_combined_p (fd->for_stmt))
4200     {
4201       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4202 		  && gimple_omp_for_kind (inner_stmt)
4203 		     == GF_OMP_FOR_KIND_SIMD);
4204       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4205 				     OMP_CLAUSE__LOOPTEMP_);
4206       gcc_assert (innerc);
4207       startvar = OMP_CLAUSE_DECL (innerc);
4208       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4209 				OMP_CLAUSE__LOOPTEMP_);
4210       gcc_assert (innerc);
4211       endvar = OMP_CLAUSE_DECL (innerc);
4212     }
4213 
4214   gsi = gsi_start_bb (l0_bb);
4215   t = istart0;
4216   if (fd->ordered && fd->collapse == 1)
4217     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4218 		     fold_convert (fd->iter_type, fd->loop.step));
4219   else if (bias)
4220     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4221   if (fd->ordered && fd->collapse == 1)
4222     {
4223       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4224 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4225 			 fd->loop.n1, fold_convert (sizetype, t));
4226       else
4227 	{
4228 	  t = fold_convert (TREE_TYPE (startvar), t);
4229 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4230 			   fd->loop.n1, t);
4231 	}
4232     }
4233   else
4234     {
4235       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4236 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4237       t = fold_convert (TREE_TYPE (startvar), t);
4238     }
4239   t = force_gimple_operand_gsi (&gsi, t,
4240 				DECL_P (startvar)
4241 				&& TREE_ADDRESSABLE (startvar),
4242 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4243   assign_stmt = gimple_build_assign (startvar, t);
4244   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4245   if (cond_var)
4246     {
4247       tree itype = TREE_TYPE (cond_var);
4248       /* For lastprivate(conditional:) itervar, we need some iteration
4249 	 counter that starts at unsigned non-zero and increases.
4250 	 Prefer as few IVs as possible, so if we can use startvar
4251 	 itself, use that, or startvar + constant (those would be
4252 	 incremented with step), and as last resort use the s0 + 1
4253 	 incremented by 1.  */
4254       if ((fd->ordered && fd->collapse == 1)
4255 	  || bias
4256 	  || POINTER_TYPE_P (type)
4257 	  || TREE_CODE (fd->loop.n1) != INTEGER_CST
4258 	  || fd->loop.cond_code != LT_EXPR)
4259 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4260 			 build_int_cst (itype, 1));
4261       else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4262 	t = fold_convert (itype, t);
4263       else
4264 	{
4265 	  tree c = fold_convert (itype, fd->loop.n1);
4266 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4267 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4268 	}
4269       t = force_gimple_operand_gsi (&gsi, t, false,
4270 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4271       assign_stmt = gimple_build_assign (cond_var, t);
4272       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4273     }
4274 
4275   t = iend0;
4276   if (fd->ordered && fd->collapse == 1)
4277     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4278 		     fold_convert (fd->iter_type, fd->loop.step));
4279   else if (bias)
4280     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4281   if (fd->ordered && fd->collapse == 1)
4282     {
4283       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4284 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4285 			 fd->loop.n1, fold_convert (sizetype, t));
4286       else
4287 	{
4288 	  t = fold_convert (TREE_TYPE (startvar), t);
4289 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4290 			   fd->loop.n1, t);
4291 	}
4292     }
4293   else
4294     {
4295       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4296 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4297       t = fold_convert (TREE_TYPE (startvar), t);
4298     }
4299   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4300 				   false, GSI_CONTINUE_LINKING);
4301   if (endvar)
4302     {
4303       assign_stmt = gimple_build_assign (endvar, iend);
4304       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4305       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4306 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
4307       else
4308 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4309       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4310     }
4311   /* Handle linear clause adjustments.  */
4312   tree itercnt = NULL_TREE;
4313   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4314     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4315 	 c; c = OMP_CLAUSE_CHAIN (c))
4316       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4317 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4318 	{
4319 	  tree d = OMP_CLAUSE_DECL (c);
4320 	  tree t = d, a, dest;
4321 	  if (omp_privatize_by_reference (t))
4322 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4323 	  tree type = TREE_TYPE (t);
4324 	  if (POINTER_TYPE_P (type))
4325 	    type = sizetype;
4326 	  dest = unshare_expr (t);
4327 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4328 	  expand_omp_build_assign (&gsif, v, t);
4329 	  if (itercnt == NULL_TREE)
4330 	    {
4331 	      itercnt = startvar;
4332 	      tree n1 = fd->loop.n1;
4333 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4334 		{
4335 		  itercnt
4336 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4337 				    itercnt);
4338 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
4339 		}
4340 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4341 				     itercnt, n1);
4342 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4343 				     itercnt, fd->loop.step);
4344 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4345 						  NULL_TREE, false,
4346 						  GSI_CONTINUE_LINKING);
4347 	    }
4348 	  a = fold_build2 (MULT_EXPR, type,
4349 			   fold_convert (type, itercnt),
4350 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4351 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4352 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4353 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4354 					false, GSI_CONTINUE_LINKING);
4355 	  expand_omp_build_assign (&gsi, dest, t, true);
4356 	}
4357   if (fd->collapse > 1)
4358     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4359 
4360   if (fd->ordered)
4361     {
4362       /* Until now, counts array contained number of iterations or
4363 	 variable containing it for ith loop.  From now on, we need
4364 	 those counts only for collapsed loops, and only for the 2nd
4365 	 till the last collapsed one.  Move those one element earlier,
4366 	 we'll use counts[fd->collapse - 1] for the first source/sink
4367 	 iteration counter and so on and counts[fd->ordered]
4368 	 as the array holding the current counter values for
4369 	 depend(source).  */
4370       if (fd->collapse > 1)
4371 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4372       if (broken_loop)
4373 	{
4374 	  int i;
4375 	  for (i = fd->collapse; i < fd->ordered; i++)
4376 	    {
4377 	      tree type = TREE_TYPE (fd->loops[i].v);
4378 	      tree this_cond
4379 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4380 			       fold_convert (type, fd->loops[i].n1),
4381 			       fold_convert (type, fd->loops[i].n2));
4382 	      if (!integer_onep (this_cond))
4383 		break;
4384 	    }
4385 	  if (i < fd->ordered)
4386 	    {
4387 	      cont_bb
4388 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4389 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
4390 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4391 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4392 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4393 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4394 	      make_edge (cont_bb, l1_bb, 0);
4395 	      l2_bb = create_empty_bb (cont_bb);
4396 	      broken_loop = false;
4397 	    }
4398 	}
4399       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4400       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4401 					      ordered_lastprivate);
4402       if (counts[fd->collapse - 1])
4403 	{
4404 	  gcc_assert (fd->collapse == 1);
4405 	  gsi = gsi_last_bb (l0_bb);
4406 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4407 				   istart0, true);
4408 	  if (cont_bb)
4409 	    {
4410 	      gsi = gsi_last_bb (cont_bb);
4411 	      t = fold_build2 (PLUS_EXPR, fd->iter_type,
4412 			       counts[fd->collapse - 1],
4413 			       build_int_cst (fd->iter_type, 1));
4414 	      expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4415 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4416 				  counts[fd->ordered], size_zero_node,
4417 				  NULL_TREE, NULL_TREE);
4418 	      expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4419 	    }
4420 	  t = counts[fd->collapse - 1];
4421 	}
4422       else if (fd->collapse > 1)
4423 	t = fd->loop.v;
4424       else
4425 	{
4426 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4427 			   fd->loops[0].v, fd->loops[0].n1);
4428 	  t = fold_convert (fd->iter_type, t);
4429 	}
4430       gsi = gsi_last_bb (l0_bb);
4431       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4432 			  size_zero_node, NULL_TREE, NULL_TREE);
4433       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4434 				    false, GSI_CONTINUE_LINKING);
4435       expand_omp_build_assign (&gsi, aref, t, true);
4436     }
4437 
4438   if (!broken_loop)
4439     {
4440       /* Code to control the increment and predicate for the sequential
4441 	 loop goes in the CONT_BB.  */
4442       gsi = gsi_last_nondebug_bb (cont_bb);
4443       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4444       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4445       vmain = gimple_omp_continue_control_use (cont_stmt);
4446       vback = gimple_omp_continue_control_def (cont_stmt);
4447 
4448       if (cond_var)
4449 	{
4450 	  tree itype = TREE_TYPE (cond_var);
4451 	  tree t2;
4452 	  if ((fd->ordered && fd->collapse == 1)
4453 	       || bias
4454 	       || POINTER_TYPE_P (type)
4455 	       || TREE_CODE (fd->loop.n1) != INTEGER_CST
4456 	       || fd->loop.cond_code != LT_EXPR)
4457 	    t2 = build_int_cst (itype, 1);
4458 	  else
4459 	    t2 = fold_convert (itype, fd->loop.step);
4460 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4461 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4462 					 NULL_TREE, true, GSI_SAME_STMT);
4463 	  assign_stmt = gimple_build_assign (cond_var, t2);
4464 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4465 	}
4466 
4467       if (!gimple_omp_for_combined_p (fd->for_stmt))
4468 	{
4469 	  if (POINTER_TYPE_P (type))
4470 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
4471 	  else
4472 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4473 	  t = force_gimple_operand_gsi (&gsi, t,
4474 					DECL_P (vback)
4475 					&& TREE_ADDRESSABLE (vback),
4476 					NULL_TREE, true, GSI_SAME_STMT);
4477 	  assign_stmt = gimple_build_assign (vback, t);
4478 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4479 
4480 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4481 	    {
4482 	      tree tem;
4483 	      if (fd->collapse > 1)
4484 		tem = fd->loop.v;
4485 	      else
4486 		{
4487 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4488 				     fd->loops[0].v, fd->loops[0].n1);
4489 		  tem = fold_convert (fd->iter_type, tem);
4490 		}
4491 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4492 				  counts[fd->ordered], size_zero_node,
4493 				  NULL_TREE, NULL_TREE);
4494 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4495 					      true, GSI_SAME_STMT);
4496 	      expand_omp_build_assign (&gsi, aref, tem);
4497 	    }
4498 
4499 	  t = build2 (fd->loop.cond_code, boolean_type_node,
4500 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4501 		      iend);
4502 	  gcond *cond_stmt = gimple_build_cond_empty (t);
4503 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4504 	}
4505 
4506       /* Remove GIMPLE_OMP_CONTINUE.  */
4507       gsi_remove (&gsi, true);
4508 
4509       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4510 	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4511 
4512       /* Emit code to get the next parallel iteration in L2_BB.  */
4513       gsi = gsi_start_bb (l2_bb);
4514 
4515       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4516 			   build_fold_addr_expr (istart0),
4517 			   build_fold_addr_expr (iend0));
4518       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4519 				    false, GSI_CONTINUE_LINKING);
4520       if (TREE_TYPE (t) != boolean_type_node)
4521 	t = fold_build2 (NE_EXPR, boolean_type_node,
4522 			 t, build_int_cst (TREE_TYPE (t), 0));
4523       gcond *cond_stmt = gimple_build_cond_empty (t);
4524       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4525     }
4526 
4527   /* Add the loop cleanup function.  */
4528   gsi = gsi_last_nondebug_bb (exit_bb);
4529   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4530     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4531   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4532     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4533   else
4534     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4535   gcall *call_stmt = gimple_build_call (t, 0);
4536   if (fd->ordered)
4537     {
4538       tree arr = counts[fd->ordered];
4539       tree clobber = build_clobber (TREE_TYPE (arr));
4540       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4541 			GSI_SAME_STMT);
4542     }
4543   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4544     {
4545       gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4546       if (fd->have_reductemp)
4547 	{
4548 	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4549 					   gimple_call_lhs (call_stmt));
4550 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4551 	}
4552     }
4553   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4554   gsi_remove (&gsi, true);
4555 
4556   /* Connect the new blocks.  */
4557   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4558   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4559 
4560   if (!broken_loop)
4561     {
4562       gimple_seq phis;
4563 
4564       e = find_edge (cont_bb, l3_bb);
4565       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4566 
4567       phis = phi_nodes (l3_bb);
4568       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4569 	{
4570 	  gimple *phi = gsi_stmt (gsi);
4571 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4572 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
4573 	}
4574       remove_edge (e);
4575 
4576       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4577       e = find_edge (cont_bb, l1_bb);
4578       if (e == NULL)
4579 	{
4580 	  e = BRANCH_EDGE (cont_bb);
4581 	  gcc_assert (single_succ (e->dest) == l1_bb);
4582 	}
4583       if (gimple_omp_for_combined_p (fd->for_stmt))
4584 	{
4585 	  remove_edge (e);
4586 	  e = NULL;
4587 	}
4588       else if (fd->collapse > 1)
4589 	{
4590 	  remove_edge (e);
4591 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4592 	}
4593       else
4594 	e->flags = EDGE_TRUE_VALUE;
4595       if (e)
4596 	{
4597 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4598 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4599 	}
4600       else
4601 	{
4602 	  e = find_edge (cont_bb, l2_bb);
4603 	  e->flags = EDGE_FALLTHRU;
4604 	}
4605       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4606 
4607       if (gimple_in_ssa_p (cfun))
4608 	{
4609 	  /* Add phis to the outer loop that connect to the phis in the inner,
4610 	     original loop, and move the loop entry value of the inner phi to
4611 	     the loop entry value of the outer phi.  */
4612 	  gphi_iterator psi;
4613 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4614 	    {
4615 	      location_t locus;
4616 	      gphi *nphi;
4617 	      gphi *exit_phi = psi.phi ();
4618 
4619 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
4620 		continue;
4621 
4622 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4623 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4624 
4625 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4626 	      edge latch_to_l1 = find_edge (latch, l1_bb);
4627 	      gphi *inner_phi
4628 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4629 
4630 	      tree t = gimple_phi_result (exit_phi);
4631 	      tree new_res = copy_ssa_name (t, NULL);
4632 	      nphi = create_phi_node (new_res, l0_bb);
4633 
4634 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4635 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4636 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4637 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4638 	      add_phi_arg (nphi, t, entry_to_l0, locus);
4639 
4640 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4641 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4642 
4643 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4644 	    }
4645 	}
4646 
4647       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4648 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
4649       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4650 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
4651       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4652 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
4653       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4654 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
4655 
4656       /* We enter expand_omp_for_generic with a loop.  This original loop may
4657 	 have its own loop struct, or it may be part of an outer loop struct
4658 	 (which may be the fake loop).  */
4659       class loop *outer_loop = entry_bb->loop_father;
4660       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4661 
4662       add_bb_to_loop (l2_bb, outer_loop);
4663 
4664       /* We've added a new loop around the original loop.  Allocate the
4665 	 corresponding loop struct.  */
4666       class loop *new_loop = alloc_loop ();
4667       new_loop->header = l0_bb;
4668       new_loop->latch = l2_bb;
4669       add_loop (new_loop, outer_loop);
4670 
4671       /* Allocate a loop structure for the original loop unless we already
4672 	 had one.  */
4673       if (!orig_loop_has_loop_struct
4674 	  && !gimple_omp_for_combined_p (fd->for_stmt))
4675 	{
4676 	  class loop *orig_loop = alloc_loop ();
4677 	  orig_loop->header = l1_bb;
4678 	  /* The loop may have multiple latches.  */
4679 	  add_loop (orig_loop, new_loop);
4680 	}
4681     }
4682 }
4683 
4684 /* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
4685    compute needed allocation size.  If !ALLOC of team allocations,
4686    if ALLOC of thread allocation.  SZ is the initial needed size for
4687    other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4688    CNT number of elements of each array, for !ALLOC this is
4689    omp_get_num_threads (), for ALLOC number of iterations handled by the
4690    current thread.  If PTR is non-NULL, it is the start of the allocation
4691    and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4692    clauses pointers to the corresponding arrays.  */
4693 
4694 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)4695 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4696 			   unsigned HOST_WIDE_INT alloc_align, tree cnt,
4697 			   gimple_stmt_iterator *gsi, bool alloc)
4698 {
4699   tree eltsz = NULL_TREE;
4700   unsigned HOST_WIDE_INT preval = 0;
4701   if (ptr && sz)
4702     ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4703 		       ptr, size_int (sz));
4704   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4705     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4706 	&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4707 	&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4708       {
4709 	tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4710 	unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4711 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4712 	  {
4713 	    unsigned HOST_WIDE_INT szl
4714 	      = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4715 	    szl = least_bit_hwi (szl);
4716 	    if (szl)
4717 	      al = MIN (al, szl);
4718 	  }
4719 	if (ptr == NULL_TREE)
4720 	  {
4721 	    if (eltsz == NULL_TREE)
4722 	      eltsz = TYPE_SIZE_UNIT (pointee_type);
4723 	    else
4724 	      eltsz = size_binop (PLUS_EXPR, eltsz,
4725 				  TYPE_SIZE_UNIT (pointee_type));
4726 	  }
4727 	if (preval == 0 && al <= alloc_align)
4728 	  {
4729 	    unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4730 	    sz += diff;
4731 	    if (diff && ptr)
4732 	      ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4733 				 ptr, size_int (diff));
4734 	  }
4735 	else if (al > preval)
4736 	  {
4737 	    if (ptr)
4738 	      {
4739 		ptr = fold_convert (pointer_sized_int_node, ptr);
4740 		ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4741 				   build_int_cst (pointer_sized_int_node,
4742 						  al - 1));
4743 		ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4744 				   build_int_cst (pointer_sized_int_node,
4745 						  -(HOST_WIDE_INT) al));
4746 		ptr = fold_convert (ptr_type_node, ptr);
4747 	      }
4748 	    else
4749 	      sz += al - 1;
4750 	  }
4751 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4752 	  preval = al;
4753 	else
4754 	  preval = 1;
4755 	if (ptr)
4756 	  {
4757 	    expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4758 	    ptr = OMP_CLAUSE_DECL (c);
4759 	    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4760 			       size_binop (MULT_EXPR, cnt,
4761 					   TYPE_SIZE_UNIT (pointee_type)));
4762 	  }
4763       }
4764 
4765   if (ptr == NULL_TREE)
4766     {
4767       eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4768       if (sz)
4769 	eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4770       return eltsz;
4771     }
4772   else
4773     return ptr;
4774 }
4775 
4776 /* Return the last _looptemp_ clause if one has been created for
4777    lastprivate on distribute parallel for{, simd} or taskloop.
4778    FD is the loop data and INNERC should be the second _looptemp_
4779    clause (the one holding the end of the range).
4780    This is followed by collapse - 1 _looptemp_ clauses for the
4781    counts[1] and up, and for triangular loops followed by 4
4782    further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4783    one factor and one adjn1).  After this there is optionally one
4784    _looptemp_ clause that this function returns.  */
4785 
4786 static tree
find_lastprivate_looptemp(struct omp_for_data * fd,tree innerc)4787 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4788 {
4789   gcc_assert (innerc);
4790   int count = fd->collapse - 1;
4791   if (fd->non_rect
4792       && fd->last_nonrect == fd->first_nonrect + 1
4793       && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4794     count += 4;
4795   for (int i = 0; i < count; i++)
4796     {
4797       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4798 				OMP_CLAUSE__LOOPTEMP_);
4799       gcc_assert (innerc);
4800     }
4801   return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4802 			  OMP_CLAUSE__LOOPTEMP_);
4803 }
4804 
4805 /* A subroutine of expand_omp_for.  Generate code for a parallel
4806    loop with static schedule and no specified chunk size.  Given
4807    parameters:
4808 
4809 	for (V = N1; V cond N2; V += STEP) BODY;
4810 
4811    where COND is "<" or ">", we generate pseudocode
4812 
4813 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4814 	if (cond is <)
4815 	  adj = STEP - 1;
4816 	else
4817 	  adj = STEP + 1;
4818 	if ((__typeof (V)) -1 > 0 && cond is >)
4819 	  n = -(adj + N2 - N1) / -STEP;
4820 	else
4821 	  n = (adj + N2 - N1) / STEP;
4822 	q = n / nthreads;
4823 	tt = n % nthreads;
4824 	if (threadid < tt) goto L3; else goto L4;
4825     L3:
4826 	tt = 0;
4827 	q = q + 1;
4828     L4:
4829 	s0 = q * threadid + tt;
4830 	e0 = s0 + q;
4831 	V = s0 * STEP + N1;
4832 	if (s0 >= e0) goto L2; else goto L0;
4833     L0:
4834 	e = e0 * STEP + N1;
4835     L1:
4836 	BODY;
4837 	V += STEP;
4838 	if (V cond e) goto L1;
4839     L2:
4840 */
4841 
4842 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4843 expand_omp_for_static_nochunk (struct omp_region *region,
4844 			       struct omp_for_data *fd,
4845 			       gimple *inner_stmt)
4846 {
4847   tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4848   tree type, itype, vmain, vback;
4849   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4850   basic_block body_bb, cont_bb, collapse_bb = NULL;
4851   basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4852   basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4853   gimple_stmt_iterator gsi, gsip;
4854   edge ep;
4855   bool broken_loop = region->cont == NULL;
4856   tree *counts = NULL;
4857   tree n1, n2, step;
4858   tree reductions = NULL_TREE;
4859   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4860 
4861   itype = type = TREE_TYPE (fd->loop.v);
4862   if (POINTER_TYPE_P (type))
4863     itype = signed_type_for (type);
4864 
4865   entry_bb = region->entry;
4866   cont_bb = region->cont;
4867   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4868   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4869   gcc_assert (broken_loop
4870 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4871   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4872   body_bb = single_succ (seq_start_bb);
4873   if (!broken_loop)
4874     {
4875       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4876 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4877       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4878     }
4879   exit_bb = region->exit;
4880 
4881   /* Iteration space partitioning goes in ENTRY_BB.  */
4882   gsi = gsi_last_nondebug_bb (entry_bb);
4883   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4884   gsip = gsi;
4885   gsi_prev (&gsip);
4886 
4887   if (fd->collapse > 1)
4888     {
4889       int first_zero_iter = -1, dummy = -1;
4890       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4891 
4892       counts = XALLOCAVEC (tree, fd->collapse);
4893       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4894 				  fin_bb, first_zero_iter,
4895 				  dummy_bb, dummy, l2_dom_bb);
4896       t = NULL_TREE;
4897     }
4898   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4899     t = integer_one_node;
4900   else
4901     t = fold_binary (fd->loop.cond_code, boolean_type_node,
4902 		     fold_convert (type, fd->loop.n1),
4903 		     fold_convert (type, fd->loop.n2));
4904   if (fd->collapse == 1
4905       && TYPE_UNSIGNED (type)
4906       && (t == NULL_TREE || !integer_onep (t)))
4907     {
4908       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4909       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4910 				     true, GSI_SAME_STMT);
4911       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4912       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4913 				     true, GSI_SAME_STMT);
4914       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4915 						n1, n2);
4916       ep = split_block (entry_bb, cond_stmt);
4917       ep->flags = EDGE_TRUE_VALUE;
4918       entry_bb = ep->dest;
4919       ep->probability = profile_probability::very_likely ();
4920       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4921       ep->probability = profile_probability::very_unlikely ();
4922       if (gimple_in_ssa_p (cfun))
4923 	{
4924 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4925 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4926 	       !gsi_end_p (gpi); gsi_next (&gpi))
4927 	    {
4928 	      gphi *phi = gpi.phi ();
4929 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4930 			   ep, UNKNOWN_LOCATION);
4931 	    }
4932 	}
4933       gsi = gsi_last_bb (entry_bb);
4934     }
4935 
4936   if (fd->lastprivate_conditional)
4937     {
4938       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4939       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4940       if (fd->have_pointer_condtemp)
4941 	condtemp = OMP_CLAUSE_DECL (c);
4942       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4943       cond_var = OMP_CLAUSE_DECL (c);
4944     }
4945   if (fd->have_reductemp
4946       /* For scan, we don't want to reinitialize condtemp before the
4947 	 second loop.  */
4948       || (fd->have_pointer_condtemp && !fd->have_scantemp)
4949       || fd->have_nonctrl_scantemp)
4950     {
4951       tree t1 = build_int_cst (long_integer_type_node, 0);
4952       tree t2 = build_int_cst (long_integer_type_node, 1);
4953       tree t3 = build_int_cstu (long_integer_type_node,
4954 				(HOST_WIDE_INT_1U << 31) + 1);
4955       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4956       gimple_stmt_iterator gsi2 = gsi_none ();
4957       gimple *g = NULL;
4958       tree mem = null_pointer_node, memv = NULL_TREE;
4959       unsigned HOST_WIDE_INT condtemp_sz = 0;
4960       unsigned HOST_WIDE_INT alloc_align = 0;
4961       if (fd->have_reductemp)
4962 	{
4963 	  gcc_assert (!fd->have_nonctrl_scantemp);
4964 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4965 	  reductions = OMP_CLAUSE_DECL (c);
4966 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4967 	  g = SSA_NAME_DEF_STMT (reductions);
4968 	  reductions = gimple_assign_rhs1 (g);
4969 	  OMP_CLAUSE_DECL (c) = reductions;
4970 	  gsi2 = gsi_for_stmt (g);
4971 	}
4972       else
4973 	{
4974 	  if (gsi_end_p (gsip))
4975 	    gsi2 = gsi_after_labels (region->entry);
4976 	  else
4977 	    gsi2 = gsip;
4978 	  reductions = null_pointer_node;
4979 	}
4980       if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4981 	{
4982 	  tree type;
4983 	  if (fd->have_pointer_condtemp)
4984 	    type = TREE_TYPE (condtemp);
4985 	  else
4986 	    type = ptr_type_node;
4987 	  memv = create_tmp_var (type);
4988 	  TREE_ADDRESSABLE (memv) = 1;
4989 	  unsigned HOST_WIDE_INT sz = 0;
4990 	  tree size = NULL_TREE;
4991 	  if (fd->have_pointer_condtemp)
4992 	    {
4993 	      sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4994 	      sz *= fd->lastprivate_conditional;
4995 	      condtemp_sz = sz;
4996 	    }
4997 	  if (fd->have_nonctrl_scantemp)
4998 	    {
4999 	      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5000 	      gimple *g = gimple_build_call (nthreads, 0);
5001 	      nthreads = create_tmp_var (integer_type_node);
5002 	      gimple_call_set_lhs (g, nthreads);
5003 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5004 	      nthreads = fold_convert (sizetype, nthreads);
5005 	      alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5006 	      size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5007 						alloc_align, nthreads, NULL,
5008 						false);
5009 	      size = fold_convert (type, size);
5010 	    }
5011 	  else
5012 	    size = build_int_cst (type, sz);
5013 	  expand_omp_build_assign (&gsi2, memv, size, false);
5014 	  mem = build_fold_addr_expr (memv);
5015 	}
5016       tree t
5017 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5018 			   9, t1, t2, t2, t3, t1, null_pointer_node,
5019 			   null_pointer_node, reductions, mem);
5020       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5021 				true, GSI_SAME_STMT);
5022       if (fd->have_pointer_condtemp)
5023 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5024       if (fd->have_nonctrl_scantemp)
5025 	{
5026 	  tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5027 	  expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5028 				     alloc_align, nthreads, &gsi2, false);
5029 	}
5030       if (fd->have_reductemp)
5031 	{
5032 	  gsi_remove (&gsi2, true);
5033 	  release_ssa_name (gimple_assign_lhs (g));
5034 	}
5035     }
5036   switch (gimple_omp_for_kind (fd->for_stmt))
5037     {
5038     case GF_OMP_FOR_KIND_FOR:
5039       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5040       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5041       break;
5042     case GF_OMP_FOR_KIND_DISTRIBUTE:
5043       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5044       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5045       break;
5046     default:
5047       gcc_unreachable ();
5048     }
5049   nthreads = build_call_expr (nthreads, 0);
5050   nthreads = fold_convert (itype, nthreads);
5051   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5052 				       true, GSI_SAME_STMT);
5053   threadid = build_call_expr (threadid, 0);
5054   threadid = fold_convert (itype, threadid);
5055   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5056 				       true, GSI_SAME_STMT);
5057 
5058   n1 = fd->loop.n1;
5059   n2 = fd->loop.n2;
5060   step = fd->loop.step;
5061   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5062     {
5063       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5064 				     OMP_CLAUSE__LOOPTEMP_);
5065       gcc_assert (innerc);
5066       n1 = OMP_CLAUSE_DECL (innerc);
5067       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5068 				OMP_CLAUSE__LOOPTEMP_);
5069       gcc_assert (innerc);
5070       n2 = OMP_CLAUSE_DECL (innerc);
5071     }
5072   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5073 				 true, NULL_TREE, true, GSI_SAME_STMT);
5074   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5075 				 true, NULL_TREE, true, GSI_SAME_STMT);
5076   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5077 				   true, NULL_TREE, true, GSI_SAME_STMT);
5078 
5079   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5080   t = fold_build2 (PLUS_EXPR, itype, step, t);
5081   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5082   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5083   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5084     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5085 		     fold_build1 (NEGATE_EXPR, itype, t),
5086 		     fold_build1 (NEGATE_EXPR, itype, step));
5087   else
5088     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5089   t = fold_convert (itype, t);
5090   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5091 
5092   q = create_tmp_reg (itype, "q");
5093   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5094   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5095   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5096 
5097   tt = create_tmp_reg (itype, "tt");
5098   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5099   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5100   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5101 
5102   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5103   gcond *cond_stmt = gimple_build_cond_empty (t);
5104   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5105 
5106   second_bb = split_block (entry_bb, cond_stmt)->dest;
5107   gsi = gsi_last_nondebug_bb (second_bb);
5108   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5109 
5110   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5111 		     GSI_SAME_STMT);
5112   gassign *assign_stmt
5113     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5114   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5115 
5116   third_bb = split_block (second_bb, assign_stmt)->dest;
5117   gsi = gsi_last_nondebug_bb (third_bb);
5118   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5119 
5120   if (fd->have_nonctrl_scantemp)
5121     {
5122       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5123       tree controlp = NULL_TREE, controlb = NULL_TREE;
5124       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5125 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5126 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5127 	  {
5128 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5129 	      controlb = OMP_CLAUSE_DECL (c);
5130 	    else
5131 	      controlp = OMP_CLAUSE_DECL (c);
5132 	    if (controlb && controlp)
5133 	      break;
5134 	  }
5135       gcc_assert (controlp && controlb);
5136       tree cnt = create_tmp_var (sizetype);
5137       gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5138       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5139       unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5140       tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5141 					   alloc_align, cnt, NULL, true);
5142       tree size = create_tmp_var (sizetype);
5143       expand_omp_build_assign (&gsi, size, sz, false);
5144       tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5145 			      size, size_int (16384));
5146       expand_omp_build_assign (&gsi, controlb, cmp);
5147       g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5148 			     NULL_TREE, NULL_TREE);
5149       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5150       fourth_bb = split_block (third_bb, g)->dest;
5151       gsi = gsi_last_nondebug_bb (fourth_bb);
5152       /* FIXME: Once we have allocators, this should use allocator.  */
5153       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5154       gimple_call_set_lhs (g, controlp);
5155       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5156       expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5157 				 &gsi, true);
5158       gsi_prev (&gsi);
5159       g = gsi_stmt (gsi);
5160       fifth_bb = split_block (fourth_bb, g)->dest;
5161       gsi = gsi_last_nondebug_bb (fifth_bb);
5162 
5163       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5164       gimple_call_set_lhs (g, controlp);
5165       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5166       tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5167       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5168 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5169 	    && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5170 	  {
5171 	    tree tmp = create_tmp_var (sizetype);
5172 	    tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5173 	    g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5174 				     TYPE_SIZE_UNIT (pointee_type));
5175 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5176 	    g = gimple_build_call (alloca_decl, 2, tmp,
5177 				   size_int (TYPE_ALIGN (pointee_type)));
5178 	    gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5179 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5180 	  }
5181 
5182       sixth_bb = split_block (fifth_bb, g)->dest;
5183       gsi = gsi_last_nondebug_bb (sixth_bb);
5184     }
5185 
5186   t = build2 (MULT_EXPR, itype, q, threadid);
5187   t = build2 (PLUS_EXPR, itype, t, tt);
5188   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5189 
5190   t = fold_build2 (PLUS_EXPR, itype, s0, q);
5191   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5192 
5193   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5194   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5195 
5196   /* Remove the GIMPLE_OMP_FOR statement.  */
5197   gsi_remove (&gsi, true);
5198 
5199   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5200   gsi = gsi_start_bb (seq_start_bb);
5201 
5202   tree startvar = fd->loop.v;
5203   tree endvar = NULL_TREE;
5204 
5205   if (gimple_omp_for_combined_p (fd->for_stmt))
5206     {
5207       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5208 		     ? gimple_omp_parallel_clauses (inner_stmt)
5209 		     : gimple_omp_for_clauses (inner_stmt);
5210       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5211       gcc_assert (innerc);
5212       startvar = OMP_CLAUSE_DECL (innerc);
5213       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5214 				OMP_CLAUSE__LOOPTEMP_);
5215       gcc_assert (innerc);
5216       endvar = OMP_CLAUSE_DECL (innerc);
5217       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5218 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5219 	{
5220 	  innerc = find_lastprivate_looptemp (fd, innerc);
5221 	  if (innerc)
5222 	    {
5223 	      /* If needed (distribute parallel for with lastprivate),
5224 		 propagate down the total number of iterations.  */
5225 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5226 				     fd->loop.n2);
5227 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5228 					    GSI_CONTINUE_LINKING);
5229 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5230 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5231 	    }
5232 	}
5233     }
5234   t = fold_convert (itype, s0);
5235   t = fold_build2 (MULT_EXPR, itype, t, step);
5236   if (POINTER_TYPE_P (type))
5237     {
5238       t = fold_build_pointer_plus (n1, t);
5239       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5240 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5241 	t = fold_convert (signed_type_for (type), t);
5242     }
5243   else
5244     t = fold_build2 (PLUS_EXPR, type, t, n1);
5245   t = fold_convert (TREE_TYPE (startvar), t);
5246   t = force_gimple_operand_gsi (&gsi, t,
5247 				DECL_P (startvar)
5248 				&& TREE_ADDRESSABLE (startvar),
5249 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5250   assign_stmt = gimple_build_assign (startvar, t);
5251   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5252   if (cond_var)
5253     {
5254       tree itype = TREE_TYPE (cond_var);
5255       /* For lastprivate(conditional:) itervar, we need some iteration
5256 	 counter that starts at unsigned non-zero and increases.
5257 	 Prefer as few IVs as possible, so if we can use startvar
5258 	 itself, use that, or startvar + constant (those would be
5259 	 incremented with step), and as last resort use the s0 + 1
5260 	 incremented by 1.  */
5261       if (POINTER_TYPE_P (type)
5262 	  || TREE_CODE (n1) != INTEGER_CST
5263 	  || fd->loop.cond_code != LT_EXPR)
5264 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5265 			 build_int_cst (itype, 1));
5266       else if (tree_int_cst_sgn (n1) == 1)
5267 	t = fold_convert (itype, t);
5268       else
5269 	{
5270 	  tree c = fold_convert (itype, n1);
5271 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5272 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5273 	}
5274       t = force_gimple_operand_gsi (&gsi, t, false,
5275 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5276       assign_stmt = gimple_build_assign (cond_var, t);
5277       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5278     }
5279 
5280   t = fold_convert (itype, e0);
5281   t = fold_build2 (MULT_EXPR, itype, t, step);
5282   if (POINTER_TYPE_P (type))
5283     {
5284       t = fold_build_pointer_plus (n1, t);
5285       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5286 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5287 	t = fold_convert (signed_type_for (type), t);
5288     }
5289   else
5290     t = fold_build2 (PLUS_EXPR, type, t, n1);
5291   t = fold_convert (TREE_TYPE (startvar), t);
5292   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5293 				false, GSI_CONTINUE_LINKING);
5294   if (endvar)
5295     {
5296       assign_stmt = gimple_build_assign (endvar, e);
5297       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5298       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5299 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5300       else
5301 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5302       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5303     }
5304   /* Handle linear clause adjustments.  */
5305   tree itercnt = NULL_TREE;
5306   tree *nonrect_bounds = NULL;
5307   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5308     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5309 	 c; c = OMP_CLAUSE_CHAIN (c))
5310       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5311 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5312 	{
5313 	  tree d = OMP_CLAUSE_DECL (c);
5314 	  tree t = d, a, dest;
5315 	  if (omp_privatize_by_reference (t))
5316 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5317 	  if (itercnt == NULL_TREE)
5318 	    {
5319 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
5320 		{
5321 		  itercnt = fold_build2 (MINUS_EXPR, itype,
5322 					 fold_convert (itype, n1),
5323 					 fold_convert (itype, fd->loop.n1));
5324 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5325 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5326 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5327 						      NULL_TREE, false,
5328 						      GSI_CONTINUE_LINKING);
5329 		}
5330 	      else
5331 		itercnt = s0;
5332 	    }
5333 	  tree type = TREE_TYPE (t);
5334 	  if (POINTER_TYPE_P (type))
5335 	    type = sizetype;
5336 	  a = fold_build2 (MULT_EXPR, type,
5337 			   fold_convert (type, itercnt),
5338 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5339 	  dest = unshare_expr (t);
5340 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5341 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5342 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5343 					false, GSI_CONTINUE_LINKING);
5344 	  expand_omp_build_assign (&gsi, dest, t, true);
5345 	}
5346   if (fd->collapse > 1)
5347     {
5348       if (fd->non_rect)
5349 	{
5350 	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5351 	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5352 	}
5353       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5354 				startvar);
5355     }
5356 
5357   if (!broken_loop)
5358     {
5359       /* The code controlling the sequential loop replaces the
5360 	 GIMPLE_OMP_CONTINUE.  */
5361       gsi = gsi_last_nondebug_bb (cont_bb);
5362       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5363       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5364       vmain = gimple_omp_continue_control_use (cont_stmt);
5365       vback = gimple_omp_continue_control_def (cont_stmt);
5366 
5367       if (cond_var)
5368 	{
5369 	  tree itype = TREE_TYPE (cond_var);
5370 	  tree t2;
5371 	  if (POINTER_TYPE_P (type)
5372 	      || TREE_CODE (n1) != INTEGER_CST
5373 	      || fd->loop.cond_code != LT_EXPR)
5374 	    t2 = build_int_cst (itype, 1);
5375 	  else
5376 	    t2 = fold_convert (itype, step);
5377 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5378 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
5379 					 NULL_TREE, true, GSI_SAME_STMT);
5380 	  assign_stmt = gimple_build_assign (cond_var, t2);
5381 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5382 	}
5383 
5384       if (!gimple_omp_for_combined_p (fd->for_stmt))
5385 	{
5386 	  if (POINTER_TYPE_P (type))
5387 	    t = fold_build_pointer_plus (vmain, step);
5388 	  else
5389 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5390 	  t = force_gimple_operand_gsi (&gsi, t,
5391 					DECL_P (vback)
5392 					&& TREE_ADDRESSABLE (vback),
5393 					NULL_TREE, true, GSI_SAME_STMT);
5394 	  assign_stmt = gimple_build_assign (vback, t);
5395 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5396 
5397 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5398 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5399 		      ? t : vback, e);
5400 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5401 	}
5402 
5403       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5404       gsi_remove (&gsi, true);
5405 
5406       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5407 	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5408 						   cont_bb, body_bb);
5409     }
5410 
5411   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
5412   gsi = gsi_last_nondebug_bb (exit_bb);
5413   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5414     {
5415       t = gimple_omp_return_lhs (gsi_stmt (gsi));
5416       if (fd->have_reductemp
5417 	  || ((fd->have_pointer_condtemp || fd->have_scantemp)
5418 	      && !fd->have_nonctrl_scantemp))
5419 	{
5420 	  tree fn;
5421 	  if (t)
5422 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5423 	  else
5424 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5425 	  gcall *g = gimple_build_call (fn, 0);
5426 	  if (t)
5427 	    {
5428 	      gimple_call_set_lhs (g, t);
5429 	      if (fd->have_reductemp)
5430 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
5431 							     NOP_EXPR, t),
5432 				  GSI_SAME_STMT);
5433 	    }
5434 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5435 	}
5436       else
5437 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5438     }
5439   else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5440 	   && !fd->have_nonctrl_scantemp)
5441     {
5442       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5443       gcall *g = gimple_build_call (fn, 0);
5444       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5445     }
5446   if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5447     {
5448       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5449       tree controlp = NULL_TREE, controlb = NULL_TREE;
5450       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5451 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5452 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5453 	  {
5454 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5455 	      controlb = OMP_CLAUSE_DECL (c);
5456 	    else
5457 	      controlp = OMP_CLAUSE_DECL (c);
5458 	    if (controlb && controlp)
5459 	      break;
5460 	  }
5461       gcc_assert (controlp && controlb);
5462       gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5463 				     NULL_TREE, NULL_TREE);
5464       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5465       exit1_bb = split_block (exit_bb, g)->dest;
5466       gsi = gsi_after_labels (exit1_bb);
5467       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5468 			     controlp);
5469       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5470       exit2_bb = split_block (exit1_bb, g)->dest;
5471       gsi = gsi_after_labels (exit2_bb);
5472       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5473 			     controlp);
5474       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5475       exit3_bb = split_block (exit2_bb, g)->dest;
5476       gsi = gsi_after_labels (exit3_bb);
5477     }
5478   gsi_remove (&gsi, true);
5479 
5480   /* Connect all the blocks.  */
5481   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5482   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5483   ep = find_edge (entry_bb, second_bb);
5484   ep->flags = EDGE_TRUE_VALUE;
5485   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5486   if (fourth_bb)
5487     {
5488       ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5489       ep->probability
5490 	= profile_probability::guessed_always ().apply_scale (1, 2);
5491       ep = find_edge (third_bb, fourth_bb);
5492       ep->flags = EDGE_TRUE_VALUE;
5493       ep->probability
5494 	= profile_probability::guessed_always ().apply_scale (1, 2);
5495       ep = find_edge (fourth_bb, fifth_bb);
5496       redirect_edge_and_branch (ep, sixth_bb);
5497     }
5498   else
5499     sixth_bb = third_bb;
5500   find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5501   find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5502   if (exit1_bb)
5503     {
5504       ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5505       ep->probability
5506 	= profile_probability::guessed_always ().apply_scale (1, 2);
5507       ep = find_edge (exit_bb, exit1_bb);
5508       ep->flags = EDGE_TRUE_VALUE;
5509       ep->probability
5510 	= profile_probability::guessed_always ().apply_scale (1, 2);
5511       ep = find_edge (exit1_bb, exit2_bb);
5512       redirect_edge_and_branch (ep, exit3_bb);
5513     }
5514 
5515   if (!broken_loop)
5516     {
5517       ep = find_edge (cont_bb, body_bb);
5518       if (ep == NULL)
5519 	{
5520 	  ep = BRANCH_EDGE (cont_bb);
5521 	  gcc_assert (single_succ (ep->dest) == body_bb);
5522 	}
5523       if (gimple_omp_for_combined_p (fd->for_stmt))
5524 	{
5525 	  remove_edge (ep);
5526 	  ep = NULL;
5527 	}
5528       else if (fd->collapse > 1)
5529 	{
5530 	  remove_edge (ep);
5531 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5532 	}
5533       else
5534 	ep->flags = EDGE_TRUE_VALUE;
5535       find_edge (cont_bb, fin_bb)->flags
5536 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5537     }
5538 
5539   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5540   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5541   if (fourth_bb)
5542     {
5543       set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5544       set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5545     }
5546   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5547 
5548   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5549 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5550   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5551 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
5552   if (exit1_bb)
5553     {
5554       set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5555       set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5556     }
5557 
5558   class loop *loop = body_bb->loop_father;
5559   if (loop != entry_bb->loop_father)
5560     {
5561       gcc_assert (broken_loop || loop->header == body_bb);
5562       gcc_assert (broken_loop
5563 		  || loop->latch == region->cont
5564 		  || single_pred (loop->latch) == region->cont);
5565       return;
5566     }
5567 
5568   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5569     {
5570       loop = alloc_loop ();
5571       loop->header = body_bb;
5572       if (collapse_bb == NULL)
5573 	loop->latch = cont_bb;
5574       add_loop (loop, body_bb->loop_father);
5575     }
5576 }
5577 
5578 /* Return phi in E->DEST with ARG on edge E.  */
5579 
5580 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)5581 find_phi_with_arg_on_edge (tree arg, edge e)
5582 {
5583   basic_block bb = e->dest;
5584 
5585   for (gphi_iterator gpi = gsi_start_phis (bb);
5586        !gsi_end_p (gpi);
5587        gsi_next (&gpi))
5588     {
5589       gphi *phi = gpi.phi ();
5590       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5591 	return phi;
5592     }
5593 
5594   return NULL;
5595 }
5596 
5597 /* A subroutine of expand_omp_for.  Generate code for a parallel
5598    loop with static schedule and a specified chunk size.  Given
5599    parameters:
5600 
5601 	for (V = N1; V cond N2; V += STEP) BODY;
5602 
5603    where COND is "<" or ">", we generate pseudocode
5604 
5605 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5606 	if (cond is <)
5607 	  adj = STEP - 1;
5608 	else
5609 	  adj = STEP + 1;
5610 	if ((__typeof (V)) -1 > 0 && cond is >)
5611 	  n = -(adj + N2 - N1) / -STEP;
5612 	else
5613 	  n = (adj + N2 - N1) / STEP;
5614 	trip = 0;
5615 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
5616 					      here so that V is defined
5617 					      if the loop is not entered
5618     L0:
5619 	s0 = (trip * nthreads + threadid) * CHUNK;
5620 	e0 = min (s0 + CHUNK, n);
5621 	if (s0 < n) goto L1; else goto L4;
5622     L1:
5623 	V = s0 * STEP + N1;
5624 	e = e0 * STEP + N1;
5625     L2:
5626 	BODY;
5627 	V += STEP;
5628 	if (V cond e) goto L2; else goto L3;
5629     L3:
5630 	trip += 1;
5631 	goto L0;
5632     L4:
5633 */
5634 
5635 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5636 expand_omp_for_static_chunk (struct omp_region *region,
5637 			     struct omp_for_data *fd, gimple *inner_stmt)
5638 {
5639   tree n, s0, e0, e, t;
5640   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5641   tree type, itype, vmain, vback, vextra;
5642   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5643   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5644   gimple_stmt_iterator gsi, gsip;
5645   edge se;
5646   bool broken_loop = region->cont == NULL;
5647   tree *counts = NULL;
5648   tree n1, n2, step;
5649   tree reductions = NULL_TREE;
5650   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5651 
5652   itype = type = TREE_TYPE (fd->loop.v);
5653   if (POINTER_TYPE_P (type))
5654     itype = signed_type_for (type);
5655 
5656   entry_bb = region->entry;
5657   se = split_block (entry_bb, last_stmt (entry_bb));
5658   entry_bb = se->src;
5659   iter_part_bb = se->dest;
5660   cont_bb = region->cont;
5661   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5662   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5663   gcc_assert (broken_loop
5664 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5665   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5666   body_bb = single_succ (seq_start_bb);
5667   if (!broken_loop)
5668     {
5669       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5670 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5671       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5672       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5673     }
5674   exit_bb = region->exit;
5675 
5676   /* Trip and adjustment setup goes in ENTRY_BB.  */
5677   gsi = gsi_last_nondebug_bb (entry_bb);
5678   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5679   gsip = gsi;
5680   gsi_prev (&gsip);
5681 
5682   if (fd->collapse > 1)
5683     {
5684       int first_zero_iter = -1, dummy = -1;
5685       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5686 
5687       counts = XALLOCAVEC (tree, fd->collapse);
5688       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5689 				  fin_bb, first_zero_iter,
5690 				  dummy_bb, dummy, l2_dom_bb);
5691       t = NULL_TREE;
5692     }
5693   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5694     t = integer_one_node;
5695   else
5696     t = fold_binary (fd->loop.cond_code, boolean_type_node,
5697 		     fold_convert (type, fd->loop.n1),
5698 		     fold_convert (type, fd->loop.n2));
5699   if (fd->collapse == 1
5700       && TYPE_UNSIGNED (type)
5701       && (t == NULL_TREE || !integer_onep (t)))
5702     {
5703       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5704       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5705 				     true, GSI_SAME_STMT);
5706       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5707       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5708 				     true, GSI_SAME_STMT);
5709       gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5710 						n1, n2);
5711       se = split_block (entry_bb, cond_stmt);
5712       se->flags = EDGE_TRUE_VALUE;
5713       entry_bb = se->dest;
5714       se->probability = profile_probability::very_likely ();
5715       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5716       se->probability = profile_probability::very_unlikely ();
5717       if (gimple_in_ssa_p (cfun))
5718 	{
5719 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5720 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5721 	       !gsi_end_p (gpi); gsi_next (&gpi))
5722 	    {
5723 	      gphi *phi = gpi.phi ();
5724 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5725 			   se, UNKNOWN_LOCATION);
5726 	    }
5727 	}
5728       gsi = gsi_last_bb (entry_bb);
5729     }
5730 
5731   if (fd->lastprivate_conditional)
5732     {
5733       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5734       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5735       if (fd->have_pointer_condtemp)
5736 	condtemp = OMP_CLAUSE_DECL (c);
5737       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5738       cond_var = OMP_CLAUSE_DECL (c);
5739     }
5740   if (fd->have_reductemp || fd->have_pointer_condtemp)
5741     {
5742       tree t1 = build_int_cst (long_integer_type_node, 0);
5743       tree t2 = build_int_cst (long_integer_type_node, 1);
5744       tree t3 = build_int_cstu (long_integer_type_node,
5745 				(HOST_WIDE_INT_1U << 31) + 1);
5746       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5747       gimple_stmt_iterator gsi2 = gsi_none ();
5748       gimple *g = NULL;
5749       tree mem = null_pointer_node, memv = NULL_TREE;
5750       if (fd->have_reductemp)
5751 	{
5752 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5753 	  reductions = OMP_CLAUSE_DECL (c);
5754 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5755 	  g = SSA_NAME_DEF_STMT (reductions);
5756 	  reductions = gimple_assign_rhs1 (g);
5757 	  OMP_CLAUSE_DECL (c) = reductions;
5758 	  gsi2 = gsi_for_stmt (g);
5759 	}
5760       else
5761 	{
5762 	  if (gsi_end_p (gsip))
5763 	    gsi2 = gsi_after_labels (region->entry);
5764 	  else
5765 	    gsi2 = gsip;
5766 	  reductions = null_pointer_node;
5767 	}
5768       if (fd->have_pointer_condtemp)
5769 	{
5770 	  tree type = TREE_TYPE (condtemp);
5771 	  memv = create_tmp_var (type);
5772 	  TREE_ADDRESSABLE (memv) = 1;
5773 	  unsigned HOST_WIDE_INT sz
5774 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5775 	  sz *= fd->lastprivate_conditional;
5776 	  expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5777 				   false);
5778 	  mem = build_fold_addr_expr (memv);
5779 	}
5780       tree t
5781 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5782 			   9, t1, t2, t2, t3, t1, null_pointer_node,
5783 			   null_pointer_node, reductions, mem);
5784       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5785 				true, GSI_SAME_STMT);
5786       if (fd->have_pointer_condtemp)
5787 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5788       if (fd->have_reductemp)
5789 	{
5790 	  gsi_remove (&gsi2, true);
5791 	  release_ssa_name (gimple_assign_lhs (g));
5792 	}
5793     }
5794   switch (gimple_omp_for_kind (fd->for_stmt))
5795     {
5796     case GF_OMP_FOR_KIND_FOR:
5797       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5798       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5799       break;
5800     case GF_OMP_FOR_KIND_DISTRIBUTE:
5801       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5802       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5803       break;
5804     default:
5805       gcc_unreachable ();
5806     }
5807   nthreads = build_call_expr (nthreads, 0);
5808   nthreads = fold_convert (itype, nthreads);
5809   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5810 				       true, GSI_SAME_STMT);
5811   threadid = build_call_expr (threadid, 0);
5812   threadid = fold_convert (itype, threadid);
5813   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5814 				       true, GSI_SAME_STMT);
5815 
5816   n1 = fd->loop.n1;
5817   n2 = fd->loop.n2;
5818   step = fd->loop.step;
5819   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5820     {
5821       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5822 				     OMP_CLAUSE__LOOPTEMP_);
5823       gcc_assert (innerc);
5824       n1 = OMP_CLAUSE_DECL (innerc);
5825       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5826 				OMP_CLAUSE__LOOPTEMP_);
5827       gcc_assert (innerc);
5828       n2 = OMP_CLAUSE_DECL (innerc);
5829     }
5830   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5831 				 true, NULL_TREE, true, GSI_SAME_STMT);
5832   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5833 				 true, NULL_TREE, true, GSI_SAME_STMT);
5834   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5835 				   true, NULL_TREE, true, GSI_SAME_STMT);
5836   tree chunk_size = fold_convert (itype, fd->chunk_size);
5837   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5838   chunk_size
5839     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5840 				GSI_SAME_STMT);
5841 
5842   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5843   t = fold_build2 (PLUS_EXPR, itype, step, t);
5844   t = fold_build2 (PLUS_EXPR, itype, t, n2);
5845   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5846   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5847     t = fold_build2 (TRUNC_DIV_EXPR, itype,
5848 		     fold_build1 (NEGATE_EXPR, itype, t),
5849 		     fold_build1 (NEGATE_EXPR, itype, step));
5850   else
5851     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5852   t = fold_convert (itype, t);
5853   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5854 				true, GSI_SAME_STMT);
5855 
5856   trip_var = create_tmp_reg (itype, ".trip");
5857   if (gimple_in_ssa_p (cfun))
5858     {
5859       trip_init = make_ssa_name (trip_var);
5860       trip_main = make_ssa_name (trip_var);
5861       trip_back = make_ssa_name (trip_var);
5862     }
5863   else
5864     {
5865       trip_init = trip_var;
5866       trip_main = trip_var;
5867       trip_back = trip_var;
5868     }
5869 
5870   gassign *assign_stmt
5871     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5872   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5873 
5874   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5875   t = fold_build2 (MULT_EXPR, itype, t, step);
5876   if (POINTER_TYPE_P (type))
5877     t = fold_build_pointer_plus (n1, t);
5878   else
5879     t = fold_build2 (PLUS_EXPR, type, t, n1);
5880   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5881 				     true, GSI_SAME_STMT);
5882 
5883   /* Remove the GIMPLE_OMP_FOR.  */
5884   gsi_remove (&gsi, true);
5885 
5886   gimple_stmt_iterator gsif = gsi;
5887 
5888   /* Iteration space partitioning goes in ITER_PART_BB.  */
5889   gsi = gsi_last_bb (iter_part_bb);
5890 
5891   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5892   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5893   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5894   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5895 				 false, GSI_CONTINUE_LINKING);
5896 
5897   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5898   t = fold_build2 (MIN_EXPR, itype, t, n);
5899   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5900 				 false, GSI_CONTINUE_LINKING);
5901 
5902   t = build2 (LT_EXPR, boolean_type_node, s0, n);
5903   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5904 
5905   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5906   gsi = gsi_start_bb (seq_start_bb);
5907 
5908   tree startvar = fd->loop.v;
5909   tree endvar = NULL_TREE;
5910 
5911   if (gimple_omp_for_combined_p (fd->for_stmt))
5912     {
5913       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5914 		     ? gimple_omp_parallel_clauses (inner_stmt)
5915 		     : gimple_omp_for_clauses (inner_stmt);
5916       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5917       gcc_assert (innerc);
5918       startvar = OMP_CLAUSE_DECL (innerc);
5919       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5920 				OMP_CLAUSE__LOOPTEMP_);
5921       gcc_assert (innerc);
5922       endvar = OMP_CLAUSE_DECL (innerc);
5923       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5924 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5925 	{
5926 	  innerc = find_lastprivate_looptemp (fd, innerc);
5927 	  if (innerc)
5928 	    {
5929 	      /* If needed (distribute parallel for with lastprivate),
5930 		 propagate down the total number of iterations.  */
5931 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5932 				     fd->loop.n2);
5933 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5934 					    GSI_CONTINUE_LINKING);
5935 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5936 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5937 	    }
5938 	}
5939     }
5940 
5941   t = fold_convert (itype, s0);
5942   t = fold_build2 (MULT_EXPR, itype, t, step);
5943   if (POINTER_TYPE_P (type))
5944     {
5945       t = fold_build_pointer_plus (n1, t);
5946       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5947 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5948 	t = fold_convert (signed_type_for (type), t);
5949     }
5950   else
5951     t = fold_build2 (PLUS_EXPR, type, t, n1);
5952   t = fold_convert (TREE_TYPE (startvar), t);
5953   t = force_gimple_operand_gsi (&gsi, t,
5954 				DECL_P (startvar)
5955 				&& TREE_ADDRESSABLE (startvar),
5956 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5957   assign_stmt = gimple_build_assign (startvar, t);
5958   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5959   if (cond_var)
5960     {
5961       tree itype = TREE_TYPE (cond_var);
5962       /* For lastprivate(conditional:) itervar, we need some iteration
5963 	 counter that starts at unsigned non-zero and increases.
5964 	 Prefer as few IVs as possible, so if we can use startvar
5965 	 itself, use that, or startvar + constant (those would be
5966 	 incremented with step), and as last resort use the s0 + 1
5967 	 incremented by 1.  */
5968       if (POINTER_TYPE_P (type)
5969 	  || TREE_CODE (n1) != INTEGER_CST
5970 	  || fd->loop.cond_code != LT_EXPR)
5971 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5972 			 build_int_cst (itype, 1));
5973       else if (tree_int_cst_sgn (n1) == 1)
5974 	t = fold_convert (itype, t);
5975       else
5976 	{
5977 	  tree c = fold_convert (itype, n1);
5978 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5979 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5980 	}
5981       t = force_gimple_operand_gsi (&gsi, t, false,
5982 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5983       assign_stmt = gimple_build_assign (cond_var, t);
5984       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5985     }
5986 
5987   t = fold_convert (itype, e0);
5988   t = fold_build2 (MULT_EXPR, itype, t, step);
5989   if (POINTER_TYPE_P (type))
5990     {
5991       t = fold_build_pointer_plus (n1, t);
5992       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5993 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5994 	t = fold_convert (signed_type_for (type), t);
5995     }
5996   else
5997     t = fold_build2 (PLUS_EXPR, type, t, n1);
5998   t = fold_convert (TREE_TYPE (startvar), t);
5999   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6000 				false, GSI_CONTINUE_LINKING);
6001   if (endvar)
6002     {
6003       assign_stmt = gimple_build_assign (endvar, e);
6004       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6005       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6006 	assign_stmt = gimple_build_assign (fd->loop.v, e);
6007       else
6008 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6009       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6010     }
6011   /* Handle linear clause adjustments.  */
6012   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6013   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6014     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6015 	 c; c = OMP_CLAUSE_CHAIN (c))
6016       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6017 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6018 	{
6019 	  tree d = OMP_CLAUSE_DECL (c);
6020 	  tree t = d, a, dest;
6021 	  if (omp_privatize_by_reference (t))
6022 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6023 	  tree type = TREE_TYPE (t);
6024 	  if (POINTER_TYPE_P (type))
6025 	    type = sizetype;
6026 	  dest = unshare_expr (t);
6027 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
6028 	  expand_omp_build_assign (&gsif, v, t);
6029 	  if (itercnt == NULL_TREE)
6030 	    {
6031 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
6032 		{
6033 		  itercntbias
6034 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6035 				   fold_convert (itype, fd->loop.n1));
6036 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6037 					     itercntbias, step);
6038 		  itercntbias
6039 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
6040 						NULL_TREE, true,
6041 						GSI_SAME_STMT);
6042 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6043 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6044 						      NULL_TREE, false,
6045 						      GSI_CONTINUE_LINKING);
6046 		}
6047 	      else
6048 		itercnt = s0;
6049 	    }
6050 	  a = fold_build2 (MULT_EXPR, type,
6051 			   fold_convert (type, itercnt),
6052 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6053 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6054 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6055 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6056 					false, GSI_CONTINUE_LINKING);
6057 	  expand_omp_build_assign (&gsi, dest, t, true);
6058 	}
6059   if (fd->collapse > 1)
6060     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6061 
6062   if (!broken_loop)
6063     {
6064       /* The code controlling the sequential loop goes in CONT_BB,
6065 	 replacing the GIMPLE_OMP_CONTINUE.  */
6066       gsi = gsi_last_nondebug_bb (cont_bb);
6067       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6068       vmain = gimple_omp_continue_control_use (cont_stmt);
6069       vback = gimple_omp_continue_control_def (cont_stmt);
6070 
6071       if (cond_var)
6072 	{
6073 	  tree itype = TREE_TYPE (cond_var);
6074 	  tree t2;
6075 	  if (POINTER_TYPE_P (type)
6076 	      || TREE_CODE (n1) != INTEGER_CST
6077 	      || fd->loop.cond_code != LT_EXPR)
6078 	    t2 = build_int_cst (itype, 1);
6079 	  else
6080 	    t2 = fold_convert (itype, step);
6081 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6082 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
6083 					 NULL_TREE, true, GSI_SAME_STMT);
6084 	  assign_stmt = gimple_build_assign (cond_var, t2);
6085 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6086 	}
6087 
6088       if (!gimple_omp_for_combined_p (fd->for_stmt))
6089 	{
6090 	  if (POINTER_TYPE_P (type))
6091 	    t = fold_build_pointer_plus (vmain, step);
6092 	  else
6093 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
6094 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6095 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6096 					  true, GSI_SAME_STMT);
6097 	  assign_stmt = gimple_build_assign (vback, t);
6098 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6099 
6100 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6101 	    t = build2 (EQ_EXPR, boolean_type_node,
6102 			build_int_cst (itype, 0),
6103 			build_int_cst (itype, 1));
6104 	  else
6105 	    t = build2 (fd->loop.cond_code, boolean_type_node,
6106 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
6107 			? t : vback, e);
6108 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6109 	}
6110 
6111       /* Remove GIMPLE_OMP_CONTINUE.  */
6112       gsi_remove (&gsi, true);
6113 
6114       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6115 	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6116 
6117       /* Trip update code goes into TRIP_UPDATE_BB.  */
6118       gsi = gsi_start_bb (trip_update_bb);
6119 
6120       t = build_int_cst (itype, 1);
6121       t = build2 (PLUS_EXPR, itype, trip_main, t);
6122       assign_stmt = gimple_build_assign (trip_back, t);
6123       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6124     }
6125 
6126   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
6127   gsi = gsi_last_nondebug_bb (exit_bb);
6128   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6129     {
6130       t = gimple_omp_return_lhs (gsi_stmt (gsi));
6131       if (fd->have_reductemp || fd->have_pointer_condtemp)
6132 	{
6133 	  tree fn;
6134 	  if (t)
6135 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6136 	  else
6137 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6138 	  gcall *g = gimple_build_call (fn, 0);
6139 	  if (t)
6140 	    {
6141 	      gimple_call_set_lhs (g, t);
6142 	      if (fd->have_reductemp)
6143 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
6144 							     NOP_EXPR, t),
6145 				  GSI_SAME_STMT);
6146 	    }
6147 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6148 	}
6149       else
6150 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6151     }
6152   else if (fd->have_pointer_condtemp)
6153     {
6154       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6155       gcall *g = gimple_build_call (fn, 0);
6156       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6157     }
6158   gsi_remove (&gsi, true);
6159 
6160   /* Connect the new blocks.  */
6161   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6162   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6163 
6164   if (!broken_loop)
6165     {
6166       se = find_edge (cont_bb, body_bb);
6167       if (se == NULL)
6168 	{
6169 	  se = BRANCH_EDGE (cont_bb);
6170 	  gcc_assert (single_succ (se->dest) == body_bb);
6171 	}
6172       if (gimple_omp_for_combined_p (fd->for_stmt))
6173 	{
6174 	  remove_edge (se);
6175 	  se = NULL;
6176 	}
6177       else if (fd->collapse > 1)
6178 	{
6179 	  remove_edge (se);
6180 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6181 	}
6182       else
6183 	se->flags = EDGE_TRUE_VALUE;
6184       find_edge (cont_bb, trip_update_bb)->flags
6185 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6186 
6187       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6188 				iter_part_bb);
6189     }
6190 
6191   if (gimple_in_ssa_p (cfun))
6192     {
6193       gphi_iterator psi;
6194       gphi *phi;
6195       edge re, ene;
6196       edge_var_map *vm;
6197       size_t i;
6198 
6199       gcc_assert (fd->collapse == 1 && !broken_loop);
6200 
6201       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6202 	 remove arguments of the phi nodes in fin_bb.  We need to create
6203 	 appropriate phi nodes in iter_part_bb instead.  */
6204       se = find_edge (iter_part_bb, fin_bb);
6205       re = single_succ_edge (trip_update_bb);
6206       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6207       ene = single_succ_edge (entry_bb);
6208 
6209       psi = gsi_start_phis (fin_bb);
6210       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6211 	   gsi_next (&psi), ++i)
6212 	{
6213 	  gphi *nphi;
6214 	  location_t locus;
6215 
6216 	  phi = psi.phi ();
6217 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6218 			       redirect_edge_var_map_def (vm), 0))
6219 	    continue;
6220 
6221 	  t = gimple_phi_result (phi);
6222 	  gcc_assert (t == redirect_edge_var_map_result (vm));
6223 
6224 	  if (!single_pred_p (fin_bb))
6225 	    t = copy_ssa_name (t, phi);
6226 
6227 	  nphi = create_phi_node (t, iter_part_bb);
6228 
6229 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6230 	  locus = gimple_phi_arg_location_from_edge (phi, se);
6231 
6232 	  /* A special case -- fd->loop.v is not yet computed in
6233 	     iter_part_bb, we need to use vextra instead.  */
6234 	  if (t == fd->loop.v)
6235 	    t = vextra;
6236 	  add_phi_arg (nphi, t, ene, locus);
6237 	  locus = redirect_edge_var_map_location (vm);
6238 	  tree back_arg = redirect_edge_var_map_def (vm);
6239 	  add_phi_arg (nphi, back_arg, re, locus);
6240 	  edge ce = find_edge (cont_bb, body_bb);
6241 	  if (ce == NULL)
6242 	    {
6243 	      ce = BRANCH_EDGE (cont_bb);
6244 	      gcc_assert (single_succ (ce->dest) == body_bb);
6245 	      ce = single_succ_edge (ce->dest);
6246 	    }
6247 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6248 	  gcc_assert (inner_loop_phi != NULL);
6249 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6250 		       find_edge (seq_start_bb, body_bb), locus);
6251 
6252 	  if (!single_pred_p (fin_bb))
6253 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6254 	}
6255       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6256       redirect_edge_var_map_clear (re);
6257       if (single_pred_p (fin_bb))
6258 	while (1)
6259 	  {
6260 	    psi = gsi_start_phis (fin_bb);
6261 	    if (gsi_end_p (psi))
6262 	      break;
6263 	    remove_phi_node (&psi, false);
6264 	  }
6265 
6266       /* Make phi node for trip.  */
6267       phi = create_phi_node (trip_main, iter_part_bb);
6268       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6269 		   UNKNOWN_LOCATION);
6270       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6271 		   UNKNOWN_LOCATION);
6272     }
6273 
6274   if (!broken_loop)
6275     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6276   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6277 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6278   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6279 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
6280   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6281 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6282   set_immediate_dominator (CDI_DOMINATORS, body_bb,
6283 			   recompute_dominator (CDI_DOMINATORS, body_bb));
6284 
6285   if (!broken_loop)
6286     {
6287       class loop *loop = body_bb->loop_father;
6288       class loop *trip_loop = alloc_loop ();
6289       trip_loop->header = iter_part_bb;
6290       trip_loop->latch = trip_update_bb;
6291       add_loop (trip_loop, iter_part_bb->loop_father);
6292 
6293       if (loop != entry_bb->loop_father)
6294 	{
6295 	  gcc_assert (loop->header == body_bb);
6296 	  gcc_assert (loop->latch == region->cont
6297 		      || single_pred (loop->latch) == region->cont);
6298 	  trip_loop->inner = loop;
6299 	  return;
6300 	}
6301 
6302       if (!gimple_omp_for_combined_p (fd->for_stmt))
6303 	{
6304 	  loop = alloc_loop ();
6305 	  loop->header = body_bb;
6306 	  if (collapse_bb == NULL)
6307 	    loop->latch = cont_bb;
6308 	  add_loop (loop, trip_loop);
6309 	}
6310     }
6311 }
6312 
6313 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
6314    loop.  Given parameters:
6315 
6316 	for (V = N1; V cond N2; V += STEP) BODY;
6317 
6318    where COND is "<" or ">", we generate pseudocode
6319 
6320 	V = N1;
6321 	goto L1;
6322     L0:
6323 	BODY;
6324 	V += STEP;
6325     L1:
6326 	if (V cond N2) goto L0; else goto L2;
6327     L2:
6328 
6329     For collapsed loops, emit the outer loops as scalar
6330     and only try to vectorize the innermost loop.  */
6331 
6332 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)6333 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6334 {
6335   tree type, t;
6336   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6337   gimple_stmt_iterator gsi;
6338   gimple *stmt;
6339   gcond *cond_stmt;
6340   bool broken_loop = region->cont == NULL;
6341   edge e, ne;
6342   tree *counts = NULL;
6343   int i;
6344   int safelen_int = INT_MAX;
6345   bool dont_vectorize = false;
6346   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6347 				  OMP_CLAUSE_SAFELEN);
6348   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6349 				  OMP_CLAUSE__SIMDUID_);
6350   tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6351 			      OMP_CLAUSE_IF);
6352   tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6353 				  OMP_CLAUSE_SIMDLEN);
6354   tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6355 				   OMP_CLAUSE__CONDTEMP_);
6356   tree n1, n2;
6357   tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6358 
6359   if (safelen)
6360     {
6361       poly_uint64 val;
6362       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6363       if (!poly_int_tree_p (safelen, &val))
6364 	safelen_int = 0;
6365       else
6366 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6367       if (safelen_int == 1)
6368 	safelen_int = 0;
6369     }
6370   if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6371       || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6372     {
6373       safelen_int = 0;
6374       dont_vectorize = true;
6375     }
6376   type = TREE_TYPE (fd->loop.v);
6377   entry_bb = region->entry;
6378   cont_bb = region->cont;
6379   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6380   gcc_assert (broken_loop
6381 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6382   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6383   if (!broken_loop)
6384     {
6385       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6386       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6387       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6388       l2_bb = BRANCH_EDGE (entry_bb)->dest;
6389     }
6390   else
6391     {
6392       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6393       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6394       l2_bb = single_succ (l1_bb);
6395     }
6396   exit_bb = region->exit;
6397   l2_dom_bb = NULL;
6398 
6399   gsi = gsi_last_nondebug_bb (entry_bb);
6400 
6401   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6402   /* Not needed in SSA form right now.  */
6403   gcc_assert (!gimple_in_ssa_p (cfun));
6404   if (fd->collapse > 1
6405       && (gimple_omp_for_combined_into_p (fd->for_stmt)
6406 	  || broken_loop))
6407     {
6408       int first_zero_iter = -1, dummy = -1;
6409       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6410 
6411       counts = XALLOCAVEC (tree, fd->collapse);
6412       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6413 				  zero_iter_bb, first_zero_iter,
6414 				  dummy_bb, dummy, l2_dom_bb);
6415     }
6416   if (l2_dom_bb == NULL)
6417     l2_dom_bb = l1_bb;
6418 
6419   n1 = fd->loop.n1;
6420   n2 = fd->loop.n2;
6421   if (gimple_omp_for_combined_into_p (fd->for_stmt))
6422     {
6423       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6424 				     OMP_CLAUSE__LOOPTEMP_);
6425       gcc_assert (innerc);
6426       n1 = OMP_CLAUSE_DECL (innerc);
6427       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6428 				OMP_CLAUSE__LOOPTEMP_);
6429       gcc_assert (innerc);
6430       n2 = OMP_CLAUSE_DECL (innerc);
6431     }
6432   tree step = fd->loop.step;
6433   tree orig_step = step; /* May be different from step if is_simt.  */
6434 
6435   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6436 				  OMP_CLAUSE__SIMT_);
6437   if (is_simt)
6438     {
6439       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6440       is_simt = safelen_int > 1;
6441     }
6442   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6443   if (is_simt)
6444     {
6445       simt_lane = create_tmp_var (unsigned_type_node);
6446       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6447       gimple_call_set_lhs (g, simt_lane);
6448       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6449       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6450 				 fold_convert (TREE_TYPE (step), simt_lane));
6451       n1 = fold_convert (type, n1);
6452       if (POINTER_TYPE_P (type))
6453 	n1 = fold_build_pointer_plus (n1, offset);
6454       else
6455 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6456 
6457       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
6458       if (fd->collapse > 1)
6459 	simt_maxlane = build_one_cst (unsigned_type_node);
6460       else if (safelen_int < omp_max_simt_vf ())
6461 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6462       tree vf
6463 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6464 					unsigned_type_node, 0);
6465       if (simt_maxlane)
6466 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6467       vf = fold_convert (TREE_TYPE (step), vf);
6468       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6469     }
6470 
6471   tree n2var = NULL_TREE;
6472   tree n2v = NULL_TREE;
6473   tree *nonrect_bounds = NULL;
6474   tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6475   if (fd->collapse > 1)
6476     {
6477       if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6478 	{
6479 	  if (fd->non_rect)
6480 	    {
6481 	      nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6482 	      memset (nonrect_bounds, 0,
6483 		      sizeof (tree) * (fd->last_nonrect + 1));
6484 	    }
6485 	  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6486 	  gcc_assert (entry_bb == gsi_bb (gsi));
6487 	  gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6488 	  gsi_prev (&gsi);
6489 	  entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6490 	  expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6491 				    NULL, n1);
6492 	  gsi = gsi_for_stmt (fd->for_stmt);
6493 	}
6494       if (broken_loop)
6495 	;
6496       else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6497 	{
6498 	  /* Compute in n2var the limit for the first innermost loop,
6499 	     i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6500 	     where cnt is how many iterations would the loop have if
6501 	     all further iterations were assigned to the current task.  */
6502 	  n2var = create_tmp_var (type);
6503 	  i = fd->collapse - 1;
6504 	  tree itype = TREE_TYPE (fd->loops[i].v);
6505 	  if (POINTER_TYPE_P (itype))
6506 	    itype = signed_type_for (itype);
6507 	  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6508 				     ? -1 : 1));
6509 	  t = fold_build2 (PLUS_EXPR, itype,
6510 			   fold_convert (itype, fd->loops[i].step), t);
6511 	  t = fold_build2 (PLUS_EXPR, itype, t,
6512 			   fold_convert (itype, fd->loops[i].n2));
6513 	  if (fd->loops[i].m2)
6514 	    {
6515 	      tree t2 = fold_convert (itype,
6516 				      fd->loops[i - fd->loops[i].outer].v);
6517 	      tree t3 = fold_convert (itype, fd->loops[i].m2);
6518 	      t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6519 	      t = fold_build2 (PLUS_EXPR, itype, t, t2);
6520 	    }
6521 	  t = fold_build2 (MINUS_EXPR, itype, t,
6522 			   fold_convert (itype, fd->loops[i].v));
6523 	  if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6524 	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6525 			     fold_build1 (NEGATE_EXPR, itype, t),
6526 			     fold_build1 (NEGATE_EXPR, itype,
6527 					  fold_convert (itype,
6528 							fd->loops[i].step)));
6529 	  else
6530 	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6531 			     fold_convert (itype, fd->loops[i].step));
6532 	  t = fold_convert (type, t);
6533 	  tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6534 	  min_arg1 = create_tmp_var (type);
6535 	  expand_omp_build_assign (&gsi, min_arg1, t2);
6536 	  min_arg2 = create_tmp_var (type);
6537 	  expand_omp_build_assign (&gsi, min_arg2, t);
6538 	}
6539       else
6540 	{
6541 	  if (TREE_CODE (n2) == INTEGER_CST)
6542 	    {
6543 	      /* Indicate for lastprivate handling that at least one iteration
6544 		 has been performed, without wasting runtime.  */
6545 	      if (integer_nonzerop (n2))
6546 		expand_omp_build_assign (&gsi, fd->loop.v,
6547 					 fold_convert (type, n2));
6548 	      else
6549 		/* Indicate that no iteration has been performed.  */
6550 		expand_omp_build_assign (&gsi, fd->loop.v,
6551 					 build_one_cst (type));
6552 	    }
6553 	  else
6554 	    {
6555 	      expand_omp_build_assign (&gsi, fd->loop.v,
6556 				       build_zero_cst (type));
6557 	      expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6558 	    }
6559 	  for (i = 0; i < fd->collapse; i++)
6560 	    {
6561 	      t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6562 	      if (fd->loops[i].m1)
6563 		{
6564 		  tree t2
6565 		    = fold_convert (TREE_TYPE (t),
6566 				    fd->loops[i - fd->loops[i].outer].v);
6567 		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6568 		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6569 		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6570 		}
6571 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6572 	      /* For normal non-combined collapsed loops just initialize
6573 		 the outermost iterator in the entry_bb.  */
6574 	      if (!broken_loop)
6575 		break;
6576 	    }
6577 	}
6578     }
6579   else
6580     expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6581   tree altv = NULL_TREE, altn2 = NULL_TREE;
6582   if (fd->collapse == 1
6583       && !broken_loop
6584       && TREE_CODE (orig_step) != INTEGER_CST)
6585     {
6586       /* The vectorizer currently punts on loops with non-constant steps
6587 	 for the main IV (can't compute number of iterations and gives up
6588 	 because of that).  As for OpenMP loops it is always possible to
6589 	 compute the number of iterations upfront, use an alternate IV
6590 	 as the loop iterator:
6591 	 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6592 	 for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
6593       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6594       expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6595       tree itype = TREE_TYPE (fd->loop.v);
6596       if (POINTER_TYPE_P (itype))
6597 	itype = signed_type_for (itype);
6598       t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6599       t = fold_build2 (PLUS_EXPR, itype,
6600 		       fold_convert (itype, step), t);
6601       t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6602       t = fold_build2 (MINUS_EXPR, itype, t,
6603 		       fold_convert (itype, fd->loop.v));
6604       if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6605 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
6606 			 fold_build1 (NEGATE_EXPR, itype, t),
6607 			 fold_build1 (NEGATE_EXPR, itype,
6608 				      fold_convert (itype, step)));
6609       else
6610 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6611 			 fold_convert (itype, step));
6612       t = fold_convert (TREE_TYPE (altv), t);
6613       altn2 = create_tmp_var (TREE_TYPE (altv));
6614       expand_omp_build_assign (&gsi, altn2, t);
6615       tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6616       t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6617 				     true, GSI_SAME_STMT);
6618       t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6619       gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6620 					build_zero_cst (TREE_TYPE (altv)));
6621       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6622     }
6623   else if (fd->collapse > 1
6624 	   && !broken_loop
6625 	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6626 	   && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6627     {
6628       altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6629       altn2 = create_tmp_var (TREE_TYPE (altv));
6630     }
6631   if (cond_var)
6632     {
6633       if (POINTER_TYPE_P (type)
6634 	  || TREE_CODE (n1) != INTEGER_CST
6635 	  || fd->loop.cond_code != LT_EXPR
6636 	  || tree_int_cst_sgn (n1) != 1)
6637 	expand_omp_build_assign (&gsi, cond_var,
6638 				 build_one_cst (TREE_TYPE (cond_var)));
6639       else
6640 	expand_omp_build_assign (&gsi, cond_var,
6641 				 fold_convert (TREE_TYPE (cond_var), n1));
6642     }
6643 
6644   /* Remove the GIMPLE_OMP_FOR statement.  */
6645   gsi_remove (&gsi, true);
6646 
6647   if (!broken_loop)
6648     {
6649       /* Code to control the increment goes in the CONT_BB.  */
6650       gsi = gsi_last_nondebug_bb (cont_bb);
6651       stmt = gsi_stmt (gsi);
6652       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6653 
6654       if (fd->collapse == 1
6655 	  || gimple_omp_for_combined_into_p (fd->for_stmt))
6656 	{
6657 	  if (POINTER_TYPE_P (type))
6658 	    t = fold_build_pointer_plus (fd->loop.v, step);
6659 	  else
6660 	    t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6661 	  expand_omp_build_assign (&gsi, fd->loop.v, t);
6662 	}
6663       else if (TREE_CODE (n2) != INTEGER_CST)
6664 	expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6665       if (altv)
6666 	{
6667 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6668 			   build_one_cst (TREE_TYPE (altv)));
6669 	  expand_omp_build_assign (&gsi, altv, t);
6670 	}
6671 
6672       if (fd->collapse > 1)
6673 	{
6674 	  i = fd->collapse - 1;
6675 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6676 	    {
6677 	      t = fold_convert (sizetype, fd->loops[i].step);
6678 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6679 	    }
6680 	  else
6681 	    {
6682 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6683 				fd->loops[i].step);
6684 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6685 			       fd->loops[i].v, t);
6686 	    }
6687 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6688 	}
6689       if (cond_var)
6690 	{
6691 	  if (POINTER_TYPE_P (type)
6692 	      || TREE_CODE (n1) != INTEGER_CST
6693 	      || fd->loop.cond_code != LT_EXPR
6694 	      || tree_int_cst_sgn (n1) != 1)
6695 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6696 			     build_one_cst (TREE_TYPE (cond_var)));
6697 	  else
6698 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6699 			     fold_convert (TREE_TYPE (cond_var), step));
6700 	  expand_omp_build_assign (&gsi, cond_var, t);
6701 	}
6702 
6703       /* Remove GIMPLE_OMP_CONTINUE.  */
6704       gsi_remove (&gsi, true);
6705     }
6706 
6707   /* Emit the condition in L1_BB.  */
6708   gsi = gsi_start_bb (l1_bb);
6709 
6710   if (altv)
6711     t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6712   else if (fd->collapse > 1
6713 	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6714 	   && !broken_loop)
6715     {
6716       i = fd->collapse - 1;
6717       tree itype = TREE_TYPE (fd->loops[i].v);
6718       if (fd->loops[i].m2)
6719 	t = n2v = create_tmp_var (itype);
6720       else
6721 	t = fold_convert (itype, fd->loops[i].n2);
6722       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6723 				    false, GSI_CONTINUE_LINKING);
6724       tree v = fd->loops[i].v;
6725       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6726 	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6727 				      false, GSI_CONTINUE_LINKING);
6728       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6729     }
6730   else
6731     {
6732       if (fd->collapse > 1 && !broken_loop)
6733 	t = n2var;
6734       else
6735 	t = fold_convert (type, n2);
6736       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6737 				    false, GSI_CONTINUE_LINKING);
6738       tree v = fd->loop.v;
6739       if (DECL_P (v) && TREE_ADDRESSABLE (v))
6740 	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6741 				      false, GSI_CONTINUE_LINKING);
6742       t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6743     }
6744   cond_stmt = gimple_build_cond_empty (t);
6745   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6746   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6747 		 NULL, NULL)
6748       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6749 		    NULL, NULL))
6750     {
6751       gsi = gsi_for_stmt (cond_stmt);
6752       gimple_regimplify_operands (cond_stmt, &gsi);
6753     }
6754 
6755   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
6756   if (is_simt)
6757     {
6758       gsi = gsi_start_bb (l2_bb);
6759       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6760       if (POINTER_TYPE_P (type))
6761 	t = fold_build_pointer_plus (fd->loop.v, step);
6762       else
6763 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6764       expand_omp_build_assign (&gsi, fd->loop.v, t);
6765     }
6766 
6767   /* Remove GIMPLE_OMP_RETURN.  */
6768   gsi = gsi_last_nondebug_bb (exit_bb);
6769   gsi_remove (&gsi, true);
6770 
6771   /* Connect the new blocks.  */
6772   remove_edge (FALLTHRU_EDGE (entry_bb));
6773 
6774   if (!broken_loop)
6775     {
6776       remove_edge (BRANCH_EDGE (entry_bb));
6777       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6778 
6779       e = BRANCH_EDGE (l1_bb);
6780       ne = FALLTHRU_EDGE (l1_bb);
6781       e->flags = EDGE_TRUE_VALUE;
6782     }
6783   else
6784     {
6785       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6786 
6787       ne = single_succ_edge (l1_bb);
6788       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6789 
6790     }
6791   ne->flags = EDGE_FALSE_VALUE;
6792   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6793   ne->probability = e->probability.invert ();
6794 
6795   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6796   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6797 
6798   if (simt_maxlane)
6799     {
6800       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6801 				     NULL_TREE, NULL_TREE);
6802       gsi = gsi_last_bb (entry_bb);
6803       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6804       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6805       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6806       FALLTHRU_EDGE (entry_bb)->probability
6807 	 = profile_probability::guessed_always ().apply_scale (7, 8);
6808       BRANCH_EDGE (entry_bb)->probability
6809 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6810       l2_dom_bb = entry_bb;
6811     }
6812   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6813 
6814   if (!broken_loop && fd->collapse > 1)
6815     {
6816       basic_block last_bb = l1_bb;
6817       basic_block init_bb = NULL;
6818       for (i = fd->collapse - 2; i >= 0; i--)
6819 	{
6820 	  tree nextn2v = NULL_TREE;
6821 	  if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6822 	    e = EDGE_SUCC (last_bb, 0);
6823 	  else
6824 	    e = EDGE_SUCC (last_bb, 1);
6825 	  basic_block bb = split_edge (e);
6826 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6827 	    {
6828 	      t = fold_convert (sizetype, fd->loops[i].step);
6829 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6830 	    }
6831 	  else
6832 	    {
6833 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6834 				fd->loops[i].step);
6835 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6836 			       fd->loops[i].v, t);
6837 	    }
6838 	  gsi = gsi_after_labels (bb);
6839 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6840 
6841 	  bb = split_block (bb, last_stmt (bb))->dest;
6842 	  gsi = gsi_start_bb (bb);
6843 	  tree itype = TREE_TYPE (fd->loops[i].v);
6844 	  if (fd->loops[i].m2)
6845 	    t = nextn2v = create_tmp_var (itype);
6846 	  else
6847 	    t = fold_convert (itype, fd->loops[i].n2);
6848 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6849 					false, GSI_CONTINUE_LINKING);
6850 	  tree v = fd->loops[i].v;
6851 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
6852 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6853 					  false, GSI_CONTINUE_LINKING);
6854 	  t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6855 	  cond_stmt = gimple_build_cond_empty (t);
6856 	  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6857 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6858 			 expand_omp_regimplify_p, NULL, NULL)
6859 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6860 			    expand_omp_regimplify_p, NULL, NULL))
6861 	    {
6862 	      gsi = gsi_for_stmt (cond_stmt);
6863 	      gimple_regimplify_operands (cond_stmt, &gsi);
6864 	    }
6865 	  ne = single_succ_edge (bb);
6866 	  ne->flags = EDGE_FALSE_VALUE;
6867 
6868 	  init_bb = create_empty_bb (bb);
6869 	  set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6870 	  add_bb_to_loop (init_bb, bb->loop_father);
6871 	  e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6872 	  e->probability
6873 	    = profile_probability::guessed_always ().apply_scale (7, 8);
6874 	  ne->probability = e->probability.invert ();
6875 
6876 	  gsi = gsi_after_labels (init_bb);
6877 	  t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6878 			    fd->loops[i + 1].n1);
6879 	  if (fd->loops[i + 1].m1)
6880 	    {
6881 	      tree t2 = fold_convert (TREE_TYPE (t),
6882 				      fd->loops[i + 1
6883 						- fd->loops[i + 1].outer].v);
6884 	      tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6885 	      t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6886 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6887 	    }
6888 	  expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6889 	  if (fd->loops[i + 1].m2)
6890 	    {
6891 	      if (i + 2 == fd->collapse && (n2var || altv))
6892 		{
6893 		  gcc_assert (n2v == NULL_TREE);
6894 		  n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6895 		}
6896 	      t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6897 				fd->loops[i + 1].n2);
6898 	      tree t2 = fold_convert (TREE_TYPE (t),
6899 				      fd->loops[i + 1
6900 						- fd->loops[i + 1].outer].v);
6901 	      tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6902 	      t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6903 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6904 	      expand_omp_build_assign (&gsi, n2v, t);
6905 	    }
6906 	  if (i + 2 == fd->collapse && n2var)
6907 	    {
6908 	      /* For composite simd, n2 is the first iteration the current
6909 		 task shouldn't already handle, so we effectively want to use
6910 		 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6911 		 as the vectorized loop.  Except the vectorizer will not
6912 		 vectorize that, so instead compute N2VAR as
6913 		 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6914 		 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6915 		 as the loop to vectorize.  */
6916 	      tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6917 	      if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6918 		{
6919 		  t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6920 					     == LT_EXPR ? -1 : 1));
6921 		  t = fold_build2 (PLUS_EXPR, itype,
6922 				   fold_convert (itype,
6923 						 fd->loops[i + 1].step), t);
6924 		  if (fd->loops[i + 1].m2)
6925 		    t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6926 		  else
6927 		    t = fold_build2 (PLUS_EXPR, itype, t,
6928 				     fold_convert (itype,
6929 						   fd->loops[i + 1].n2));
6930 		  t = fold_build2 (MINUS_EXPR, itype, t,
6931 				   fold_convert (itype, fd->loops[i + 1].v));
6932 		  tree step = fold_convert (itype, fd->loops[i + 1].step);
6933 		  if (TYPE_UNSIGNED (itype)
6934 		      && fd->loops[i + 1].cond_code == GT_EXPR)
6935 		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6936 				     fold_build1 (NEGATE_EXPR, itype, t),
6937 				     fold_build1 (NEGATE_EXPR, itype, step));
6938 		  else
6939 		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6940 		  t = fold_convert (type, t);
6941 		}
6942 	      else
6943 		t = counts[i + 1];
6944 	      expand_omp_build_assign (&gsi, min_arg1, t2);
6945 	      expand_omp_build_assign (&gsi, min_arg2, t);
6946 	      e = split_block (init_bb, last_stmt (init_bb));
6947 	      gsi = gsi_after_labels (e->dest);
6948 	      init_bb = e->dest;
6949 	      remove_edge (FALLTHRU_EDGE (entry_bb));
6950 	      make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6951 	      set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6952 	      set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6953 	      t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6954 	      t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6955 	      expand_omp_build_assign (&gsi, n2var, t);
6956 	    }
6957 	  if (i + 2 == fd->collapse && altv)
6958 	    {
6959 	      /* The vectorizer currently punts on loops with non-constant
6960 		 steps for the main IV (can't compute number of iterations
6961 		 and gives up because of that).  As for OpenMP loops it is
6962 		 always possible to compute the number of iterations upfront,
6963 		 use an alternate IV as the loop iterator.  */
6964 	      expand_omp_build_assign (&gsi, altv,
6965 				       build_zero_cst (TREE_TYPE (altv)));
6966 	      tree itype = TREE_TYPE (fd->loops[i + 1].v);
6967 	      if (POINTER_TYPE_P (itype))
6968 		itype = signed_type_for (itype);
6969 	      t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6970 					 ? -1 : 1));
6971 	      t = fold_build2 (PLUS_EXPR, itype,
6972 			       fold_convert (itype, fd->loops[i + 1].step), t);
6973 	      t = fold_build2 (PLUS_EXPR, itype, t,
6974 			       fold_convert (itype,
6975 					     fd->loops[i + 1].m2
6976 					     ? n2v : fd->loops[i + 1].n2));
6977 	      t = fold_build2 (MINUS_EXPR, itype, t,
6978 			       fold_convert (itype, fd->loops[i + 1].v));
6979 	      tree step = fold_convert (itype, fd->loops[i + 1].step);
6980 	      if (TYPE_UNSIGNED (itype)
6981 		  && fd->loops[i + 1].cond_code == GT_EXPR)
6982 		t = fold_build2 (TRUNC_DIV_EXPR, itype,
6983 				 fold_build1 (NEGATE_EXPR, itype, t),
6984 				 fold_build1 (NEGATE_EXPR, itype, step));
6985 	      else
6986 		t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6987 	      t = fold_convert (TREE_TYPE (altv), t);
6988 	      expand_omp_build_assign (&gsi, altn2, t);
6989 	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6990 				      fd->loops[i + 1].m2
6991 				      ? n2v : fd->loops[i + 1].n2);
6992 	      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6993 					     true, GSI_SAME_STMT);
6994 	      t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6995 				fd->loops[i + 1].v, t2);
6996 	      gassign *g
6997 		= gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6998 				       build_zero_cst (TREE_TYPE (altv)));
6999 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7000 	    }
7001 	  n2v = nextn2v;
7002 
7003 	  make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7004 	  if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7005 	    {
7006 	      e = find_edge (entry_bb, last_bb);
7007 	      redirect_edge_succ (e, bb);
7008 	      set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7009 	      set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7010 	    }
7011 
7012 	  last_bb = bb;
7013 	}
7014     }
7015   if (!broken_loop)
7016     {
7017       class loop *loop = alloc_loop ();
7018       loop->header = l1_bb;
7019       loop->latch = cont_bb;
7020       add_loop (loop, l1_bb->loop_father);
7021       loop->safelen = safelen_int;
7022       if (simduid)
7023 	{
7024 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7025 	  cfun->has_simduid_loops = true;
7026 	}
7027       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7028 	 the loop.  */
7029       if ((flag_tree_loop_vectorize
7030 	   || !OPTION_SET_P (flag_tree_loop_vectorize))
7031 	  && flag_tree_loop_optimize
7032 	  && loop->safelen > 1)
7033 	{
7034 	  loop->force_vectorize = true;
7035 	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7036 	    {
7037 	      unsigned HOST_WIDE_INT v
7038 		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7039 	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7040 		loop->simdlen = v;
7041 	    }
7042 	  cfun->has_force_vectorize_loops = true;
7043 	}
7044       else if (dont_vectorize)
7045 	loop->dont_vectorize = true;
7046     }
7047   else if (simduid)
7048     cfun->has_simduid_loops = true;
7049 }
7050 
7051 /* Taskloop construct is represented after gimplification with
7052    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7053    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
7054    which should just compute all the needed loop temporaries
7055    for GIMPLE_OMP_TASK.  */
7056 
7057 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7058 expand_omp_taskloop_for_outer (struct omp_region *region,
7059 			       struct omp_for_data *fd,
7060 			       gimple *inner_stmt)
7061 {
7062   tree type, bias = NULL_TREE;
7063   basic_block entry_bb, cont_bb, exit_bb;
7064   gimple_stmt_iterator gsi;
7065   gassign *assign_stmt;
7066   tree *counts = NULL;
7067   int i;
7068 
7069   gcc_assert (inner_stmt);
7070   gcc_assert (region->cont);
7071   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7072 	      && gimple_omp_task_taskloop_p (inner_stmt));
7073   type = TREE_TYPE (fd->loop.v);
7074 
7075   /* See if we need to bias by LLONG_MIN.  */
7076   if (fd->iter_type == long_long_unsigned_type_node
7077       && TREE_CODE (type) == INTEGER_TYPE
7078       && !TYPE_UNSIGNED (type))
7079     {
7080       tree n1, n2;
7081 
7082       if (fd->loop.cond_code == LT_EXPR)
7083 	{
7084 	  n1 = fd->loop.n1;
7085 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7086 	}
7087       else
7088 	{
7089 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7090 	  n2 = fd->loop.n1;
7091 	}
7092       if (TREE_CODE (n1) != INTEGER_CST
7093 	  || TREE_CODE (n2) != INTEGER_CST
7094 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7095 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7096     }
7097 
7098   entry_bb = region->entry;
7099   cont_bb = region->cont;
7100   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7101   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7102   exit_bb = region->exit;
7103 
7104   gsi = gsi_last_nondebug_bb (entry_bb);
7105   gimple *for_stmt = gsi_stmt (gsi);
7106   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7107   if (fd->collapse > 1)
7108     {
7109       int first_zero_iter = -1, dummy = -1;
7110       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7111 
7112       counts = XALLOCAVEC (tree, fd->collapse);
7113       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7114 				  zero_iter_bb, first_zero_iter,
7115 				  dummy_bb, dummy, l2_dom_bb);
7116 
7117       if (zero_iter_bb)
7118 	{
7119 	  /* Some counts[i] vars might be uninitialized if
7120 	     some loop has zero iterations.  But the body shouldn't
7121 	     be executed in that case, so just avoid uninit warnings.  */
7122 	  for (i = first_zero_iter; i < fd->collapse; i++)
7123 	    if (SSA_VAR_P (counts[i]))
7124 	      suppress_warning (counts[i], OPT_Wuninitialized);
7125 	  gsi_prev (&gsi);
7126 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
7127 	  entry_bb = e->dest;
7128 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7129 	  gsi = gsi_last_bb (entry_bb);
7130 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7131 				   get_immediate_dominator (CDI_DOMINATORS,
7132 							    zero_iter_bb));
7133 	}
7134     }
7135 
7136   tree t0, t1;
7137   t1 = fd->loop.n2;
7138   t0 = fd->loop.n1;
7139   if (POINTER_TYPE_P (TREE_TYPE (t0))
7140       && TYPE_PRECISION (TREE_TYPE (t0))
7141 	 != TYPE_PRECISION (fd->iter_type))
7142     {
7143       /* Avoid casting pointers to integer of a different size.  */
7144       tree itype = signed_type_for (type);
7145       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7146       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7147     }
7148   else
7149     {
7150       t1 = fold_convert (fd->iter_type, t1);
7151       t0 = fold_convert (fd->iter_type, t0);
7152     }
7153   if (bias)
7154     {
7155       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7156       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7157     }
7158 
7159   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7160 				 OMP_CLAUSE__LOOPTEMP_);
7161   gcc_assert (innerc);
7162   tree startvar = OMP_CLAUSE_DECL (innerc);
7163   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7164   gcc_assert (innerc);
7165   tree endvar = OMP_CLAUSE_DECL (innerc);
7166   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7167     {
7168       innerc = find_lastprivate_looptemp (fd, innerc);
7169       if (innerc)
7170 	{
7171 	  /* If needed (inner taskloop has lastprivate clause), propagate
7172 	     down the total number of iterations.  */
7173 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7174 					     NULL_TREE, false,
7175 					     GSI_CONTINUE_LINKING);
7176 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7177 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7178 	}
7179     }
7180 
7181   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7182 				 GSI_CONTINUE_LINKING);
7183   assign_stmt = gimple_build_assign (startvar, t0);
7184   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7185 
7186   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7187 				 GSI_CONTINUE_LINKING);
7188   assign_stmt = gimple_build_assign (endvar, t1);
7189   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7190   if (fd->collapse > 1)
7191     expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7192 
7193   /* Remove the GIMPLE_OMP_FOR statement.  */
7194   gsi = gsi_for_stmt (for_stmt);
7195   gsi_remove (&gsi, true);
7196 
7197   gsi = gsi_last_nondebug_bb (cont_bb);
7198   gsi_remove (&gsi, true);
7199 
7200   gsi = gsi_last_nondebug_bb (exit_bb);
7201   gsi_remove (&gsi, true);
7202 
7203   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7204   remove_edge (BRANCH_EDGE (entry_bb));
7205   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7206   remove_edge (BRANCH_EDGE (cont_bb));
7207   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7208   set_immediate_dominator (CDI_DOMINATORS, region->entry,
7209 			   recompute_dominator (CDI_DOMINATORS, region->entry));
7210 }
7211 
7212 /* Taskloop construct is represented after gimplification with
7213    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7214    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
7215    GOMP_taskloop{,_ull} function arranges for each task to be given just
7216    a single range of iterations.  */
7217 
7218 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)7219 expand_omp_taskloop_for_inner (struct omp_region *region,
7220 			       struct omp_for_data *fd,
7221 			       gimple *inner_stmt)
7222 {
7223   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7224   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7225   basic_block fin_bb;
7226   gimple_stmt_iterator gsi;
7227   edge ep;
7228   bool broken_loop = region->cont == NULL;
7229   tree *counts = NULL;
7230   tree n1, n2, step;
7231 
7232   itype = type = TREE_TYPE (fd->loop.v);
7233   if (POINTER_TYPE_P (type))
7234     itype = signed_type_for (type);
7235 
7236   /* See if we need to bias by LLONG_MIN.  */
7237   if (fd->iter_type == long_long_unsigned_type_node
7238       && TREE_CODE (type) == INTEGER_TYPE
7239       && !TYPE_UNSIGNED (type))
7240     {
7241       tree n1, n2;
7242 
7243       if (fd->loop.cond_code == LT_EXPR)
7244 	{
7245 	  n1 = fd->loop.n1;
7246 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7247 	}
7248       else
7249 	{
7250 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7251 	  n2 = fd->loop.n1;
7252 	}
7253       if (TREE_CODE (n1) != INTEGER_CST
7254 	  || TREE_CODE (n2) != INTEGER_CST
7255 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7256 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7257     }
7258 
7259   entry_bb = region->entry;
7260   cont_bb = region->cont;
7261   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7262   fin_bb = BRANCH_EDGE (entry_bb)->dest;
7263   gcc_assert (broken_loop
7264 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7265   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7266   if (!broken_loop)
7267     {
7268       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7269       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7270     }
7271   exit_bb = region->exit;
7272 
7273   /* Iteration space partitioning goes in ENTRY_BB.  */
7274   gsi = gsi_last_nondebug_bb (entry_bb);
7275   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7276 
7277   if (fd->collapse > 1)
7278     {
7279       int first_zero_iter = -1, dummy = -1;
7280       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7281 
7282       counts = XALLOCAVEC (tree, fd->collapse);
7283       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7284 				  fin_bb, first_zero_iter,
7285 				  dummy_bb, dummy, l2_dom_bb);
7286       t = NULL_TREE;
7287     }
7288   else
7289     t = integer_one_node;
7290 
7291   step = fd->loop.step;
7292   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7293 				 OMP_CLAUSE__LOOPTEMP_);
7294   gcc_assert (innerc);
7295   n1 = OMP_CLAUSE_DECL (innerc);
7296   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7297   gcc_assert (innerc);
7298   n2 = OMP_CLAUSE_DECL (innerc);
7299   if (bias)
7300     {
7301       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7302       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7303     }
7304   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7305 				 true, NULL_TREE, true, GSI_SAME_STMT);
7306   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7307 				 true, NULL_TREE, true, GSI_SAME_STMT);
7308   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7309 				   true, NULL_TREE, true, GSI_SAME_STMT);
7310 
7311   tree startvar = fd->loop.v;
7312   tree endvar = NULL_TREE;
7313 
7314   if (gimple_omp_for_combined_p (fd->for_stmt))
7315     {
7316       tree clauses = gimple_omp_for_clauses (inner_stmt);
7317       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7318       gcc_assert (innerc);
7319       startvar = OMP_CLAUSE_DECL (innerc);
7320       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7321 				OMP_CLAUSE__LOOPTEMP_);
7322       gcc_assert (innerc);
7323       endvar = OMP_CLAUSE_DECL (innerc);
7324     }
7325   t = fold_convert (TREE_TYPE (startvar), n1);
7326   t = force_gimple_operand_gsi (&gsi, t,
7327 				DECL_P (startvar)
7328 				&& TREE_ADDRESSABLE (startvar),
7329 				NULL_TREE, false, GSI_CONTINUE_LINKING);
7330   gimple *assign_stmt = gimple_build_assign (startvar, t);
7331   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7332 
7333   t = fold_convert (TREE_TYPE (startvar), n2);
7334   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7335 				false, GSI_CONTINUE_LINKING);
7336   if (endvar)
7337     {
7338       assign_stmt = gimple_build_assign (endvar, e);
7339       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7340       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7341 	assign_stmt = gimple_build_assign (fd->loop.v, e);
7342       else
7343 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7344       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7345     }
7346 
7347   tree *nonrect_bounds = NULL;
7348   if (fd->collapse > 1)
7349     {
7350       if (fd->non_rect)
7351 	{
7352 	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7353 	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7354 	}
7355       gcc_assert (gsi_bb (gsi) == entry_bb);
7356       expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7357 				startvar);
7358       entry_bb = gsi_bb (gsi);
7359     }
7360 
7361   if (!broken_loop)
7362     {
7363       /* The code controlling the sequential loop replaces the
7364 	 GIMPLE_OMP_CONTINUE.  */
7365       gsi = gsi_last_nondebug_bb (cont_bb);
7366       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7367       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7368       vmain = gimple_omp_continue_control_use (cont_stmt);
7369       vback = gimple_omp_continue_control_def (cont_stmt);
7370 
7371       if (!gimple_omp_for_combined_p (fd->for_stmt))
7372 	{
7373 	  if (POINTER_TYPE_P (type))
7374 	    t = fold_build_pointer_plus (vmain, step);
7375 	  else
7376 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
7377 	  t = force_gimple_operand_gsi (&gsi, t,
7378 					DECL_P (vback)
7379 					&& TREE_ADDRESSABLE (vback),
7380 					NULL_TREE, true, GSI_SAME_STMT);
7381 	  assign_stmt = gimple_build_assign (vback, t);
7382 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7383 
7384 	  t = build2 (fd->loop.cond_code, boolean_type_node,
7385 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
7386 		      ? t : vback, e);
7387 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7388 	}
7389 
7390       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7391       gsi_remove (&gsi, true);
7392 
7393       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7394 	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7395 						   cont_bb, body_bb);
7396     }
7397 
7398   /* Remove the GIMPLE_OMP_FOR statement.  */
7399   gsi = gsi_for_stmt (fd->for_stmt);
7400   gsi_remove (&gsi, true);
7401 
7402   /* Remove the GIMPLE_OMP_RETURN statement.  */
7403   gsi = gsi_last_nondebug_bb (exit_bb);
7404   gsi_remove (&gsi, true);
7405 
7406   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7407   if (!broken_loop)
7408     remove_edge (BRANCH_EDGE (entry_bb));
7409   else
7410     {
7411       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7412       region->outer->cont = NULL;
7413     }
7414 
7415   /* Connect all the blocks.  */
7416   if (!broken_loop)
7417     {
7418       ep = find_edge (cont_bb, body_bb);
7419       if (gimple_omp_for_combined_p (fd->for_stmt))
7420 	{
7421 	  remove_edge (ep);
7422 	  ep = NULL;
7423 	}
7424       else if (fd->collapse > 1)
7425 	{
7426 	  remove_edge (ep);
7427 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7428 	}
7429       else
7430 	ep->flags = EDGE_TRUE_VALUE;
7431       find_edge (cont_bb, fin_bb)->flags
7432 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7433     }
7434 
7435   set_immediate_dominator (CDI_DOMINATORS, body_bb,
7436 			   recompute_dominator (CDI_DOMINATORS, body_bb));
7437   if (!broken_loop)
7438     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7439 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
7440 
7441   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7442     {
7443       class loop *loop = alloc_loop ();
7444       loop->header = body_bb;
7445       if (collapse_bb == NULL)
7446 	loop->latch = cont_bb;
7447       add_loop (loop, body_bb->loop_father);
7448     }
7449 }
7450 
7451 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
7452    partitioned loop.  The lowering here is abstracted, in that the
7453    loop parameters are passed through internal functions, which are
7454    further lowered by oacc_device_lower, once we get to the target
7455    compiler.  The loop is of the form:
7456 
7457    for (V = B; V LTGT E; V += S) {BODY}
7458 
7459    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
7460    (constant 0 for no chunking) and we will have a GWV partitioning
7461    mask, specifying dimensions over which the loop is to be
7462    partitioned (see note below).  We generate code that looks like
7463    (this ignores tiling):
7464 
7465    <entry_bb> [incoming FALL->body, BRANCH->exit]
7466      typedef signedintify (typeof (V)) T;  // underlying signed integral type
7467      T range = E - B;
7468      T chunk_no = 0;
7469      T DIR = LTGT == '<' ? +1 : -1;
7470      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7471      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7472 
7473    <head_bb> [created by splitting end of entry_bb]
7474      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7475      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7476      if (!(offset LTGT bound)) goto bottom_bb;
7477 
7478    <body_bb> [incoming]
7479      V = B + offset;
7480      {BODY}
7481 
7482    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7483      offset += step;
7484      if (offset LTGT bound) goto body_bb; [*]
7485 
7486    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7487      chunk_no++;
7488      if (chunk < chunk_max) goto head_bb;
7489 
7490    <exit_bb> [incoming]
7491      V = B + ((range -/+ 1) / S +/- 1) * S [*]
7492 
7493    [*] Needed if V live at end of loop.  */
7494 
7495 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)7496 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7497 {
7498   bool is_oacc_kernels_parallelized
7499     = (lookup_attribute ("oacc kernels parallelized",
7500 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7501   {
7502     bool is_oacc_kernels
7503       = (lookup_attribute ("oacc kernels",
7504 			   DECL_ATTRIBUTES (current_function_decl)) != NULL);
7505     if (is_oacc_kernels_parallelized)
7506       gcc_checking_assert (is_oacc_kernels);
7507   }
7508   gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7509   /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7510      for SSA specifics, and some are for 'parloops' OpenACC
7511      'kernels'-parallelized specifics.  */
7512 
7513   tree v = fd->loop.v;
7514   enum tree_code cond_code = fd->loop.cond_code;
7515   enum tree_code plus_code = PLUS_EXPR;
7516 
7517   tree chunk_size = integer_minus_one_node;
7518   tree gwv = integer_zero_node;
7519   tree iter_type = TREE_TYPE (v);
7520   tree diff_type = iter_type;
7521   tree plus_type = iter_type;
7522   struct oacc_collapse *counts = NULL;
7523 
7524   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7525 		       == GF_OMP_FOR_KIND_OACC_LOOP);
7526   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7527   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7528 
7529   if (POINTER_TYPE_P (iter_type))
7530     {
7531       plus_code = POINTER_PLUS_EXPR;
7532       plus_type = sizetype;
7533     }
7534   for (int ix = fd->collapse; ix--;)
7535     {
7536       tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7537       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7538 	diff_type = diff_type2;
7539     }
7540   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7541     diff_type = signed_type_for (diff_type);
7542   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7543     diff_type = integer_type_node;
7544 
7545   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7546   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7547   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
7548   basic_block bottom_bb = NULL;
7549 
7550   /* entry_bb has two successors; the branch edge is to the exit
7551      block, fallthrough edge to body.  */
7552   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7553 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7554 
7555   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
7556      body_bb, or to a block whose only successor is the body_bb.  Its
7557      fallthrough successor is the final block (same as the branch
7558      successor of the entry_bb).  */
7559   if (cont_bb)
7560     {
7561       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7562       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7563 
7564       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7565       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7566     }
7567   else
7568     gcc_assert (!gimple_in_ssa_p (cfun));
7569 
7570   /* The exit block only has entry_bb and cont_bb as predecessors.  */
7571   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7572 
7573   tree chunk_no;
7574   tree chunk_max = NULL_TREE;
7575   tree bound, offset;
7576   tree step = create_tmp_var (diff_type, ".step");
7577   bool up = cond_code == LT_EXPR;
7578   tree dir = build_int_cst (diff_type, up ? +1 : -1);
7579   bool chunking = !gimple_in_ssa_p (cfun);
7580   bool negating;
7581 
7582   /* Tiling vars.  */
7583   tree tile_size = NULL_TREE;
7584   tree element_s = NULL_TREE;
7585   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7586   basic_block elem_body_bb = NULL;
7587   basic_block elem_cont_bb = NULL;
7588 
7589   /* SSA instances.  */
7590   tree offset_incr = NULL_TREE;
7591   tree offset_init = NULL_TREE;
7592 
7593   gimple_stmt_iterator gsi;
7594   gassign *ass;
7595   gcall *call;
7596   gimple *stmt;
7597   tree expr;
7598   location_t loc;
7599   edge split, be, fte;
7600 
7601   /* Split the end of entry_bb to create head_bb.  */
7602   split = split_block (entry_bb, last_stmt (entry_bb));
7603   basic_block head_bb = split->dest;
7604   entry_bb = split->src;
7605 
7606   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
7607   gsi = gsi_last_nondebug_bb (entry_bb);
7608   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7609   loc = gimple_location (for_stmt);
7610 
7611   if (gimple_in_ssa_p (cfun))
7612     {
7613       offset_init = gimple_omp_for_index (for_stmt, 0);
7614       gcc_assert (integer_zerop (fd->loop.n1));
7615       /* The SSA parallelizer does gang parallelism.  */
7616       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7617     }
7618 
7619   if (fd->collapse > 1 || fd->tiling)
7620     {
7621       gcc_assert (!gimple_in_ssa_p (cfun) && up);
7622       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7623       tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7624 					      TREE_TYPE (fd->loop.n2), loc);
7625 
7626       if (SSA_VAR_P (fd->loop.n2))
7627 	{
7628 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7629 					    true, GSI_SAME_STMT);
7630 	  ass = gimple_build_assign (fd->loop.n2, total);
7631 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7632 	}
7633     }
7634 
7635   tree b = fd->loop.n1;
7636   tree e = fd->loop.n2;
7637   tree s = fd->loop.step;
7638 
7639   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7640   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7641 
7642   /* Convert the step, avoiding possible unsigned->signed overflow.  */
7643   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7644   if (negating)
7645     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7646   s = fold_convert (diff_type, s);
7647   if (negating)
7648     s = fold_build1 (NEGATE_EXPR, diff_type, s);
7649   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7650 
7651   if (!chunking)
7652     chunk_size = integer_zero_node;
7653   expr = fold_convert (diff_type, chunk_size);
7654   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7655 					 NULL_TREE, true, GSI_SAME_STMT);
7656 
7657   if (fd->tiling)
7658     {
7659       /* Determine the tile size and element step,
7660 	 modify the outer loop step size.  */
7661       tile_size = create_tmp_var (diff_type, ".tile_size");
7662       expr = build_int_cst (diff_type, 1);
7663       for (int ix = 0; ix < fd->collapse; ix++)
7664 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7665       expr = force_gimple_operand_gsi (&gsi, expr, true,
7666 				       NULL_TREE, true, GSI_SAME_STMT);
7667       ass = gimple_build_assign (tile_size, expr);
7668       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7669 
7670       element_s = create_tmp_var (diff_type, ".element_s");
7671       ass = gimple_build_assign (element_s, s);
7672       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7673 
7674       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7675       s = force_gimple_operand_gsi (&gsi, expr, true,
7676 				    NULL_TREE, true, GSI_SAME_STMT);
7677     }
7678 
7679   /* Determine the range, avoiding possible unsigned->signed overflow.  */
7680   negating = !up && TYPE_UNSIGNED (iter_type);
7681   expr = fold_build2 (MINUS_EXPR, plus_type,
7682 		      fold_convert (plus_type, negating ? b : e),
7683 		      fold_convert (plus_type, negating ? e : b));
7684   expr = fold_convert (diff_type, expr);
7685   if (negating)
7686     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7687   tree range = force_gimple_operand_gsi (&gsi, expr, true,
7688 					 NULL_TREE, true, GSI_SAME_STMT);
7689 
7690   chunk_no = build_int_cst (diff_type, 0);
7691   if (chunking)
7692     {
7693       gcc_assert (!gimple_in_ssa_p (cfun));
7694 
7695       expr = chunk_no;
7696       chunk_max = create_tmp_var (diff_type, ".chunk_max");
7697       chunk_no = create_tmp_var (diff_type, ".chunk_no");
7698 
7699       ass = gimple_build_assign (chunk_no, expr);
7700       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7701 
7702       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7703 					 build_int_cst (integer_type_node,
7704 							IFN_GOACC_LOOP_CHUNKS),
7705 					 dir, range, s, chunk_size, gwv);
7706       gimple_call_set_lhs (call, chunk_max);
7707       gimple_set_location (call, loc);
7708       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7709     }
7710   else
7711     chunk_size = chunk_no;
7712 
7713   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7714 				     build_int_cst (integer_type_node,
7715 						    IFN_GOACC_LOOP_STEP),
7716 				     dir, range, s, chunk_size, gwv);
7717   gimple_call_set_lhs (call, step);
7718   gimple_set_location (call, loc);
7719   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7720 
7721   /* Remove the GIMPLE_OMP_FOR.  */
7722   gsi_remove (&gsi, true);
7723 
7724   /* Fixup edges from head_bb.  */
7725   be = BRANCH_EDGE (head_bb);
7726   fte = FALLTHRU_EDGE (head_bb);
7727   be->flags |= EDGE_FALSE_VALUE;
7728   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7729 
7730   basic_block body_bb = fte->dest;
7731 
7732   if (gimple_in_ssa_p (cfun))
7733     {
7734       gsi = gsi_last_nondebug_bb (cont_bb);
7735       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7736 
7737       offset = gimple_omp_continue_control_use (cont_stmt);
7738       offset_incr = gimple_omp_continue_control_def (cont_stmt);
7739     }
7740   else
7741     {
7742       offset = create_tmp_var (diff_type, ".offset");
7743       offset_init = offset_incr = offset;
7744     }
7745   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7746 
7747   /* Loop offset & bound go into head_bb.  */
7748   gsi = gsi_start_bb (head_bb);
7749 
7750   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7751 				     build_int_cst (integer_type_node,
7752 						    IFN_GOACC_LOOP_OFFSET),
7753 				     dir, range, s,
7754 				     chunk_size, gwv, chunk_no);
7755   gimple_call_set_lhs (call, offset_init);
7756   gimple_set_location (call, loc);
7757   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7758 
7759   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7760 				     build_int_cst (integer_type_node,
7761 						    IFN_GOACC_LOOP_BOUND),
7762 				     dir, range, s,
7763 				     chunk_size, gwv, offset_init);
7764   gimple_call_set_lhs (call, bound);
7765   gimple_set_location (call, loc);
7766   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7767 
7768   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7769   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7770 		    GSI_CONTINUE_LINKING);
7771 
7772   /* V assignment goes into body_bb.  */
7773   if (!gimple_in_ssa_p (cfun))
7774     {
7775       gsi = gsi_start_bb (body_bb);
7776 
7777       expr = build2 (plus_code, iter_type, b,
7778 		     fold_convert (plus_type, offset));
7779       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7780 				       true, GSI_SAME_STMT);
7781       ass = gimple_build_assign (v, expr);
7782       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7783 
7784       if (fd->collapse > 1 || fd->tiling)
7785 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7786 
7787       if (fd->tiling)
7788 	{
7789 	  /* Determine the range of the element loop -- usually simply
7790 	     the tile_size, but could be smaller if the final
7791 	     iteration of the outer loop is a partial tile.  */
7792 	  tree e_range = create_tmp_var (diff_type, ".e_range");
7793 
7794 	  expr = build2 (MIN_EXPR, diff_type,
7795 			 build2 (MINUS_EXPR, diff_type, bound, offset),
7796 			 build2 (MULT_EXPR, diff_type, tile_size,
7797 				 element_s));
7798 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7799 					   true, GSI_SAME_STMT);
7800 	  ass = gimple_build_assign (e_range, expr);
7801 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7802 
7803 	  /* Determine bound, offset & step of inner loop. */
7804 	  e_bound = create_tmp_var (diff_type, ".e_bound");
7805 	  e_offset = create_tmp_var (diff_type, ".e_offset");
7806 	  e_step = create_tmp_var (diff_type, ".e_step");
7807 
7808 	  /* Mark these as element loops.  */
7809 	  tree t, e_gwv = integer_minus_one_node;
7810 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
7811 
7812 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7813 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7814 					     element_s, chunk, e_gwv, chunk);
7815 	  gimple_call_set_lhs (call, e_offset);
7816 	  gimple_set_location (call, loc);
7817 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7818 
7819 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7820 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7821 					     element_s, chunk, e_gwv, e_offset);
7822 	  gimple_call_set_lhs (call, e_bound);
7823 	  gimple_set_location (call, loc);
7824 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7825 
7826 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7827 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7828 					     element_s, chunk, e_gwv);
7829 	  gimple_call_set_lhs (call, e_step);
7830 	  gimple_set_location (call, loc);
7831 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7832 
7833 	  /* Add test and split block.  */
7834 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7835 	  stmt = gimple_build_cond_empty (expr);
7836 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7837 	  split = split_block (body_bb, stmt);
7838 	  elem_body_bb = split->dest;
7839 	  if (cont_bb == body_bb)
7840 	    cont_bb = elem_body_bb;
7841 	  body_bb = split->src;
7842 
7843 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7844 
7845 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
7846 	  if (cont_bb == NULL)
7847 	    {
7848 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7849 	      e->probability = profile_probability::even ();
7850 	      split->probability = profile_probability::even ();
7851 	    }
7852 
7853 	  /* Initialize the user's loop vars.  */
7854 	  gsi = gsi_start_bb (elem_body_bb);
7855 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7856 				     diff_type);
7857 	}
7858     }
7859 
7860   /* Loop increment goes into cont_bb.  If this is not a loop, we
7861      will have spawned threads as if it was, and each one will
7862      execute one iteration.  The specification is not explicit about
7863      whether such constructs are ill-formed or not, and they can
7864      occur, especially when noreturn routines are involved.  */
7865   if (cont_bb)
7866     {
7867       gsi = gsi_last_nondebug_bb (cont_bb);
7868       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7869       loc = gimple_location (cont_stmt);
7870 
7871       if (fd->tiling)
7872 	{
7873 	  /* Insert element loop increment and test.  */
7874 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7875 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7876 					   true, GSI_SAME_STMT);
7877 	  ass = gimple_build_assign (e_offset, expr);
7878 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7879 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7880 
7881 	  stmt = gimple_build_cond_empty (expr);
7882 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7883 	  split = split_block (cont_bb, stmt);
7884 	  elem_cont_bb = split->src;
7885 	  cont_bb = split->dest;
7886 
7887 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7888 	  split->probability = profile_probability::unlikely ().guessed ();
7889 	  edge latch_edge
7890 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7891 	  latch_edge->probability = profile_probability::likely ().guessed ();
7892 
7893 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7894 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
7895 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7896 	  loop_entry_edge->probability
7897 	    = profile_probability::likely ().guessed ();
7898 
7899 	  gsi = gsi_for_stmt (cont_stmt);
7900 	}
7901 
7902       /* Increment offset.  */
7903       if (gimple_in_ssa_p (cfun))
7904 	expr = build2 (plus_code, iter_type, offset,
7905 		       fold_convert (plus_type, step));
7906       else
7907 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
7908       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7909 				       true, GSI_SAME_STMT);
7910       ass = gimple_build_assign (offset_incr, expr);
7911       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7912       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7913       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7914 
7915       /*  Remove the GIMPLE_OMP_CONTINUE.  */
7916       gsi_remove (&gsi, true);
7917 
7918       /* Fixup edges from cont_bb.  */
7919       be = BRANCH_EDGE (cont_bb);
7920       fte = FALLTHRU_EDGE (cont_bb);
7921       be->flags |= EDGE_TRUE_VALUE;
7922       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7923 
7924       if (chunking)
7925 	{
7926 	  /* Split the beginning of exit_bb to make bottom_bb.  We
7927 	     need to insert a nop at the start, because splitting is
7928 	     after a stmt, not before.  */
7929 	  gsi = gsi_start_bb (exit_bb);
7930 	  stmt = gimple_build_nop ();
7931 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7932 	  split = split_block (exit_bb, stmt);
7933 	  bottom_bb = split->src;
7934 	  exit_bb = split->dest;
7935 	  gsi = gsi_last_bb (bottom_bb);
7936 
7937 	  /* Chunk increment and test goes into bottom_bb.  */
7938 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7939 			 build_int_cst (diff_type, 1));
7940 	  ass = gimple_build_assign (chunk_no, expr);
7941 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7942 
7943 	  /* Chunk test at end of bottom_bb.  */
7944 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7945 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7946 			    GSI_CONTINUE_LINKING);
7947 
7948 	  /* Fixup edges from bottom_bb.  */
7949 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7950 	  split->probability = profile_probability::unlikely ().guessed ();
7951 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7952 	  latch_edge->probability = profile_probability::likely ().guessed ();
7953 	}
7954     }
7955 
7956   gsi = gsi_last_nondebug_bb (exit_bb);
7957   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7958   loc = gimple_location (gsi_stmt (gsi));
7959 
7960   if (!gimple_in_ssa_p (cfun))
7961     {
7962       /* Insert the final value of V, in case it is live.  This is the
7963 	 value for the only thread that survives past the join.  */
7964       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7965       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7966       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7967       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7968       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7969       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7970 				       true, GSI_SAME_STMT);
7971       ass = gimple_build_assign (v, expr);
7972       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7973     }
7974 
7975   /* Remove the OMP_RETURN.  */
7976   gsi_remove (&gsi, true);
7977 
7978   if (cont_bb)
7979     {
7980       /* We now have one, two or three nested loops.  Update the loop
7981 	 structures.  */
7982       class loop *parent = entry_bb->loop_father;
7983       class loop *body = body_bb->loop_father;
7984 
7985       if (chunking)
7986 	{
7987 	  class loop *chunk_loop = alloc_loop ();
7988 	  chunk_loop->header = head_bb;
7989 	  chunk_loop->latch = bottom_bb;
7990 	  add_loop (chunk_loop, parent);
7991 	  parent = chunk_loop;
7992 	}
7993       else if (parent != body)
7994 	{
7995 	  gcc_assert (body->header == body_bb);
7996 	  gcc_assert (body->latch == cont_bb
7997 		      || single_pred (body->latch) == cont_bb);
7998 	  parent = NULL;
7999 	}
8000 
8001       if (parent)
8002 	{
8003 	  class loop *body_loop = alloc_loop ();
8004 	  body_loop->header = body_bb;
8005 	  body_loop->latch = cont_bb;
8006 	  add_loop (body_loop, parent);
8007 
8008 	  if (fd->tiling)
8009 	    {
8010 	      /* Insert tiling's element loop.  */
8011 	      class loop *inner_loop = alloc_loop ();
8012 	      inner_loop->header = elem_body_bb;
8013 	      inner_loop->latch = elem_cont_bb;
8014 	      add_loop (inner_loop, body_loop);
8015 	    }
8016 	}
8017     }
8018 }
8019 
8020 /* Expand the OMP loop defined by REGION.  */
8021 
8022 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)8023 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8024 {
8025   struct omp_for_data fd;
8026   struct omp_for_data_loop *loops;
8027 
8028   loops = XALLOCAVEC (struct omp_for_data_loop,
8029 		      gimple_omp_for_collapse (last_stmt (region->entry)));
8030   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8031 			&fd, loops);
8032   region->sched_kind = fd.sched_kind;
8033   region->sched_modifiers = fd.sched_modifiers;
8034   region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8035   if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8036     {
8037       for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8038 	if ((loops[i].m1 || loops[i].m2)
8039 	    && (loops[i].m1 == NULL_TREE
8040 		|| TREE_CODE (loops[i].m1) == INTEGER_CST)
8041 	    && (loops[i].m2 == NULL_TREE
8042 		|| TREE_CODE (loops[i].m2) == INTEGER_CST)
8043 	    && TREE_CODE (loops[i].step) == INTEGER_CST
8044 	    && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8045 	  {
8046 	    tree t;
8047 	    tree itype = TREE_TYPE (loops[i].v);
8048 	    if (loops[i].m1 && loops[i].m2)
8049 	      t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8050 	    else if (loops[i].m1)
8051 	      t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8052 	    else
8053 	      t = loops[i].m2;
8054 	    t = fold_build2 (MULT_EXPR, itype, t,
8055 			     fold_convert (itype,
8056 					   loops[i - loops[i].outer].step));
8057 	    if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8058 	      t = fold_build2 (TRUNC_MOD_EXPR, itype,
8059 			       fold_build1 (NEGATE_EXPR, itype, t),
8060 			       fold_build1 (NEGATE_EXPR, itype,
8061 					    fold_convert (itype,
8062 							  loops[i].step)));
8063 	    else
8064 	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8065 			       fold_convert (itype, loops[i].step));
8066 	    if (integer_nonzerop (t))
8067 	      error_at (gimple_location (fd.for_stmt),
8068 			"invalid OpenMP non-rectangular loop step; "
8069 			"%<(%E - %E) * %E%> is not a multiple of loop %d "
8070 			"step %qE",
8071 			loops[i].m2 ? loops[i].m2 : integer_zero_node,
8072 			loops[i].m1 ? loops[i].m1 : integer_zero_node,
8073 			loops[i - loops[i].outer].step, i + 1,
8074 			loops[i].step);
8075 	  }
8076     }
8077 
8078   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8079   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8080   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8081   if (region->cont)
8082     {
8083       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8084       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8085       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8086     }
8087   else
8088     /* If there isn't a continue then this is a degerate case where
8089        the introduction of abnormal edges during lowering will prevent
8090        original loops from being detected.  Fix that up.  */
8091     loops_state_set (LOOPS_NEED_FIXUP);
8092 
8093   if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8094     expand_omp_simd (region, &fd);
8095   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8096     {
8097       gcc_assert (!inner_stmt && !fd.non_rect);
8098       expand_oacc_for (region, &fd);
8099     }
8100   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8101     {
8102       if (gimple_omp_for_combined_into_p (fd.for_stmt))
8103 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8104       else
8105 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8106     }
8107   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8108 	   && !fd.have_ordered)
8109     {
8110       if (fd.chunk_size == NULL)
8111 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8112       else
8113 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
8114     }
8115   else
8116     {
8117       int fn_index, start_ix, next_ix;
8118       unsigned HOST_WIDE_INT sched = 0;
8119       tree sched_arg = NULL_TREE;
8120 
8121       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8122 		  == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8123       if (fd.chunk_size == NULL
8124 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8125 	fd.chunk_size = integer_zero_node;
8126       switch (fd.sched_kind)
8127 	{
8128 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
8129 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8130 	      && fd.lastprivate_conditional == 0)
8131 	    {
8132 	      gcc_assert (!fd.have_ordered);
8133 	      fn_index = 6;
8134 	      sched = 4;
8135 	    }
8136 	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8137 		   && !fd.have_ordered
8138 		   && fd.lastprivate_conditional == 0)
8139 	    fn_index = 7;
8140 	  else
8141 	    {
8142 	      fn_index = 3;
8143 	      sched = (HOST_WIDE_INT_1U << 31);
8144 	    }
8145 	  break;
8146 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8147 	case OMP_CLAUSE_SCHEDULE_GUIDED:
8148 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8149 	      && !fd.have_ordered
8150 	      && fd.lastprivate_conditional == 0)
8151 	    {
8152 	      fn_index = 3 + fd.sched_kind;
8153 	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8154 	      break;
8155 	    }
8156 	  fn_index = fd.sched_kind;
8157 	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8158 	  sched += (HOST_WIDE_INT_1U << 31);
8159 	  break;
8160 	case OMP_CLAUSE_SCHEDULE_STATIC:
8161 	  gcc_assert (fd.have_ordered);
8162 	  fn_index = 0;
8163 	  sched = (HOST_WIDE_INT_1U << 31) + 1;
8164 	  break;
8165 	default:
8166 	  gcc_unreachable ();
8167 	}
8168       if (!fd.ordered)
8169 	fn_index += fd.have_ordered * 8;
8170       if (fd.ordered)
8171 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8172       else
8173 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8174       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8175       if (fd.have_reductemp || fd.have_pointer_condtemp)
8176 	{
8177 	  if (fd.ordered)
8178 	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8179 	  else if (fd.have_ordered)
8180 	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8181 	  else
8182 	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8183 	  sched_arg = build_int_cstu (long_integer_type_node, sched);
8184 	  if (!fd.chunk_size)
8185 	    fd.chunk_size = integer_zero_node;
8186 	}
8187       if (fd.iter_type == long_long_unsigned_type_node)
8188 	{
8189 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8190 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8191 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8192 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8193 	}
8194       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8195 			      (enum built_in_function) next_ix, sched_arg,
8196 			      inner_stmt);
8197     }
8198 
8199   if (gimple_in_ssa_p (cfun))
8200     update_ssa (TODO_update_ssa_only_virtuals);
8201 }
8202 
8203 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
8204 
8205 	v = GOMP_sections_start (n);
8206     L0:
8207 	switch (v)
8208 	  {
8209 	  case 0:
8210 	    goto L2;
8211 	  case 1:
8212 	    section 1;
8213 	    goto L1;
8214 	  case 2:
8215 	    ...
8216 	  case n:
8217 	    ...
8218 	  default:
8219 	    abort ();
8220 	  }
8221     L1:
8222 	v = GOMP_sections_next ();
8223 	goto L0;
8224     L2:
8225 	reduction;
8226 
8227     If this is a combined parallel sections, replace the call to
8228     GOMP_sections_start with call to GOMP_sections_next.  */
8229 
8230 static void
expand_omp_sections(struct omp_region * region)8231 expand_omp_sections (struct omp_region *region)
8232 {
8233   tree t, u, vin = NULL, vmain, vnext, l2;
8234   unsigned len;
8235   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8236   gimple_stmt_iterator si, switch_si;
8237   gomp_sections *sections_stmt;
8238   gimple *stmt;
8239   gomp_continue *cont;
8240   edge_iterator ei;
8241   edge e;
8242   struct omp_region *inner;
8243   unsigned i, casei;
8244   bool exit_reachable = region->cont != NULL;
8245 
8246   gcc_assert (region->exit != NULL);
8247   entry_bb = region->entry;
8248   l0_bb = single_succ (entry_bb);
8249   l1_bb = region->cont;
8250   l2_bb = region->exit;
8251   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8252     l2 = gimple_block_label (l2_bb);
8253   else
8254     {
8255       /* This can happen if there are reductions.  */
8256       len = EDGE_COUNT (l0_bb->succs);
8257       gcc_assert (len > 0);
8258       e = EDGE_SUCC (l0_bb, len - 1);
8259       si = gsi_last_nondebug_bb (e->dest);
8260       l2 = NULL_TREE;
8261       if (gsi_end_p (si)
8262 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8263 	l2 = gimple_block_label (e->dest);
8264       else
8265 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
8266 	  {
8267 	    si = gsi_last_nondebug_bb (e->dest);
8268 	    if (gsi_end_p (si)
8269 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8270 	      {
8271 		l2 = gimple_block_label (e->dest);
8272 		break;
8273 	      }
8274 	  }
8275     }
8276   if (exit_reachable)
8277     default_bb = create_empty_bb (l1_bb->prev_bb);
8278   else
8279     default_bb = create_empty_bb (l0_bb);
8280 
8281   /* We will build a switch() with enough cases for all the
8282      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8283      and a default case to abort if something goes wrong.  */
8284   len = EDGE_COUNT (l0_bb->succs);
8285 
8286   /* Use vec::quick_push on label_vec throughout, since we know the size
8287      in advance.  */
8288   auto_vec<tree> label_vec (len);
8289 
8290   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8291      GIMPLE_OMP_SECTIONS statement.  */
8292   si = gsi_last_nondebug_bb (entry_bb);
8293   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8294   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8295   vin = gimple_omp_sections_control (sections_stmt);
8296   tree clauses = gimple_omp_sections_clauses (sections_stmt);
8297   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8298   tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8299   tree cond_var = NULL_TREE;
8300   if (reductmp || condtmp)
8301     {
8302       tree reductions = null_pointer_node, mem = null_pointer_node;
8303       tree memv = NULL_TREE, condtemp = NULL_TREE;
8304       gimple_stmt_iterator gsi = gsi_none ();
8305       gimple *g = NULL;
8306       if (reductmp)
8307 	{
8308 	  reductions = OMP_CLAUSE_DECL (reductmp);
8309 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8310 	  g = SSA_NAME_DEF_STMT (reductions);
8311 	  reductions = gimple_assign_rhs1 (g);
8312 	  OMP_CLAUSE_DECL (reductmp) = reductions;
8313 	  gsi = gsi_for_stmt (g);
8314 	}
8315       else
8316 	gsi = si;
8317       if (condtmp)
8318 	{
8319 	  condtemp = OMP_CLAUSE_DECL (condtmp);
8320 	  tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8321 				    OMP_CLAUSE__CONDTEMP_);
8322 	  cond_var = OMP_CLAUSE_DECL (c);
8323 	  tree type = TREE_TYPE (condtemp);
8324 	  memv = create_tmp_var (type);
8325 	  TREE_ADDRESSABLE (memv) = 1;
8326 	  unsigned cnt = 0;
8327 	  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8328 	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8329 		&& OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8330 	      ++cnt;
8331 	  unsigned HOST_WIDE_INT sz
8332 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8333 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8334 				   false);
8335 	  mem = build_fold_addr_expr (memv);
8336 	}
8337       t = build_int_cst (unsigned_type_node, len - 1);
8338       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8339       stmt = gimple_build_call (u, 3, t, reductions, mem);
8340       gimple_call_set_lhs (stmt, vin);
8341       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8342       if (condtmp)
8343 	{
8344 	  expand_omp_build_assign (&gsi, condtemp, memv, false);
8345 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8346 			   vin, build_one_cst (TREE_TYPE (cond_var)));
8347 	  expand_omp_build_assign (&gsi, cond_var, t, false);
8348 	}
8349       if (reductmp)
8350 	{
8351 	  gsi_remove (&gsi, true);
8352 	  release_ssa_name (gimple_assign_lhs (g));
8353 	}
8354     }
8355   else if (!is_combined_parallel (region))
8356     {
8357       /* If we are not inside a combined parallel+sections region,
8358 	 call GOMP_sections_start.  */
8359       t = build_int_cst (unsigned_type_node, len - 1);
8360       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8361       stmt = gimple_build_call (u, 1, t);
8362     }
8363   else
8364     {
8365       /* Otherwise, call GOMP_sections_next.  */
8366       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8367       stmt = gimple_build_call (u, 0);
8368     }
8369   if (!reductmp && !condtmp)
8370     {
8371       gimple_call_set_lhs (stmt, vin);
8372       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8373     }
8374   gsi_remove (&si, true);
8375 
8376   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8377      L0_BB.  */
8378   switch_si = gsi_last_nondebug_bb (l0_bb);
8379   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8380   if (exit_reachable)
8381     {
8382       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8383       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8384       vmain = gimple_omp_continue_control_use (cont);
8385       vnext = gimple_omp_continue_control_def (cont);
8386     }
8387   else
8388     {
8389       vmain = vin;
8390       vnext = NULL_TREE;
8391     }
8392 
8393   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8394   label_vec.quick_push (t);
8395   i = 1;
8396 
8397   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
8398   for (inner = region->inner, casei = 1;
8399        inner;
8400        inner = inner->next, i++, casei++)
8401     {
8402       basic_block s_entry_bb, s_exit_bb;
8403 
8404       /* Skip optional reduction region.  */
8405       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8406 	{
8407 	  --i;
8408 	  --casei;
8409 	  continue;
8410 	}
8411 
8412       s_entry_bb = inner->entry;
8413       s_exit_bb = inner->exit;
8414 
8415       t = gimple_block_label (s_entry_bb);
8416       u = build_int_cst (unsigned_type_node, casei);
8417       u = build_case_label (u, NULL, t);
8418       label_vec.quick_push (u);
8419 
8420       si = gsi_last_nondebug_bb (s_entry_bb);
8421       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8422       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8423       gsi_remove (&si, true);
8424       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8425 
8426       if (s_exit_bb == NULL)
8427 	continue;
8428 
8429       si = gsi_last_nondebug_bb (s_exit_bb);
8430       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8431       gsi_remove (&si, true);
8432 
8433       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8434     }
8435 
8436   /* Error handling code goes in DEFAULT_BB.  */
8437   t = gimple_block_label (default_bb);
8438   u = build_case_label (NULL, NULL, t);
8439   make_edge (l0_bb, default_bb, 0);
8440   add_bb_to_loop (default_bb, current_loops->tree_root);
8441 
8442   stmt = gimple_build_switch (vmain, u, label_vec);
8443   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8444   gsi_remove (&switch_si, true);
8445 
8446   si = gsi_start_bb (default_bb);
8447   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8448   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8449 
8450   if (exit_reachable)
8451     {
8452       tree bfn_decl;
8453 
8454       /* Code to get the next section goes in L1_BB.  */
8455       si = gsi_last_nondebug_bb (l1_bb);
8456       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8457 
8458       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8459       stmt = gimple_build_call (bfn_decl, 0);
8460       gimple_call_set_lhs (stmt, vnext);
8461       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8462       if (cond_var)
8463 	{
8464 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8465 			   vnext, build_one_cst (TREE_TYPE (cond_var)));
8466 	  expand_omp_build_assign (&si, cond_var, t, false);
8467 	}
8468       gsi_remove (&si, true);
8469 
8470       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8471     }
8472 
8473   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
8474   si = gsi_last_nondebug_bb (l2_bb);
8475   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8476     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8477   else if (gimple_omp_return_lhs (gsi_stmt (si)))
8478     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8479   else
8480     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8481   stmt = gimple_build_call (t, 0);
8482   if (gimple_omp_return_lhs (gsi_stmt (si)))
8483     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8484   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8485   gsi_remove (&si, true);
8486 
8487   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8488 }
8489 
8490 /* Expand code for an OpenMP single or scope directive.  We've already expanded
8491    much of the code, here we simply place the GOMP_barrier call.  */
8492 
8493 static void
expand_omp_single(struct omp_region * region)8494 expand_omp_single (struct omp_region *region)
8495 {
8496   basic_block entry_bb, exit_bb;
8497   gimple_stmt_iterator si;
8498 
8499   entry_bb = region->entry;
8500   exit_bb = region->exit;
8501 
8502   si = gsi_last_nondebug_bb (entry_bb);
8503   enum gimple_code code = gimple_code (gsi_stmt (si));
8504   gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8505   gsi_remove (&si, true);
8506   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8507 
8508   if (exit_bb == NULL)
8509     {
8510       gcc_assert (code == GIMPLE_OMP_SCOPE);
8511       return;
8512     }
8513 
8514   si = gsi_last_nondebug_bb (exit_bb);
8515   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8516     {
8517       tree t = gimple_omp_return_lhs (gsi_stmt (si));
8518       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8519     }
8520   gsi_remove (&si, true);
8521   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8522 }
8523 
8524 /* Generic expansion for OpenMP synchronization directives: master,
8525    ordered and critical.  All we need to do here is remove the entry
8526    and exit markers for REGION.  */
8527 
8528 static void
expand_omp_synch(struct omp_region * region)8529 expand_omp_synch (struct omp_region *region)
8530 {
8531   basic_block entry_bb, exit_bb;
8532   gimple_stmt_iterator si;
8533 
8534   entry_bb = region->entry;
8535   exit_bb = region->exit;
8536 
8537   si = gsi_last_nondebug_bb (entry_bb);
8538   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8539 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8540 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8541 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8542 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8543 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8544 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8545   if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8546       && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8547     {
8548       expand_omp_taskreg (region);
8549       return;
8550     }
8551   gsi_remove (&si, true);
8552   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8553 
8554   if (exit_bb)
8555     {
8556       si = gsi_last_nondebug_bb (exit_bb);
8557       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8558       gsi_remove (&si, true);
8559       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8560     }
8561 }
8562 
8563 /* Translate enum omp_memory_order to enum memmodel for the embedded
8564    fail clause in there.  */
8565 
8566 static enum memmodel
omp_memory_order_to_fail_memmodel(enum omp_memory_order mo)8567 omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8568 {
8569   switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8570     {
8571     case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8572       switch (mo & OMP_MEMORY_ORDER_MASK)
8573 	{
8574 	case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8575 	case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8576 	case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8577 	case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8578 	case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8579 	default: break;
8580 	}
8581       gcc_unreachable ();
8582     case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8583     case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8584     case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8585     default: gcc_unreachable ();
8586     }
8587 }
8588 
8589 /* Translate enum omp_memory_order to enum memmodel.  The two enums
8590    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8591    is 0 and omp_memory_order has the fail mode encoded in it too.  */
8592 
8593 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)8594 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8595 {
8596   enum memmodel ret, fail_ret;
8597   switch (mo & OMP_MEMORY_ORDER_MASK)
8598     {
8599     case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8600     case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8601     case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8602     case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8603     case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8604     default: gcc_unreachable ();
8605     }
8606   /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8607      we can just return ret here unconditionally.  Otherwise, work around
8608      it here and make sure fail memmodel is not stronger.  */
8609   if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8610     return ret;
8611   fail_ret = omp_memory_order_to_fail_memmodel (mo);
8612   if (fail_ret > ret)
8613     return fail_ret;
8614   return ret;
8615 }
8616 
8617 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8618    operation as a normal volatile load.  */
8619 
8620 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)8621 expand_omp_atomic_load (basic_block load_bb, tree addr,
8622 			tree loaded_val, int index)
8623 {
8624   enum built_in_function tmpbase;
8625   gimple_stmt_iterator gsi;
8626   basic_block store_bb;
8627   location_t loc;
8628   gimple *stmt;
8629   tree decl, call, type, itype;
8630 
8631   gsi = gsi_last_nondebug_bb (load_bb);
8632   stmt = gsi_stmt (gsi);
8633   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8634   loc = gimple_location (stmt);
8635 
8636   /* ??? If the target does not implement atomic_load_optab[mode], and mode
8637      is smaller than word size, then expand_atomic_load assumes that the load
8638      is atomic.  We could avoid the builtin entirely in this case.  */
8639 
8640   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8641   decl = builtin_decl_explicit (tmpbase);
8642   if (decl == NULL_TREE)
8643     return false;
8644 
8645   type = TREE_TYPE (loaded_val);
8646   itype = TREE_TYPE (TREE_TYPE (decl));
8647 
8648   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8649   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8650   call = build_call_expr_loc (loc, decl, 2, addr, mo);
8651   if (!useless_type_conversion_p (type, itype))
8652     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8653   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8654 
8655   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8656   gsi_remove (&gsi, true);
8657 
8658   store_bb = single_succ (load_bb);
8659   gsi = gsi_last_nondebug_bb (store_bb);
8660   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8661   gsi_remove (&gsi, true);
8662 
8663   if (gimple_in_ssa_p (cfun))
8664     update_ssa (TODO_update_ssa_no_phi);
8665 
8666   return true;
8667 }
8668 
8669 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8670    operation as a normal volatile store.  */
8671 
8672 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8673 expand_omp_atomic_store (basic_block load_bb, tree addr,
8674 			 tree loaded_val, tree stored_val, int index)
8675 {
8676   enum built_in_function tmpbase;
8677   gimple_stmt_iterator gsi;
8678   basic_block store_bb = single_succ (load_bb);
8679   location_t loc;
8680   gimple *stmt;
8681   tree decl, call, type, itype;
8682   machine_mode imode;
8683   bool exchange;
8684 
8685   gsi = gsi_last_nondebug_bb (load_bb);
8686   stmt = gsi_stmt (gsi);
8687   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8688 
8689   /* If the load value is needed, then this isn't a store but an exchange.  */
8690   exchange = gimple_omp_atomic_need_value_p (stmt);
8691 
8692   gsi = gsi_last_nondebug_bb (store_bb);
8693   stmt = gsi_stmt (gsi);
8694   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8695   loc = gimple_location (stmt);
8696 
8697   /* ??? If the target does not implement atomic_store_optab[mode], and mode
8698      is smaller than word size, then expand_atomic_store assumes that the store
8699      is atomic.  We could avoid the builtin entirely in this case.  */
8700 
8701   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8702   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8703   decl = builtin_decl_explicit (tmpbase);
8704   if (decl == NULL_TREE)
8705     return false;
8706 
8707   type = TREE_TYPE (stored_val);
8708 
8709   /* Dig out the type of the function's second argument.  */
8710   itype = TREE_TYPE (decl);
8711   itype = TYPE_ARG_TYPES (itype);
8712   itype = TREE_CHAIN (itype);
8713   itype = TREE_VALUE (itype);
8714   imode = TYPE_MODE (itype);
8715 
8716   if (exchange && !can_atomic_exchange_p (imode, true))
8717     return false;
8718 
8719   if (!useless_type_conversion_p (itype, type))
8720     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8721   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8722   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8723   call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8724   if (exchange)
8725     {
8726       if (!useless_type_conversion_p (type, itype))
8727 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8728       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8729     }
8730 
8731   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8732   gsi_remove (&gsi, true);
8733 
8734   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
8735   gsi = gsi_last_nondebug_bb (load_bb);
8736   gsi_remove (&gsi, true);
8737 
8738   if (gimple_in_ssa_p (cfun))
8739     update_ssa (TODO_update_ssa_no_phi);
8740 
8741   return true;
8742 }
8743 
8744 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8745    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
8746    size of the data type, and thus usable to find the index of the builtin
8747    decl.  Returns false if the expression is not of the proper form.  */
8748 
8749 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8750 expand_omp_atomic_fetch_op (basic_block load_bb,
8751 			    tree addr, tree loaded_val,
8752 			    tree stored_val, int index)
8753 {
8754   enum built_in_function oldbase, newbase, tmpbase;
8755   tree decl, itype, call;
8756   tree lhs, rhs;
8757   basic_block store_bb = single_succ (load_bb);
8758   gimple_stmt_iterator gsi;
8759   gimple *stmt;
8760   location_t loc;
8761   enum tree_code code;
8762   bool need_old, need_new;
8763   machine_mode imode;
8764 
8765   /* We expect to find the following sequences:
8766 
8767    load_bb:
8768        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8769 
8770    store_bb:
8771        val = tmp OP something; (or: something OP tmp)
8772        GIMPLE_OMP_STORE (val)
8773 
8774   ???FIXME: Allow a more flexible sequence.
8775   Perhaps use data flow to pick the statements.
8776 
8777   */
8778 
8779   gsi = gsi_after_labels (store_bb);
8780   stmt = gsi_stmt (gsi);
8781   if (is_gimple_debug (stmt))
8782     {
8783       gsi_next_nondebug (&gsi);
8784       if (gsi_end_p (gsi))
8785 	return false;
8786       stmt = gsi_stmt (gsi);
8787     }
8788   loc = gimple_location (stmt);
8789   if (!is_gimple_assign (stmt))
8790     return false;
8791   gsi_next_nondebug (&gsi);
8792   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8793     return false;
8794   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8795   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8796   enum omp_memory_order omo
8797     = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8798   enum memmodel mo = omp_memory_order_to_memmodel (omo);
8799   gcc_checking_assert (!need_old || !need_new);
8800 
8801   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8802     return false;
8803 
8804   /* Check for one of the supported fetch-op operations.  */
8805   code = gimple_assign_rhs_code (stmt);
8806   switch (code)
8807     {
8808     case PLUS_EXPR:
8809     case POINTER_PLUS_EXPR:
8810       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8811       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8812       break;
8813     case MINUS_EXPR:
8814       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8815       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8816       break;
8817     case BIT_AND_EXPR:
8818       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8819       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8820       break;
8821     case BIT_IOR_EXPR:
8822       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8823       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8824       break;
8825     case BIT_XOR_EXPR:
8826       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8827       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8828       break;
8829     default:
8830       return false;
8831     }
8832 
8833   /* Make sure the expression is of the proper form.  */
8834   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8835     rhs = gimple_assign_rhs2 (stmt);
8836   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8837 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8838     rhs = gimple_assign_rhs1 (stmt);
8839   else
8840     return false;
8841 
8842   tmpbase = ((enum built_in_function)
8843 	     ((need_new ? newbase : oldbase) + index + 1));
8844   decl = builtin_decl_explicit (tmpbase);
8845   if (decl == NULL_TREE)
8846     return false;
8847   itype = TREE_TYPE (TREE_TYPE (decl));
8848   imode = TYPE_MODE (itype);
8849 
8850   /* We could test all of the various optabs involved, but the fact of the
8851      matter is that (with the exception of i486 vs i586 and xadd) all targets
8852      that support any atomic operaton optab also implements compare-and-swap.
8853      Let optabs.c take care of expanding any compare-and-swap loop.  */
8854   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8855     return false;
8856 
8857   gsi = gsi_last_nondebug_bb (load_bb);
8858   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8859 
8860   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8861      It only requires that the operation happen atomically.  Thus we can
8862      use the RELAXED memory model.  */
8863   call = build_call_expr_loc (loc, decl, 3, addr,
8864 			      fold_convert_loc (loc, itype, rhs),
8865 			      build_int_cst (NULL, mo));
8866 
8867   if (need_old || need_new)
8868     {
8869       lhs = need_old ? loaded_val : stored_val;
8870       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8871       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8872     }
8873   else
8874     call = fold_convert_loc (loc, void_type_node, call);
8875   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8876   gsi_remove (&gsi, true);
8877 
8878   gsi = gsi_last_nondebug_bb (store_bb);
8879   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8880   gsi_remove (&gsi, true);
8881   gsi = gsi_last_nondebug_bb (store_bb);
8882   stmt = gsi_stmt (gsi);
8883   gsi_remove (&gsi, true);
8884 
8885   if (gimple_in_ssa_p (cfun))
8886     {
8887       release_defs (stmt);
8888       update_ssa (TODO_update_ssa_no_phi);
8889     }
8890 
8891   return true;
8892 }
8893 
8894 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8895    compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8896    Returns false if the expression is not of the proper form.  */
8897 
8898 static bool
expand_omp_atomic_cas(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)8899 expand_omp_atomic_cas (basic_block load_bb, tree addr,
8900 		       tree loaded_val, tree stored_val, int index)
8901 {
8902   /* We expect to find the following sequences:
8903 
8904    load_bb:
8905        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8906 
8907    store_bb:
8908        val = tmp == e ? d : tmp;
8909        GIMPLE_OMP_ATOMIC_STORE (val)
8910 
8911      or in store_bb instead:
8912        tmp2 = tmp == e;
8913        val = tmp2 ? d : tmp;
8914        GIMPLE_OMP_ATOMIC_STORE (val)
8915 
8916      or:
8917        tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8918        val = e == tmp3 ? d : tmp;
8919        GIMPLE_OMP_ATOMIC_STORE (val)
8920 
8921      etc.  */
8922 
8923 
8924   basic_block store_bb = single_succ (load_bb);
8925   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8926   gimple *store_stmt = gsi_stmt (gsi);
8927   if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8928     return false;
8929   gsi_prev_nondebug (&gsi);
8930   if (gsi_end_p (gsi))
8931     return false;
8932   gimple *condexpr_stmt = gsi_stmt (gsi);
8933   if (!is_gimple_assign (condexpr_stmt)
8934       || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8935     return false;
8936   if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8937     return false;
8938   gimple *cond_stmt = NULL;
8939   gimple *vce_stmt = NULL;
8940   gsi_prev_nondebug (&gsi);
8941   if (!gsi_end_p (gsi))
8942     {
8943       cond_stmt = gsi_stmt (gsi);
8944       if (!is_gimple_assign (cond_stmt))
8945 	return false;
8946       if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8947 	{
8948 	  gsi_prev_nondebug (&gsi);
8949 	  if (!gsi_end_p (gsi))
8950 	    {
8951 	      vce_stmt = gsi_stmt (gsi);
8952 	      if (!is_gimple_assign (vce_stmt)
8953 		  || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8954 		return false;
8955 	    }
8956 	}
8957       else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8958 	std::swap (vce_stmt, cond_stmt);
8959       else
8960 	return false;
8961       if (vce_stmt)
8962 	{
8963 	  tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8964 	  if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8965 	      || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8966 	    return false;
8967 	  if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8968 	      || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8969 	      || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8970 				      TYPE_SIZE (TREE_TYPE (loaded_val))))
8971 	    return false;
8972 	  gsi_prev_nondebug (&gsi);
8973 	  if (!gsi_end_p (gsi))
8974 	    return false;
8975 	}
8976     }
8977   tree cond = gimple_assign_rhs1 (condexpr_stmt);
8978   tree cond_op1, cond_op2;
8979   if (cond_stmt)
8980     {
8981       if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8982 	return false;
8983       cond_op1 = gimple_assign_rhs1 (cond_stmt);
8984       cond_op2 = gimple_assign_rhs2 (cond_stmt);
8985     }
8986   else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8987     return false;
8988   else
8989     {
8990       cond_op1 = TREE_OPERAND (cond, 0);
8991       cond_op2 = TREE_OPERAND (cond, 1);
8992     }
8993   tree d;
8994   if (TREE_CODE (cond) == NE_EXPR)
8995     {
8996       if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8997 	return false;
8998       d = gimple_assign_rhs3 (condexpr_stmt);
8999     }
9000   else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9001     return false;
9002   else
9003     d = gimple_assign_rhs2 (condexpr_stmt);
9004   tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9005   if (operand_equal_p (e, cond_op1))
9006     e = cond_op2;
9007   else if (operand_equal_p (e, cond_op2))
9008     e = cond_op1;
9009   else
9010     return false;
9011 
9012   location_t loc = gimple_location (store_stmt);
9013   gimple *load_stmt = last_stmt (load_bb);
9014   bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9015   bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9016   bool weak = gimple_omp_atomic_weak_p (load_stmt);
9017   enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9018   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9019   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9020   gcc_checking_assert (!need_old || !need_new);
9021 
9022   enum built_in_function fncode
9023     = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9024 				+ index + 1);
9025   tree cmpxchg = builtin_decl_explicit (fncode);
9026   if (cmpxchg == NULL_TREE)
9027     return false;
9028   tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9029 
9030   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9031       || !can_atomic_load_p (TYPE_MODE (itype)))
9032     return false;
9033 
9034   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9035   if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9036     return false;
9037 
9038   gsi = gsi_for_stmt (store_stmt);
9039   if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9040     {
9041       tree ne = create_tmp_reg (itype);
9042       gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9043       gimple_set_location (g, loc);
9044       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9045       e = ne;
9046     }
9047   if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9048     {
9049       tree nd = create_tmp_reg (itype);
9050       enum tree_code code;
9051       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9052 	{
9053 	  code = VIEW_CONVERT_EXPR;
9054 	  d = build1 (VIEW_CONVERT_EXPR, itype, d);
9055 	}
9056       else
9057 	code = NOP_EXPR;
9058       gimple *g = gimple_build_assign (nd, code, d);
9059       gimple_set_location (g, loc);
9060       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9061       d = nd;
9062     }
9063 
9064   tree ctype = build_complex_type (itype);
9065   int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9066   gimple *g
9067     = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9068 				  build_int_cst (integer_type_node, flag),
9069 				  mo, fmo);
9070   tree cres = create_tmp_reg (ctype);
9071   gimple_call_set_lhs (g, cres);
9072   gimple_set_location (g, loc);
9073   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9074 
9075   if (cond_stmt || need_old || need_new)
9076     {
9077       tree im = create_tmp_reg (itype);
9078       g = gimple_build_assign (im, IMAGPART_EXPR,
9079 			       build1 (IMAGPART_EXPR, itype, cres));
9080       gimple_set_location (g, loc);
9081       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9082 
9083       tree re = NULL_TREE;
9084       if (need_old || need_new)
9085 	{
9086 	  re = create_tmp_reg (itype);
9087 	  g = gimple_build_assign (re, REALPART_EXPR,
9088 				   build1 (REALPART_EXPR, itype, cres));
9089 	  gimple_set_location (g, loc);
9090 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9091 	}
9092 
9093       if (cond_stmt)
9094 	{
9095 	  g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9096 				   NOP_EXPR, im);
9097 	  gimple_set_location (g, loc);
9098 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9099 	}
9100       else if (need_new)
9101 	{
9102 	  g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9103 				   build2 (NE_EXPR, boolean_type_node,
9104 					   im, build_zero_cst (itype)),
9105 				   d, re);
9106 	  gimple_set_location (g, loc);
9107 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9108 	  re = gimple_assign_lhs (g);
9109 	}
9110 
9111       if (need_old || need_new)
9112 	{
9113 	  tree v = need_old ? loaded_val : stored_val;
9114 	  enum tree_code code;
9115 	  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9116 	    {
9117 	      code = VIEW_CONVERT_EXPR;
9118 	      re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9119 	    }
9120 	  else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9121 	    code = NOP_EXPR;
9122 	  else
9123 	    code = TREE_CODE (re);
9124 	  g = gimple_build_assign (v, code, re);
9125 	  gimple_set_location (g, loc);
9126 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9127 	}
9128     }
9129 
9130   gsi_remove (&gsi, true);
9131   gsi = gsi_for_stmt (load_stmt);
9132   gsi_remove (&gsi, true);
9133   gsi = gsi_for_stmt (condexpr_stmt);
9134   gsi_remove (&gsi, true);
9135   if (cond_stmt)
9136     {
9137       gsi = gsi_for_stmt (cond_stmt);
9138       gsi_remove (&gsi, true);
9139     }
9140   if (vce_stmt)
9141     {
9142       gsi = gsi_for_stmt (vce_stmt);
9143       gsi_remove (&gsi, true);
9144     }
9145 
9146   return true;
9147 }
9148 
9149 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9150 
9151       oldval = *addr;
9152       repeat:
9153 	newval = rhs;	 // with oldval replacing *addr in rhs
9154 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9155 	if (oldval != newval)
9156 	  goto repeat;
9157 
9158    INDEX is log2 of the size of the data type, and thus usable to find the
9159    index of the builtin decl.  */
9160 
9161 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)9162 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9163 			    tree addr, tree loaded_val, tree stored_val,
9164 			    int index)
9165 {
9166   tree loadedi, storedi, initial, new_storedi, old_vali;
9167   tree type, itype, cmpxchg, iaddr, atype;
9168   gimple_stmt_iterator si;
9169   basic_block loop_header = single_succ (load_bb);
9170   gimple *phi, *stmt;
9171   edge e;
9172   enum built_in_function fncode;
9173 
9174   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9175 				    + index + 1);
9176   cmpxchg = builtin_decl_explicit (fncode);
9177   if (cmpxchg == NULL_TREE)
9178     return false;
9179   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9180   atype = type;
9181   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9182 
9183   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9184       || !can_atomic_load_p (TYPE_MODE (itype)))
9185     return false;
9186 
9187   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
9188   si = gsi_last_nondebug_bb (load_bb);
9189   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9190   location_t loc = gimple_location (gsi_stmt (si));
9191   enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9192   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9193   tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9194 
9195   /* For floating-point values, we'll need to view-convert them to integers
9196      so that we can perform the atomic compare and swap.  Simplify the
9197      following code by always setting up the "i"ntegral variables.  */
9198   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9199     {
9200       tree iaddr_val;
9201 
9202       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9203 							   true));
9204       atype = itype;
9205       iaddr_val
9206 	= force_gimple_operand_gsi (&si,
9207 				    fold_convert (TREE_TYPE (iaddr), addr),
9208 				    false, NULL_TREE, true, GSI_SAME_STMT);
9209       stmt = gimple_build_assign (iaddr, iaddr_val);
9210       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9211       loadedi = create_tmp_var (itype);
9212       if (gimple_in_ssa_p (cfun))
9213 	loadedi = make_ssa_name (loadedi);
9214     }
9215   else
9216     {
9217       iaddr = addr;
9218       loadedi = loaded_val;
9219     }
9220 
9221   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9222   tree loaddecl = builtin_decl_explicit (fncode);
9223   if (loaddecl)
9224     initial
9225       = fold_convert (atype,
9226 		      build_call_expr (loaddecl, 2, iaddr,
9227 				       build_int_cst (NULL_TREE,
9228 						      MEMMODEL_RELAXED)));
9229   else
9230     {
9231       tree off
9232 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9233 						      true), 0);
9234       initial = build2 (MEM_REF, atype, iaddr, off);
9235     }
9236 
9237   initial
9238     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9239 				GSI_SAME_STMT);
9240 
9241   /* Move the value to the LOADEDI temporary.  */
9242   if (gimple_in_ssa_p (cfun))
9243     {
9244       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9245       phi = create_phi_node (loadedi, loop_header);
9246       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9247 	       initial);
9248     }
9249   else
9250     gsi_insert_before (&si,
9251 		       gimple_build_assign (loadedi, initial),
9252 		       GSI_SAME_STMT);
9253   if (loadedi != loaded_val)
9254     {
9255       gimple_stmt_iterator gsi2;
9256       tree x;
9257 
9258       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9259       gsi2 = gsi_start_bb (loop_header);
9260       if (gimple_in_ssa_p (cfun))
9261 	{
9262 	  gassign *stmt;
9263 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9264 					true, GSI_SAME_STMT);
9265 	  stmt = gimple_build_assign (loaded_val, x);
9266 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9267 	}
9268       else
9269 	{
9270 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9271 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9272 				    true, GSI_SAME_STMT);
9273 	}
9274     }
9275   gsi_remove (&si, true);
9276 
9277   si = gsi_last_nondebug_bb (store_bb);
9278   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9279 
9280   if (iaddr == addr)
9281     storedi = stored_val;
9282   else
9283     storedi
9284       = force_gimple_operand_gsi (&si,
9285 				  build1 (VIEW_CONVERT_EXPR, itype,
9286 					  stored_val), true, NULL_TREE, true,
9287 				  GSI_SAME_STMT);
9288 
9289   /* Build the compare&swap statement.  */
9290   tree ctype = build_complex_type (itype);
9291   int flag = int_size_in_bytes (itype);
9292   new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9293 					      ctype, 6, iaddr, loadedi,
9294 					      storedi,
9295 					      build_int_cst (integer_type_node,
9296 							     flag),
9297 					      mo, fmo);
9298   new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9299   new_storedi = force_gimple_operand_gsi (&si,
9300 					  fold_convert (TREE_TYPE (loadedi),
9301 							new_storedi),
9302 					  true, NULL_TREE,
9303 					  true, GSI_SAME_STMT);
9304 
9305   if (gimple_in_ssa_p (cfun))
9306     old_vali = loadedi;
9307   else
9308     {
9309       old_vali = create_tmp_var (TREE_TYPE (loadedi));
9310       stmt = gimple_build_assign (old_vali, loadedi);
9311       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9312 
9313       stmt = gimple_build_assign (loadedi, new_storedi);
9314       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9315     }
9316 
9317   /* Note that we always perform the comparison as an integer, even for
9318      floating point.  This allows the atomic operation to properly
9319      succeed even with NaNs and -0.0.  */
9320   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9321   stmt = gimple_build_cond_empty (ne);
9322   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9323 
9324   /* Update cfg.  */
9325   e = single_succ_edge (store_bb);
9326   e->flags &= ~EDGE_FALLTHRU;
9327   e->flags |= EDGE_FALSE_VALUE;
9328   /* Expect no looping.  */
9329   e->probability = profile_probability::guessed_always ();
9330 
9331   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9332   e->probability = profile_probability::guessed_never ();
9333 
9334   /* Copy the new value to loadedi (we already did that before the condition
9335      if we are not in SSA).  */
9336   if (gimple_in_ssa_p (cfun))
9337     {
9338       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9339       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9340     }
9341 
9342   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
9343   gsi_remove (&si, true);
9344 
9345   class loop *loop = alloc_loop ();
9346   loop->header = loop_header;
9347   loop->latch = store_bb;
9348   add_loop (loop, loop_header->loop_father);
9349 
9350   if (gimple_in_ssa_p (cfun))
9351     update_ssa (TODO_update_ssa_no_phi);
9352 
9353   return true;
9354 }
9355 
9356 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9357 
9358 				  GOMP_atomic_start ();
9359 				  *addr = rhs;
9360 				  GOMP_atomic_end ();
9361 
9362    The result is not globally atomic, but works so long as all parallel
9363    references are within #pragma omp atomic directives.  According to
9364    responses received from omp@openmp.org, appears to be within spec.
9365    Which makes sense, since that's how several other compilers handle
9366    this situation as well.
9367    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9368    expanding.  STORED_VAL is the operand of the matching
9369    GIMPLE_OMP_ATOMIC_STORE.
9370 
9371    We replace
9372    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9373    loaded_val = *addr;
9374 
9375    and replace
9376    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
9377    *addr = stored_val;
9378 */
9379 
9380 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)9381 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9382 			 tree addr, tree loaded_val, tree stored_val)
9383 {
9384   gimple_stmt_iterator si;
9385   gassign *stmt;
9386   tree t;
9387 
9388   si = gsi_last_nondebug_bb (load_bb);
9389   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9390 
9391   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9392   t = build_call_expr (t, 0);
9393   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9394 
9395   tree mem = build_simple_mem_ref (addr);
9396   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9397   TREE_OPERAND (mem, 1)
9398     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9399 						 true),
9400 		    TREE_OPERAND (mem, 1));
9401   stmt = gimple_build_assign (loaded_val, mem);
9402   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9403   gsi_remove (&si, true);
9404 
9405   si = gsi_last_nondebug_bb (store_bb);
9406   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9407 
9408   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9409   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9410 
9411   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9412   t = build_call_expr (t, 0);
9413   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9414   gsi_remove (&si, true);
9415 
9416   if (gimple_in_ssa_p (cfun))
9417     update_ssa (TODO_update_ssa_no_phi);
9418   return true;
9419 }
9420 
9421 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
9422    using expand_omp_atomic_fetch_op.  If it failed, we try to
9423    call expand_omp_atomic_pipeline, and if it fails too, the
9424    ultimate fallback is wrapping the operation in a mutex
9425    (expand_omp_atomic_mutex).  REGION is the atomic region built
9426    by build_omp_regions_1().  */
9427 
9428 static void
expand_omp_atomic(struct omp_region * region)9429 expand_omp_atomic (struct omp_region *region)
9430 {
9431   basic_block load_bb = region->entry, store_bb = region->exit;
9432   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9433   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9434   tree loaded_val = gimple_omp_atomic_load_lhs (load);
9435   tree addr = gimple_omp_atomic_load_rhs (load);
9436   tree stored_val = gimple_omp_atomic_store_val (store);
9437   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9438   HOST_WIDE_INT index;
9439 
9440   /* Make sure the type is one of the supported sizes.  */
9441   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9442   index = exact_log2 (index);
9443   if (index >= 0 && index <= 4)
9444     {
9445       unsigned int align = TYPE_ALIGN_UNIT (type);
9446 
9447       /* __sync builtins require strict data alignment.  */
9448       if (exact_log2 (align) >= index)
9449 	{
9450 	  /* Atomic load.  */
9451 	  scalar_mode smode;
9452 	  if (loaded_val == stored_val
9453 	      && (is_int_mode (TYPE_MODE (type), &smode)
9454 		  || is_float_mode (TYPE_MODE (type), &smode))
9455 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9456 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9457 	    return;
9458 
9459 	  /* Atomic store.  */
9460 	  if ((is_int_mode (TYPE_MODE (type), &smode)
9461 	       || is_float_mode (TYPE_MODE (type), &smode))
9462 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9463 	      && store_bb == single_succ (load_bb)
9464 	      && first_stmt (store_bb) == store
9465 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
9466 					  stored_val, index))
9467 	    return;
9468 
9469 	  /* When possible, use specialized atomic update functions.  */
9470 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9471 	      && store_bb == single_succ (load_bb)
9472 	      && expand_omp_atomic_fetch_op (load_bb, addr,
9473 					     loaded_val, stored_val, index))
9474 	    return;
9475 
9476 	  /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop.  */
9477 	  if (store_bb == single_succ (load_bb)
9478 	      && !gimple_in_ssa_p (cfun)
9479 	      && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9480 					index))
9481 	    return;
9482 
9483 	  /* If we don't have specialized __sync builtins, try and implement
9484 	     as a compare and swap loop.  */
9485 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9486 					  loaded_val, stored_val, index))
9487 	    return;
9488 	}
9489     }
9490 
9491   /* The ultimate fallback is wrapping the operation in a mutex.  */
9492   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9493 }
9494 
9495 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9496    at REGION_EXIT.  */
9497 
9498 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)9499 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9500 				   basic_block region_exit)
9501 {
9502   class loop *outer = region_entry->loop_father;
9503   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9504 
9505   /* Don't parallelize the kernels region if it contains more than one outer
9506      loop.  */
9507   unsigned int nr_outer_loops = 0;
9508   class loop *single_outer = NULL;
9509   for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9510     {
9511       gcc_assert (loop_outer (loop) == outer);
9512 
9513       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9514 	continue;
9515 
9516       if (region_exit != NULL
9517 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9518 	continue;
9519 
9520       nr_outer_loops++;
9521       single_outer = loop;
9522     }
9523   if (nr_outer_loops != 1)
9524     return;
9525 
9526   for (class loop *loop = single_outer->inner;
9527        loop != NULL;
9528        loop = loop->inner)
9529     if (loop->next)
9530       return;
9531 
9532   /* Mark the loops in the region.  */
9533   for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9534     loop->in_oacc_kernels_region = true;
9535 }
9536 
9537 /* Build target argument identifier from the DEVICE identifier, value
9538    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
9539 
9540 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)9541 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9542 {
9543   tree t = build_int_cst (integer_type_node, device);
9544   if (subseqent_param)
9545     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9546 		     build_int_cst (integer_type_node,
9547 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9548   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9549 		   build_int_cst (integer_type_node, id));
9550   return t;
9551 }
9552 
9553 /* Like above but return it in type that can be directly stored as an element
9554    of the argument array.  */
9555 
9556 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)9557 get_target_argument_identifier (int device, bool subseqent_param, int id)
9558 {
9559   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9560   return fold_convert (ptr_type_node, t);
9561 }
9562 
9563 /* Return a target argument consisting of DEVICE identifier, value identifier
9564    ID, and the actual VALUE.  */
9565 
9566 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)9567 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9568 			   tree value)
9569 {
9570   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9571 			fold_convert (integer_type_node, value),
9572 			build_int_cst (unsigned_type_node,
9573 				       GOMP_TARGET_ARG_VALUE_SHIFT));
9574   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9575 		   get_target_argument_identifier_1 (device, false, id));
9576   t = fold_convert (ptr_type_node, t);
9577   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9578 }
9579 
9580 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9581    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9582    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9583    arguments.  */
9584 
9585 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)9586 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9587 					 int id, tree value, vec <tree> *args)
9588 {
9589   if (tree_fits_shwi_p (value)
9590       && tree_to_shwi (value) > -(1 << 15)
9591       && tree_to_shwi (value) < (1 << 15))
9592     args->quick_push (get_target_argument_value (gsi, device, id, value));
9593   else
9594     {
9595       args->quick_push (get_target_argument_identifier (device, true, id));
9596       value = fold_convert (ptr_type_node, value);
9597       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9598 					GSI_SAME_STMT);
9599       args->quick_push (value);
9600     }
9601 }
9602 
9603 /* Create an array of arguments that is then passed to GOMP_target.  */
9604 
9605 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)9606 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9607 {
9608   auto_vec <tree, 6> args;
9609   tree clauses = gimple_omp_target_clauses (tgt_stmt);
9610   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9611   if (c)
9612     t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9613   else
9614     t = integer_minus_one_node;
9615   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9616 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9617 
9618   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9619   if (c)
9620     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9621   else
9622     t = integer_minus_one_node;
9623   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9624 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
9625 					   &args);
9626 
9627   /* Produce more, perhaps device specific, arguments here.  */
9628 
9629   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9630 							  args.length () + 1),
9631 				  ".omp_target_args");
9632   for (unsigned i = 0; i < args.length (); i++)
9633     {
9634       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9635 			 build_int_cst (integer_type_node, i),
9636 			 NULL_TREE, NULL_TREE);
9637       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9638 			 GSI_SAME_STMT);
9639     }
9640   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9641 		     build_int_cst (integer_type_node, args.length ()),
9642 		     NULL_TREE, NULL_TREE);
9643   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9644 		     GSI_SAME_STMT);
9645   TREE_ADDRESSABLE (argarray) = 1;
9646   return build_fold_addr_expr (argarray);
9647 }
9648 
9649 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
9650 
9651 static void
expand_omp_target(struct omp_region * region)9652 expand_omp_target (struct omp_region *region)
9653 {
9654   basic_block entry_bb, exit_bb, new_bb;
9655   struct function *child_cfun;
9656   tree child_fn, block, t;
9657   gimple_stmt_iterator gsi;
9658   gomp_target *entry_stmt;
9659   gimple *stmt;
9660   edge e;
9661   bool offloaded;
9662   int target_kind;
9663 
9664   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9665   target_kind = gimple_omp_target_kind (entry_stmt);
9666   new_bb = region->entry;
9667 
9668   offloaded = is_gimple_omp_offloaded (entry_stmt);
9669   switch (target_kind)
9670     {
9671     case GF_OMP_TARGET_KIND_REGION:
9672     case GF_OMP_TARGET_KIND_UPDATE:
9673     case GF_OMP_TARGET_KIND_ENTER_DATA:
9674     case GF_OMP_TARGET_KIND_EXIT_DATA:
9675     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9676     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9677     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9678     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9679     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9680     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9681     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9682     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9683     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9684     case GF_OMP_TARGET_KIND_DATA:
9685     case GF_OMP_TARGET_KIND_OACC_DATA:
9686     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9687     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9688       break;
9689     default:
9690       gcc_unreachable ();
9691     }
9692 
9693   child_fn = NULL_TREE;
9694   child_cfun = NULL;
9695   if (offloaded)
9696     {
9697       child_fn = gimple_omp_target_child_fn (entry_stmt);
9698       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9699     }
9700 
9701   /* Supported by expand_omp_taskreg, but not here.  */
9702   if (child_cfun != NULL)
9703     gcc_checking_assert (!child_cfun->cfg);
9704   gcc_checking_assert (!gimple_in_ssa_p (cfun));
9705 
9706   entry_bb = region->entry;
9707   exit_bb = region->exit;
9708 
9709   if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9710     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9711 
9712   /* Going on, all OpenACC compute constructs are mapped to
9713      'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9714      To distinguish between them, we attach attributes.  */
9715   switch (target_kind)
9716     {
9717     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9718       DECL_ATTRIBUTES (child_fn)
9719 	= tree_cons (get_identifier ("oacc parallel"),
9720 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9721       break;
9722     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9723       DECL_ATTRIBUTES (child_fn)
9724 	= tree_cons (get_identifier ("oacc kernels"),
9725 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9726       break;
9727     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9728       DECL_ATTRIBUTES (child_fn)
9729 	= tree_cons (get_identifier ("oacc serial"),
9730 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9731       break;
9732     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9733       DECL_ATTRIBUTES (child_fn)
9734 	= tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9735 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9736       break;
9737     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9738       DECL_ATTRIBUTES (child_fn)
9739 	= tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9740 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9741       break;
9742     default:
9743       /* Make sure we don't miss any.  */
9744       gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9745 			     && is_gimple_omp_offloaded (entry_stmt)));
9746       break;
9747     }
9748 
9749   if (offloaded)
9750     {
9751       unsigned srcidx, dstidx, num;
9752 
9753       /* If the offloading region needs data sent from the parent
9754 	 function, then the very first statement (except possible
9755 	 tree profile counter updates) of the offloading body
9756 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
9757 	 &.OMP_DATA_O is passed as an argument to the child function,
9758 	 we need to replace it with the argument as seen by the child
9759 	 function.
9760 
9761 	 In most cases, this will end up being the identity assignment
9762 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
9763 	 a function call that has been inlined, the original PARM_DECL
9764 	 .OMP_DATA_I may have been converted into a different local
9765 	 variable.  In which case, we need to keep the assignment.  */
9766       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9767       if (data_arg)
9768 	{
9769 	  basic_block entry_succ_bb = single_succ (entry_bb);
9770 	  gimple_stmt_iterator gsi;
9771 	  tree arg;
9772 	  gimple *tgtcopy_stmt = NULL;
9773 	  tree sender = TREE_VEC_ELT (data_arg, 0);
9774 
9775 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9776 	    {
9777 	      gcc_assert (!gsi_end_p (gsi));
9778 	      stmt = gsi_stmt (gsi);
9779 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
9780 		continue;
9781 
9782 	      if (gimple_num_ops (stmt) == 2)
9783 		{
9784 		  tree arg = gimple_assign_rhs1 (stmt);
9785 
9786 		  /* We're ignoring the subcode because we're
9787 		     effectively doing a STRIP_NOPS.  */
9788 
9789 		  if (TREE_CODE (arg) == ADDR_EXPR
9790 		      && TREE_OPERAND (arg, 0) == sender)
9791 		    {
9792 		      tgtcopy_stmt = stmt;
9793 		      break;
9794 		    }
9795 		}
9796 	    }
9797 
9798 	  gcc_assert (tgtcopy_stmt != NULL);
9799 	  arg = DECL_ARGUMENTS (child_fn);
9800 
9801 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9802 	  gsi_remove (&gsi, true);
9803 	}
9804 
9805       /* Declare local variables needed in CHILD_CFUN.  */
9806       block = DECL_INITIAL (child_fn);
9807       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9808       /* The gimplifier could record temporaries in the offloading block
9809 	 rather than in containing function's local_decls chain,
9810 	 which would mean cgraph missed finalizing them.  Do it now.  */
9811       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9812 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9813 	  varpool_node::finalize_decl (t);
9814       DECL_SAVED_TREE (child_fn) = NULL;
9815       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
9816       gimple_set_body (child_fn, NULL);
9817       TREE_USED (block) = 1;
9818 
9819       /* Reset DECL_CONTEXT on function arguments.  */
9820       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9821 	DECL_CONTEXT (t) = child_fn;
9822 
9823       /* Split ENTRY_BB at GIMPLE_*,
9824 	 so that it can be moved to the child function.  */
9825       gsi = gsi_last_nondebug_bb (entry_bb);
9826       stmt = gsi_stmt (gsi);
9827       gcc_assert (stmt
9828 		  && gimple_code (stmt) == gimple_code (entry_stmt));
9829       e = split_block (entry_bb, stmt);
9830       gsi_remove (&gsi, true);
9831       entry_bb = e->dest;
9832       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9833 
9834       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
9835       if (exit_bb)
9836 	{
9837 	  gsi = gsi_last_nondebug_bb (exit_bb);
9838 	  gcc_assert (!gsi_end_p (gsi)
9839 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9840 	  stmt = gimple_build_return (NULL);
9841 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9842 	  gsi_remove (&gsi, true);
9843 	}
9844 
9845       /* Move the offloading region into CHILD_CFUN.  */
9846 
9847       block = gimple_block (entry_stmt);
9848 
9849       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9850       if (exit_bb)
9851 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9852       /* When the OMP expansion process cannot guarantee an up-to-date
9853 	 loop tree arrange for the child function to fixup loops.  */
9854       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9855 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9856 
9857       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
9858       num = vec_safe_length (child_cfun->local_decls);
9859       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9860 	{
9861 	  t = (*child_cfun->local_decls)[srcidx];
9862 	  if (DECL_CONTEXT (t) == cfun->decl)
9863 	    continue;
9864 	  if (srcidx != dstidx)
9865 	    (*child_cfun->local_decls)[dstidx] = t;
9866 	  dstidx++;
9867 	}
9868       if (dstidx != num)
9869 	vec_safe_truncate (child_cfun->local_decls, dstidx);
9870 
9871       /* Inform the callgraph about the new function.  */
9872       child_cfun->curr_properties = cfun->curr_properties;
9873       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9874       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9875       cgraph_node *node = cgraph_node::get_create (child_fn);
9876       node->parallelized_function = 1;
9877       cgraph_node::add_new_function (child_fn, true);
9878 
9879       /* Add the new function to the offload table.  */
9880       if (ENABLE_OFFLOADING)
9881 	{
9882 	  if (in_lto_p)
9883 	    DECL_PRESERVE_P (child_fn) = 1;
9884 	  vec_safe_push (offload_funcs, child_fn);
9885 	}
9886 
9887       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9888 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9889 
9890       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
9891 	 fixed in a following pass.  */
9892       push_cfun (child_cfun);
9893       if (need_asm)
9894 	assign_assembler_name_if_needed (child_fn);
9895       cgraph_edge::rebuild_edges ();
9896 
9897       /* Some EH regions might become dead, see PR34608.  If
9898 	 pass_cleanup_cfg isn't the first pass to happen with the
9899 	 new child, these dead EH edges might cause problems.
9900 	 Clean them up now.  */
9901       if (flag_exceptions)
9902 	{
9903 	  basic_block bb;
9904 	  bool changed = false;
9905 
9906 	  FOR_EACH_BB_FN (bb, cfun)
9907 	    changed |= gimple_purge_dead_eh_edges (bb);
9908 	  if (changed)
9909 	    cleanup_tree_cfg ();
9910 	}
9911       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9912 	verify_loop_structure ();
9913       pop_cfun ();
9914 
9915       if (dump_file && !gimple_in_ssa_p (cfun))
9916 	{
9917 	  omp_any_child_fn_dumped = true;
9918 	  dump_function_header (dump_file, child_fn, dump_flags);
9919 	  dump_function_to_file (child_fn, dump_file, dump_flags);
9920 	}
9921 
9922       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9923     }
9924 
9925   /* Emit a library call to launch the offloading region, or do data
9926      transfers.  */
9927   tree t1, t2, t3, t4, depend, c, clauses;
9928   enum built_in_function start_ix;
9929   unsigned int flags_i = 0;
9930 
9931   switch (gimple_omp_target_kind (entry_stmt))
9932     {
9933     case GF_OMP_TARGET_KIND_REGION:
9934       start_ix = BUILT_IN_GOMP_TARGET;
9935       break;
9936     case GF_OMP_TARGET_KIND_DATA:
9937       start_ix = BUILT_IN_GOMP_TARGET_DATA;
9938       break;
9939     case GF_OMP_TARGET_KIND_UPDATE:
9940       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9941       break;
9942     case GF_OMP_TARGET_KIND_ENTER_DATA:
9943       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9944       break;
9945     case GF_OMP_TARGET_KIND_EXIT_DATA:
9946       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9947       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9948       break;
9949     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9950     case GF_OMP_TARGET_KIND_OACC_KERNELS:
9951     case GF_OMP_TARGET_KIND_OACC_SERIAL:
9952     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9953     case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9954       start_ix = BUILT_IN_GOACC_PARALLEL;
9955       break;
9956     case GF_OMP_TARGET_KIND_OACC_DATA:
9957     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9958     case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9959       start_ix = BUILT_IN_GOACC_DATA_START;
9960       break;
9961     case GF_OMP_TARGET_KIND_OACC_UPDATE:
9962       start_ix = BUILT_IN_GOACC_UPDATE;
9963       break;
9964     case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9965       start_ix = BUILT_IN_GOACC_ENTER_DATA;
9966       break;
9967     case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9968       start_ix = BUILT_IN_GOACC_EXIT_DATA;
9969       break;
9970     case GF_OMP_TARGET_KIND_OACC_DECLARE:
9971       start_ix = BUILT_IN_GOACC_DECLARE;
9972       break;
9973     default:
9974       gcc_unreachable ();
9975     }
9976 
9977   clauses = gimple_omp_target_clauses (entry_stmt);
9978 
9979   tree device = NULL_TREE;
9980   location_t device_loc = UNKNOWN_LOCATION;
9981   tree goacc_flags = NULL_TREE;
9982   if (is_gimple_omp_oacc (entry_stmt))
9983     {
9984       /* By default, no GOACC_FLAGs are set.  */
9985       goacc_flags = integer_zero_node;
9986     }
9987   else
9988     {
9989       c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9990       if (c)
9991 	{
9992 	  device = OMP_CLAUSE_DEVICE_ID (c);
9993 	  device_loc = OMP_CLAUSE_LOCATION (c);
9994 	  if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
9995 	    sorry_at (device_loc, "%<ancestor%> not yet supported");
9996 	}
9997       else
9998 	{
9999 	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10000 	     library choose).  */
10001 	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10002 	  device_loc = gimple_location (entry_stmt);
10003 	}
10004 
10005       c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10006       /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10007 	 nowait doesn't appear.  */
10008       if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10009 	c = NULL;
10010       if (c)
10011 	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10012     }
10013 
10014   /* By default, there is no conditional.  */
10015   tree cond = NULL_TREE;
10016   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10017   if (c)
10018     cond = OMP_CLAUSE_IF_EXPR (c);
10019   /* If we found the clause 'if (cond)', build:
10020      OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10021      OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10022   if (cond)
10023     {
10024       tree *tp;
10025       if (is_gimple_omp_oacc (entry_stmt))
10026 	tp = &goacc_flags;
10027       else
10028 	{
10029 	  /* Ensure 'device' is of the correct type.  */
10030 	  device = fold_convert_loc (device_loc, integer_type_node, device);
10031 
10032 	  tp = &device;
10033 	}
10034 
10035       cond = gimple_boolify (cond);
10036 
10037       basic_block cond_bb, then_bb, else_bb;
10038       edge e;
10039       tree tmp_var;
10040 
10041       tmp_var = create_tmp_var (TREE_TYPE (*tp));
10042       if (offloaded)
10043 	e = split_block_after_labels (new_bb);
10044       else
10045 	{
10046 	  gsi = gsi_last_nondebug_bb (new_bb);
10047 	  gsi_prev (&gsi);
10048 	  e = split_block (new_bb, gsi_stmt (gsi));
10049 	}
10050       cond_bb = e->src;
10051       new_bb = e->dest;
10052       remove_edge (e);
10053 
10054       then_bb = create_empty_bb (cond_bb);
10055       else_bb = create_empty_bb (then_bb);
10056       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10057       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10058 
10059       stmt = gimple_build_cond_empty (cond);
10060       gsi = gsi_last_bb (cond_bb);
10061       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10062 
10063       gsi = gsi_start_bb (then_bb);
10064       stmt = gimple_build_assign (tmp_var, *tp);
10065       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10066 
10067       gsi = gsi_start_bb (else_bb);
10068       if (is_gimple_omp_oacc (entry_stmt))
10069 	stmt = gimple_build_assign (tmp_var,
10070 				    BIT_IOR_EXPR,
10071 				    *tp,
10072 				    build_int_cst (integer_type_node,
10073 						   GOACC_FLAG_HOST_FALLBACK));
10074       else
10075 	stmt = gimple_build_assign (tmp_var,
10076 				    build_int_cst (integer_type_node,
10077 						   GOMP_DEVICE_HOST_FALLBACK));
10078       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10079 
10080       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10081       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10082       add_bb_to_loop (then_bb, cond_bb->loop_father);
10083       add_bb_to_loop (else_bb, cond_bb->loop_father);
10084       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10085       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10086 
10087       *tp = tmp_var;
10088 
10089       gsi = gsi_last_nondebug_bb (new_bb);
10090     }
10091   else
10092     {
10093       gsi = gsi_last_nondebug_bb (new_bb);
10094 
10095       if (device != NULL_TREE)
10096 	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10097 					   true, GSI_SAME_STMT);
10098     }
10099 
10100   t = gimple_omp_target_data_arg (entry_stmt);
10101   if (t == NULL)
10102     {
10103       t1 = size_zero_node;
10104       t2 = build_zero_cst (ptr_type_node);
10105       t3 = t2;
10106       t4 = t2;
10107     }
10108   else
10109     {
10110       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10111       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10112       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10113       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10114       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10115     }
10116 
10117   gimple *g;
10118   bool tagging = false;
10119   /* The maximum number used by any start_ix, without varargs.  */
10120   auto_vec<tree, 11> args;
10121   if (is_gimple_omp_oacc (entry_stmt))
10122     {
10123       tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10124 					TREE_TYPE (goacc_flags), goacc_flags);
10125       goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10126 						NULL_TREE, true,
10127 						GSI_SAME_STMT);
10128       args.quick_push (goacc_flags_m);
10129     }
10130   else
10131     args.quick_push (device);
10132   if (offloaded)
10133     args.quick_push (build_fold_addr_expr (child_fn));
10134   args.quick_push (t1);
10135   args.quick_push (t2);
10136   args.quick_push (t3);
10137   args.quick_push (t4);
10138   switch (start_ix)
10139     {
10140     case BUILT_IN_GOACC_DATA_START:
10141     case BUILT_IN_GOACC_DECLARE:
10142     case BUILT_IN_GOMP_TARGET_DATA:
10143       break;
10144     case BUILT_IN_GOMP_TARGET:
10145     case BUILT_IN_GOMP_TARGET_UPDATE:
10146     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10147       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10148       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10149       if (c)
10150 	depend = OMP_CLAUSE_DECL (c);
10151       else
10152 	depend = build_int_cst (ptr_type_node, 0);
10153       args.quick_push (depend);
10154       if (start_ix == BUILT_IN_GOMP_TARGET)
10155 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
10156       break;
10157     case BUILT_IN_GOACC_PARALLEL:
10158       if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10159 	{
10160 	  tree dims = NULL_TREE;
10161 	  unsigned int ix;
10162 
10163 	  /* For serial constructs we set all dimensions to 1.  */
10164 	  for (ix = GOMP_DIM_MAX; ix--;)
10165 	    dims = tree_cons (NULL_TREE, integer_one_node, dims);
10166 	  oacc_replace_fn_attrib (child_fn, dims);
10167 	}
10168       else
10169 	oacc_set_fn_attrib (child_fn, clauses, &args);
10170       tagging = true;
10171       /* FALLTHRU */
10172     case BUILT_IN_GOACC_ENTER_DATA:
10173     case BUILT_IN_GOACC_EXIT_DATA:
10174     case BUILT_IN_GOACC_UPDATE:
10175       {
10176 	tree t_async = NULL_TREE;
10177 
10178 	/* If present, use the value specified by the respective
10179 	   clause, making sure that is of the correct type.  */
10180 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10181 	if (c)
10182 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10183 				      integer_type_node,
10184 				      OMP_CLAUSE_ASYNC_EXPR (c));
10185 	else if (!tagging)
10186 	  /* Default values for t_async.  */
10187 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
10188 				      integer_type_node,
10189 				      build_int_cst (integer_type_node,
10190 						     GOMP_ASYNC_SYNC));
10191 	if (tagging && t_async)
10192 	  {
10193 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10194 
10195 	    if (TREE_CODE (t_async) == INTEGER_CST)
10196 	      {
10197 		/* See if we can pack the async arg in to the tag's
10198 		   operand.  */
10199 		i_async = TREE_INT_CST_LOW (t_async);
10200 		if (i_async < GOMP_LAUNCH_OP_MAX)
10201 		  t_async = NULL_TREE;
10202 		else
10203 		  i_async = GOMP_LAUNCH_OP_MAX;
10204 	      }
10205 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10206 					      i_async));
10207 	  }
10208 	if (t_async)
10209 	  args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10210 						    NULL_TREE, true,
10211 						    GSI_SAME_STMT));
10212 
10213 	/* Save the argument index, and ... */
10214 	unsigned t_wait_idx = args.length ();
10215 	unsigned num_waits = 0;
10216 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10217 	if (!tagging || c)
10218 	  /* ... push a placeholder.  */
10219 	  args.safe_push (integer_zero_node);
10220 
10221 	for (; c; c = OMP_CLAUSE_CHAIN (c))
10222 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10223 	    {
10224 	      tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10225 					   integer_type_node,
10226 					   OMP_CLAUSE_WAIT_EXPR (c));
10227 	      arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10228 					      GSI_SAME_STMT);
10229 	      args.safe_push (arg);
10230 	      num_waits++;
10231 	    }
10232 
10233 	if (!tagging || num_waits)
10234 	  {
10235 	    tree len;
10236 
10237 	    /* Now that we know the number, update the placeholder.  */
10238 	    if (tagging)
10239 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10240 	    else
10241 	      len = build_int_cst (integer_type_node, num_waits);
10242 	    len = fold_convert_loc (gimple_location (entry_stmt),
10243 				    unsigned_type_node, len);
10244 	    args[t_wait_idx] = len;
10245 	  }
10246       }
10247       break;
10248     default:
10249       gcc_unreachable ();
10250     }
10251   if (tagging)
10252     /*  Push terminal marker - zero.  */
10253     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10254 
10255   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10256   gimple_set_location (g, gimple_location (entry_stmt));
10257   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10258   if (!offloaded)
10259     {
10260       g = gsi_stmt (gsi);
10261       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10262       gsi_remove (&gsi, true);
10263     }
10264 }
10265 
10266 /* Expand the parallel region tree rooted at REGION.  Expansion
10267    proceeds in depth-first order.  Innermost regions are expanded
10268    first.  This way, parallel regions that require a new function to
10269    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10270    internal dependencies in their body.  */
10271 
10272 static void
expand_omp(struct omp_region * region)10273 expand_omp (struct omp_region *region)
10274 {
10275   omp_any_child_fn_dumped = false;
10276   while (region)
10277     {
10278       location_t saved_location;
10279       gimple *inner_stmt = NULL;
10280 
10281       /* First, determine whether this is a combined parallel+workshare
10282 	 region.  */
10283       if (region->type == GIMPLE_OMP_PARALLEL)
10284 	determine_parallel_type (region);
10285 
10286       if (region->type == GIMPLE_OMP_FOR
10287 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
10288 	inner_stmt = last_stmt (region->inner->entry);
10289 
10290       if (region->inner)
10291 	expand_omp (region->inner);
10292 
10293       saved_location = input_location;
10294       if (gimple_has_location (last_stmt (region->entry)))
10295 	input_location = gimple_location (last_stmt (region->entry));
10296 
10297       switch (region->type)
10298 	{
10299 	case GIMPLE_OMP_PARALLEL:
10300 	case GIMPLE_OMP_TASK:
10301 	  expand_omp_taskreg (region);
10302 	  break;
10303 
10304 	case GIMPLE_OMP_FOR:
10305 	  expand_omp_for (region, inner_stmt);
10306 	  break;
10307 
10308 	case GIMPLE_OMP_SECTIONS:
10309 	  expand_omp_sections (region);
10310 	  break;
10311 
10312 	case GIMPLE_OMP_SECTION:
10313 	  /* Individual omp sections are handled together with their
10314 	     parent GIMPLE_OMP_SECTIONS region.  */
10315 	  break;
10316 
10317 	case GIMPLE_OMP_SINGLE:
10318 	case GIMPLE_OMP_SCOPE:
10319 	  expand_omp_single (region);
10320 	  break;
10321 
10322 	case GIMPLE_OMP_ORDERED:
10323 	  {
10324 	    gomp_ordered *ord_stmt
10325 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
10326 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10327 				 OMP_CLAUSE_DEPEND))
10328 	      {
10329 		/* We'll expand these when expanding corresponding
10330 		   worksharing region with ordered(n) clause.  */
10331 		gcc_assert (region->outer
10332 			    && region->outer->type == GIMPLE_OMP_FOR);
10333 		region->ord_stmt = ord_stmt;
10334 		break;
10335 	      }
10336 	  }
10337 	  /* FALLTHRU */
10338 	case GIMPLE_OMP_MASTER:
10339 	case GIMPLE_OMP_MASKED:
10340 	case GIMPLE_OMP_TASKGROUP:
10341 	case GIMPLE_OMP_CRITICAL:
10342 	case GIMPLE_OMP_TEAMS:
10343 	  expand_omp_synch (region);
10344 	  break;
10345 
10346 	case GIMPLE_OMP_ATOMIC_LOAD:
10347 	  expand_omp_atomic (region);
10348 	  break;
10349 
10350 	case GIMPLE_OMP_TARGET:
10351 	  expand_omp_target (region);
10352 	  break;
10353 
10354 	default:
10355 	  gcc_unreachable ();
10356 	}
10357 
10358       input_location = saved_location;
10359       region = region->next;
10360     }
10361   if (omp_any_child_fn_dumped)
10362     {
10363       if (dump_file)
10364 	dump_function_header (dump_file, current_function_decl, dump_flags);
10365       omp_any_child_fn_dumped = false;
10366     }
10367 }
10368 
10369 /* Helper for build_omp_regions.  Scan the dominator tree starting at
10370    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
10371    true, the function ends once a single tree is built (otherwise, whole
10372    forest of OMP constructs may be built).  */
10373 
10374 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)10375 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10376 		     bool single_tree)
10377 {
10378   gimple_stmt_iterator gsi;
10379   gimple *stmt;
10380   basic_block son;
10381 
10382   gsi = gsi_last_nondebug_bb (bb);
10383   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10384     {
10385       struct omp_region *region;
10386       enum gimple_code code;
10387 
10388       stmt = gsi_stmt (gsi);
10389       code = gimple_code (stmt);
10390       if (code == GIMPLE_OMP_RETURN)
10391 	{
10392 	  /* STMT is the return point out of region PARENT.  Mark it
10393 	     as the exit point and make PARENT the immediately
10394 	     enclosing region.  */
10395 	  gcc_assert (parent);
10396 	  region = parent;
10397 	  region->exit = bb;
10398 	  parent = parent->outer;
10399 	}
10400       else if (code == GIMPLE_OMP_ATOMIC_STORE)
10401 	{
10402 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10403 	     GIMPLE_OMP_RETURN, but matches with
10404 	     GIMPLE_OMP_ATOMIC_LOAD.  */
10405 	  gcc_assert (parent);
10406 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10407 	  region = parent;
10408 	  region->exit = bb;
10409 	  parent = parent->outer;
10410 	}
10411       else if (code == GIMPLE_OMP_CONTINUE)
10412 	{
10413 	  gcc_assert (parent);
10414 	  parent->cont = bb;
10415 	}
10416       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10417 	{
10418 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10419 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
10420 	}
10421       else
10422 	{
10423 	  region = new_omp_region (bb, code, parent);
10424 	  /* Otherwise...  */
10425 	  if (code == GIMPLE_OMP_TARGET)
10426 	    {
10427 	      switch (gimple_omp_target_kind (stmt))
10428 		{
10429 		case GF_OMP_TARGET_KIND_REGION:
10430 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10431 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
10432 		case GF_OMP_TARGET_KIND_OACC_SERIAL:
10433 		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10434 		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10435 		  break;
10436 		case GF_OMP_TARGET_KIND_UPDATE:
10437 		case GF_OMP_TARGET_KIND_ENTER_DATA:
10438 		case GF_OMP_TARGET_KIND_EXIT_DATA:
10439 		case GF_OMP_TARGET_KIND_DATA:
10440 		case GF_OMP_TARGET_KIND_OACC_DATA:
10441 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10442 		case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10443 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
10444 		case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10445 		case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10446 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
10447 		  /* ..., other than for those stand-alone directives...  */
10448 		  region = NULL;
10449 		  break;
10450 		default:
10451 		  gcc_unreachable ();
10452 		}
10453 	    }
10454 	  else if (code == GIMPLE_OMP_ORDERED
10455 		   && omp_find_clause (gimple_omp_ordered_clauses
10456 					 (as_a <gomp_ordered *> (stmt)),
10457 				       OMP_CLAUSE_DEPEND))
10458 	    /* #pragma omp ordered depend is also just a stand-alone
10459 	       directive.  */
10460 	    region = NULL;
10461 	  else if (code == GIMPLE_OMP_TASK
10462 		   && gimple_omp_task_taskwait_p (stmt))
10463 	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
10464 	    region = NULL;
10465 	  /* ..., this directive becomes the parent for a new region.  */
10466 	  if (region)
10467 	    parent = region;
10468 	}
10469     }
10470 
10471   if (single_tree && !parent)
10472     return;
10473 
10474   for (son = first_dom_son (CDI_DOMINATORS, bb);
10475        son;
10476        son = next_dom_son (CDI_DOMINATORS, son))
10477     build_omp_regions_1 (son, parent, single_tree);
10478 }
10479 
10480 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10481    root_omp_region.  */
10482 
10483 static void
build_omp_regions_root(basic_block root)10484 build_omp_regions_root (basic_block root)
10485 {
10486   gcc_assert (root_omp_region == NULL);
10487   build_omp_regions_1 (root, NULL, true);
10488   gcc_assert (root_omp_region != NULL);
10489 }
10490 
10491 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
10492 
10493 void
omp_expand_local(basic_block head)10494 omp_expand_local (basic_block head)
10495 {
10496   build_omp_regions_root (head);
10497   if (dump_file && (dump_flags & TDF_DETAILS))
10498     {
10499       fprintf (dump_file, "\nOMP region tree\n\n");
10500       dump_omp_region (dump_file, root_omp_region, 0);
10501       fprintf (dump_file, "\n");
10502     }
10503 
10504   remove_exit_barriers (root_omp_region);
10505   expand_omp (root_omp_region);
10506 
10507   omp_free_regions ();
10508 }
10509 
10510 /* Scan the CFG and build a tree of OMP regions.  Return the root of
10511    the OMP region tree.  */
10512 
10513 static void
build_omp_regions(void)10514 build_omp_regions (void)
10515 {
10516   gcc_assert (root_omp_region == NULL);
10517   calculate_dominance_info (CDI_DOMINATORS);
10518   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10519 }
10520 
10521 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
10522 
10523 static unsigned int
execute_expand_omp(void)10524 execute_expand_omp (void)
10525 {
10526   build_omp_regions ();
10527 
10528   if (!root_omp_region)
10529     return 0;
10530 
10531   if (dump_file)
10532     {
10533       fprintf (dump_file, "\nOMP region tree\n\n");
10534       dump_omp_region (dump_file, root_omp_region, 0);
10535       fprintf (dump_file, "\n");
10536     }
10537 
10538   remove_exit_barriers (root_omp_region);
10539 
10540   expand_omp (root_omp_region);
10541 
10542   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10543     verify_loop_structure ();
10544   cleanup_tree_cfg ();
10545 
10546   omp_free_regions ();
10547 
10548   return 0;
10549 }
10550 
10551 /* OMP expansion -- the default pass, run before creation of SSA form.  */
10552 
10553 namespace {
10554 
10555 const pass_data pass_data_expand_omp =
10556 {
10557   GIMPLE_PASS, /* type */
10558   "ompexp", /* name */
10559   OPTGROUP_OMP, /* optinfo_flags */
10560   TV_NONE, /* tv_id */
10561   PROP_gimple_any, /* properties_required */
10562   PROP_gimple_eomp, /* properties_provided */
10563   0, /* properties_destroyed */
10564   0, /* todo_flags_start */
10565   0, /* todo_flags_finish */
10566 };
10567 
10568 class pass_expand_omp : public gimple_opt_pass
10569 {
10570 public:
pass_expand_omp(gcc::context * ctxt)10571   pass_expand_omp (gcc::context *ctxt)
10572     : gimple_opt_pass (pass_data_expand_omp, ctxt)
10573   {}
10574 
10575   /* opt_pass methods: */
execute(function *)10576   virtual unsigned int execute (function *)
10577     {
10578       bool gate = ((flag_openacc != 0 || flag_openmp != 0
10579 		    || flag_openmp_simd != 0)
10580 		   && !seen_error ());
10581 
10582       /* This pass always runs, to provide PROP_gimple_eomp.
10583 	 But often, there is nothing to do.  */
10584       if (!gate)
10585 	return 0;
10586 
10587       return execute_expand_omp ();
10588     }
10589 
10590 }; // class pass_expand_omp
10591 
10592 } // anon namespace
10593 
10594 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)10595 make_pass_expand_omp (gcc::context *ctxt)
10596 {
10597   return new pass_expand_omp (ctxt);
10598 }
10599 
10600 namespace {
10601 
10602 const pass_data pass_data_expand_omp_ssa =
10603 {
10604   GIMPLE_PASS, /* type */
10605   "ompexpssa", /* name */
10606   OPTGROUP_OMP, /* optinfo_flags */
10607   TV_NONE, /* tv_id */
10608   PROP_cfg | PROP_ssa, /* properties_required */
10609   PROP_gimple_eomp, /* properties_provided */
10610   0, /* properties_destroyed */
10611   0, /* todo_flags_start */
10612   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10613 };
10614 
10615 class pass_expand_omp_ssa : public gimple_opt_pass
10616 {
10617 public:
pass_expand_omp_ssa(gcc::context * ctxt)10618   pass_expand_omp_ssa (gcc::context *ctxt)
10619     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10620   {}
10621 
10622   /* opt_pass methods: */
gate(function * fun)10623   virtual bool gate (function *fun)
10624     {
10625       return !(fun->curr_properties & PROP_gimple_eomp);
10626     }
execute(function *)10627   virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()10628   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10629 
10630 }; // class pass_expand_omp_ssa
10631 
10632 } // anon namespace
10633 
10634 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)10635 make_pass_expand_omp_ssa (gcc::context *ctxt)
10636 {
10637   return new pass_expand_omp_ssa (ctxt);
10638 }
10639 
10640 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10641    GIMPLE_* codes.  */
10642 
10643 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)10644 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10645 		       int *region_idx)
10646 {
10647   gimple *last = last_stmt (bb);
10648   enum gimple_code code = gimple_code (last);
10649   struct omp_region *cur_region = *region;
10650   bool fallthru = false;
10651 
10652   switch (code)
10653     {
10654     case GIMPLE_OMP_PARALLEL:
10655     case GIMPLE_OMP_FOR:
10656     case GIMPLE_OMP_SINGLE:
10657     case GIMPLE_OMP_TEAMS:
10658     case GIMPLE_OMP_MASTER:
10659     case GIMPLE_OMP_MASKED:
10660     case GIMPLE_OMP_SCOPE:
10661     case GIMPLE_OMP_TASKGROUP:
10662     case GIMPLE_OMP_CRITICAL:
10663     case GIMPLE_OMP_SECTION:
10664       cur_region = new_omp_region (bb, code, cur_region);
10665       fallthru = true;
10666       break;
10667 
10668     case GIMPLE_OMP_TASK:
10669       cur_region = new_omp_region (bb, code, cur_region);
10670       fallthru = true;
10671       if (gimple_omp_task_taskwait_p (last))
10672 	cur_region = cur_region->outer;
10673       break;
10674 
10675     case GIMPLE_OMP_ORDERED:
10676       cur_region = new_omp_region (bb, code, cur_region);
10677       fallthru = true;
10678       if (omp_find_clause (gimple_omp_ordered_clauses
10679 			     (as_a <gomp_ordered *> (last)),
10680 			   OMP_CLAUSE_DEPEND))
10681 	cur_region = cur_region->outer;
10682       break;
10683 
10684     case GIMPLE_OMP_TARGET:
10685       cur_region = new_omp_region (bb, code, cur_region);
10686       fallthru = true;
10687       switch (gimple_omp_target_kind (last))
10688 	{
10689 	case GF_OMP_TARGET_KIND_REGION:
10690 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10691 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
10692 	case GF_OMP_TARGET_KIND_OACC_SERIAL:
10693 	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10694 	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10695 	  break;
10696 	case GF_OMP_TARGET_KIND_UPDATE:
10697 	case GF_OMP_TARGET_KIND_ENTER_DATA:
10698 	case GF_OMP_TARGET_KIND_EXIT_DATA:
10699 	case GF_OMP_TARGET_KIND_DATA:
10700 	case GF_OMP_TARGET_KIND_OACC_DATA:
10701 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10702 	case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10703 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
10704 	case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10705 	case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10706 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
10707 	  cur_region = cur_region->outer;
10708 	  break;
10709 	default:
10710 	  gcc_unreachable ();
10711 	}
10712       break;
10713 
10714     case GIMPLE_OMP_SECTIONS:
10715       cur_region = new_omp_region (bb, code, cur_region);
10716       fallthru = true;
10717       break;
10718 
10719     case GIMPLE_OMP_SECTIONS_SWITCH:
10720       fallthru = false;
10721       break;
10722 
10723     case GIMPLE_OMP_ATOMIC_LOAD:
10724     case GIMPLE_OMP_ATOMIC_STORE:
10725        fallthru = true;
10726        break;
10727 
10728     case GIMPLE_OMP_RETURN:
10729       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10730 	 somewhere other than the next block.  This will be
10731 	 created later.  */
10732       cur_region->exit = bb;
10733       if (cur_region->type == GIMPLE_OMP_TASK)
10734 	/* Add an edge corresponding to not scheduling the task
10735 	   immediately.  */
10736 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10737       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10738       cur_region = cur_region->outer;
10739       break;
10740 
10741     case GIMPLE_OMP_CONTINUE:
10742       cur_region->cont = bb;
10743       switch (cur_region->type)
10744 	{
10745 	case GIMPLE_OMP_FOR:
10746 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10747 	     succs edges as abnormal to prevent splitting
10748 	     them.  */
10749 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10750 	  /* Make the loopback edge.  */
10751 	  make_edge (bb, single_succ (cur_region->entry),
10752 		     EDGE_ABNORMAL);
10753 
10754 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
10755 	     corresponds to the case that the body of the loop
10756 	     is not executed at all.  */
10757 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10758 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10759 	  fallthru = false;
10760 	  break;
10761 
10762 	case GIMPLE_OMP_SECTIONS:
10763 	  /* Wire up the edges into and out of the nested sections.  */
10764 	  {
10765 	    basic_block switch_bb = single_succ (cur_region->entry);
10766 
10767 	    struct omp_region *i;
10768 	    for (i = cur_region->inner; i ; i = i->next)
10769 	      {
10770 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
10771 		make_edge (switch_bb, i->entry, 0);
10772 		make_edge (i->exit, bb, EDGE_FALLTHRU);
10773 	      }
10774 
10775 	    /* Make the loopback edge to the block with
10776 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
10777 	    make_edge (bb, switch_bb, 0);
10778 
10779 	    /* Make the edge from the switch to exit.  */
10780 	    make_edge (switch_bb, bb->next_bb, 0);
10781 	    fallthru = false;
10782 	  }
10783 	  break;
10784 
10785 	case GIMPLE_OMP_TASK:
10786 	  fallthru = true;
10787 	  break;
10788 
10789 	default:
10790 	  gcc_unreachable ();
10791 	}
10792       break;
10793 
10794     default:
10795       gcc_unreachable ();
10796     }
10797 
10798   if (*region != cur_region)
10799     {
10800       *region = cur_region;
10801       if (cur_region)
10802 	*region_idx = cur_region->entry->index;
10803       else
10804 	*region_idx = 0;
10805     }
10806 
10807   return fallthru;
10808 }
10809