xref: /dragonfly/contrib/gcc-8.0/gcc/omp-expand.c (revision d8082429)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 
62 /* OMP region information.  Every parallel and workshare
63    directive is enclosed between two markers, the OMP_* directive
64    and a corresponding GIMPLE_OMP_RETURN statement.  */
65 
66 struct omp_region
67 {
68   /* The enclosing region.  */
69   struct omp_region *outer;
70 
71   /* First child region.  */
72   struct omp_region *inner;
73 
74   /* Next peer region.  */
75   struct omp_region *next;
76 
77   /* Block containing the omp directive as its last stmt.  */
78   basic_block entry;
79 
80   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
81   basic_block exit;
82 
83   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
84   basic_block cont;
85 
86   /* If this is a combined parallel+workshare region, this is a list
87      of additional arguments needed by the combined parallel+workshare
88      library call.  */
89   vec<tree, va_gc> *ws_args;
90 
91   /* The code for the omp directive of this region.  */
92   enum gimple_code type;
93 
94   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
95   enum omp_clause_schedule_kind sched_kind;
96 
97   /* Schedule modifiers.  */
98   unsigned char sched_modifiers;
99 
100   /* True if this is a combined parallel+workshare region.  */
101   bool is_combined_parallel;
102 
103   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104      a depend clause.  */
105   gomp_ordered *ord_stmt;
106 };
107 
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
110 
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 				     bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
115 
116 /* Return true if REGION is a combined parallel+workshare region.  */
117 
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
120 {
121   return region->is_combined_parallel;
122 }
123 
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125    is the immediate dominator of PAR_ENTRY_BB, return true if there
126    are no data dependencies that would prevent expanding the parallel
127    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128 
129    When expanding a combined parallel+workshare region, the call to
130    the child function may need additional arguments in the case of
131    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
132    computed out of variables passed in from the parent to the child
133    via 'struct .omp_data_s'.  For instance:
134 
135 	#pragma omp parallel for schedule (guided, i * 4)
136 	for (j ...)
137 
138    Is lowered into:
139 
140 	# BLOCK 2 (PAR_ENTRY_BB)
141 	.omp_data_o.i = i;
142 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143 
144 	# BLOCK 3 (WS_ENTRY_BB)
145 	.omp_data_i = &.omp_data_o;
146 	D.1667 = .omp_data_i->i;
147 	D.1598 = D.1667 * 4;
148 	#pragma omp for schedule (guided, D.1598)
149 
150    When we outline the parallel region, the call to the child function
151    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152    that value is computed *after* the call site.  So, in principle we
153    cannot do the transformation.
154 
155    To see whether the code in WS_ENTRY_BB blocks the combined
156    parallel+workshare call, we collect all the variables used in the
157    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
159    call.
160 
161    FIXME.  If we had the SSA form built at this point, we could merely
162    hoist the code in block 3 into block 2 and be done with it.  But at
163    this point we don't have dataflow information and though we could
164    hack something up here, it is really not worth the aggravation.  */
165 
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
168 {
169   struct omp_for_data fd;
170   gimple *ws_stmt = last_stmt (ws_entry_bb);
171 
172   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173     return true;
174 
175   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176 
177   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
178 
179   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180     return false;
181   if (fd.iter_type != long_integer_type_node)
182     return false;
183 
184   /* FIXME.  We give up too easily here.  If any of these arguments
185      are not constants, they will likely involve variables that have
186      been mapped into fields of .omp_data_s for sharing with the child
187      function.  With appropriate data flow, it would be possible to
188      see through this.  */
189   if (!is_gimple_min_invariant (fd.loop.n1)
190       || !is_gimple_min_invariant (fd.loop.n2)
191       || !is_gimple_min_invariant (fd.loop.step)
192       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193     return false;
194 
195   return true;
196 }
197 
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199    presence (SIMD_SCHEDULE).  */
200 
201 static tree
202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
203 {
204   if (!simd_schedule)
205     return chunk_size;
206 
207   poly_uint64 vf = omp_max_vf ();
208   if (known_eq (vf, 1U))
209     return chunk_size;
210 
211   tree type = TREE_TYPE (chunk_size);
212   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 			    build_int_cst (type, vf - 1));
214   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 		      build_int_cst (type, -vf));
216 }
217 
218 /* Collect additional arguments needed to emit a combined
219    parallel+workshare call.  WS_STMT is the workshare directive being
220    expanded.  */
221 
222 static vec<tree, va_gc> *
223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
224 {
225   tree t;
226   location_t loc = gimple_location (ws_stmt);
227   vec<tree, va_gc> *ws_args;
228 
229   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
230     {
231       struct omp_for_data fd;
232       tree n1, n2;
233 
234       omp_extract_for_data (for_stmt, &fd, NULL);
235       n1 = fd.loop.n1;
236       n2 = fd.loop.n2;
237 
238       if (gimple_omp_for_combined_into_p (for_stmt))
239 	{
240 	  tree innerc
241 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 			       OMP_CLAUSE__LOOPTEMP_);
243 	  gcc_assert (innerc);
244 	  n1 = OMP_CLAUSE_DECL (innerc);
245 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 				    OMP_CLAUSE__LOOPTEMP_);
247 	  gcc_assert (innerc);
248 	  n2 = OMP_CLAUSE_DECL (innerc);
249 	}
250 
251       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
252 
253       t = fold_convert_loc (loc, long_integer_type_node, n1);
254       ws_args->quick_push (t);
255 
256       t = fold_convert_loc (loc, long_integer_type_node, n2);
257       ws_args->quick_push (t);
258 
259       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260       ws_args->quick_push (t);
261 
262       if (fd.chunk_size)
263 	{
264 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 	  ws_args->quick_push (t);
267 	}
268 
269       return ws_args;
270     }
271   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
272     {
273       /* Number of sections is equal to the number of edges from the
274 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 	 the exit of the sections region.  */
276       basic_block bb = single_succ (gimple_bb (ws_stmt));
277       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278       vec_alloc (ws_args, 1);
279       ws_args->quick_push (t);
280       return ws_args;
281     }
282 
283   gcc_unreachable ();
284 }
285 
286 /* Discover whether REGION is a combined parallel+workshare region.  */
287 
288 static void
289 determine_parallel_type (struct omp_region *region)
290 {
291   basic_block par_entry_bb, par_exit_bb;
292   basic_block ws_entry_bb, ws_exit_bb;
293 
294   if (region == NULL || region->inner == NULL
295       || region->exit == NULL || region->inner->exit == NULL
296       || region->inner->cont == NULL)
297     return;
298 
299   /* We only support parallel+for and parallel+sections.  */
300   if (region->type != GIMPLE_OMP_PARALLEL
301       || (region->inner->type != GIMPLE_OMP_FOR
302 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
303     return;
304 
305   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306      WS_EXIT_BB -> PAR_EXIT_BB.  */
307   par_entry_bb = region->entry;
308   par_exit_bb = region->exit;
309   ws_entry_bb = region->inner->entry;
310   ws_exit_bb = region->inner->exit;
311 
312   if (single_succ (par_entry_bb) == ws_entry_bb
313       && single_succ (ws_exit_bb) == par_exit_bb
314       && workshare_safe_to_combine_p (ws_entry_bb)
315       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 	  || (last_and_only_stmt (ws_entry_bb)
317 	      && last_and_only_stmt (par_exit_bb))))
318     {
319       gimple *par_stmt = last_stmt (par_entry_bb);
320       gimple *ws_stmt = last_stmt (ws_entry_bb);
321 
322       if (region->inner->type == GIMPLE_OMP_FOR)
323 	{
324 	  /* If this is a combined parallel loop, we need to determine
325 	     whether or not to use the combined library calls.  There
326 	     are two cases where we do not apply the transformation:
327 	     static loops and any kind of ordered loop.  In the first
328 	     case, we already open code the loop so there is no need
329 	     to do anything else.  In the latter case, the combined
330 	     parallel loop call would still need extra synchronization
331 	     to implement ordered semantics, so there would not be any
332 	     gain in using the combined call.  */
333 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
334 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 	  if (c == NULL
336 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 		  == OMP_CLAUSE_SCHEDULE_STATIC)
338 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
339 	    {
340 	      region->is_combined_parallel = false;
341 	      region->inner->is_combined_parallel = false;
342 	      return;
343 	    }
344 	}
345 
346       region->is_combined_parallel = true;
347       region->inner->is_combined_parallel = true;
348       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
349     }
350 }
351 
352 /* Debugging dumps for parallel regions.  */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
356 
357 /* Dump the parallel region tree rooted at REGION.  */
358 
359 void
360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
361 {
362   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 	   gimple_code_name[region->type]);
364 
365   if (region->inner)
366     dump_omp_region (file, region->inner, indent + 4);
367 
368   if (region->cont)
369     {
370       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 	       region->cont->index);
372     }
373 
374   if (region->exit)
375     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 	     region->exit->index);
377   else
378     fprintf (file, "%*s[no exit marker]\n", indent, "");
379 
380   if (region->next)
381     dump_omp_region (file, region->next, indent);
382 }
383 
384 DEBUG_FUNCTION void
385 debug_omp_region (struct omp_region *region)
386 {
387   dump_omp_region (stderr, region, 0);
388 }
389 
390 DEBUG_FUNCTION void
391 debug_all_omp_regions (void)
392 {
393   dump_omp_region (stderr, root_omp_region, 0);
394 }
395 
396 /* Create a new parallel region starting at STMT inside region PARENT.  */
397 
398 static struct omp_region *
399 new_omp_region (basic_block bb, enum gimple_code type,
400 		struct omp_region *parent)
401 {
402   struct omp_region *region = XCNEW (struct omp_region);
403 
404   region->outer = parent;
405   region->entry = bb;
406   region->type = type;
407 
408   if (parent)
409     {
410       /* This is a nested region.  Add it to the list of inner
411 	 regions in PARENT.  */
412       region->next = parent->inner;
413       parent->inner = region;
414     }
415   else
416     {
417       /* This is a toplevel region.  Add it to the list of toplevel
418 	 regions in ROOT_OMP_REGION.  */
419       region->next = root_omp_region;
420       root_omp_region = region;
421     }
422 
423   return region;
424 }
425 
426 /* Release the memory associated with the region tree rooted at REGION.  */
427 
428 static void
429 free_omp_region_1 (struct omp_region *region)
430 {
431   struct omp_region *i, *n;
432 
433   for (i = region->inner; i ; i = n)
434     {
435       n = i->next;
436       free_omp_region_1 (i);
437     }
438 
439   free (region);
440 }
441 
442 /* Release the memory for the entire omp region tree.  */
443 
444 void
445 omp_free_regions (void)
446 {
447   struct omp_region *r, *n;
448   for (r = root_omp_region; r ; r = n)
449     {
450       n = r->next;
451       free_omp_region_1 (r);
452     }
453   root_omp_region = NULL;
454 }
455 
456 /* A convenience function to build an empty GIMPLE_COND with just the
457    condition.  */
458 
459 static gcond *
460 gimple_build_cond_empty (tree cond)
461 {
462   enum tree_code pred_code;
463   tree lhs, rhs;
464 
465   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
467 }
468 
469 /* Return true if a parallel REGION is within a declare target function or
470    within a target region and is not a part of a gridified target.  */
471 
472 static bool
473 parallel_needs_hsa_kernel_p (struct omp_region *region)
474 {
475   bool indirect = false;
476   for (region = region->outer; region; region = region->outer)
477     {
478       if (region->type == GIMPLE_OMP_PARALLEL)
479 	indirect = true;
480       else if (region->type == GIMPLE_OMP_TARGET)
481 	{
482 	  gomp_target *tgt_stmt
483 	    = as_a <gomp_target *> (last_stmt (region->entry));
484 
485 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 			       OMP_CLAUSE__GRIDDIM_))
487 	    return indirect;
488 	  else
489 	    return true;
490 	}
491     }
492 
493   if (lookup_attribute ("omp declare target",
494 			DECL_ATTRIBUTES (current_function_decl)))
495     return true;
496 
497   return false;
498 }
499 
500 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
501    Add CHILD_FNDECL to decl chain of the supercontext of the block
502    ENTRY_BLOCK - this is the block which originally contained the
503    code from which CHILD_FNDECL was created.
504 
505    Together, these actions ensure that the debug info for the outlined
506    function will be emitted with the correct lexical scope.  */
507 
508 static void
509 adjust_context_and_scope (struct omp_region *region, tree entry_block,
510 			  tree child_fndecl)
511 {
512   tree parent_fndecl = NULL_TREE;
513   gimple *entry_stmt;
514   /* OMP expansion expands inner regions before outer ones, so if
515      we e.g. have explicit task region nested in parallel region, when
516      expanding the task region current_function_decl will be the original
517      source function, but we actually want to use as context the child
518      function of the parallel.  */
519   for (region = region->outer;
520        region && parent_fndecl == NULL_TREE; region = region->outer)
521     switch (region->type)
522       {
523       case GIMPLE_OMP_PARALLEL:
524       case GIMPLE_OMP_TASK:
525 	entry_stmt = last_stmt (region->entry);
526 	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
527 	break;
528       case GIMPLE_OMP_TARGET:
529 	entry_stmt = last_stmt (region->entry);
530 	parent_fndecl
531 	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
532 	break;
533       default:
534 	break;
535       }
536 
537   if (parent_fndecl == NULL_TREE)
538     parent_fndecl = current_function_decl;
539   DECL_CONTEXT (child_fndecl) = parent_fndecl;
540 
541   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
542     {
543       tree b = BLOCK_SUPERCONTEXT (entry_block);
544       if (TREE_CODE (b) == BLOCK)
545         {
546 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
547 	  BLOCK_VARS (b) = child_fndecl;
548 	}
549     }
550 }
551 
552 /* Build the function calls to GOMP_parallel_start etc to actually
553    generate the parallel operation.  REGION is the parallel region
554    being expanded.  BB is the block where to insert the code.  WS_ARGS
555    will be set if this is a call to a combined parallel+workshare
556    construct, it contains the list of additional arguments needed by
557    the workshare construct.  */
558 
559 static void
560 expand_parallel_call (struct omp_region *region, basic_block bb,
561 		      gomp_parallel *entry_stmt,
562 		      vec<tree, va_gc> *ws_args)
563 {
564   tree t, t1, t2, val, cond, c, clauses, flags;
565   gimple_stmt_iterator gsi;
566   gimple *stmt;
567   enum built_in_function start_ix;
568   int start_ix2;
569   location_t clause_loc;
570   vec<tree, va_gc> *args;
571 
572   clauses = gimple_omp_parallel_clauses (entry_stmt);
573 
574   /* Determine what flavor of GOMP_parallel we will be
575      emitting.  */
576   start_ix = BUILT_IN_GOMP_PARALLEL;
577   if (is_combined_parallel (region))
578     {
579       switch (region->inner->type)
580 	{
581 	case GIMPLE_OMP_FOR:
582 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
583 	  switch (region->inner->sched_kind)
584 	    {
585 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
586 	      start_ix2 = 3;
587 	      break;
588 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
589 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
590 	      if (region->inner->sched_modifiers
591 		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
592 		{
593 		  start_ix2 = 3 + region->inner->sched_kind;
594 		  break;
595 		}
596 	      /* FALLTHRU */
597 	    default:
598 	      start_ix2 = region->inner->sched_kind;
599 	      break;
600 	    }
601 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
602 	  start_ix = (enum built_in_function) start_ix2;
603 	  break;
604 	case GIMPLE_OMP_SECTIONS:
605 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
606 	  break;
607 	default:
608 	  gcc_unreachable ();
609 	}
610     }
611 
612   /* By default, the value of NUM_THREADS is zero (selected at run time)
613      and there is no conditional.  */
614   cond = NULL_TREE;
615   val = build_int_cst (unsigned_type_node, 0);
616   flags = build_int_cst (unsigned_type_node, 0);
617 
618   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
619   if (c)
620     cond = OMP_CLAUSE_IF_EXPR (c);
621 
622   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
623   if (c)
624     {
625       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
626       clause_loc = OMP_CLAUSE_LOCATION (c);
627     }
628   else
629     clause_loc = gimple_location (entry_stmt);
630 
631   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
632   if (c)
633     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
634 
635   /* Ensure 'val' is of the correct type.  */
636   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
637 
638   /* If we found the clause 'if (cond)', build either
639      (cond != 0) or (cond ? val : 1u).  */
640   if (cond)
641     {
642       cond = gimple_boolify (cond);
643 
644       if (integer_zerop (val))
645 	val = fold_build2_loc (clause_loc,
646 			   EQ_EXPR, unsigned_type_node, cond,
647 			   build_int_cst (TREE_TYPE (cond), 0));
648       else
649 	{
650 	  basic_block cond_bb, then_bb, else_bb;
651 	  edge e, e_then, e_else;
652 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
653 
654 	  tmp_var = create_tmp_var (TREE_TYPE (val));
655 	  if (gimple_in_ssa_p (cfun))
656 	    {
657 	      tmp_then = make_ssa_name (tmp_var);
658 	      tmp_else = make_ssa_name (tmp_var);
659 	      tmp_join = make_ssa_name (tmp_var);
660 	    }
661 	  else
662 	    {
663 	      tmp_then = tmp_var;
664 	      tmp_else = tmp_var;
665 	      tmp_join = tmp_var;
666 	    }
667 
668 	  e = split_block_after_labels (bb);
669 	  cond_bb = e->src;
670 	  bb = e->dest;
671 	  remove_edge (e);
672 
673 	  then_bb = create_empty_bb (cond_bb);
674 	  else_bb = create_empty_bb (then_bb);
675 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
676 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
677 
678 	  stmt = gimple_build_cond_empty (cond);
679 	  gsi = gsi_start_bb (cond_bb);
680 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
681 
682 	  gsi = gsi_start_bb (then_bb);
683 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
684 
685 	  gsi = gsi_start_bb (else_bb);
686 	  expand_omp_build_assign (&gsi, tmp_else,
687 				   build_int_cst (unsigned_type_node, 1),
688 				   true);
689 
690 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
691 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
692 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
693 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
694 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
695 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
696 
697 	  if (gimple_in_ssa_p (cfun))
698 	    {
699 	      gphi *phi = create_phi_node (tmp_join, bb);
700 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
701 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
702 	    }
703 
704 	  val = tmp_join;
705 	}
706 
707       gsi = gsi_start_bb (bb);
708       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
709 				      false, GSI_CONTINUE_LINKING);
710     }
711 
712   gsi = gsi_last_nondebug_bb (bb);
713   t = gimple_omp_parallel_data_arg (entry_stmt);
714   if (t == NULL)
715     t1 = null_pointer_node;
716   else
717     t1 = build_fold_addr_expr (t);
718   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
719   t2 = build_fold_addr_expr (child_fndecl);
720 
721   vec_alloc (args, 4 + vec_safe_length (ws_args));
722   args->quick_push (t2);
723   args->quick_push (t1);
724   args->quick_push (val);
725   if (ws_args)
726     args->splice (*ws_args);
727   args->quick_push (flags);
728 
729   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
730 			       builtin_decl_explicit (start_ix), args);
731 
732   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
733 			    false, GSI_CONTINUE_LINKING);
734 
735   if (hsa_gen_requested_p ()
736       && parallel_needs_hsa_kernel_p (region))
737     {
738       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
739       hsa_register_kernel (child_cnode);
740     }
741 }
742 
743 /* Build the function call to GOMP_task to actually
744    generate the task operation.  BB is the block where to insert the code.  */
745 
746 static void
747 expand_task_call (struct omp_region *region, basic_block bb,
748 		  gomp_task *entry_stmt)
749 {
750   tree t1, t2, t3;
751   gimple_stmt_iterator gsi;
752   location_t loc = gimple_location (entry_stmt);
753 
754   tree clauses = gimple_omp_task_clauses (entry_stmt);
755 
756   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
757   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
758   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
759   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
760   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
761   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
762 
763   unsigned int iflags
764     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
765       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
766       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
767 
768   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
769   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
770   tree num_tasks = NULL_TREE;
771   bool ull = false;
772   if (taskloop_p)
773     {
774       gimple *g = last_stmt (region->outer->entry);
775       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
776 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
777       struct omp_for_data fd;
778       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
779       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
780       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
781 				OMP_CLAUSE__LOOPTEMP_);
782       startvar = OMP_CLAUSE_DECL (startvar);
783       endvar = OMP_CLAUSE_DECL (endvar);
784       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
785       if (fd.loop.cond_code == LT_EXPR)
786 	iflags |= GOMP_TASK_FLAG_UP;
787       tree tclauses = gimple_omp_for_clauses (g);
788       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
789       if (num_tasks)
790 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
791       else
792 	{
793 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
794 	  if (num_tasks)
795 	    {
796 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
797 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
798 	    }
799 	  else
800 	    num_tasks = integer_zero_node;
801 	}
802       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
803       if (ifc == NULL_TREE)
804 	iflags |= GOMP_TASK_FLAG_IF;
805       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
806 	iflags |= GOMP_TASK_FLAG_NOGROUP;
807       ull = fd.iter_type == long_long_unsigned_type_node;
808     }
809   else if (priority)
810     iflags |= GOMP_TASK_FLAG_PRIORITY;
811 
812   tree flags = build_int_cst (unsigned_type_node, iflags);
813 
814   tree cond = boolean_true_node;
815   if (ifc)
816     {
817       if (taskloop_p)
818 	{
819 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
821 			       build_int_cst (unsigned_type_node,
822 					      GOMP_TASK_FLAG_IF),
823 			       build_int_cst (unsigned_type_node, 0));
824 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
825 				   flags, t);
826 	}
827       else
828 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
829     }
830 
831   if (finalc)
832     {
833       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
834       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
835 			   build_int_cst (unsigned_type_node,
836 					  GOMP_TASK_FLAG_FINAL),
837 			   build_int_cst (unsigned_type_node, 0));
838       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
839     }
840   if (depend)
841     depend = OMP_CLAUSE_DECL (depend);
842   else
843     depend = build_int_cst (ptr_type_node, 0);
844   if (priority)
845     priority = fold_convert (integer_type_node,
846 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
847   else
848     priority = integer_zero_node;
849 
850   gsi = gsi_last_nondebug_bb (bb);
851   tree t = gimple_omp_task_data_arg (entry_stmt);
852   if (t == NULL)
853     t2 = null_pointer_node;
854   else
855     t2 = build_fold_addr_expr_loc (loc, t);
856   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
857   t = gimple_omp_task_copy_fn (entry_stmt);
858   if (t == NULL)
859     t3 = null_pointer_node;
860   else
861     t3 = build_fold_addr_expr_loc (loc, t);
862 
863   if (taskloop_p)
864     t = build_call_expr (ull
865 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
866 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
867 			 11, t1, t2, t3,
868 			 gimple_omp_task_arg_size (entry_stmt),
869 			 gimple_omp_task_arg_align (entry_stmt), flags,
870 			 num_tasks, priority, startvar, endvar, step);
871   else
872     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
873 			 9, t1, t2, t3,
874 			 gimple_omp_task_arg_size (entry_stmt),
875 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
876 			 depend, priority);
877 
878   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
879 			    false, GSI_CONTINUE_LINKING);
880 }
881 
882 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
883 
884 static tree
885 vec2chain (vec<tree, va_gc> *v)
886 {
887   tree chain = NULL_TREE, t;
888   unsigned ix;
889 
890   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
891     {
892       DECL_CHAIN (t) = chain;
893       chain = t;
894     }
895 
896   return chain;
897 }
898 
899 /* Remove barriers in REGION->EXIT's block.  Note that this is only
900    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
901    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
902    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
903    removed.  */
904 
905 static void
906 remove_exit_barrier (struct omp_region *region)
907 {
908   gimple_stmt_iterator gsi;
909   basic_block exit_bb;
910   edge_iterator ei;
911   edge e;
912   gimple *stmt;
913   int any_addressable_vars = -1;
914 
915   exit_bb = region->exit;
916 
917   /* If the parallel region doesn't return, we don't have REGION->EXIT
918      block at all.  */
919   if (! exit_bb)
920     return;
921 
922   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
923      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
924      statements that can appear in between are extremely limited -- no
925      memory operations at all.  Here, we allow nothing at all, so the
926      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
927   gsi = gsi_last_nondebug_bb (exit_bb);
928   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
929   gsi_prev_nondebug (&gsi);
930   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
931     return;
932 
933   FOR_EACH_EDGE (e, ei, exit_bb->preds)
934     {
935       gsi = gsi_last_nondebug_bb (e->src);
936       if (gsi_end_p (gsi))
937 	continue;
938       stmt = gsi_stmt (gsi);
939       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
940 	  && !gimple_omp_return_nowait_p (stmt))
941 	{
942 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
943 	     in many cases.  If there could be tasks queued, the barrier
944 	     might be needed to let the tasks run before some local
945 	     variable of the parallel that the task uses as shared
946 	     runs out of scope.  The task can be spawned either
947 	     from within current function (this would be easy to check)
948 	     or from some function it calls and gets passed an address
949 	     of such a variable.  */
950 	  if (any_addressable_vars < 0)
951 	    {
952 	      gomp_parallel *parallel_stmt
953 		= as_a <gomp_parallel *> (last_stmt (region->entry));
954 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
955 	      tree local_decls, block, decl;
956 	      unsigned ix;
957 
958 	      any_addressable_vars = 0;
959 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
960 		if (TREE_ADDRESSABLE (decl))
961 		  {
962 		    any_addressable_vars = 1;
963 		    break;
964 		  }
965 	      for (block = gimple_block (stmt);
966 		   !any_addressable_vars
967 		   && block
968 		   && TREE_CODE (block) == BLOCK;
969 		   block = BLOCK_SUPERCONTEXT (block))
970 		{
971 		  for (local_decls = BLOCK_VARS (block);
972 		       local_decls;
973 		       local_decls = DECL_CHAIN (local_decls))
974 		    if (TREE_ADDRESSABLE (local_decls))
975 		      {
976 			any_addressable_vars = 1;
977 			break;
978 		      }
979 		  if (block == gimple_block (parallel_stmt))
980 		    break;
981 		}
982 	    }
983 	  if (!any_addressable_vars)
984 	    gimple_omp_return_set_nowait (stmt);
985 	}
986     }
987 }
988 
989 static void
990 remove_exit_barriers (struct omp_region *region)
991 {
992   if (region->type == GIMPLE_OMP_PARALLEL)
993     remove_exit_barrier (region);
994 
995   if (region->inner)
996     {
997       region = region->inner;
998       remove_exit_barriers (region);
999       while (region->next)
1000 	{
1001 	  region = region->next;
1002 	  remove_exit_barriers (region);
1003 	}
1004     }
1005 }
1006 
1007 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1008    calls.  These can't be declared as const functions, but
1009    within one parallel body they are constant, so they can be
1010    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1011    which are declared const.  Similarly for task body, except
1012    that in untied task omp_get_thread_num () can change at any task
1013    scheduling point.  */
1014 
1015 static void
1016 optimize_omp_library_calls (gimple *entry_stmt)
1017 {
1018   basic_block bb;
1019   gimple_stmt_iterator gsi;
1020   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1021   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1022   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1023   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1024   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1025 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1026 					  OMP_CLAUSE_UNTIED) != NULL);
1027 
1028   FOR_EACH_BB_FN (bb, cfun)
1029     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1030       {
1031 	gimple *call = gsi_stmt (gsi);
1032 	tree decl;
1033 
1034 	if (is_gimple_call (call)
1035 	    && (decl = gimple_call_fndecl (call))
1036 	    && DECL_EXTERNAL (decl)
1037 	    && TREE_PUBLIC (decl)
1038 	    && DECL_INITIAL (decl) == NULL)
1039 	  {
1040 	    tree built_in;
1041 
1042 	    if (DECL_NAME (decl) == thr_num_id)
1043 	      {
1044 		/* In #pragma omp task untied omp_get_thread_num () can change
1045 		   during the execution of the task region.  */
1046 		if (untied_task)
1047 		  continue;
1048 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1049 	      }
1050 	    else if (DECL_NAME (decl) == num_thr_id)
1051 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1052 	    else
1053 	      continue;
1054 
1055 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1056 		|| gimple_call_num_args (call) != 0)
1057 	      continue;
1058 
1059 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1060 	      continue;
1061 
1062 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1063 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1064 					TREE_TYPE (TREE_TYPE (built_in))))
1065 	      continue;
1066 
1067 	    gimple_call_set_fndecl (call, built_in);
1068 	  }
1069       }
1070 }
1071 
1072 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1073    regimplified.  */
1074 
1075 static tree
1076 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1077 {
1078   tree t = *tp;
1079 
1080   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1081   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1082     return t;
1083 
1084   if (TREE_CODE (t) == ADDR_EXPR)
1085     recompute_tree_invariant_for_addr_expr (t);
1086 
1087   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1088   return NULL_TREE;
1089 }
1090 
1091 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1092 
1093 static void
1094 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1095 			 bool after)
1096 {
1097   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1098   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1099 				   !after, after ? GSI_CONTINUE_LINKING
1100 						 : GSI_SAME_STMT);
1101   gimple *stmt = gimple_build_assign (to, from);
1102   if (after)
1103     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1104   else
1105     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1106   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1107       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1108     {
1109       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1110       gimple_regimplify_operands (stmt, &gsi);
1111     }
1112 }
1113 
1114 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1115 
1116 static void
1117 expand_omp_taskreg (struct omp_region *region)
1118 {
1119   basic_block entry_bb, exit_bb, new_bb;
1120   struct function *child_cfun;
1121   tree child_fn, block, t;
1122   gimple_stmt_iterator gsi;
1123   gimple *entry_stmt, *stmt;
1124   edge e;
1125   vec<tree, va_gc> *ws_args;
1126 
1127   entry_stmt = last_stmt (region->entry);
1128   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1129   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1130 
1131   entry_bb = region->entry;
1132   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1133     exit_bb = region->cont;
1134   else
1135     exit_bb = region->exit;
1136 
1137   if (is_combined_parallel (region))
1138     ws_args = region->ws_args;
1139   else
1140     ws_args = NULL;
1141 
1142   if (child_cfun->cfg)
1143     {
1144       /* Due to inlining, it may happen that we have already outlined
1145 	 the region, in which case all we need to do is make the
1146 	 sub-graph unreachable and emit the parallel call.  */
1147       edge entry_succ_e, exit_succ_e;
1148 
1149       entry_succ_e = single_succ_edge (entry_bb);
1150 
1151       gsi = gsi_last_nondebug_bb (entry_bb);
1152       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154       gsi_remove (&gsi, true);
1155 
1156       new_bb = entry_bb;
1157       if (exit_bb)
1158 	{
1159 	  exit_succ_e = single_succ_edge (exit_bb);
1160 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1161 	}
1162       remove_edge_and_dominated_blocks (entry_succ_e);
1163     }
1164   else
1165     {
1166       unsigned srcidx, dstidx, num;
1167 
1168       /* If the parallel region needs data sent from the parent
1169 	 function, then the very first statement (except possible
1170 	 tree profile counter updates) of the parallel body
1171 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1172 	 &.OMP_DATA_O is passed as an argument to the child function,
1173 	 we need to replace it with the argument as seen by the child
1174 	 function.
1175 
1176 	 In most cases, this will end up being the identity assignment
1177 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1178 	 a function call that has been inlined, the original PARM_DECL
1179 	 .OMP_DATA_I may have been converted into a different local
1180 	 variable.  In which case, we need to keep the assignment.  */
1181       if (gimple_omp_taskreg_data_arg (entry_stmt))
1182 	{
1183 	  basic_block entry_succ_bb
1184 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185 				       : FALLTHRU_EDGE (entry_bb)->dest;
1186 	  tree arg;
1187 	  gimple *parcopy_stmt = NULL;
1188 
1189 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1190 	    {
1191 	      gimple *stmt;
1192 
1193 	      gcc_assert (!gsi_end_p (gsi));
1194 	      stmt = gsi_stmt (gsi);
1195 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196 		continue;
1197 
1198 	      if (gimple_num_ops (stmt) == 2)
1199 		{
1200 		  tree arg = gimple_assign_rhs1 (stmt);
1201 
1202 		  /* We're ignore the subcode because we're
1203 		     effectively doing a STRIP_NOPS.  */
1204 
1205 		  if (TREE_CODE (arg) == ADDR_EXPR
1206 		      && TREE_OPERAND (arg, 0)
1207 			== gimple_omp_taskreg_data_arg (entry_stmt))
1208 		    {
1209 		      parcopy_stmt = stmt;
1210 		      break;
1211 		    }
1212 		}
1213 	    }
1214 
1215 	  gcc_assert (parcopy_stmt != NULL);
1216 	  arg = DECL_ARGUMENTS (child_fn);
1217 
1218 	  if (!gimple_in_ssa_p (cfun))
1219 	    {
1220 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1221 		gsi_remove (&gsi, true);
1222 	      else
1223 		{
1224 		  /* ?? Is setting the subcode really necessary ??  */
1225 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 		}
1228 	    }
1229 	  else
1230 	    {
1231 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1232 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233 	      /* We'd like to set the rhs to the default def in the child_fn,
1234 		 but it's too early to create ssa names in the child_fn.
1235 		 Instead, we set the rhs to the parm.  In
1236 		 move_sese_region_to_fn, we introduce a default def for the
1237 		 parm, map the parm to it's default def, and once we encounter
1238 		 this stmt, replace the parm with the default def.  */
1239 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240 	      update_stmt (parcopy_stmt);
1241 	    }
1242 	}
1243 
1244       /* Declare local variables needed in CHILD_CFUN.  */
1245       block = DECL_INITIAL (child_fn);
1246       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247       /* The gimplifier could record temporaries in parallel/task block
1248 	 rather than in containing function's local_decls chain,
1249 	 which would mean cgraph missed finalizing them.  Do it now.  */
1250       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252 	  varpool_node::finalize_decl (t);
1253       DECL_SAVED_TREE (child_fn) = NULL;
1254       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1255       gimple_set_body (child_fn, NULL);
1256       TREE_USED (block) = 1;
1257 
1258       /* Reset DECL_CONTEXT on function arguments.  */
1259       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260 	DECL_CONTEXT (t) = child_fn;
1261 
1262       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 	 so that it can be moved to the child function.  */
1264       gsi = gsi_last_nondebug_bb (entry_bb);
1265       stmt = gsi_stmt (gsi);
1266       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267 			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268       e = split_block (entry_bb, stmt);
1269       gsi_remove (&gsi, true);
1270       entry_bb = e->dest;
1271       edge e2 = NULL;
1272       if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274       else
1275 	{
1276 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277 	  gcc_assert (e2->dest == region->exit);
1278 	  remove_edge (BRANCH_EDGE (entry_bb));
1279 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280 	  gsi = gsi_last_nondebug_bb (region->exit);
1281 	  gcc_assert (!gsi_end_p (gsi)
1282 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283 	  gsi_remove (&gsi, true);
1284 	}
1285 
1286       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1287       if (exit_bb)
1288 	{
1289 	  gsi = gsi_last_nondebug_bb (exit_bb);
1290 	  gcc_assert (!gsi_end_p (gsi)
1291 		      && (gimple_code (gsi_stmt (gsi))
1292 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293 	  stmt = gimple_build_return (NULL);
1294 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295 	  gsi_remove (&gsi, true);
1296 	}
1297 
1298       /* Move the parallel region into CHILD_CFUN.  */
1299 
1300       if (gimple_in_ssa_p (cfun))
1301 	{
1302 	  init_tree_ssa (child_cfun);
1303 	  init_ssa_operands (child_cfun);
1304 	  child_cfun->gimple_df->in_ssa_p = true;
1305 	  block = NULL_TREE;
1306 	}
1307       else
1308 	block = gimple_block (entry_stmt);
1309 
1310       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1311       if (exit_bb)
1312 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1313       if (e2)
1314 	{
1315 	  basic_block dest_bb = e2->dest;
1316 	  if (!exit_bb)
1317 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1318 	  remove_edge (e2);
1319 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1320 	}
1321       /* When the OMP expansion process cannot guarantee an up-to-date
1322 	 loop tree arrange for the child function to fixup loops.  */
1323       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1324 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1325 
1326       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1327       num = vec_safe_length (child_cfun->local_decls);
1328       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1329 	{
1330 	  t = (*child_cfun->local_decls)[srcidx];
1331 	  if (DECL_CONTEXT (t) == cfun->decl)
1332 	    continue;
1333 	  if (srcidx != dstidx)
1334 	    (*child_cfun->local_decls)[dstidx] = t;
1335 	  dstidx++;
1336 	}
1337       if (dstidx != num)
1338 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1339 
1340       /* Inform the callgraph about the new function.  */
1341       child_cfun->curr_properties = cfun->curr_properties;
1342       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1343       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1344       cgraph_node *node = cgraph_node::get_create (child_fn);
1345       node->parallelized_function = 1;
1346       cgraph_node::add_new_function (child_fn, true);
1347 
1348       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1349 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1350 
1351       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1352 	 fixed in a following pass.  */
1353       push_cfun (child_cfun);
1354       if (need_asm)
1355 	assign_assembler_name_if_needed (child_fn);
1356 
1357       if (optimize)
1358 	optimize_omp_library_calls (entry_stmt);
1359       update_max_bb_count ();
1360       cgraph_edge::rebuild_edges ();
1361 
1362       /* Some EH regions might become dead, see PR34608.  If
1363 	 pass_cleanup_cfg isn't the first pass to happen with the
1364 	 new child, these dead EH edges might cause problems.
1365 	 Clean them up now.  */
1366       if (flag_exceptions)
1367 	{
1368 	  basic_block bb;
1369 	  bool changed = false;
1370 
1371 	  FOR_EACH_BB_FN (bb, cfun)
1372 	    changed |= gimple_purge_dead_eh_edges (bb);
1373 	  if (changed)
1374 	    cleanup_tree_cfg ();
1375 	}
1376       if (gimple_in_ssa_p (cfun))
1377 	update_ssa (TODO_update_ssa);
1378       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1379 	verify_loop_structure ();
1380       pop_cfun ();
1381 
1382       if (dump_file && !gimple_in_ssa_p (cfun))
1383 	{
1384 	  omp_any_child_fn_dumped = true;
1385 	  dump_function_header (dump_file, child_fn, dump_flags);
1386 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1387 	}
1388     }
1389 
1390   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1391 
1392   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1393     expand_parallel_call (region, new_bb,
1394 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1395   else
1396     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1397   if (gimple_in_ssa_p (cfun))
1398     update_ssa (TODO_update_ssa_only_virtuals);
1399 }
1400 
1401 /* Information about members of an OpenACC collapsed loop nest.  */
1402 
1403 struct oacc_collapse
1404 {
1405   tree base;  /* Base value.  */
1406   tree iters; /* Number of steps.  */
1407   tree step;  /* Step size.  */
1408   tree tile;  /* Tile increment (if tiled).  */
1409   tree outer; /* Tile iterator var. */
1410 };
1411 
1412 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1413    Fill in COUNTS array.  Emit any initialization code before GSI.
1414    Return the calculated outer loop bound of BOUND_TYPE.  */
1415 
1416 static tree
1417 expand_oacc_collapse_init (const struct omp_for_data *fd,
1418 			   gimple_stmt_iterator *gsi,
1419 			   oacc_collapse *counts, tree bound_type,
1420 			   location_t loc)
1421 {
1422   tree tiling = fd->tiling;
1423   tree total = build_int_cst (bound_type, 1);
1424   int ix;
1425 
1426   gcc_assert (integer_onep (fd->loop.step));
1427   gcc_assert (integer_zerop (fd->loop.n1));
1428 
1429   /* When tiling, the first operand of the tile clause applies to the
1430      innermost loop, and we work outwards from there.  Seems
1431      backwards, but whatever.  */
1432   for (ix = fd->collapse; ix--;)
1433     {
1434       const omp_for_data_loop *loop = &fd->loops[ix];
1435 
1436       tree iter_type = TREE_TYPE (loop->v);
1437       tree diff_type = iter_type;
1438       tree plus_type = iter_type;
1439 
1440       gcc_assert (loop->cond_code == fd->loop.cond_code);
1441 
1442       if (POINTER_TYPE_P (iter_type))
1443 	plus_type = sizetype;
1444       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1445 	diff_type = signed_type_for (diff_type);
1446       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1447 	diff_type = integer_type_node;
1448 
1449       if (tiling)
1450 	{
1451 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1452 	  tree loop_no = build_int_cst (integer_type_node, ix);
1453 	  tree tile = TREE_VALUE (tiling);
1454 	  gcall *call
1455 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1456 					  /* gwv-outer=*/integer_zero_node,
1457 					  /* gwv-inner=*/integer_zero_node);
1458 
1459 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1460 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1461 	  gimple_call_set_lhs (call, counts[ix].tile);
1462 	  gimple_set_location (call, loc);
1463 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1464 
1465 	  tiling = TREE_CHAIN (tiling);
1466 	}
1467       else
1468 	{
1469 	  counts[ix].tile = NULL;
1470 	  counts[ix].outer = loop->v;
1471 	}
1472 
1473       tree b = loop->n1;
1474       tree e = loop->n2;
1475       tree s = loop->step;
1476       bool up = loop->cond_code == LT_EXPR;
1477       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1478       bool negating;
1479       tree expr;
1480 
1481       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1482 				    true, GSI_SAME_STMT);
1483       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1484 				    true, GSI_SAME_STMT);
1485 
1486       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1487       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1488       if (negating)
1489 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1490       s = fold_convert (diff_type, s);
1491       if (negating)
1492 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1493       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1494 				    true, GSI_SAME_STMT);
1495 
1496       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1497       negating = !up && TYPE_UNSIGNED (iter_type);
1498       expr = fold_build2 (MINUS_EXPR, plus_type,
1499 			  fold_convert (plus_type, negating ? b : e),
1500 			  fold_convert (plus_type, negating ? e : b));
1501       expr = fold_convert (diff_type, expr);
1502       if (negating)
1503 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1504       tree range = force_gimple_operand_gsi
1505 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1506 
1507       /* Determine number of iterations.  */
1508       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1509       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1510       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1511 
1512       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1513 					     true, GSI_SAME_STMT);
1514 
1515       counts[ix].base = b;
1516       counts[ix].iters = iters;
1517       counts[ix].step = s;
1518 
1519       total = fold_build2 (MULT_EXPR, bound_type, total,
1520 			   fold_convert (bound_type, iters));
1521     }
1522 
1523   return total;
1524 }
1525 
1526 /* Emit initializers for collapsed loop members.  INNER is true if
1527    this is for the element loop of a TILE.  IVAR is the outer
1528    loop iteration variable, from which collapsed loop iteration values
1529    are  calculated.  COUNTS array has been initialized by
1530    expand_oacc_collapse_inits.  */
1531 
1532 static void
1533 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1534 			   gimple_stmt_iterator *gsi,
1535 			   const oacc_collapse *counts, tree ivar)
1536 {
1537   tree ivar_type = TREE_TYPE (ivar);
1538 
1539   /*  The most rapidly changing iteration variable is the innermost
1540       one.  */
1541   for (int ix = fd->collapse; ix--;)
1542     {
1543       const omp_for_data_loop *loop = &fd->loops[ix];
1544       const oacc_collapse *collapse = &counts[ix];
1545       tree v = inner ? loop->v : collapse->outer;
1546       tree iter_type = TREE_TYPE (v);
1547       tree diff_type = TREE_TYPE (collapse->step);
1548       tree plus_type = iter_type;
1549       enum tree_code plus_code = PLUS_EXPR;
1550       tree expr;
1551 
1552       if (POINTER_TYPE_P (iter_type))
1553 	{
1554 	  plus_code = POINTER_PLUS_EXPR;
1555 	  plus_type = sizetype;
1556 	}
1557 
1558       expr = ivar;
1559       if (ix)
1560 	{
1561 	  tree mod = fold_convert (ivar_type, collapse->iters);
1562 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1563 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1564 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1565 					   true, GSI_SAME_STMT);
1566 	}
1567 
1568       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1569 			  collapse->step);
1570       expr = fold_build2 (plus_code, iter_type,
1571 			  inner ? collapse->outer : collapse->base,
1572 			  fold_convert (plus_type, expr));
1573       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1574 				       true, GSI_SAME_STMT);
1575       gassign *ass = gimple_build_assign (v, expr);
1576       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1577     }
1578 }
1579 
1580 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1581    of the combined collapse > 1 loop constructs, generate code like:
1582 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1583 	if (cond3 is <)
1584 	  adj = STEP3 - 1;
1585 	else
1586 	  adj = STEP3 + 1;
1587 	count3 = (adj + N32 - N31) / STEP3;
1588 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1589 	if (cond2 is <)
1590 	  adj = STEP2 - 1;
1591 	else
1592 	  adj = STEP2 + 1;
1593 	count2 = (adj + N22 - N21) / STEP2;
1594 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1595 	if (cond1 is <)
1596 	  adj = STEP1 - 1;
1597 	else
1598 	  adj = STEP1 + 1;
1599 	count1 = (adj + N12 - N11) / STEP1;
1600 	count = count1 * count2 * count3;
1601    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1602 	count = 0;
1603    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1604    of the combined loop constructs, just initialize COUNTS array
1605    from the _looptemp_ clauses.  */
1606 
1607 /* NOTE: It *could* be better to moosh all of the BBs together,
1608    creating one larger BB with all the computation and the unexpected
1609    jump at the end.  I.e.
1610 
1611    bool zero3, zero2, zero1, zero;
1612 
1613    zero3 = N32 c3 N31;
1614    count3 = (N32 - N31) /[cl] STEP3;
1615    zero2 = N22 c2 N21;
1616    count2 = (N22 - N21) /[cl] STEP2;
1617    zero1 = N12 c1 N11;
1618    count1 = (N12 - N11) /[cl] STEP1;
1619    zero = zero3 || zero2 || zero1;
1620    count = count1 * count2 * count3;
1621    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1622 
1623    After all, we expect the zero=false, and thus we expect to have to
1624    evaluate all of the comparison expressions, so short-circuiting
1625    oughtn't be a win.  Since the condition isn't protecting a
1626    denominator, we're not concerned about divide-by-zero, so we can
1627    fully evaluate count even if a numerator turned out to be wrong.
1628 
1629    It seems like putting this all together would create much better
1630    scheduling opportunities, and less pressure on the chip's branch
1631    predictor.  */
1632 
1633 static void
1634 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1635 			    basic_block &entry_bb, tree *counts,
1636 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1637 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1638 			    basic_block &l2_dom_bb)
1639 {
1640   tree t, type = TREE_TYPE (fd->loop.v);
1641   edge e, ne;
1642   int i;
1643 
1644   /* Collapsed loops need work for expansion into SSA form.  */
1645   gcc_assert (!gimple_in_ssa_p (cfun));
1646 
1647   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1648       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1649     {
1650       gcc_assert (fd->ordered == 0);
1651       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1652 	 isn't supposed to be handled, as the inner loop doesn't
1653 	 use it.  */
1654       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1655 				     OMP_CLAUSE__LOOPTEMP_);
1656       gcc_assert (innerc);
1657       for (i = 0; i < fd->collapse; i++)
1658 	{
1659 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1660 				    OMP_CLAUSE__LOOPTEMP_);
1661 	  gcc_assert (innerc);
1662 	  if (i)
1663 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1664 	  else
1665 	    counts[0] = NULL_TREE;
1666 	}
1667       return;
1668     }
1669 
1670   for (i = fd->collapse; i < fd->ordered; i++)
1671     {
1672       tree itype = TREE_TYPE (fd->loops[i].v);
1673       counts[i] = NULL_TREE;
1674       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1675 		       fold_convert (itype, fd->loops[i].n1),
1676 		       fold_convert (itype, fd->loops[i].n2));
1677       if (t && integer_zerop (t))
1678 	{
1679 	  for (i = fd->collapse; i < fd->ordered; i++)
1680 	    counts[i] = build_int_cst (type, 0);
1681 	  break;
1682 	}
1683     }
1684   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1685     {
1686       tree itype = TREE_TYPE (fd->loops[i].v);
1687 
1688       if (i >= fd->collapse && counts[i])
1689 	continue;
1690       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1691 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1692 				fold_convert (itype, fd->loops[i].n1),
1693 				fold_convert (itype, fd->loops[i].n2)))
1694 	      == NULL_TREE || !integer_onep (t)))
1695 	{
1696 	  gcond *cond_stmt;
1697 	  tree n1, n2;
1698 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1699 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1700 					 true, GSI_SAME_STMT);
1701 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1702 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1703 					 true, GSI_SAME_STMT);
1704 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1705 					 NULL_TREE, NULL_TREE);
1706 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1707 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1708 			 expand_omp_regimplify_p, NULL, NULL)
1709 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1710 			    expand_omp_regimplify_p, NULL, NULL))
1711 	    {
1712 	      *gsi = gsi_for_stmt (cond_stmt);
1713 	      gimple_regimplify_operands (cond_stmt, gsi);
1714 	    }
1715 	  e = split_block (entry_bb, cond_stmt);
1716 	  basic_block &zero_iter_bb
1717 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1718 	  int &first_zero_iter
1719 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1720 	  if (zero_iter_bb == NULL)
1721 	    {
1722 	      gassign *assign_stmt;
1723 	      first_zero_iter = i;
1724 	      zero_iter_bb = create_empty_bb (entry_bb);
1725 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1726 	      *gsi = gsi_after_labels (zero_iter_bb);
1727 	      if (i < fd->collapse)
1728 		assign_stmt = gimple_build_assign (fd->loop.n2,
1729 						   build_zero_cst (type));
1730 	      else
1731 		{
1732 		  counts[i] = create_tmp_reg (type, ".count");
1733 		  assign_stmt
1734 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1735 		}
1736 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1737 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1738 				       entry_bb);
1739 	    }
1740 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1741 	  ne->probability = profile_probability::very_unlikely ();
1742 	  e->flags = EDGE_TRUE_VALUE;
1743 	  e->probability = ne->probability.invert ();
1744 	  if (l2_dom_bb == NULL)
1745 	    l2_dom_bb = entry_bb;
1746 	  entry_bb = e->dest;
1747 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1748 	}
1749 
1750       if (POINTER_TYPE_P (itype))
1751 	itype = signed_type_for (itype);
1752       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1753 				 ? -1 : 1));
1754       t = fold_build2 (PLUS_EXPR, itype,
1755 		       fold_convert (itype, fd->loops[i].step), t);
1756       t = fold_build2 (PLUS_EXPR, itype, t,
1757 		       fold_convert (itype, fd->loops[i].n2));
1758       t = fold_build2 (MINUS_EXPR, itype, t,
1759 		       fold_convert (itype, fd->loops[i].n1));
1760       /* ?? We could probably use CEIL_DIV_EXPR instead of
1761 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1762 	 generate the same code in the end because generically we
1763 	 don't know that the values involved must be negative for
1764 	 GT??  */
1765       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1766 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1767 			 fold_build1 (NEGATE_EXPR, itype, t),
1768 			 fold_build1 (NEGATE_EXPR, itype,
1769 				      fold_convert (itype,
1770 						    fd->loops[i].step)));
1771       else
1772 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1773 			 fold_convert (itype, fd->loops[i].step));
1774       t = fold_convert (type, t);
1775       if (TREE_CODE (t) == INTEGER_CST)
1776 	counts[i] = t;
1777       else
1778 	{
1779 	  if (i < fd->collapse || i != first_zero_iter2)
1780 	    counts[i] = create_tmp_reg (type, ".count");
1781 	  expand_omp_build_assign (gsi, counts[i], t);
1782 	}
1783       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1784 	{
1785 	  if (i == 0)
1786 	    t = counts[0];
1787 	  else
1788 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1789 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1790 	}
1791     }
1792 }
1793 
1794 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1795 	T = V;
1796 	V3 = N31 + (T % count3) * STEP3;
1797 	T = T / count3;
1798 	V2 = N21 + (T % count2) * STEP2;
1799 	T = T / count2;
1800 	V1 = N11 + T * STEP1;
1801    if this loop doesn't have an inner loop construct combined with it.
1802    If it does have an inner loop construct combined with it and the
1803    iteration count isn't known constant, store values from counts array
1804    into its _looptemp_ temporaries instead.  */
1805 
1806 static void
1807 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1808 			  tree *counts, gimple *inner_stmt, tree startvar)
1809 {
1810   int i;
1811   if (gimple_omp_for_combined_p (fd->for_stmt))
1812     {
1813       /* If fd->loop.n2 is constant, then no propagation of the counts
1814 	 is needed, they are constant.  */
1815       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1816 	return;
1817 
1818       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1819 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1820 		     : gimple_omp_for_clauses (inner_stmt);
1821       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1822 	 isn't supposed to be handled, as the inner loop doesn't
1823 	 use it.  */
1824       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1825       gcc_assert (innerc);
1826       for (i = 0; i < fd->collapse; i++)
1827 	{
1828 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1829 				    OMP_CLAUSE__LOOPTEMP_);
1830 	  gcc_assert (innerc);
1831 	  if (i)
1832 	    {
1833 	      tree tem = OMP_CLAUSE_DECL (innerc);
1834 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1835 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1836 					    false, GSI_CONTINUE_LINKING);
1837 	      gassign *stmt = gimple_build_assign (tem, t);
1838 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1839 	    }
1840 	}
1841       return;
1842     }
1843 
1844   tree type = TREE_TYPE (fd->loop.v);
1845   tree tem = create_tmp_reg (type, ".tem");
1846   gassign *stmt = gimple_build_assign (tem, startvar);
1847   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1848 
1849   for (i = fd->collapse - 1; i >= 0; i--)
1850     {
1851       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1852       itype = vtype;
1853       if (POINTER_TYPE_P (vtype))
1854 	itype = signed_type_for (vtype);
1855       if (i != 0)
1856 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1857       else
1858 	t = tem;
1859       t = fold_convert (itype, t);
1860       t = fold_build2 (MULT_EXPR, itype, t,
1861 		       fold_convert (itype, fd->loops[i].step));
1862       if (POINTER_TYPE_P (vtype))
1863 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1864       else
1865 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1866       t = force_gimple_operand_gsi (gsi, t,
1867 				    DECL_P (fd->loops[i].v)
1868 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1869 				    NULL_TREE, false,
1870 				    GSI_CONTINUE_LINKING);
1871       stmt = gimple_build_assign (fd->loops[i].v, t);
1872       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1873       if (i != 0)
1874 	{
1875 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1876 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1877 					false, GSI_CONTINUE_LINKING);
1878 	  stmt = gimple_build_assign (tem, t);
1879 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1880 	}
1881     }
1882 }
1883 
1884 /* Helper function for expand_omp_for_*.  Generate code like:
1885     L10:
1886 	V3 += STEP3;
1887 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1888     L11:
1889 	V3 = N31;
1890 	V2 += STEP2;
1891 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1892     L12:
1893 	V2 = N21;
1894 	V1 += STEP1;
1895 	goto BODY_BB;  */
1896 
1897 static basic_block
1898 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1899 			     basic_block body_bb)
1900 {
1901   basic_block last_bb, bb, collapse_bb = NULL;
1902   int i;
1903   gimple_stmt_iterator gsi;
1904   edge e;
1905   tree t;
1906   gimple *stmt;
1907 
1908   last_bb = cont_bb;
1909   for (i = fd->collapse - 1; i >= 0; i--)
1910     {
1911       tree vtype = TREE_TYPE (fd->loops[i].v);
1912 
1913       bb = create_empty_bb (last_bb);
1914       add_bb_to_loop (bb, last_bb->loop_father);
1915       gsi = gsi_start_bb (bb);
1916 
1917       if (i < fd->collapse - 1)
1918 	{
1919 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1920 	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1921 
1922 	  t = fd->loops[i + 1].n1;
1923 	  t = force_gimple_operand_gsi (&gsi, t,
1924 					DECL_P (fd->loops[i + 1].v)
1925 					&& TREE_ADDRESSABLE (fd->loops[i
1926 								       + 1].v),
1927 					NULL_TREE, false,
1928 					GSI_CONTINUE_LINKING);
1929 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1930 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1931 	}
1932       else
1933 	collapse_bb = bb;
1934 
1935       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1936 
1937       if (POINTER_TYPE_P (vtype))
1938 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1939       else
1940 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1941       t = force_gimple_operand_gsi (&gsi, t,
1942 				    DECL_P (fd->loops[i].v)
1943 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1944 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1945       stmt = gimple_build_assign (fd->loops[i].v, t);
1946       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1947 
1948       if (i > 0)
1949 	{
1950 	  t = fd->loops[i].n2;
1951 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1952 					false, GSI_CONTINUE_LINKING);
1953 	  tree v = fd->loops[i].v;
1954 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1955 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1956 					  false, GSI_CONTINUE_LINKING);
1957 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1958 	  stmt = gimple_build_cond_empty (t);
1959 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1960 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1961 			 expand_omp_regimplify_p, NULL, NULL)
1962 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1963 			    expand_omp_regimplify_p, NULL, NULL))
1964 	    gimple_regimplify_operands (stmt, &gsi);
1965 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1966 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1967 	}
1968       else
1969 	make_edge (bb, body_bb, EDGE_FALLTHRU);
1970       last_bb = bb;
1971     }
1972 
1973   return collapse_bb;
1974 }
1975 
1976 /* Expand #pragma omp ordered depend(source).  */
1977 
1978 static void
1979 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1980 			   tree *counts, location_t loc)
1981 {
1982   enum built_in_function source_ix
1983     = fd->iter_type == long_integer_type_node
1984       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1985   gimple *g
1986     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1987 			 build_fold_addr_expr (counts[fd->ordered]));
1988   gimple_set_location (g, loc);
1989   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1990 }
1991 
1992 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1993 
1994 static void
1995 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1996 			 tree *counts, tree c, location_t loc)
1997 {
1998   auto_vec<tree, 10> args;
1999   enum built_in_function sink_ix
2000     = fd->iter_type == long_integer_type_node
2001       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2002   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2003   int i;
2004   gimple_stmt_iterator gsi2 = *gsi;
2005   bool warned_step = false;
2006 
2007   for (i = 0; i < fd->ordered; i++)
2008     {
2009       tree step = NULL_TREE;
2010       off = TREE_PURPOSE (deps);
2011       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2012 	{
2013 	  step = TREE_OPERAND (off, 1);
2014 	  off = TREE_OPERAND (off, 0);
2015 	}
2016       if (!integer_zerop (off))
2017 	{
2018 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2019 		      || fd->loops[i].cond_code == GT_EXPR);
2020 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2021 	  if (step)
2022 	    {
2023 	      /* Non-simple Fortran DO loops.  If step is variable,
2024 		 we don't know at compile even the direction, so can't
2025 		 warn.  */
2026 	      if (TREE_CODE (step) != INTEGER_CST)
2027 		break;
2028 	      forward = tree_int_cst_sgn (step) != -1;
2029 	    }
2030 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2031 	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2032 				"lexically later iteration");
2033 	  break;
2034 	}
2035       deps = TREE_CHAIN (deps);
2036     }
2037   /* If all offsets corresponding to the collapsed loops are zero,
2038      this depend clause can be ignored.  FIXME: but there is still a
2039      flush needed.  We need to emit one __sync_synchronize () for it
2040      though (perhaps conditionally)?  Solve this together with the
2041      conservative dependence folding optimization.
2042   if (i >= fd->collapse)
2043     return;  */
2044 
2045   deps = OMP_CLAUSE_DECL (c);
2046   gsi_prev (&gsi2);
2047   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2048   edge e2 = split_block_after_labels (e1->dest);
2049 
2050   gsi2 = gsi_after_labels (e1->dest);
2051   *gsi = gsi_last_bb (e1->src);
2052   for (i = 0; i < fd->ordered; i++)
2053     {
2054       tree itype = TREE_TYPE (fd->loops[i].v);
2055       tree step = NULL_TREE;
2056       tree orig_off = NULL_TREE;
2057       if (POINTER_TYPE_P (itype))
2058 	itype = sizetype;
2059       if (i)
2060 	deps = TREE_CHAIN (deps);
2061       off = TREE_PURPOSE (deps);
2062       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2063 	{
2064 	  step = TREE_OPERAND (off, 1);
2065 	  off = TREE_OPERAND (off, 0);
2066 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2067 		      && integer_onep (fd->loops[i].step)
2068 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2069 	}
2070       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2071       if (step)
2072 	{
2073 	  off = fold_convert_loc (loc, itype, off);
2074 	  orig_off = off;
2075 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2076 	}
2077 
2078       if (integer_zerop (off))
2079 	t = boolean_true_node;
2080       else
2081 	{
2082 	  tree a;
2083 	  tree co = fold_convert_loc (loc, itype, off);
2084 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2085 	    {
2086 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2088 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2089 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2090 				   co);
2091 	    }
2092 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2094 				 fd->loops[i].v, co);
2095 	  else
2096 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097 				 fd->loops[i].v, co);
2098 	  if (step)
2099 	    {
2100 	      tree t1, t2;
2101 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2102 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2103 				      fd->loops[i].n1);
2104 	      else
2105 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2106 				      fd->loops[i].n2);
2107 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2108 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109 				      fd->loops[i].n2);
2110 	      else
2111 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2112 				      fd->loops[i].n1);
2113 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2114 				   step, build_int_cst (TREE_TYPE (step), 0));
2115 	      if (TREE_CODE (step) != INTEGER_CST)
2116 		{
2117 		  t1 = unshare_expr (t1);
2118 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2119 						 false, GSI_CONTINUE_LINKING);
2120 		  t2 = unshare_expr (t2);
2121 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2122 						 false, GSI_CONTINUE_LINKING);
2123 		}
2124 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2125 				   t, t2, t1);
2126 	    }
2127 	  else if (fd->loops[i].cond_code == LT_EXPR)
2128 	    {
2129 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2130 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2131 				     fd->loops[i].n1);
2132 	      else
2133 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2134 				     fd->loops[i].n2);
2135 	    }
2136 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2137 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2138 				 fd->loops[i].n2);
2139 	  else
2140 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2141 				 fd->loops[i].n1);
2142 	}
2143       if (cond)
2144 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2145       else
2146 	cond = t;
2147 
2148       off = fold_convert_loc (loc, itype, off);
2149 
2150       if (step
2151 	  || (fd->loops[i].cond_code == LT_EXPR
2152 	      ? !integer_onep (fd->loops[i].step)
2153 	      : !integer_minus_onep (fd->loops[i].step)))
2154 	{
2155 	  if (step == NULL_TREE
2156 	      && TYPE_UNSIGNED (itype)
2157 	      && fd->loops[i].cond_code == GT_EXPR)
2158 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2159 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2160 						  s));
2161 	  else
2162 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2163 				 orig_off ? orig_off : off, s);
2164 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2165 			       build_int_cst (itype, 0));
2166 	  if (integer_zerop (t) && !warned_step)
2167 	    {
2168 	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2169 				  "in the iteration space");
2170 	      warned_step = true;
2171 	    }
2172 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2173 				  cond, t);
2174 	}
2175 
2176       if (i <= fd->collapse - 1 && fd->collapse > 1)
2177 	t = fd->loop.v;
2178       else if (counts[i])
2179 	t = counts[i];
2180       else
2181 	{
2182 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2183 			       fd->loops[i].v, fd->loops[i].n1);
2184 	  t = fold_convert_loc (loc, fd->iter_type, t);
2185 	}
2186       if (step)
2187 	/* We have divided off by step already earlier.  */;
2188       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2189 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2190 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2191 						s));
2192       else
2193 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2194       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2195 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2196       off = fold_convert_loc (loc, fd->iter_type, off);
2197       if (i <= fd->collapse - 1 && fd->collapse > 1)
2198 	{
2199 	  if (i)
2200 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2201 				   off);
2202 	  if (i < fd->collapse - 1)
2203 	    {
2204 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2205 				      counts[i]);
2206 	      continue;
2207 	    }
2208 	}
2209       off = unshare_expr (off);
2210       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2211       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2212 				    true, GSI_SAME_STMT);
2213       args.safe_push (t);
2214     }
2215   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2216   gimple_set_location (g, loc);
2217   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2218 
2219   cond = unshare_expr (cond);
2220   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2221 				   GSI_CONTINUE_LINKING);
2222   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2223   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2224   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2225   e1->probability = e3->probability.invert ();
2226   e1->flags = EDGE_TRUE_VALUE;
2227   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2228 
2229   *gsi = gsi_after_labels (e2->dest);
2230 }
2231 
2232 /* Expand all #pragma omp ordered depend(source) and
2233    #pragma omp ordered depend(sink:...) constructs in the current
2234    #pragma omp for ordered(n) region.  */
2235 
2236 static void
2237 expand_omp_ordered_source_sink (struct omp_region *region,
2238 				struct omp_for_data *fd, tree *counts,
2239 				basic_block cont_bb)
2240 {
2241   struct omp_region *inner;
2242   int i;
2243   for (i = fd->collapse - 1; i < fd->ordered; i++)
2244     if (i == fd->collapse - 1 && fd->collapse > 1)
2245       counts[i] = NULL_TREE;
2246     else if (i >= fd->collapse && !cont_bb)
2247       counts[i] = build_zero_cst (fd->iter_type);
2248     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2249 	     && integer_onep (fd->loops[i].step))
2250       counts[i] = NULL_TREE;
2251     else
2252       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2253   tree atype
2254     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2255   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2256   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2257 
2258   for (inner = region->inner; inner; inner = inner->next)
2259     if (inner->type == GIMPLE_OMP_ORDERED)
2260       {
2261 	gomp_ordered *ord_stmt = inner->ord_stmt;
2262 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2263 	location_t loc = gimple_location (ord_stmt);
2264 	tree c;
2265 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2266 	     c; c = OMP_CLAUSE_CHAIN (c))
2267 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2268 	    break;
2269 	if (c)
2270 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2271 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2272 	     c; c = OMP_CLAUSE_CHAIN (c))
2273 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2274 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2275 	gsi_remove (&gsi, true);
2276       }
2277 }
2278 
2279 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2280    collapsed.  */
2281 
2282 static basic_block
2283 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2284 			      basic_block cont_bb, basic_block body_bb,
2285 			      bool ordered_lastprivate)
2286 {
2287   if (fd->ordered == fd->collapse)
2288     return cont_bb;
2289 
2290   if (!cont_bb)
2291     {
2292       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293       for (int i = fd->collapse; i < fd->ordered; i++)
2294 	{
2295 	  tree type = TREE_TYPE (fd->loops[i].v);
2296 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2297 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2298 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299 			      size_int (i - fd->collapse + 1),
2300 			      NULL_TREE, NULL_TREE);
2301 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302 	}
2303       return NULL;
2304     }
2305 
2306   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2307     {
2308       tree t, type = TREE_TYPE (fd->loops[i].v);
2309       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2310       expand_omp_build_assign (&gsi, fd->loops[i].v,
2311 			       fold_convert (type, fd->loops[i].n1));
2312       if (counts[i])
2313 	expand_omp_build_assign (&gsi, counts[i],
2314 				 build_zero_cst (fd->iter_type));
2315       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2316 			  size_int (i - fd->collapse + 1),
2317 			  NULL_TREE, NULL_TREE);
2318       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2319       if (!gsi_end_p (gsi))
2320 	gsi_prev (&gsi);
2321       else
2322 	gsi = gsi_last_bb (body_bb);
2323       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2324       basic_block new_body = e1->dest;
2325       if (body_bb == cont_bb)
2326 	cont_bb = new_body;
2327       edge e2 = NULL;
2328       basic_block new_header;
2329       if (EDGE_COUNT (cont_bb->preds) > 0)
2330 	{
2331 	  gsi = gsi_last_bb (cont_bb);
2332 	  if (POINTER_TYPE_P (type))
2333 	    t = fold_build_pointer_plus (fd->loops[i].v,
2334 					 fold_convert (sizetype,
2335 						       fd->loops[i].step));
2336 	  else
2337 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2338 			     fold_convert (type, fd->loops[i].step));
2339 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2340 	  if (counts[i])
2341 	    {
2342 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2343 			       build_int_cst (fd->iter_type, 1));
2344 	      expand_omp_build_assign (&gsi, counts[i], t);
2345 	      t = counts[i];
2346 	    }
2347 	  else
2348 	    {
2349 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2350 			       fd->loops[i].v, fd->loops[i].n1);
2351 	      t = fold_convert (fd->iter_type, t);
2352 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2353 					    true, GSI_SAME_STMT);
2354 	    }
2355 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2356 			 size_int (i - fd->collapse + 1),
2357 			 NULL_TREE, NULL_TREE);
2358 	  expand_omp_build_assign (&gsi, aref, t);
2359 	  gsi_prev (&gsi);
2360 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2361 	  new_header = e2->dest;
2362 	}
2363       else
2364 	new_header = cont_bb;
2365       gsi = gsi_after_labels (new_header);
2366       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2367 					 true, GSI_SAME_STMT);
2368       tree n2
2369 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2370 				    true, NULL_TREE, true, GSI_SAME_STMT);
2371       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2372       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2373       edge e3 = split_block (new_header, gsi_stmt (gsi));
2374       cont_bb = e3->dest;
2375       remove_edge (e1);
2376       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2377       e3->flags = EDGE_FALSE_VALUE;
2378       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2379       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2380       e1->probability = e3->probability.invert ();
2381 
2382       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2383       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2384 
2385       if (e2)
2386 	{
2387 	  struct loop *loop = alloc_loop ();
2388 	  loop->header = new_header;
2389 	  loop->latch = e2->src;
2390 	  add_loop (loop, body_bb->loop_father);
2391 	}
2392     }
2393 
2394   /* If there are any lastprivate clauses and it is possible some loops
2395      might have zero iterations, ensure all the decls are initialized,
2396      otherwise we could crash evaluating C++ class iterators with lastprivate
2397      clauses.  */
2398   bool need_inits = false;
2399   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2400     if (need_inits)
2401       {
2402 	tree type = TREE_TYPE (fd->loops[i].v);
2403 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2404 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2405 				 fold_convert (type, fd->loops[i].n1));
2406       }
2407     else
2408       {
2409 	tree type = TREE_TYPE (fd->loops[i].v);
2410 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2411 				      boolean_type_node,
2412 				      fold_convert (type, fd->loops[i].n1),
2413 				      fold_convert (type, fd->loops[i].n2));
2414 	if (!integer_onep (this_cond))
2415 	  need_inits = true;
2416       }
2417 
2418   return cont_bb;
2419 }
2420 
2421 /* A subroutine of expand_omp_for.  Generate code for a parallel
2422    loop with any schedule.  Given parameters:
2423 
2424 	for (V = N1; V cond N2; V += STEP) BODY;
2425 
2426    where COND is "<" or ">", we generate pseudocode
2427 
2428 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2429 	if (more) goto L0; else goto L3;
2430     L0:
2431 	V = istart0;
2432 	iend = iend0;
2433     L1:
2434 	BODY;
2435 	V += STEP;
2436 	if (V cond iend) goto L1; else goto L2;
2437     L2:
2438 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2439     L3:
2440 
2441     If this is a combined omp parallel loop, instead of the call to
2442     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2443     If this is gimple_omp_for_combined_p loop, then instead of assigning
2444     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2445     inner GIMPLE_OMP_FOR and V += STEP; and
2446     if (V cond iend) goto L1; else goto L2; are removed.
2447 
2448     For collapsed loops, given parameters:
2449       collapse(3)
2450       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2451 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2452 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2453 	    BODY;
2454 
2455     we generate pseudocode
2456 
2457 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2458 	if (cond3 is <)
2459 	  adj = STEP3 - 1;
2460 	else
2461 	  adj = STEP3 + 1;
2462 	count3 = (adj + N32 - N31) / STEP3;
2463 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2464 	if (cond2 is <)
2465 	  adj = STEP2 - 1;
2466 	else
2467 	  adj = STEP2 + 1;
2468 	count2 = (adj + N22 - N21) / STEP2;
2469 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2470 	if (cond1 is <)
2471 	  adj = STEP1 - 1;
2472 	else
2473 	  adj = STEP1 + 1;
2474 	count1 = (adj + N12 - N11) / STEP1;
2475 	count = count1 * count2 * count3;
2476 	goto Z1;
2477     Z0:
2478 	count = 0;
2479     Z1:
2480 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2481 	if (more) goto L0; else goto L3;
2482     L0:
2483 	V = istart0;
2484 	T = V;
2485 	V3 = N31 + (T % count3) * STEP3;
2486 	T = T / count3;
2487 	V2 = N21 + (T % count2) * STEP2;
2488 	T = T / count2;
2489 	V1 = N11 + T * STEP1;
2490 	iend = iend0;
2491     L1:
2492 	BODY;
2493 	V += 1;
2494 	if (V < iend) goto L10; else goto L2;
2495     L10:
2496 	V3 += STEP3;
2497 	if (V3 cond3 N32) goto L1; else goto L11;
2498     L11:
2499 	V3 = N31;
2500 	V2 += STEP2;
2501 	if (V2 cond2 N22) goto L1; else goto L12;
2502     L12:
2503 	V2 = N21;
2504 	V1 += STEP1;
2505 	goto L1;
2506     L2:
2507 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2508     L3:
2509 
2510       */
2511 
2512 static void
2513 expand_omp_for_generic (struct omp_region *region,
2514 			struct omp_for_data *fd,
2515 			enum built_in_function start_fn,
2516 			enum built_in_function next_fn,
2517 			gimple *inner_stmt)
2518 {
2519   tree type, istart0, iend0, iend;
2520   tree t, vmain, vback, bias = NULL_TREE;
2521   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2522   basic_block l2_bb = NULL, l3_bb = NULL;
2523   gimple_stmt_iterator gsi;
2524   gassign *assign_stmt;
2525   bool in_combined_parallel = is_combined_parallel (region);
2526   bool broken_loop = region->cont == NULL;
2527   edge e, ne;
2528   tree *counts = NULL;
2529   int i;
2530   bool ordered_lastprivate = false;
2531 
2532   gcc_assert (!broken_loop || !in_combined_parallel);
2533   gcc_assert (fd->iter_type == long_integer_type_node
2534 	      || !in_combined_parallel);
2535 
2536   entry_bb = region->entry;
2537   cont_bb = region->cont;
2538   collapse_bb = NULL;
2539   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2540   gcc_assert (broken_loop
2541 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2542   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2543   l1_bb = single_succ (l0_bb);
2544   if (!broken_loop)
2545     {
2546       l2_bb = create_empty_bb (cont_bb);
2547       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2548 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2549 		      == l1_bb));
2550       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2551     }
2552   else
2553     l2_bb = NULL;
2554   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2555   exit_bb = region->exit;
2556 
2557   gsi = gsi_last_nondebug_bb (entry_bb);
2558 
2559   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2560   if (fd->ordered
2561       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2562 			  OMP_CLAUSE_LASTPRIVATE))
2563     ordered_lastprivate = false;
2564   if (fd->collapse > 1 || fd->ordered)
2565     {
2566       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2567       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2568 
2569       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2570       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2571 				  zero_iter1_bb, first_zero_iter1,
2572 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2573 
2574       if (zero_iter1_bb)
2575 	{
2576 	  /* Some counts[i] vars might be uninitialized if
2577 	     some loop has zero iterations.  But the body shouldn't
2578 	     be executed in that case, so just avoid uninit warnings.  */
2579 	  for (i = first_zero_iter1;
2580 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2581 	    if (SSA_VAR_P (counts[i]))
2582 	      TREE_NO_WARNING (counts[i]) = 1;
2583 	  gsi_prev (&gsi);
2584 	  e = split_block (entry_bb, gsi_stmt (gsi));
2585 	  entry_bb = e->dest;
2586 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2587 	  gsi = gsi_last_nondebug_bb (entry_bb);
2588 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2589 				   get_immediate_dominator (CDI_DOMINATORS,
2590 							    zero_iter1_bb));
2591 	}
2592       if (zero_iter2_bb)
2593 	{
2594 	  /* Some counts[i] vars might be uninitialized if
2595 	     some loop has zero iterations.  But the body shouldn't
2596 	     be executed in that case, so just avoid uninit warnings.  */
2597 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2598 	    if (SSA_VAR_P (counts[i]))
2599 	      TREE_NO_WARNING (counts[i]) = 1;
2600 	  if (zero_iter1_bb)
2601 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2602 	  else
2603 	    {
2604 	      gsi_prev (&gsi);
2605 	      e = split_block (entry_bb, gsi_stmt (gsi));
2606 	      entry_bb = e->dest;
2607 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2608 	      gsi = gsi_last_nondebug_bb (entry_bb);
2609 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2610 				       get_immediate_dominator
2611 					 (CDI_DOMINATORS, zero_iter2_bb));
2612 	    }
2613 	}
2614       if (fd->collapse == 1)
2615 	{
2616 	  counts[0] = fd->loop.n2;
2617 	  fd->loop = fd->loops[0];
2618 	}
2619     }
2620 
2621   type = TREE_TYPE (fd->loop.v);
2622   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2623   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2624   TREE_ADDRESSABLE (istart0) = 1;
2625   TREE_ADDRESSABLE (iend0) = 1;
2626 
2627   /* See if we need to bias by LLONG_MIN.  */
2628   if (fd->iter_type == long_long_unsigned_type_node
2629       && TREE_CODE (type) == INTEGER_TYPE
2630       && !TYPE_UNSIGNED (type)
2631       && fd->ordered == 0)
2632     {
2633       tree n1, n2;
2634 
2635       if (fd->loop.cond_code == LT_EXPR)
2636 	{
2637 	  n1 = fd->loop.n1;
2638 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2639 	}
2640       else
2641 	{
2642 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2643 	  n2 = fd->loop.n1;
2644 	}
2645       if (TREE_CODE (n1) != INTEGER_CST
2646 	  || TREE_CODE (n2) != INTEGER_CST
2647 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2648 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2649     }
2650 
2651   gimple_stmt_iterator gsif = gsi;
2652   gsi_prev (&gsif);
2653 
2654   tree arr = NULL_TREE;
2655   if (in_combined_parallel)
2656     {
2657       gcc_assert (fd->ordered == 0);
2658       /* In a combined parallel loop, emit a call to
2659 	 GOMP_loop_foo_next.  */
2660       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2661 			   build_fold_addr_expr (istart0),
2662 			   build_fold_addr_expr (iend0));
2663     }
2664   else
2665     {
2666       tree t0, t1, t2, t3, t4;
2667       /* If this is not a combined parallel loop, emit a call to
2668 	 GOMP_loop_foo_start in ENTRY_BB.  */
2669       t4 = build_fold_addr_expr (iend0);
2670       t3 = build_fold_addr_expr (istart0);
2671       if (fd->ordered)
2672 	{
2673 	  t0 = build_int_cst (unsigned_type_node,
2674 			      fd->ordered - fd->collapse + 1);
2675 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2676 							fd->ordered
2677 							- fd->collapse + 1),
2678 				".omp_counts");
2679 	  DECL_NAMELESS (arr) = 1;
2680 	  TREE_ADDRESSABLE (arr) = 1;
2681 	  TREE_STATIC (arr) = 1;
2682 	  vec<constructor_elt, va_gc> *v;
2683 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2684 	  int idx;
2685 
2686 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2687 	    {
2688 	      tree c;
2689 	      if (idx == 0 && fd->collapse > 1)
2690 		c = fd->loop.n2;
2691 	      else
2692 		c = counts[idx + fd->collapse - 1];
2693 	      tree purpose = size_int (idx);
2694 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2695 	      if (TREE_CODE (c) != INTEGER_CST)
2696 		TREE_STATIC (arr) = 0;
2697 	    }
2698 
2699 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2700 	  if (!TREE_STATIC (arr))
2701 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2702 						    void_type_node, arr),
2703 				      true, NULL_TREE, true, GSI_SAME_STMT);
2704 	  t1 = build_fold_addr_expr (arr);
2705 	  t2 = NULL_TREE;
2706 	}
2707       else
2708 	{
2709 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2710 	  t1 = fd->loop.n2;
2711 	  t0 = fd->loop.n1;
2712 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2713 	    {
2714 	      tree innerc
2715 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2716 				   OMP_CLAUSE__LOOPTEMP_);
2717 	      gcc_assert (innerc);
2718 	      t0 = OMP_CLAUSE_DECL (innerc);
2719 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2720 					OMP_CLAUSE__LOOPTEMP_);
2721 	      gcc_assert (innerc);
2722 	      t1 = OMP_CLAUSE_DECL (innerc);
2723 	    }
2724 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2725 	      && TYPE_PRECISION (TREE_TYPE (t0))
2726 		 != TYPE_PRECISION (fd->iter_type))
2727 	    {
2728 	      /* Avoid casting pointers to integer of a different size.  */
2729 	      tree itype = signed_type_for (type);
2730 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2731 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2732 	    }
2733 	  else
2734 	    {
2735 	      t1 = fold_convert (fd->iter_type, t1);
2736 	      t0 = fold_convert (fd->iter_type, t0);
2737 	    }
2738 	  if (bias)
2739 	    {
2740 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2741 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2742 	    }
2743 	}
2744       if (fd->iter_type == long_integer_type_node || fd->ordered)
2745 	{
2746 	  if (fd->chunk_size)
2747 	    {
2748 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2749 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2750 	      if (fd->ordered)
2751 		t = build_call_expr (builtin_decl_explicit (start_fn),
2752 				     5, t0, t1, t, t3, t4);
2753 	      else
2754 		t = build_call_expr (builtin_decl_explicit (start_fn),
2755 				     6, t0, t1, t2, t, t3, t4);
2756 	    }
2757 	  else if (fd->ordered)
2758 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2759 				 4, t0, t1, t3, t4);
2760 	  else
2761 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2762 				 5, t0, t1, t2, t3, t4);
2763 	}
2764       else
2765 	{
2766 	  tree t5;
2767 	  tree c_bool_type;
2768 	  tree bfn_decl;
2769 
2770 	  /* The GOMP_loop_ull_*start functions have additional boolean
2771 	     argument, true for < loops and false for > loops.
2772 	     In Fortran, the C bool type can be different from
2773 	     boolean_type_node.  */
2774 	  bfn_decl = builtin_decl_explicit (start_fn);
2775 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2776 	  t5 = build_int_cst (c_bool_type,
2777 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2778 	  if (fd->chunk_size)
2779 	    {
2780 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2781 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2782 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2783 	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2784 	    }
2785 	  else
2786 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2787 				 6, t5, t0, t1, t2, t3, t4);
2788 	}
2789     }
2790   if (TREE_TYPE (t) != boolean_type_node)
2791     t = fold_build2 (NE_EXPR, boolean_type_node,
2792 		     t, build_int_cst (TREE_TYPE (t), 0));
2793   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2794 				true, GSI_SAME_STMT);
2795   if (arr && !TREE_STATIC (arr))
2796     {
2797       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2798       TREE_THIS_VOLATILE (clobber) = 1;
2799       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2800 			 GSI_SAME_STMT);
2801     }
2802   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2803 
2804   /* Remove the GIMPLE_OMP_FOR statement.  */
2805   gsi_remove (&gsi, true);
2806 
2807   if (gsi_end_p (gsif))
2808     gsif = gsi_after_labels (gsi_bb (gsif));
2809   gsi_next (&gsif);
2810 
2811   /* Iteration setup for sequential loop goes in L0_BB.  */
2812   tree startvar = fd->loop.v;
2813   tree endvar = NULL_TREE;
2814 
2815   if (gimple_omp_for_combined_p (fd->for_stmt))
2816     {
2817       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2818 		  && gimple_omp_for_kind (inner_stmt)
2819 		     == GF_OMP_FOR_KIND_SIMD);
2820       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2821 				     OMP_CLAUSE__LOOPTEMP_);
2822       gcc_assert (innerc);
2823       startvar = OMP_CLAUSE_DECL (innerc);
2824       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2825 				OMP_CLAUSE__LOOPTEMP_);
2826       gcc_assert (innerc);
2827       endvar = OMP_CLAUSE_DECL (innerc);
2828     }
2829 
2830   gsi = gsi_start_bb (l0_bb);
2831   t = istart0;
2832   if (fd->ordered && fd->collapse == 1)
2833     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2834 		     fold_convert (fd->iter_type, fd->loop.step));
2835   else if (bias)
2836     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2837   if (fd->ordered && fd->collapse == 1)
2838     {
2839       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2840 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2841 			 fd->loop.n1, fold_convert (sizetype, t));
2842       else
2843 	{
2844 	  t = fold_convert (TREE_TYPE (startvar), t);
2845 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2846 			   fd->loop.n1, t);
2847 	}
2848     }
2849   else
2850     {
2851       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2852 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2853       t = fold_convert (TREE_TYPE (startvar), t);
2854     }
2855   t = force_gimple_operand_gsi (&gsi, t,
2856 				DECL_P (startvar)
2857 				&& TREE_ADDRESSABLE (startvar),
2858 				NULL_TREE, false, GSI_CONTINUE_LINKING);
2859   assign_stmt = gimple_build_assign (startvar, t);
2860   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861 
2862   t = iend0;
2863   if (fd->ordered && fd->collapse == 1)
2864     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2865 		     fold_convert (fd->iter_type, fd->loop.step));
2866   else if (bias)
2867     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2868   if (fd->ordered && fd->collapse == 1)
2869     {
2870       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2871 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2872 			 fd->loop.n1, fold_convert (sizetype, t));
2873       else
2874 	{
2875 	  t = fold_convert (TREE_TYPE (startvar), t);
2876 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2877 			   fd->loop.n1, t);
2878 	}
2879     }
2880   else
2881     {
2882       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2883 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2884       t = fold_convert (TREE_TYPE (startvar), t);
2885     }
2886   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2887 				   false, GSI_CONTINUE_LINKING);
2888   if (endvar)
2889     {
2890       assign_stmt = gimple_build_assign (endvar, iend);
2891       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2892       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2893 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2894       else
2895 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2896       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2897     }
2898   /* Handle linear clause adjustments.  */
2899   tree itercnt = NULL_TREE;
2900   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2901     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2902 	 c; c = OMP_CLAUSE_CHAIN (c))
2903       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2904 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2905 	{
2906 	  tree d = OMP_CLAUSE_DECL (c);
2907 	  bool is_ref = omp_is_reference (d);
2908 	  tree t = d, a, dest;
2909 	  if (is_ref)
2910 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2911 	  tree type = TREE_TYPE (t);
2912 	  if (POINTER_TYPE_P (type))
2913 	    type = sizetype;
2914 	  dest = unshare_expr (t);
2915 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2916 	  expand_omp_build_assign (&gsif, v, t);
2917 	  if (itercnt == NULL_TREE)
2918 	    {
2919 	      itercnt = startvar;
2920 	      tree n1 = fd->loop.n1;
2921 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2922 		{
2923 		  itercnt
2924 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2925 				    itercnt);
2926 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2927 		}
2928 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2929 				     itercnt, n1);
2930 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2931 				     itercnt, fd->loop.step);
2932 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2933 						  NULL_TREE, false,
2934 						  GSI_CONTINUE_LINKING);
2935 	    }
2936 	  a = fold_build2 (MULT_EXPR, type,
2937 			   fold_convert (type, itercnt),
2938 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2939 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2940 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2941 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2942 					false, GSI_CONTINUE_LINKING);
2943 	  assign_stmt = gimple_build_assign (dest, t);
2944 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2945 	}
2946   if (fd->collapse > 1)
2947     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2948 
2949   if (fd->ordered)
2950     {
2951       /* Until now, counts array contained number of iterations or
2952 	 variable containing it for ith loop.  From now on, we need
2953 	 those counts only for collapsed loops, and only for the 2nd
2954 	 till the last collapsed one.  Move those one element earlier,
2955 	 we'll use counts[fd->collapse - 1] for the first source/sink
2956 	 iteration counter and so on and counts[fd->ordered]
2957 	 as the array holding the current counter values for
2958 	 depend(source).  */
2959       if (fd->collapse > 1)
2960 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2961       if (broken_loop)
2962 	{
2963 	  int i;
2964 	  for (i = fd->collapse; i < fd->ordered; i++)
2965 	    {
2966 	      tree type = TREE_TYPE (fd->loops[i].v);
2967 	      tree this_cond
2968 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2969 			       fold_convert (type, fd->loops[i].n1),
2970 			       fold_convert (type, fd->loops[i].n2));
2971 	      if (!integer_onep (this_cond))
2972 		break;
2973 	    }
2974 	  if (i < fd->ordered)
2975 	    {
2976 	      cont_bb
2977 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2978 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2979 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2980 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2981 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2982 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2983 	      make_edge (cont_bb, l1_bb, 0);
2984 	      l2_bb = create_empty_bb (cont_bb);
2985 	      broken_loop = false;
2986 	    }
2987 	}
2988       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2989       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2990 					      ordered_lastprivate);
2991       if (counts[fd->collapse - 1])
2992 	{
2993 	  gcc_assert (fd->collapse == 1);
2994 	  gsi = gsi_last_bb (l0_bb);
2995 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2996 				   istart0, true);
2997 	  gsi = gsi_last_bb (cont_bb);
2998 	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2999 			   build_int_cst (fd->iter_type, 1));
3000 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3001 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3002 			      size_zero_node, NULL_TREE, NULL_TREE);
3003 	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3004 	  t = counts[fd->collapse - 1];
3005 	}
3006       else if (fd->collapse > 1)
3007 	t = fd->loop.v;
3008       else
3009 	{
3010 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3011 			   fd->loops[0].v, fd->loops[0].n1);
3012 	  t = fold_convert (fd->iter_type, t);
3013 	}
3014       gsi = gsi_last_bb (l0_bb);
3015       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3016 			  size_zero_node, NULL_TREE, NULL_TREE);
3017       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3018 				    false, GSI_CONTINUE_LINKING);
3019       expand_omp_build_assign (&gsi, aref, t, true);
3020     }
3021 
3022   if (!broken_loop)
3023     {
3024       /* Code to control the increment and predicate for the sequential
3025 	 loop goes in the CONT_BB.  */
3026       gsi = gsi_last_nondebug_bb (cont_bb);
3027       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3028       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3029       vmain = gimple_omp_continue_control_use (cont_stmt);
3030       vback = gimple_omp_continue_control_def (cont_stmt);
3031 
3032       if (!gimple_omp_for_combined_p (fd->for_stmt))
3033 	{
3034 	  if (POINTER_TYPE_P (type))
3035 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3036 	  else
3037 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3038 	  t = force_gimple_operand_gsi (&gsi, t,
3039 					DECL_P (vback)
3040 					&& TREE_ADDRESSABLE (vback),
3041 					NULL_TREE, true, GSI_SAME_STMT);
3042 	  assign_stmt = gimple_build_assign (vback, t);
3043 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3044 
3045 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3046 	    {
3047 	      tree tem;
3048 	      if (fd->collapse > 1)
3049 		tem = fd->loop.v;
3050 	      else
3051 		{
3052 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3053 				     fd->loops[0].v, fd->loops[0].n1);
3054 		  tem = fold_convert (fd->iter_type, tem);
3055 		}
3056 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3057 				  counts[fd->ordered], size_zero_node,
3058 				  NULL_TREE, NULL_TREE);
3059 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3060 					      true, GSI_SAME_STMT);
3061 	      expand_omp_build_assign (&gsi, aref, tem);
3062 	    }
3063 
3064 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3065 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3066 		      iend);
3067 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3068 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3069 	}
3070 
3071       /* Remove GIMPLE_OMP_CONTINUE.  */
3072       gsi_remove (&gsi, true);
3073 
3074       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3075 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3076 
3077       /* Emit code to get the next parallel iteration in L2_BB.  */
3078       gsi = gsi_start_bb (l2_bb);
3079 
3080       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3081 			   build_fold_addr_expr (istart0),
3082 			   build_fold_addr_expr (iend0));
3083       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3084 				    false, GSI_CONTINUE_LINKING);
3085       if (TREE_TYPE (t) != boolean_type_node)
3086 	t = fold_build2 (NE_EXPR, boolean_type_node,
3087 			 t, build_int_cst (TREE_TYPE (t), 0));
3088       gcond *cond_stmt = gimple_build_cond_empty (t);
3089       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3090     }
3091 
3092   /* Add the loop cleanup function.  */
3093   gsi = gsi_last_nondebug_bb (exit_bb);
3094   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3095     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3096   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3097     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3098   else
3099     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3100   gcall *call_stmt = gimple_build_call (t, 0);
3101   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3102     gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3103   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3104   if (fd->ordered)
3105     {
3106       tree arr = counts[fd->ordered];
3107       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3108       TREE_THIS_VOLATILE (clobber) = 1;
3109       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3110 			GSI_SAME_STMT);
3111     }
3112   gsi_remove (&gsi, true);
3113 
3114   /* Connect the new blocks.  */
3115   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3116   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3117 
3118   if (!broken_loop)
3119     {
3120       gimple_seq phis;
3121 
3122       e = find_edge (cont_bb, l3_bb);
3123       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3124 
3125       phis = phi_nodes (l3_bb);
3126       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3127 	{
3128 	  gimple *phi = gsi_stmt (gsi);
3129 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3130 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3131 	}
3132       remove_edge (e);
3133 
3134       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3135       e = find_edge (cont_bb, l1_bb);
3136       if (e == NULL)
3137 	{
3138 	  e = BRANCH_EDGE (cont_bb);
3139 	  gcc_assert (single_succ (e->dest) == l1_bb);
3140 	}
3141       if (gimple_omp_for_combined_p (fd->for_stmt))
3142 	{
3143 	  remove_edge (e);
3144 	  e = NULL;
3145 	}
3146       else if (fd->collapse > 1)
3147 	{
3148 	  remove_edge (e);
3149 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3150 	}
3151       else
3152 	e->flags = EDGE_TRUE_VALUE;
3153       if (e)
3154 	{
3155 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3156 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3157 	}
3158       else
3159 	{
3160 	  e = find_edge (cont_bb, l2_bb);
3161 	  e->flags = EDGE_FALLTHRU;
3162 	}
3163       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3164 
3165       if (gimple_in_ssa_p (cfun))
3166 	{
3167 	  /* Add phis to the outer loop that connect to the phis in the inner,
3168 	     original loop, and move the loop entry value of the inner phi to
3169 	     the loop entry value of the outer phi.  */
3170 	  gphi_iterator psi;
3171 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3172 	    {
3173 	      source_location locus;
3174 	      gphi *nphi;
3175 	      gphi *exit_phi = psi.phi ();
3176 
3177 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3178 		continue;
3179 
3180 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3182 
3183 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3185 	      gphi *inner_phi
3186 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3187 
3188 	      tree t = gimple_phi_result (exit_phi);
3189 	      tree new_res = copy_ssa_name (t, NULL);
3190 	      nphi = create_phi_node (new_res, l0_bb);
3191 
3192 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3197 
3198 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3200 
3201 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3202 	    }
3203 	}
3204 
3205       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3207       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3209       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3211       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3213 
3214       /* We enter expand_omp_for_generic with a loop.  This original loop may
3215 	 have its own loop struct, or it may be part of an outer loop struct
3216 	 (which may be the fake loop).  */
3217       struct loop *outer_loop = entry_bb->loop_father;
3218       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3219 
3220       add_bb_to_loop (l2_bb, outer_loop);
3221 
3222       /* We've added a new loop around the original loop.  Allocate the
3223 	 corresponding loop struct.  */
3224       struct loop *new_loop = alloc_loop ();
3225       new_loop->header = l0_bb;
3226       new_loop->latch = l2_bb;
3227       add_loop (new_loop, outer_loop);
3228 
3229       /* Allocate a loop structure for the original loop unless we already
3230 	 had one.  */
3231       if (!orig_loop_has_loop_struct
3232 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3233 	{
3234 	  struct loop *orig_loop = alloc_loop ();
3235 	  orig_loop->header = l1_bb;
3236 	  /* The loop may have multiple latches.  */
3237 	  add_loop (orig_loop, new_loop);
3238 	}
3239     }
3240 }
3241 
3242 /* A subroutine of expand_omp_for.  Generate code for a parallel
3243    loop with static schedule and no specified chunk size.  Given
3244    parameters:
3245 
3246 	for (V = N1; V cond N2; V += STEP) BODY;
3247 
3248    where COND is "<" or ">", we generate pseudocode
3249 
3250 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251 	if (cond is <)
3252 	  adj = STEP - 1;
3253 	else
3254 	  adj = STEP + 1;
3255 	if ((__typeof (V)) -1 > 0 && cond is >)
3256 	  n = -(adj + N2 - N1) / -STEP;
3257 	else
3258 	  n = (adj + N2 - N1) / STEP;
3259 	q = n / nthreads;
3260 	tt = n % nthreads;
3261 	if (threadid < tt) goto L3; else goto L4;
3262     L3:
3263 	tt = 0;
3264 	q = q + 1;
3265     L4:
3266 	s0 = q * threadid + tt;
3267 	e0 = s0 + q;
3268 	V = s0 * STEP + N1;
3269 	if (s0 >= e0) goto L2; else goto L0;
3270     L0:
3271 	e = e0 * STEP + N1;
3272     L1:
3273 	BODY;
3274 	V += STEP;
3275 	if (V cond e) goto L1;
3276     L2:
3277 */
3278 
3279 static void
3280 expand_omp_for_static_nochunk (struct omp_region *region,
3281 			       struct omp_for_data *fd,
3282 			       gimple *inner_stmt)
3283 {
3284   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285   tree type, itype, vmain, vback;
3286   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287   basic_block body_bb, cont_bb, collapse_bb = NULL;
3288   basic_block fin_bb;
3289   gimple_stmt_iterator gsi;
3290   edge ep;
3291   bool broken_loop = region->cont == NULL;
3292   tree *counts = NULL;
3293   tree n1, n2, step;
3294 
3295   itype = type = TREE_TYPE (fd->loop.v);
3296   if (POINTER_TYPE_P (type))
3297     itype = signed_type_for (type);
3298 
3299   entry_bb = region->entry;
3300   cont_bb = region->cont;
3301   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303   gcc_assert (broken_loop
3304 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306   body_bb = single_succ (seq_start_bb);
3307   if (!broken_loop)
3308     {
3309       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3312     }
3313   exit_bb = region->exit;
3314 
3315   /* Iteration space partitioning goes in ENTRY_BB.  */
3316   gsi = gsi_last_nondebug_bb (entry_bb);
3317   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3318 
3319   if (fd->collapse > 1)
3320     {
3321       int first_zero_iter = -1, dummy = -1;
3322       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3323 
3324       counts = XALLOCAVEC (tree, fd->collapse);
3325       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326 				  fin_bb, first_zero_iter,
3327 				  dummy_bb, dummy, l2_dom_bb);
3328       t = NULL_TREE;
3329     }
3330   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331     t = integer_one_node;
3332   else
3333     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334 		     fold_convert (type, fd->loop.n1),
3335 		     fold_convert (type, fd->loop.n2));
3336   if (fd->collapse == 1
3337       && TYPE_UNSIGNED (type)
3338       && (t == NULL_TREE || !integer_onep (t)))
3339     {
3340       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342 				     true, GSI_SAME_STMT);
3343       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345 				     true, GSI_SAME_STMT);
3346       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347 						 NULL_TREE, NULL_TREE);
3348       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350 		     expand_omp_regimplify_p, NULL, NULL)
3351 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352 			expand_omp_regimplify_p, NULL, NULL))
3353 	{
3354 	  gsi = gsi_for_stmt (cond_stmt);
3355 	  gimple_regimplify_operands (cond_stmt, &gsi);
3356 	}
3357       ep = split_block (entry_bb, cond_stmt);
3358       ep->flags = EDGE_TRUE_VALUE;
3359       entry_bb = ep->dest;
3360       ep->probability = profile_probability::very_likely ();
3361       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362       ep->probability = profile_probability::very_unlikely ();
3363       if (gimple_in_ssa_p (cfun))
3364 	{
3365 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367 	       !gsi_end_p (gpi); gsi_next (&gpi))
3368 	    {
3369 	      gphi *phi = gpi.phi ();
3370 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371 			   ep, UNKNOWN_LOCATION);
3372 	    }
3373 	}
3374       gsi = gsi_last_bb (entry_bb);
3375     }
3376 
3377   switch (gimple_omp_for_kind (fd->for_stmt))
3378     {
3379     case GF_OMP_FOR_KIND_FOR:
3380       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382       break;
3383     case GF_OMP_FOR_KIND_DISTRIBUTE:
3384       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386       break;
3387     default:
3388       gcc_unreachable ();
3389     }
3390   nthreads = build_call_expr (nthreads, 0);
3391   nthreads = fold_convert (itype, nthreads);
3392   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393 				       true, GSI_SAME_STMT);
3394   threadid = build_call_expr (threadid, 0);
3395   threadid = fold_convert (itype, threadid);
3396   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397 				       true, GSI_SAME_STMT);
3398 
3399   n1 = fd->loop.n1;
3400   n2 = fd->loop.n2;
3401   step = fd->loop.step;
3402   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3403     {
3404       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405 				     OMP_CLAUSE__LOOPTEMP_);
3406       gcc_assert (innerc);
3407       n1 = OMP_CLAUSE_DECL (innerc);
3408       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409 				OMP_CLAUSE__LOOPTEMP_);
3410       gcc_assert (innerc);
3411       n2 = OMP_CLAUSE_DECL (innerc);
3412     }
3413   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414 				 true, NULL_TREE, true, GSI_SAME_STMT);
3415   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416 				 true, NULL_TREE, true, GSI_SAME_STMT);
3417   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418 				   true, NULL_TREE, true, GSI_SAME_STMT);
3419 
3420   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421   t = fold_build2 (PLUS_EXPR, itype, step, t);
3422   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426 		     fold_build1 (NEGATE_EXPR, itype, t),
3427 		     fold_build1 (NEGATE_EXPR, itype, step));
3428   else
3429     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430   t = fold_convert (itype, t);
3431   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3432 
3433   q = create_tmp_reg (itype, "q");
3434   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3437 
3438   tt = create_tmp_reg (itype, "tt");
3439   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3442 
3443   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444   gcond *cond_stmt = gimple_build_cond_empty (t);
3445   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3446 
3447   second_bb = split_block (entry_bb, cond_stmt)->dest;
3448   gsi = gsi_last_nondebug_bb (second_bb);
3449   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3450 
3451   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452 		     GSI_SAME_STMT);
3453   gassign *assign_stmt
3454     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3456 
3457   third_bb = split_block (second_bb, assign_stmt)->dest;
3458   gsi = gsi_last_nondebug_bb (third_bb);
3459   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3460 
3461   t = build2 (MULT_EXPR, itype, q, threadid);
3462   t = build2 (PLUS_EXPR, itype, t, tt);
3463   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3464 
3465   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3467 
3468   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3470 
3471   /* Remove the GIMPLE_OMP_FOR statement.  */
3472   gsi_remove (&gsi, true);
3473 
3474   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3475   gsi = gsi_start_bb (seq_start_bb);
3476 
3477   tree startvar = fd->loop.v;
3478   tree endvar = NULL_TREE;
3479 
3480   if (gimple_omp_for_combined_p (fd->for_stmt))
3481     {
3482       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483 		     ? gimple_omp_parallel_clauses (inner_stmt)
3484 		     : gimple_omp_for_clauses (inner_stmt);
3485       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486       gcc_assert (innerc);
3487       startvar = OMP_CLAUSE_DECL (innerc);
3488       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489 				OMP_CLAUSE__LOOPTEMP_);
3490       gcc_assert (innerc);
3491       endvar = OMP_CLAUSE_DECL (innerc);
3492       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3494 	{
3495 	  int i;
3496 	  for (i = 1; i < fd->collapse; i++)
3497 	    {
3498 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499 					OMP_CLAUSE__LOOPTEMP_);
3500 	      gcc_assert (innerc);
3501 	    }
3502 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503 				    OMP_CLAUSE__LOOPTEMP_);
3504 	  if (innerc)
3505 	    {
3506 	      /* If needed (distribute parallel for with lastprivate),
3507 		 propagate down the total number of iterations.  */
3508 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509 				     fd->loop.n2);
3510 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511 					    GSI_CONTINUE_LINKING);
3512 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514 	    }
3515 	}
3516     }
3517   t = fold_convert (itype, s0);
3518   t = fold_build2 (MULT_EXPR, itype, t, step);
3519   if (POINTER_TYPE_P (type))
3520     t = fold_build_pointer_plus (n1, t);
3521   else
3522     t = fold_build2 (PLUS_EXPR, type, t, n1);
3523   t = fold_convert (TREE_TYPE (startvar), t);
3524   t = force_gimple_operand_gsi (&gsi, t,
3525 				DECL_P (startvar)
3526 				&& TREE_ADDRESSABLE (startvar),
3527 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3528   assign_stmt = gimple_build_assign (startvar, t);
3529   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3530 
3531   t = fold_convert (itype, e0);
3532   t = fold_build2 (MULT_EXPR, itype, t, step);
3533   if (POINTER_TYPE_P (type))
3534     t = fold_build_pointer_plus (n1, t);
3535   else
3536     t = fold_build2 (PLUS_EXPR, type, t, n1);
3537   t = fold_convert (TREE_TYPE (startvar), t);
3538   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539 				false, GSI_CONTINUE_LINKING);
3540   if (endvar)
3541     {
3542       assign_stmt = gimple_build_assign (endvar, e);
3543       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545 	assign_stmt = gimple_build_assign (fd->loop.v, e);
3546       else
3547 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3549     }
3550   /* Handle linear clause adjustments.  */
3551   tree itercnt = NULL_TREE;
3552   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554 	 c; c = OMP_CLAUSE_CHAIN (c))
3555       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3557 	{
3558 	  tree d = OMP_CLAUSE_DECL (c);
3559 	  bool is_ref = omp_is_reference (d);
3560 	  tree t = d, a, dest;
3561 	  if (is_ref)
3562 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563 	  if (itercnt == NULL_TREE)
3564 	    {
3565 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3566 		{
3567 		  itercnt = fold_build2 (MINUS_EXPR, itype,
3568 					 fold_convert (itype, n1),
3569 					 fold_convert (itype, fd->loop.n1));
3570 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573 						      NULL_TREE, false,
3574 						      GSI_CONTINUE_LINKING);
3575 		}
3576 	      else
3577 		itercnt = s0;
3578 	    }
3579 	  tree type = TREE_TYPE (t);
3580 	  if (POINTER_TYPE_P (type))
3581 	    type = sizetype;
3582 	  a = fold_build2 (MULT_EXPR, type,
3583 			   fold_convert (type, itercnt),
3584 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585 	  dest = unshare_expr (t);
3586 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589 					false, GSI_CONTINUE_LINKING);
3590 	  assign_stmt = gimple_build_assign (dest, t);
3591 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3592 	}
3593   if (fd->collapse > 1)
3594     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3595 
3596   if (!broken_loop)
3597     {
3598       /* The code controlling the sequential loop replaces the
3599 	 GIMPLE_OMP_CONTINUE.  */
3600       gsi = gsi_last_nondebug_bb (cont_bb);
3601       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603       vmain = gimple_omp_continue_control_use (cont_stmt);
3604       vback = gimple_omp_continue_control_def (cont_stmt);
3605 
3606       if (!gimple_omp_for_combined_p (fd->for_stmt))
3607 	{
3608 	  if (POINTER_TYPE_P (type))
3609 	    t = fold_build_pointer_plus (vmain, step);
3610 	  else
3611 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612 	  t = force_gimple_operand_gsi (&gsi, t,
3613 					DECL_P (vback)
3614 					&& TREE_ADDRESSABLE (vback),
3615 					NULL_TREE, true, GSI_SAME_STMT);
3616 	  assign_stmt = gimple_build_assign (vback, t);
3617 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3618 
3619 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3620 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621 		      ? t : vback, e);
3622 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3623 	}
3624 
3625       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3626       gsi_remove (&gsi, true);
3627 
3628       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3630     }
3631 
3632   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3633   gsi = gsi_last_nondebug_bb (exit_bb);
3634   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3635     {
3636       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3638     }
3639   gsi_remove (&gsi, true);
3640 
3641   /* Connect all the blocks.  */
3642   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3644   ep = find_edge (entry_bb, second_bb);
3645   ep->flags = EDGE_TRUE_VALUE;
3646   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3647   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3649 
3650   if (!broken_loop)
3651     {
3652       ep = find_edge (cont_bb, body_bb);
3653       if (ep == NULL)
3654 	{
3655 	  ep = BRANCH_EDGE (cont_bb);
3656 	  gcc_assert (single_succ (ep->dest) == body_bb);
3657 	}
3658       if (gimple_omp_for_combined_p (fd->for_stmt))
3659 	{
3660 	  remove_edge (ep);
3661 	  ep = NULL;
3662 	}
3663       else if (fd->collapse > 1)
3664 	{
3665 	  remove_edge (ep);
3666 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3667 	}
3668       else
3669 	ep->flags = EDGE_TRUE_VALUE;
3670       find_edge (cont_bb, fin_bb)->flags
3671 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3672     }
3673 
3674   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3677 
3678   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679 			   recompute_dominator (CDI_DOMINATORS, body_bb));
3680   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3682 
3683   struct loop *loop = body_bb->loop_father;
3684   if (loop != entry_bb->loop_father)
3685     {
3686       gcc_assert (broken_loop || loop->header == body_bb);
3687       gcc_assert (broken_loop
3688 		  || loop->latch == region->cont
3689 		  || single_pred (loop->latch) == region->cont);
3690       return;
3691     }
3692 
3693   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3694     {
3695       loop = alloc_loop ();
3696       loop->header = body_bb;
3697       if (collapse_bb == NULL)
3698 	loop->latch = cont_bb;
3699       add_loop (loop, body_bb->loop_father);
3700     }
3701 }
3702 
3703 /* Return phi in E->DEST with ARG on edge E.  */
3704 
3705 static gphi *
3706 find_phi_with_arg_on_edge (tree arg, edge e)
3707 {
3708   basic_block bb = e->dest;
3709 
3710   for (gphi_iterator gpi = gsi_start_phis (bb);
3711        !gsi_end_p (gpi);
3712        gsi_next (&gpi))
3713     {
3714       gphi *phi = gpi.phi ();
3715       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716 	return phi;
3717     }
3718 
3719   return NULL;
3720 }
3721 
3722 /* A subroutine of expand_omp_for.  Generate code for a parallel
3723    loop with static schedule and a specified chunk size.  Given
3724    parameters:
3725 
3726 	for (V = N1; V cond N2; V += STEP) BODY;
3727 
3728    where COND is "<" or ">", we generate pseudocode
3729 
3730 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731 	if (cond is <)
3732 	  adj = STEP - 1;
3733 	else
3734 	  adj = STEP + 1;
3735 	if ((__typeof (V)) -1 > 0 && cond is >)
3736 	  n = -(adj + N2 - N1) / -STEP;
3737 	else
3738 	  n = (adj + N2 - N1) / STEP;
3739 	trip = 0;
3740 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3741 					      here so that V is defined
3742 					      if the loop is not entered
3743     L0:
3744 	s0 = (trip * nthreads + threadid) * CHUNK;
3745 	e0 = min (s0 + CHUNK, n);
3746 	if (s0 < n) goto L1; else goto L4;
3747     L1:
3748 	V = s0 * STEP + N1;
3749 	e = e0 * STEP + N1;
3750     L2:
3751 	BODY;
3752 	V += STEP;
3753 	if (V cond e) goto L2; else goto L3;
3754     L3:
3755 	trip += 1;
3756 	goto L0;
3757     L4:
3758 */
3759 
3760 static void
3761 expand_omp_for_static_chunk (struct omp_region *region,
3762 			     struct omp_for_data *fd, gimple *inner_stmt)
3763 {
3764   tree n, s0, e0, e, t;
3765   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766   tree type, itype, vmain, vback, vextra;
3767   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769   gimple_stmt_iterator gsi;
3770   edge se;
3771   bool broken_loop = region->cont == NULL;
3772   tree *counts = NULL;
3773   tree n1, n2, step;
3774 
3775   itype = type = TREE_TYPE (fd->loop.v);
3776   if (POINTER_TYPE_P (type))
3777     itype = signed_type_for (type);
3778 
3779   entry_bb = region->entry;
3780   se = split_block (entry_bb, last_stmt (entry_bb));
3781   entry_bb = se->src;
3782   iter_part_bb = se->dest;
3783   cont_bb = region->cont;
3784   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786   gcc_assert (broken_loop
3787 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789   body_bb = single_succ (seq_start_bb);
3790   if (!broken_loop)
3791     {
3792       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3796     }
3797   exit_bb = region->exit;
3798 
3799   /* Trip and adjustment setup goes in ENTRY_BB.  */
3800   gsi = gsi_last_nondebug_bb (entry_bb);
3801   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3802 
3803   if (fd->collapse > 1)
3804     {
3805       int first_zero_iter = -1, dummy = -1;
3806       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3807 
3808       counts = XALLOCAVEC (tree, fd->collapse);
3809       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810 				  fin_bb, first_zero_iter,
3811 				  dummy_bb, dummy, l2_dom_bb);
3812       t = NULL_TREE;
3813     }
3814   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815     t = integer_one_node;
3816   else
3817     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818 		     fold_convert (type, fd->loop.n1),
3819 		     fold_convert (type, fd->loop.n2));
3820   if (fd->collapse == 1
3821       && TYPE_UNSIGNED (type)
3822       && (t == NULL_TREE || !integer_onep (t)))
3823     {
3824       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826 				     true, GSI_SAME_STMT);
3827       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829 				     true, GSI_SAME_STMT);
3830       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831 						 NULL_TREE, NULL_TREE);
3832       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834 		     expand_omp_regimplify_p, NULL, NULL)
3835 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836 			expand_omp_regimplify_p, NULL, NULL))
3837 	{
3838 	  gsi = gsi_for_stmt (cond_stmt);
3839 	  gimple_regimplify_operands (cond_stmt, &gsi);
3840 	}
3841       se = split_block (entry_bb, cond_stmt);
3842       se->flags = EDGE_TRUE_VALUE;
3843       entry_bb = se->dest;
3844       se->probability = profile_probability::very_likely ();
3845       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846       se->probability = profile_probability::very_unlikely ();
3847       if (gimple_in_ssa_p (cfun))
3848 	{
3849 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851 	       !gsi_end_p (gpi); gsi_next (&gpi))
3852 	    {
3853 	      gphi *phi = gpi.phi ();
3854 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855 			   se, UNKNOWN_LOCATION);
3856 	    }
3857 	}
3858       gsi = gsi_last_bb (entry_bb);
3859     }
3860 
3861   switch (gimple_omp_for_kind (fd->for_stmt))
3862     {
3863     case GF_OMP_FOR_KIND_FOR:
3864       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866       break;
3867     case GF_OMP_FOR_KIND_DISTRIBUTE:
3868       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870       break;
3871     default:
3872       gcc_unreachable ();
3873     }
3874   nthreads = build_call_expr (nthreads, 0);
3875   nthreads = fold_convert (itype, nthreads);
3876   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877 				       true, GSI_SAME_STMT);
3878   threadid = build_call_expr (threadid, 0);
3879   threadid = fold_convert (itype, threadid);
3880   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881 				       true, GSI_SAME_STMT);
3882 
3883   n1 = fd->loop.n1;
3884   n2 = fd->loop.n2;
3885   step = fd->loop.step;
3886   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3887     {
3888       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889 				     OMP_CLAUSE__LOOPTEMP_);
3890       gcc_assert (innerc);
3891       n1 = OMP_CLAUSE_DECL (innerc);
3892       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893 				OMP_CLAUSE__LOOPTEMP_);
3894       gcc_assert (innerc);
3895       n2 = OMP_CLAUSE_DECL (innerc);
3896     }
3897   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898 				 true, NULL_TREE, true, GSI_SAME_STMT);
3899   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900 				 true, NULL_TREE, true, GSI_SAME_STMT);
3901   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902 				   true, NULL_TREE, true, GSI_SAME_STMT);
3903   tree chunk_size = fold_convert (itype, fd->chunk_size);
3904   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905   chunk_size
3906     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907 				GSI_SAME_STMT);
3908 
3909   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910   t = fold_build2 (PLUS_EXPR, itype, step, t);
3911   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915 		     fold_build1 (NEGATE_EXPR, itype, t),
3916 		     fold_build1 (NEGATE_EXPR, itype, step));
3917   else
3918     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919   t = fold_convert (itype, t);
3920   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 				true, GSI_SAME_STMT);
3922 
3923   trip_var = create_tmp_reg (itype, ".trip");
3924   if (gimple_in_ssa_p (cfun))
3925     {
3926       trip_init = make_ssa_name (trip_var);
3927       trip_main = make_ssa_name (trip_var);
3928       trip_back = make_ssa_name (trip_var);
3929     }
3930   else
3931     {
3932       trip_init = trip_var;
3933       trip_main = trip_var;
3934       trip_back = trip_var;
3935     }
3936 
3937   gassign *assign_stmt
3938     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3940 
3941   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942   t = fold_build2 (MULT_EXPR, itype, t, step);
3943   if (POINTER_TYPE_P (type))
3944     t = fold_build_pointer_plus (n1, t);
3945   else
3946     t = fold_build2 (PLUS_EXPR, type, t, n1);
3947   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 				     true, GSI_SAME_STMT);
3949 
3950   /* Remove the GIMPLE_OMP_FOR.  */
3951   gsi_remove (&gsi, true);
3952 
3953   gimple_stmt_iterator gsif = gsi;
3954 
3955   /* Iteration space partitioning goes in ITER_PART_BB.  */
3956   gsi = gsi_last_bb (iter_part_bb);
3957 
3958   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 				 false, GSI_CONTINUE_LINKING);
3963 
3964   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965   t = fold_build2 (MIN_EXPR, itype, t, n);
3966   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967 				 false, GSI_CONTINUE_LINKING);
3968 
3969   t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3971 
3972   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3973   gsi = gsi_start_bb (seq_start_bb);
3974 
3975   tree startvar = fd->loop.v;
3976   tree endvar = NULL_TREE;
3977 
3978   if (gimple_omp_for_combined_p (fd->for_stmt))
3979     {
3980       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981 		     ? gimple_omp_parallel_clauses (inner_stmt)
3982 		     : gimple_omp_for_clauses (inner_stmt);
3983       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984       gcc_assert (innerc);
3985       startvar = OMP_CLAUSE_DECL (innerc);
3986       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987 				OMP_CLAUSE__LOOPTEMP_);
3988       gcc_assert (innerc);
3989       endvar = OMP_CLAUSE_DECL (innerc);
3990       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3992 	{
3993 	  int i;
3994 	  for (i = 1; i < fd->collapse; i++)
3995 	    {
3996 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997 					OMP_CLAUSE__LOOPTEMP_);
3998 	      gcc_assert (innerc);
3999 	    }
4000 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001 				    OMP_CLAUSE__LOOPTEMP_);
4002 	  if (innerc)
4003 	    {
4004 	      /* If needed (distribute parallel for with lastprivate),
4005 		 propagate down the total number of iterations.  */
4006 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007 				     fd->loop.n2);
4008 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009 					    GSI_CONTINUE_LINKING);
4010 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4012 	    }
4013 	}
4014     }
4015 
4016   t = fold_convert (itype, s0);
4017   t = fold_build2 (MULT_EXPR, itype, t, step);
4018   if (POINTER_TYPE_P (type))
4019     t = fold_build_pointer_plus (n1, t);
4020   else
4021     t = fold_build2 (PLUS_EXPR, type, t, n1);
4022   t = fold_convert (TREE_TYPE (startvar), t);
4023   t = force_gimple_operand_gsi (&gsi, t,
4024 				DECL_P (startvar)
4025 				&& TREE_ADDRESSABLE (startvar),
4026 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4027   assign_stmt = gimple_build_assign (startvar, t);
4028   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4029 
4030   t = fold_convert (itype, e0);
4031   t = fold_build2 (MULT_EXPR, itype, t, step);
4032   if (POINTER_TYPE_P (type))
4033     t = fold_build_pointer_plus (n1, t);
4034   else
4035     t = fold_build2 (PLUS_EXPR, type, t, n1);
4036   t = fold_convert (TREE_TYPE (startvar), t);
4037   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038 				false, GSI_CONTINUE_LINKING);
4039   if (endvar)
4040     {
4041       assign_stmt = gimple_build_assign (endvar, e);
4042       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4045       else
4046 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4048     }
4049   /* Handle linear clause adjustments.  */
4050   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053 	 c; c = OMP_CLAUSE_CHAIN (c))
4054       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4056 	{
4057 	  tree d = OMP_CLAUSE_DECL (c);
4058 	  bool is_ref = omp_is_reference (d);
4059 	  tree t = d, a, dest;
4060 	  if (is_ref)
4061 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062 	  tree type = TREE_TYPE (t);
4063 	  if (POINTER_TYPE_P (type))
4064 	    type = sizetype;
4065 	  dest = unshare_expr (t);
4066 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067 	  expand_omp_build_assign (&gsif, v, t);
4068 	  if (itercnt == NULL_TREE)
4069 	    {
4070 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071 		{
4072 		  itercntbias
4073 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074 				   fold_convert (itype, fd->loop.n1));
4075 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076 					     itercntbias, step);
4077 		  itercntbias
4078 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079 						NULL_TREE, true,
4080 						GSI_SAME_STMT);
4081 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083 						      NULL_TREE, false,
4084 						      GSI_CONTINUE_LINKING);
4085 		}
4086 	      else
4087 		itercnt = s0;
4088 	    }
4089 	  a = fold_build2 (MULT_EXPR, type,
4090 			   fold_convert (type, itercnt),
4091 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095 					false, GSI_CONTINUE_LINKING);
4096 	  assign_stmt = gimple_build_assign (dest, t);
4097 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4098 	}
4099   if (fd->collapse > 1)
4100     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4101 
4102   if (!broken_loop)
4103     {
4104       /* The code controlling the sequential loop goes in CONT_BB,
4105 	 replacing the GIMPLE_OMP_CONTINUE.  */
4106       gsi = gsi_last_nondebug_bb (cont_bb);
4107       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108       vmain = gimple_omp_continue_control_use (cont_stmt);
4109       vback = gimple_omp_continue_control_def (cont_stmt);
4110 
4111       if (!gimple_omp_for_combined_p (fd->for_stmt))
4112 	{
4113 	  if (POINTER_TYPE_P (type))
4114 	    t = fold_build_pointer_plus (vmain, step);
4115 	  else
4116 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119 					  true, GSI_SAME_STMT);
4120 	  assign_stmt = gimple_build_assign (vback, t);
4121 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4122 
4123 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124 	    t = build2 (EQ_EXPR, boolean_type_node,
4125 			build_int_cst (itype, 0),
4126 			build_int_cst (itype, 1));
4127 	  else
4128 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4129 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130 			? t : vback, e);
4131 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4132 	}
4133 
4134       /* Remove GIMPLE_OMP_CONTINUE.  */
4135       gsi_remove (&gsi, true);
4136 
4137       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4139 
4140       /* Trip update code goes into TRIP_UPDATE_BB.  */
4141       gsi = gsi_start_bb (trip_update_bb);
4142 
4143       t = build_int_cst (itype, 1);
4144       t = build2 (PLUS_EXPR, itype, trip_main, t);
4145       assign_stmt = gimple_build_assign (trip_back, t);
4146       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147     }
4148 
4149   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4150   gsi = gsi_last_nondebug_bb (exit_bb);
4151   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4152     {
4153       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4155     }
4156   gsi_remove (&gsi, true);
4157 
4158   /* Connect the new blocks.  */
4159   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4161 
4162   if (!broken_loop)
4163     {
4164       se = find_edge (cont_bb, body_bb);
4165       if (se == NULL)
4166 	{
4167 	  se = BRANCH_EDGE (cont_bb);
4168 	  gcc_assert (single_succ (se->dest) == body_bb);
4169 	}
4170       if (gimple_omp_for_combined_p (fd->for_stmt))
4171 	{
4172 	  remove_edge (se);
4173 	  se = NULL;
4174 	}
4175       else if (fd->collapse > 1)
4176 	{
4177 	  remove_edge (se);
4178 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4179 	}
4180       else
4181 	se->flags = EDGE_TRUE_VALUE;
4182       find_edge (cont_bb, trip_update_bb)->flags
4183 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4184 
4185       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186 				iter_part_bb);
4187     }
4188 
4189   if (gimple_in_ssa_p (cfun))
4190     {
4191       gphi_iterator psi;
4192       gphi *phi;
4193       edge re, ene;
4194       edge_var_map *vm;
4195       size_t i;
4196 
4197       gcc_assert (fd->collapse == 1 && !broken_loop);
4198 
4199       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200 	 remove arguments of the phi nodes in fin_bb.  We need to create
4201 	 appropriate phi nodes in iter_part_bb instead.  */
4202       se = find_edge (iter_part_bb, fin_bb);
4203       re = single_succ_edge (trip_update_bb);
4204       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205       ene = single_succ_edge (entry_bb);
4206 
4207       psi = gsi_start_phis (fin_bb);
4208       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209 	   gsi_next (&psi), ++i)
4210 	{
4211 	  gphi *nphi;
4212 	  source_location locus;
4213 
4214 	  phi = psi.phi ();
4215 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4216 			       redirect_edge_var_map_def (vm), 0))
4217 	    continue;
4218 
4219 	  t = gimple_phi_result (phi);
4220 	  gcc_assert (t == redirect_edge_var_map_result (vm));
4221 
4222 	  if (!single_pred_p (fin_bb))
4223 	    t = copy_ssa_name (t, phi);
4224 
4225 	  nphi = create_phi_node (t, iter_part_bb);
4226 
4227 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4228 	  locus = gimple_phi_arg_location_from_edge (phi, se);
4229 
4230 	  /* A special case -- fd->loop.v is not yet computed in
4231 	     iter_part_bb, we need to use vextra instead.  */
4232 	  if (t == fd->loop.v)
4233 	    t = vextra;
4234 	  add_phi_arg (nphi, t, ene, locus);
4235 	  locus = redirect_edge_var_map_location (vm);
4236 	  tree back_arg = redirect_edge_var_map_def (vm);
4237 	  add_phi_arg (nphi, back_arg, re, locus);
4238 	  edge ce = find_edge (cont_bb, body_bb);
4239 	  if (ce == NULL)
4240 	    {
4241 	      ce = BRANCH_EDGE (cont_bb);
4242 	      gcc_assert (single_succ (ce->dest) == body_bb);
4243 	      ce = single_succ_edge (ce->dest);
4244 	    }
4245 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4246 	  gcc_assert (inner_loop_phi != NULL);
4247 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4248 		       find_edge (seq_start_bb, body_bb), locus);
4249 
4250 	  if (!single_pred_p (fin_bb))
4251 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4252 	}
4253       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4254       redirect_edge_var_map_clear (re);
4255       if (single_pred_p (fin_bb))
4256 	while (1)
4257 	  {
4258 	    psi = gsi_start_phis (fin_bb);
4259 	    if (gsi_end_p (psi))
4260 	      break;
4261 	    remove_phi_node (&psi, false);
4262 	  }
4263 
4264       /* Make phi node for trip.  */
4265       phi = create_phi_node (trip_main, iter_part_bb);
4266       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4267 		   UNKNOWN_LOCATION);
4268       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4269 		   UNKNOWN_LOCATION);
4270     }
4271 
4272   if (!broken_loop)
4273     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4274   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4275 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4276   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4277 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4278   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4279 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4280   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4281 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4282 
4283   if (!broken_loop)
4284     {
4285       struct loop *loop = body_bb->loop_father;
4286       struct loop *trip_loop = alloc_loop ();
4287       trip_loop->header = iter_part_bb;
4288       trip_loop->latch = trip_update_bb;
4289       add_loop (trip_loop, iter_part_bb->loop_father);
4290 
4291       if (loop != entry_bb->loop_father)
4292 	{
4293 	  gcc_assert (loop->header == body_bb);
4294 	  gcc_assert (loop->latch == region->cont
4295 		      || single_pred (loop->latch) == region->cont);
4296 	  trip_loop->inner = loop;
4297 	  return;
4298 	}
4299 
4300       if (!gimple_omp_for_combined_p (fd->for_stmt))
4301 	{
4302 	  loop = alloc_loop ();
4303 	  loop->header = body_bb;
4304 	  if (collapse_bb == NULL)
4305 	    loop->latch = cont_bb;
4306 	  add_loop (loop, trip_loop);
4307 	}
4308     }
4309 }
4310 
4311 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4312    loop.  Given parameters:
4313 
4314 	for (V = N1; V cond N2; V += STEP) BODY;
4315 
4316    where COND is "<" or ">", we generate pseudocode
4317 
4318 	V = N1;
4319 	goto L1;
4320     L0:
4321 	BODY;
4322 	V += STEP;
4323     L1:
4324 	if (V cond N2) goto L0; else goto L2;
4325     L2:
4326 
4327     For collapsed loops, given parameters:
4328       collapse(3)
4329       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4330 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4331 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4332 	    BODY;
4333 
4334     we generate pseudocode
4335 
4336 	if (cond3 is <)
4337 	  adj = STEP3 - 1;
4338 	else
4339 	  adj = STEP3 + 1;
4340 	count3 = (adj + N32 - N31) / STEP3;
4341 	if (cond2 is <)
4342 	  adj = STEP2 - 1;
4343 	else
4344 	  adj = STEP2 + 1;
4345 	count2 = (adj + N22 - N21) / STEP2;
4346 	if (cond1 is <)
4347 	  adj = STEP1 - 1;
4348 	else
4349 	  adj = STEP1 + 1;
4350 	count1 = (adj + N12 - N11) / STEP1;
4351 	count = count1 * count2 * count3;
4352 	V = 0;
4353 	V1 = N11;
4354 	V2 = N21;
4355 	V3 = N31;
4356 	goto L1;
4357     L0:
4358 	BODY;
4359 	V += 1;
4360 	V3 += STEP3;
4361 	V2 += (V3 cond3 N32) ? 0 : STEP2;
4362 	V3 = (V3 cond3 N32) ? V3 : N31;
4363 	V1 += (V2 cond2 N22) ? 0 : STEP1;
4364 	V2 = (V2 cond2 N22) ? V2 : N21;
4365     L1:
4366 	if (V < count) goto L0; else goto L2;
4367     L2:
4368 
4369       */
4370 
4371 static void
4372 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4373 {
4374   tree type, t;
4375   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4376   gimple_stmt_iterator gsi;
4377   gimple *stmt;
4378   gcond *cond_stmt;
4379   bool broken_loop = region->cont == NULL;
4380   edge e, ne;
4381   tree *counts = NULL;
4382   int i;
4383   int safelen_int = INT_MAX;
4384   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4385 				  OMP_CLAUSE_SAFELEN);
4386   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4387 				  OMP_CLAUSE__SIMDUID_);
4388   tree n1, n2;
4389 
4390   if (safelen)
4391     {
4392       poly_uint64 val;
4393       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4394       if (!poly_int_tree_p (safelen, &val))
4395 	safelen_int = 0;
4396       else
4397 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4398       if (safelen_int == 1)
4399 	safelen_int = 0;
4400     }
4401   type = TREE_TYPE (fd->loop.v);
4402   entry_bb = region->entry;
4403   cont_bb = region->cont;
4404   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4405   gcc_assert (broken_loop
4406 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4407   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4408   if (!broken_loop)
4409     {
4410       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4411       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4412       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4413       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4414     }
4415   else
4416     {
4417       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4418       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4419       l2_bb = single_succ (l1_bb);
4420     }
4421   exit_bb = region->exit;
4422   l2_dom_bb = NULL;
4423 
4424   gsi = gsi_last_nondebug_bb (entry_bb);
4425 
4426   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4427   /* Not needed in SSA form right now.  */
4428   gcc_assert (!gimple_in_ssa_p (cfun));
4429   if (fd->collapse > 1)
4430     {
4431       int first_zero_iter = -1, dummy = -1;
4432       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4433 
4434       counts = XALLOCAVEC (tree, fd->collapse);
4435       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4436 				  zero_iter_bb, first_zero_iter,
4437 				  dummy_bb, dummy, l2_dom_bb);
4438     }
4439   if (l2_dom_bb == NULL)
4440     l2_dom_bb = l1_bb;
4441 
4442   n1 = fd->loop.n1;
4443   n2 = fd->loop.n2;
4444   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4445     {
4446       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4447 				     OMP_CLAUSE__LOOPTEMP_);
4448       gcc_assert (innerc);
4449       n1 = OMP_CLAUSE_DECL (innerc);
4450       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4451 				OMP_CLAUSE__LOOPTEMP_);
4452       gcc_assert (innerc);
4453       n2 = OMP_CLAUSE_DECL (innerc);
4454     }
4455   tree step = fd->loop.step;
4456 
4457   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4458 				  OMP_CLAUSE__SIMT_);
4459   if (is_simt)
4460     {
4461       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4462       is_simt = safelen_int > 1;
4463     }
4464   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4465   if (is_simt)
4466     {
4467       simt_lane = create_tmp_var (unsigned_type_node);
4468       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4469       gimple_call_set_lhs (g, simt_lane);
4470       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4471       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4472 				 fold_convert (TREE_TYPE (step), simt_lane));
4473       n1 = fold_convert (type, n1);
4474       if (POINTER_TYPE_P (type))
4475 	n1 = fold_build_pointer_plus (n1, offset);
4476       else
4477 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4478 
4479       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4480       if (fd->collapse > 1)
4481 	simt_maxlane = build_one_cst (unsigned_type_node);
4482       else if (safelen_int < omp_max_simt_vf ())
4483 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4484       tree vf
4485 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4486 					unsigned_type_node, 0);
4487       if (simt_maxlane)
4488 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4489       vf = fold_convert (TREE_TYPE (step), vf);
4490       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4491     }
4492 
4493   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4494   if (fd->collapse > 1)
4495     {
4496       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4497 	{
4498 	  gsi_prev (&gsi);
4499 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4500 	  gsi_next (&gsi);
4501 	}
4502       else
4503 	for (i = 0; i < fd->collapse; i++)
4504 	  {
4505 	    tree itype = TREE_TYPE (fd->loops[i].v);
4506 	    if (POINTER_TYPE_P (itype))
4507 	      itype = signed_type_for (itype);
4508 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4509 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4510 	  }
4511     }
4512 
4513   /* Remove the GIMPLE_OMP_FOR statement.  */
4514   gsi_remove (&gsi, true);
4515 
4516   if (!broken_loop)
4517     {
4518       /* Code to control the increment goes in the CONT_BB.  */
4519       gsi = gsi_last_nondebug_bb (cont_bb);
4520       stmt = gsi_stmt (gsi);
4521       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4522 
4523       if (POINTER_TYPE_P (type))
4524 	t = fold_build_pointer_plus (fd->loop.v, step);
4525       else
4526 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4527       expand_omp_build_assign (&gsi, fd->loop.v, t);
4528 
4529       if (fd->collapse > 1)
4530 	{
4531 	  i = fd->collapse - 1;
4532 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4533 	    {
4534 	      t = fold_convert (sizetype, fd->loops[i].step);
4535 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4536 	    }
4537 	  else
4538 	    {
4539 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4540 				fd->loops[i].step);
4541 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4542 			       fd->loops[i].v, t);
4543 	    }
4544 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4545 
4546 	  for (i = fd->collapse - 1; i > 0; i--)
4547 	    {
4548 	      tree itype = TREE_TYPE (fd->loops[i].v);
4549 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4550 	      if (POINTER_TYPE_P (itype2))
4551 		itype2 = signed_type_for (itype2);
4552 	      t = fold_convert (itype2, fd->loops[i - 1].step);
4553 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4554 					    GSI_SAME_STMT);
4555 	      t = build3 (COND_EXPR, itype2,
4556 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4557 				  fd->loops[i].v,
4558 				  fold_convert (itype, fd->loops[i].n2)),
4559 			  build_int_cst (itype2, 0), t);
4560 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4561 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4562 	      else
4563 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4564 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4565 
4566 	      t = fold_convert (itype, fd->loops[i].n1);
4567 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4568 					    GSI_SAME_STMT);
4569 	      t = build3 (COND_EXPR, itype,
4570 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4571 				  fd->loops[i].v,
4572 				  fold_convert (itype, fd->loops[i].n2)),
4573 			  fd->loops[i].v, t);
4574 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4575 	    }
4576 	}
4577 
4578       /* Remove GIMPLE_OMP_CONTINUE.  */
4579       gsi_remove (&gsi, true);
4580     }
4581 
4582   /* Emit the condition in L1_BB.  */
4583   gsi = gsi_start_bb (l1_bb);
4584 
4585   t = fold_convert (type, n2);
4586   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4587 				false, GSI_CONTINUE_LINKING);
4588   tree v = fd->loop.v;
4589   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4590     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4591 				  false, GSI_CONTINUE_LINKING);
4592   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4593   cond_stmt = gimple_build_cond_empty (t);
4594   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4595   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4596 		 NULL, NULL)
4597       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4598 		    NULL, NULL))
4599     {
4600       gsi = gsi_for_stmt (cond_stmt);
4601       gimple_regimplify_operands (cond_stmt, &gsi);
4602     }
4603 
4604   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4605   if (is_simt)
4606     {
4607       gsi = gsi_start_bb (l2_bb);
4608       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4609       if (POINTER_TYPE_P (type))
4610 	t = fold_build_pointer_plus (fd->loop.v, step);
4611       else
4612 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4613       expand_omp_build_assign (&gsi, fd->loop.v, t);
4614     }
4615 
4616   /* Remove GIMPLE_OMP_RETURN.  */
4617   gsi = gsi_last_nondebug_bb (exit_bb);
4618   gsi_remove (&gsi, true);
4619 
4620   /* Connect the new blocks.  */
4621   remove_edge (FALLTHRU_EDGE (entry_bb));
4622 
4623   if (!broken_loop)
4624     {
4625       remove_edge (BRANCH_EDGE (entry_bb));
4626       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4627 
4628       e = BRANCH_EDGE (l1_bb);
4629       ne = FALLTHRU_EDGE (l1_bb);
4630       e->flags = EDGE_TRUE_VALUE;
4631     }
4632   else
4633     {
4634       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4635 
4636       ne = single_succ_edge (l1_bb);
4637       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4638 
4639     }
4640   ne->flags = EDGE_FALSE_VALUE;
4641   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4642   ne->probability = e->probability.invert ();
4643 
4644   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4645   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4646 
4647   if (simt_maxlane)
4648     {
4649       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4650 				     NULL_TREE, NULL_TREE);
4651       gsi = gsi_last_bb (entry_bb);
4652       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4653       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4654       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4655       FALLTHRU_EDGE (entry_bb)->probability
4656 	 = profile_probability::guessed_always ().apply_scale (7, 8);
4657       BRANCH_EDGE (entry_bb)->probability
4658 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4659       l2_dom_bb = entry_bb;
4660     }
4661   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4662 
4663   if (!broken_loop)
4664     {
4665       struct loop *loop = alloc_loop ();
4666       loop->header = l1_bb;
4667       loop->latch = cont_bb;
4668       add_loop (loop, l1_bb->loop_father);
4669       loop->safelen = safelen_int;
4670       if (simduid)
4671 	{
4672 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4673 	  cfun->has_simduid_loops = true;
4674 	}
4675       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4676 	 the loop.  */
4677       if ((flag_tree_loop_vectorize
4678 	   || !global_options_set.x_flag_tree_loop_vectorize)
4679 	  && flag_tree_loop_optimize
4680 	  && loop->safelen > 1)
4681 	{
4682 	  loop->force_vectorize = true;
4683 	  cfun->has_force_vectorize_loops = true;
4684 	}
4685     }
4686   else if (simduid)
4687     cfun->has_simduid_loops = true;
4688 }
4689 
4690 /* Taskloop construct is represented after gimplification with
4691    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4692    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4693    which should just compute all the needed loop temporaries
4694    for GIMPLE_OMP_TASK.  */
4695 
4696 static void
4697 expand_omp_taskloop_for_outer (struct omp_region *region,
4698 			       struct omp_for_data *fd,
4699 			       gimple *inner_stmt)
4700 {
4701   tree type, bias = NULL_TREE;
4702   basic_block entry_bb, cont_bb, exit_bb;
4703   gimple_stmt_iterator gsi;
4704   gassign *assign_stmt;
4705   tree *counts = NULL;
4706   int i;
4707 
4708   gcc_assert (inner_stmt);
4709   gcc_assert (region->cont);
4710   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4711 	      && gimple_omp_task_taskloop_p (inner_stmt));
4712   type = TREE_TYPE (fd->loop.v);
4713 
4714   /* See if we need to bias by LLONG_MIN.  */
4715   if (fd->iter_type == long_long_unsigned_type_node
4716       && TREE_CODE (type) == INTEGER_TYPE
4717       && !TYPE_UNSIGNED (type))
4718     {
4719       tree n1, n2;
4720 
4721       if (fd->loop.cond_code == LT_EXPR)
4722 	{
4723 	  n1 = fd->loop.n1;
4724 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4725 	}
4726       else
4727 	{
4728 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4729 	  n2 = fd->loop.n1;
4730 	}
4731       if (TREE_CODE (n1) != INTEGER_CST
4732 	  || TREE_CODE (n2) != INTEGER_CST
4733 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4734 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4735     }
4736 
4737   entry_bb = region->entry;
4738   cont_bb = region->cont;
4739   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4740   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4741   exit_bb = region->exit;
4742 
4743   gsi = gsi_last_nondebug_bb (entry_bb);
4744   gimple *for_stmt = gsi_stmt (gsi);
4745   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4746   if (fd->collapse > 1)
4747     {
4748       int first_zero_iter = -1, dummy = -1;
4749       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4750 
4751       counts = XALLOCAVEC (tree, fd->collapse);
4752       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4753 				  zero_iter_bb, first_zero_iter,
4754 				  dummy_bb, dummy, l2_dom_bb);
4755 
4756       if (zero_iter_bb)
4757 	{
4758 	  /* Some counts[i] vars might be uninitialized if
4759 	     some loop has zero iterations.  But the body shouldn't
4760 	     be executed in that case, so just avoid uninit warnings.  */
4761 	  for (i = first_zero_iter; i < fd->collapse; i++)
4762 	    if (SSA_VAR_P (counts[i]))
4763 	      TREE_NO_WARNING (counts[i]) = 1;
4764 	  gsi_prev (&gsi);
4765 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4766 	  entry_bb = e->dest;
4767 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4768 	  gsi = gsi_last_bb (entry_bb);
4769 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4770 				   get_immediate_dominator (CDI_DOMINATORS,
4771 							    zero_iter_bb));
4772 	}
4773     }
4774 
4775   tree t0, t1;
4776   t1 = fd->loop.n2;
4777   t0 = fd->loop.n1;
4778   if (POINTER_TYPE_P (TREE_TYPE (t0))
4779       && TYPE_PRECISION (TREE_TYPE (t0))
4780 	 != TYPE_PRECISION (fd->iter_type))
4781     {
4782       /* Avoid casting pointers to integer of a different size.  */
4783       tree itype = signed_type_for (type);
4784       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4785       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4786     }
4787   else
4788     {
4789       t1 = fold_convert (fd->iter_type, t1);
4790       t0 = fold_convert (fd->iter_type, t0);
4791     }
4792   if (bias)
4793     {
4794       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4795       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4796     }
4797 
4798   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4799 				 OMP_CLAUSE__LOOPTEMP_);
4800   gcc_assert (innerc);
4801   tree startvar = OMP_CLAUSE_DECL (innerc);
4802   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4803   gcc_assert (innerc);
4804   tree endvar = OMP_CLAUSE_DECL (innerc);
4805   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4806     {
4807       gcc_assert (innerc);
4808       for (i = 1; i < fd->collapse; i++)
4809 	{
4810 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4811 				    OMP_CLAUSE__LOOPTEMP_);
4812 	  gcc_assert (innerc);
4813 	}
4814       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4815 				OMP_CLAUSE__LOOPTEMP_);
4816       if (innerc)
4817 	{
4818 	  /* If needed (inner taskloop has lastprivate clause), propagate
4819 	     down the total number of iterations.  */
4820 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4821 					     NULL_TREE, false,
4822 					     GSI_CONTINUE_LINKING);
4823 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4824 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4825 	}
4826     }
4827 
4828   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4829 				 GSI_CONTINUE_LINKING);
4830   assign_stmt = gimple_build_assign (startvar, t0);
4831   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4832 
4833   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4834 				 GSI_CONTINUE_LINKING);
4835   assign_stmt = gimple_build_assign (endvar, t1);
4836   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4837   if (fd->collapse > 1)
4838     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4839 
4840   /* Remove the GIMPLE_OMP_FOR statement.  */
4841   gsi = gsi_for_stmt (for_stmt);
4842   gsi_remove (&gsi, true);
4843 
4844   gsi = gsi_last_nondebug_bb (cont_bb);
4845   gsi_remove (&gsi, true);
4846 
4847   gsi = gsi_last_nondebug_bb (exit_bb);
4848   gsi_remove (&gsi, true);
4849 
4850   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4851   remove_edge (BRANCH_EDGE (entry_bb));
4852   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4853   remove_edge (BRANCH_EDGE (cont_bb));
4854   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4855   set_immediate_dominator (CDI_DOMINATORS, region->entry,
4856 			   recompute_dominator (CDI_DOMINATORS, region->entry));
4857 }
4858 
4859 /* Taskloop construct is represented after gimplification with
4860    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4861    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
4862    GOMP_taskloop{,_ull} function arranges for each task to be given just
4863    a single range of iterations.  */
4864 
4865 static void
4866 expand_omp_taskloop_for_inner (struct omp_region *region,
4867 			       struct omp_for_data *fd,
4868 			       gimple *inner_stmt)
4869 {
4870   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4871   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4872   basic_block fin_bb;
4873   gimple_stmt_iterator gsi;
4874   edge ep;
4875   bool broken_loop = region->cont == NULL;
4876   tree *counts = NULL;
4877   tree n1, n2, step;
4878 
4879   itype = type = TREE_TYPE (fd->loop.v);
4880   if (POINTER_TYPE_P (type))
4881     itype = signed_type_for (type);
4882 
4883   /* See if we need to bias by LLONG_MIN.  */
4884   if (fd->iter_type == long_long_unsigned_type_node
4885       && TREE_CODE (type) == INTEGER_TYPE
4886       && !TYPE_UNSIGNED (type))
4887     {
4888       tree n1, n2;
4889 
4890       if (fd->loop.cond_code == LT_EXPR)
4891 	{
4892 	  n1 = fd->loop.n1;
4893 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4894 	}
4895       else
4896 	{
4897 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4898 	  n2 = fd->loop.n1;
4899 	}
4900       if (TREE_CODE (n1) != INTEGER_CST
4901 	  || TREE_CODE (n2) != INTEGER_CST
4902 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4903 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4904     }
4905 
4906   entry_bb = region->entry;
4907   cont_bb = region->cont;
4908   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4909   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4910   gcc_assert (broken_loop
4911 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4912   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4913   if (!broken_loop)
4914     {
4915       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4916       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4917     }
4918   exit_bb = region->exit;
4919 
4920   /* Iteration space partitioning goes in ENTRY_BB.  */
4921   gsi = gsi_last_nondebug_bb (entry_bb);
4922   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4923 
4924   if (fd->collapse > 1)
4925     {
4926       int first_zero_iter = -1, dummy = -1;
4927       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4928 
4929       counts = XALLOCAVEC (tree, fd->collapse);
4930       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4931 				  fin_bb, first_zero_iter,
4932 				  dummy_bb, dummy, l2_dom_bb);
4933       t = NULL_TREE;
4934     }
4935   else
4936     t = integer_one_node;
4937 
4938   step = fd->loop.step;
4939   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4940 				 OMP_CLAUSE__LOOPTEMP_);
4941   gcc_assert (innerc);
4942   n1 = OMP_CLAUSE_DECL (innerc);
4943   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4944   gcc_assert (innerc);
4945   n2 = OMP_CLAUSE_DECL (innerc);
4946   if (bias)
4947     {
4948       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4949       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4950     }
4951   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4952 				 true, NULL_TREE, true, GSI_SAME_STMT);
4953   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4954 				 true, NULL_TREE, true, GSI_SAME_STMT);
4955   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4956 				   true, NULL_TREE, true, GSI_SAME_STMT);
4957 
4958   tree startvar = fd->loop.v;
4959   tree endvar = NULL_TREE;
4960 
4961   if (gimple_omp_for_combined_p (fd->for_stmt))
4962     {
4963       tree clauses = gimple_omp_for_clauses (inner_stmt);
4964       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4965       gcc_assert (innerc);
4966       startvar = OMP_CLAUSE_DECL (innerc);
4967       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4968 				OMP_CLAUSE__LOOPTEMP_);
4969       gcc_assert (innerc);
4970       endvar = OMP_CLAUSE_DECL (innerc);
4971     }
4972   t = fold_convert (TREE_TYPE (startvar), n1);
4973   t = force_gimple_operand_gsi (&gsi, t,
4974 				DECL_P (startvar)
4975 				&& TREE_ADDRESSABLE (startvar),
4976 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4977   gimple *assign_stmt = gimple_build_assign (startvar, t);
4978   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4979 
4980   t = fold_convert (TREE_TYPE (startvar), n2);
4981   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4982 				false, GSI_CONTINUE_LINKING);
4983   if (endvar)
4984     {
4985       assign_stmt = gimple_build_assign (endvar, e);
4986       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4987       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4988 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4989       else
4990 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4991       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4992     }
4993   if (fd->collapse > 1)
4994     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4995 
4996   if (!broken_loop)
4997     {
4998       /* The code controlling the sequential loop replaces the
4999 	 GIMPLE_OMP_CONTINUE.  */
5000       gsi = gsi_last_nondebug_bb (cont_bb);
5001       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5002       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5003       vmain = gimple_omp_continue_control_use (cont_stmt);
5004       vback = gimple_omp_continue_control_def (cont_stmt);
5005 
5006       if (!gimple_omp_for_combined_p (fd->for_stmt))
5007 	{
5008 	  if (POINTER_TYPE_P (type))
5009 	    t = fold_build_pointer_plus (vmain, step);
5010 	  else
5011 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5012 	  t = force_gimple_operand_gsi (&gsi, t,
5013 					DECL_P (vback)
5014 					&& TREE_ADDRESSABLE (vback),
5015 					NULL_TREE, true, GSI_SAME_STMT);
5016 	  assign_stmt = gimple_build_assign (vback, t);
5017 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5018 
5019 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5020 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5021 		      ? t : vback, e);
5022 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5023 	}
5024 
5025       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5026       gsi_remove (&gsi, true);
5027 
5028       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5029 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5030     }
5031 
5032   /* Remove the GIMPLE_OMP_FOR statement.  */
5033   gsi = gsi_for_stmt (fd->for_stmt);
5034   gsi_remove (&gsi, true);
5035 
5036   /* Remove the GIMPLE_OMP_RETURN statement.  */
5037   gsi = gsi_last_nondebug_bb (exit_bb);
5038   gsi_remove (&gsi, true);
5039 
5040   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5041   if (!broken_loop)
5042     remove_edge (BRANCH_EDGE (entry_bb));
5043   else
5044     {
5045       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5046       region->outer->cont = NULL;
5047     }
5048 
5049   /* Connect all the blocks.  */
5050   if (!broken_loop)
5051     {
5052       ep = find_edge (cont_bb, body_bb);
5053       if (gimple_omp_for_combined_p (fd->for_stmt))
5054 	{
5055 	  remove_edge (ep);
5056 	  ep = NULL;
5057 	}
5058       else if (fd->collapse > 1)
5059 	{
5060 	  remove_edge (ep);
5061 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5062 	}
5063       else
5064 	ep->flags = EDGE_TRUE_VALUE;
5065       find_edge (cont_bb, fin_bb)->flags
5066 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5067     }
5068 
5069   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5070 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5071   if (!broken_loop)
5072     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5073 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5074 
5075   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5076     {
5077       struct loop *loop = alloc_loop ();
5078       loop->header = body_bb;
5079       if (collapse_bb == NULL)
5080 	loop->latch = cont_bb;
5081       add_loop (loop, body_bb->loop_father);
5082     }
5083 }
5084 
5085 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5086    partitioned loop.  The lowering here is abstracted, in that the
5087    loop parameters are passed through internal functions, which are
5088    further lowered by oacc_device_lower, once we get to the target
5089    compiler.  The loop is of the form:
5090 
5091    for (V = B; V LTGT E; V += S) {BODY}
5092 
5093    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5094    (constant 0 for no chunking) and we will have a GWV partitioning
5095    mask, specifying dimensions over which the loop is to be
5096    partitioned (see note below).  We generate code that looks like
5097    (this ignores tiling):
5098 
5099    <entry_bb> [incoming FALL->body, BRANCH->exit]
5100      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5101      T range = E - B;
5102      T chunk_no = 0;
5103      T DIR = LTGT == '<' ? +1 : -1;
5104      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5105      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5106 
5107    <head_bb> [created by splitting end of entry_bb]
5108      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5109      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5110      if (!(offset LTGT bound)) goto bottom_bb;
5111 
5112    <body_bb> [incoming]
5113      V = B + offset;
5114      {BODY}
5115 
5116    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5117      offset += step;
5118      if (offset LTGT bound) goto body_bb; [*]
5119 
5120    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5121      chunk_no++;
5122      if (chunk < chunk_max) goto head_bb;
5123 
5124    <exit_bb> [incoming]
5125      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5126 
5127    [*] Needed if V live at end of loop.  */
5128 
5129 static void
5130 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5131 {
5132   tree v = fd->loop.v;
5133   enum tree_code cond_code = fd->loop.cond_code;
5134   enum tree_code plus_code = PLUS_EXPR;
5135 
5136   tree chunk_size = integer_minus_one_node;
5137   tree gwv = integer_zero_node;
5138   tree iter_type = TREE_TYPE (v);
5139   tree diff_type = iter_type;
5140   tree plus_type = iter_type;
5141   struct oacc_collapse *counts = NULL;
5142 
5143   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5144 		       == GF_OMP_FOR_KIND_OACC_LOOP);
5145   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5146   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5147 
5148   if (POINTER_TYPE_P (iter_type))
5149     {
5150       plus_code = POINTER_PLUS_EXPR;
5151       plus_type = sizetype;
5152     }
5153   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5154     diff_type = signed_type_for (diff_type);
5155   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5156     diff_type = integer_type_node;
5157 
5158   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5159   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5160   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5161   basic_block bottom_bb = NULL;
5162 
5163   /* entry_bb has two sucessors; the branch edge is to the exit
5164      block,  fallthrough edge to body.  */
5165   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5166 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5167 
5168   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5169      body_bb, or to a block whose only successor is the body_bb.  Its
5170      fallthrough successor is the final block (same as the branch
5171      successor of the entry_bb).  */
5172   if (cont_bb)
5173     {
5174       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5175       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5176 
5177       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5178       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5179     }
5180   else
5181     gcc_assert (!gimple_in_ssa_p (cfun));
5182 
5183   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5184   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5185 
5186   tree chunk_no;
5187   tree chunk_max = NULL_TREE;
5188   tree bound, offset;
5189   tree step = create_tmp_var (diff_type, ".step");
5190   bool up = cond_code == LT_EXPR;
5191   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5192   bool chunking = !gimple_in_ssa_p (cfun);
5193   bool negating;
5194 
5195   /* Tiling vars.  */
5196   tree tile_size = NULL_TREE;
5197   tree element_s = NULL_TREE;
5198   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5199   basic_block elem_body_bb = NULL;
5200   basic_block elem_cont_bb = NULL;
5201 
5202   /* SSA instances.  */
5203   tree offset_incr = NULL_TREE;
5204   tree offset_init = NULL_TREE;
5205 
5206   gimple_stmt_iterator gsi;
5207   gassign *ass;
5208   gcall *call;
5209   gimple *stmt;
5210   tree expr;
5211   location_t loc;
5212   edge split, be, fte;
5213 
5214   /* Split the end of entry_bb to create head_bb.  */
5215   split = split_block (entry_bb, last_stmt (entry_bb));
5216   basic_block head_bb = split->dest;
5217   entry_bb = split->src;
5218 
5219   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5220   gsi = gsi_last_nondebug_bb (entry_bb);
5221   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5222   loc = gimple_location (for_stmt);
5223 
5224   if (gimple_in_ssa_p (cfun))
5225     {
5226       offset_init = gimple_omp_for_index (for_stmt, 0);
5227       gcc_assert (integer_zerop (fd->loop.n1));
5228       /* The SSA parallelizer does gang parallelism.  */
5229       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5230     }
5231 
5232   if (fd->collapse > 1 || fd->tiling)
5233     {
5234       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5235       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5236       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5237 					      TREE_TYPE (fd->loop.n2), loc);
5238 
5239       if (SSA_VAR_P (fd->loop.n2))
5240 	{
5241 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5242 					    true, GSI_SAME_STMT);
5243 	  ass = gimple_build_assign (fd->loop.n2, total);
5244 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5245 	}
5246     }
5247 
5248   tree b = fd->loop.n1;
5249   tree e = fd->loop.n2;
5250   tree s = fd->loop.step;
5251 
5252   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5253   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5254 
5255   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5256   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5257   if (negating)
5258     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5259   s = fold_convert (diff_type, s);
5260   if (negating)
5261     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5262   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5263 
5264   if (!chunking)
5265     chunk_size = integer_zero_node;
5266   expr = fold_convert (diff_type, chunk_size);
5267   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5268 					 NULL_TREE, true, GSI_SAME_STMT);
5269 
5270   if (fd->tiling)
5271     {
5272       /* Determine the tile size and element step,
5273 	 modify the outer loop step size.  */
5274       tile_size = create_tmp_var (diff_type, ".tile_size");
5275       expr = build_int_cst (diff_type, 1);
5276       for (int ix = 0; ix < fd->collapse; ix++)
5277 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5278       expr = force_gimple_operand_gsi (&gsi, expr, true,
5279 				       NULL_TREE, true, GSI_SAME_STMT);
5280       ass = gimple_build_assign (tile_size, expr);
5281       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5282 
5283       element_s = create_tmp_var (diff_type, ".element_s");
5284       ass = gimple_build_assign (element_s, s);
5285       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5286 
5287       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5288       s = force_gimple_operand_gsi (&gsi, expr, true,
5289 				    NULL_TREE, true, GSI_SAME_STMT);
5290     }
5291 
5292   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5293   negating = !up && TYPE_UNSIGNED (iter_type);
5294   expr = fold_build2 (MINUS_EXPR, plus_type,
5295 		      fold_convert (plus_type, negating ? b : e),
5296 		      fold_convert (plus_type, negating ? e : b));
5297   expr = fold_convert (diff_type, expr);
5298   if (negating)
5299     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5300   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5301 					 NULL_TREE, true, GSI_SAME_STMT);
5302 
5303   chunk_no = build_int_cst (diff_type, 0);
5304   if (chunking)
5305     {
5306       gcc_assert (!gimple_in_ssa_p (cfun));
5307 
5308       expr = chunk_no;
5309       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5310       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5311 
5312       ass = gimple_build_assign (chunk_no, expr);
5313       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5314 
5315       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5316 					 build_int_cst (integer_type_node,
5317 							IFN_GOACC_LOOP_CHUNKS),
5318 					 dir, range, s, chunk_size, gwv);
5319       gimple_call_set_lhs (call, chunk_max);
5320       gimple_set_location (call, loc);
5321       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5322     }
5323   else
5324     chunk_size = chunk_no;
5325 
5326   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5327 				     build_int_cst (integer_type_node,
5328 						    IFN_GOACC_LOOP_STEP),
5329 				     dir, range, s, chunk_size, gwv);
5330   gimple_call_set_lhs (call, step);
5331   gimple_set_location (call, loc);
5332   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5333 
5334   /* Remove the GIMPLE_OMP_FOR.  */
5335   gsi_remove (&gsi, true);
5336 
5337   /* Fixup edges from head_bb.  */
5338   be = BRANCH_EDGE (head_bb);
5339   fte = FALLTHRU_EDGE (head_bb);
5340   be->flags |= EDGE_FALSE_VALUE;
5341   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5342 
5343   basic_block body_bb = fte->dest;
5344 
5345   if (gimple_in_ssa_p (cfun))
5346     {
5347       gsi = gsi_last_nondebug_bb (cont_bb);
5348       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5349 
5350       offset = gimple_omp_continue_control_use (cont_stmt);
5351       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5352     }
5353   else
5354     {
5355       offset = create_tmp_var (diff_type, ".offset");
5356       offset_init = offset_incr = offset;
5357     }
5358   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5359 
5360   /* Loop offset & bound go into head_bb.  */
5361   gsi = gsi_start_bb (head_bb);
5362 
5363   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5364 				     build_int_cst (integer_type_node,
5365 						    IFN_GOACC_LOOP_OFFSET),
5366 				     dir, range, s,
5367 				     chunk_size, gwv, chunk_no);
5368   gimple_call_set_lhs (call, offset_init);
5369   gimple_set_location (call, loc);
5370   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5371 
5372   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5373 				     build_int_cst (integer_type_node,
5374 						    IFN_GOACC_LOOP_BOUND),
5375 				     dir, range, s,
5376 				     chunk_size, gwv, offset_init);
5377   gimple_call_set_lhs (call, bound);
5378   gimple_set_location (call, loc);
5379   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5380 
5381   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5382   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5383 		    GSI_CONTINUE_LINKING);
5384 
5385   /* V assignment goes into body_bb.  */
5386   if (!gimple_in_ssa_p (cfun))
5387     {
5388       gsi = gsi_start_bb (body_bb);
5389 
5390       expr = build2 (plus_code, iter_type, b,
5391 		     fold_convert (plus_type, offset));
5392       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5393 				       true, GSI_SAME_STMT);
5394       ass = gimple_build_assign (v, expr);
5395       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5396 
5397       if (fd->collapse > 1 || fd->tiling)
5398 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5399 
5400       if (fd->tiling)
5401 	{
5402 	  /* Determine the range of the element loop -- usually simply
5403 	     the tile_size, but could be smaller if the final
5404 	     iteration of the outer loop is a partial tile.  */
5405 	  tree e_range = create_tmp_var (diff_type, ".e_range");
5406 
5407 	  expr = build2 (MIN_EXPR, diff_type,
5408 			 build2 (MINUS_EXPR, diff_type, bound, offset),
5409 			 build2 (MULT_EXPR, diff_type, tile_size,
5410 				 element_s));
5411 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5412 					   true, GSI_SAME_STMT);
5413 	  ass = gimple_build_assign (e_range, expr);
5414 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5415 
5416 	  /* Determine bound, offset & step of inner loop. */
5417 	  e_bound = create_tmp_var (diff_type, ".e_bound");
5418 	  e_offset = create_tmp_var (diff_type, ".e_offset");
5419 	  e_step = create_tmp_var (diff_type, ".e_step");
5420 
5421 	  /* Mark these as element loops.  */
5422 	  tree t, e_gwv = integer_minus_one_node;
5423 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5424 
5425 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5426 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5427 					     element_s, chunk, e_gwv, chunk);
5428 	  gimple_call_set_lhs (call, e_offset);
5429 	  gimple_set_location (call, loc);
5430 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5431 
5432 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5433 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5434 					     element_s, chunk, e_gwv, e_offset);
5435 	  gimple_call_set_lhs (call, e_bound);
5436 	  gimple_set_location (call, loc);
5437 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5438 
5439 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5440 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5441 					     element_s, chunk, e_gwv);
5442 	  gimple_call_set_lhs (call, e_step);
5443 	  gimple_set_location (call, loc);
5444 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5445 
5446 	  /* Add test and split block.  */
5447 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5448 	  stmt = gimple_build_cond_empty (expr);
5449 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5450 	  split = split_block (body_bb, stmt);
5451 	  elem_body_bb = split->dest;
5452 	  if (cont_bb == body_bb)
5453 	    cont_bb = elem_body_bb;
5454 	  body_bb = split->src;
5455 
5456 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5457 
5458 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5459 	  if (cont_bb == NULL)
5460 	    {
5461 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5462 	      e->probability = profile_probability::even ();
5463 	      split->probability = profile_probability::even ();
5464 	    }
5465 
5466 	  /* Initialize the user's loop vars.  */
5467 	  gsi = gsi_start_bb (elem_body_bb);
5468 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5469 	}
5470     }
5471 
5472   /* Loop increment goes into cont_bb.  If this is not a loop, we
5473      will have spawned threads as if it was, and each one will
5474      execute one iteration.  The specification is not explicit about
5475      whether such constructs are ill-formed or not, and they can
5476      occur, especially when noreturn routines are involved.  */
5477   if (cont_bb)
5478     {
5479       gsi = gsi_last_nondebug_bb (cont_bb);
5480       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5481       loc = gimple_location (cont_stmt);
5482 
5483       if (fd->tiling)
5484 	{
5485 	  /* Insert element loop increment and test.  */
5486 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5487 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5488 					   true, GSI_SAME_STMT);
5489 	  ass = gimple_build_assign (e_offset, expr);
5490 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5491 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5492 
5493 	  stmt = gimple_build_cond_empty (expr);
5494 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5495 	  split = split_block (cont_bb, stmt);
5496 	  elem_cont_bb = split->src;
5497 	  cont_bb = split->dest;
5498 
5499 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5500 	  split->probability = profile_probability::unlikely ().guessed ();
5501 	  edge latch_edge
5502 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5503 	  latch_edge->probability = profile_probability::likely ().guessed ();
5504 
5505 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5506 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
5507 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5508 	  loop_entry_edge->probability
5509 	    = profile_probability::likely ().guessed ();
5510 
5511 	  gsi = gsi_for_stmt (cont_stmt);
5512 	}
5513 
5514       /* Increment offset.  */
5515       if (gimple_in_ssa_p (cfun))
5516 	expr = build2 (plus_code, iter_type, offset,
5517 		       fold_convert (plus_type, step));
5518       else
5519 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5520       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5521 				       true, GSI_SAME_STMT);
5522       ass = gimple_build_assign (offset_incr, expr);
5523       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5524       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5525       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5526 
5527       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5528       gsi_remove (&gsi, true);
5529 
5530       /* Fixup edges from cont_bb.  */
5531       be = BRANCH_EDGE (cont_bb);
5532       fte = FALLTHRU_EDGE (cont_bb);
5533       be->flags |= EDGE_TRUE_VALUE;
5534       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5535 
5536       if (chunking)
5537 	{
5538 	  /* Split the beginning of exit_bb to make bottom_bb.  We
5539 	     need to insert a nop at the start, because splitting is
5540 	     after a stmt, not before.  */
5541 	  gsi = gsi_start_bb (exit_bb);
5542 	  stmt = gimple_build_nop ();
5543 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5544 	  split = split_block (exit_bb, stmt);
5545 	  bottom_bb = split->src;
5546 	  exit_bb = split->dest;
5547 	  gsi = gsi_last_bb (bottom_bb);
5548 
5549 	  /* Chunk increment and test goes into bottom_bb.  */
5550 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5551 			 build_int_cst (diff_type, 1));
5552 	  ass = gimple_build_assign (chunk_no, expr);
5553 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5554 
5555 	  /* Chunk test at end of bottom_bb.  */
5556 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5557 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5558 			    GSI_CONTINUE_LINKING);
5559 
5560 	  /* Fixup edges from bottom_bb.  */
5561 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5562 	  split->probability = profile_probability::unlikely ().guessed ();
5563 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5564 	  latch_edge->probability = profile_probability::likely ().guessed ();
5565 	}
5566     }
5567 
5568   gsi = gsi_last_nondebug_bb (exit_bb);
5569   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5570   loc = gimple_location (gsi_stmt (gsi));
5571 
5572   if (!gimple_in_ssa_p (cfun))
5573     {
5574       /* Insert the final value of V, in case it is live.  This is the
5575 	 value for the only thread that survives past the join.  */
5576       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5577       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5578       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5579       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5580       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5581       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5582 				       true, GSI_SAME_STMT);
5583       ass = gimple_build_assign (v, expr);
5584       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5585     }
5586 
5587   /* Remove the OMP_RETURN.  */
5588   gsi_remove (&gsi, true);
5589 
5590   if (cont_bb)
5591     {
5592       /* We now have one, two or three nested loops.  Update the loop
5593 	 structures.  */
5594       struct loop *parent = entry_bb->loop_father;
5595       struct loop *body = body_bb->loop_father;
5596 
5597       if (chunking)
5598 	{
5599 	  struct loop *chunk_loop = alloc_loop ();
5600 	  chunk_loop->header = head_bb;
5601 	  chunk_loop->latch = bottom_bb;
5602 	  add_loop (chunk_loop, parent);
5603 	  parent = chunk_loop;
5604 	}
5605       else if (parent != body)
5606 	{
5607 	  gcc_assert (body->header == body_bb);
5608 	  gcc_assert (body->latch == cont_bb
5609 		      || single_pred (body->latch) == cont_bb);
5610 	  parent = NULL;
5611 	}
5612 
5613       if (parent)
5614 	{
5615 	  struct loop *body_loop = alloc_loop ();
5616 	  body_loop->header = body_bb;
5617 	  body_loop->latch = cont_bb;
5618 	  add_loop (body_loop, parent);
5619 
5620 	  if (fd->tiling)
5621 	    {
5622 	      /* Insert tiling's element loop.  */
5623 	      struct loop *inner_loop = alloc_loop ();
5624 	      inner_loop->header = elem_body_bb;
5625 	      inner_loop->latch = elem_cont_bb;
5626 	      add_loop (inner_loop, body_loop);
5627 	    }
5628 	}
5629     }
5630 }
5631 
5632 /* Expand the OMP loop defined by REGION.  */
5633 
5634 static void
5635 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5636 {
5637   struct omp_for_data fd;
5638   struct omp_for_data_loop *loops;
5639 
5640   loops
5641     = (struct omp_for_data_loop *)
5642       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5643 	      * sizeof (struct omp_for_data_loop));
5644   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5645 			&fd, loops);
5646   region->sched_kind = fd.sched_kind;
5647   region->sched_modifiers = fd.sched_modifiers;
5648 
5649   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5650   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5651   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5652   if (region->cont)
5653     {
5654       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5655       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5656       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5657     }
5658   else
5659     /* If there isn't a continue then this is a degerate case where
5660        the introduction of abnormal edges during lowering will prevent
5661        original loops from being detected.  Fix that up.  */
5662     loops_state_set (LOOPS_NEED_FIXUP);
5663 
5664   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5665     expand_omp_simd (region, &fd);
5666   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5667     {
5668       gcc_assert (!inner_stmt);
5669       expand_oacc_for (region, &fd);
5670     }
5671   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5672     {
5673       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5674 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5675       else
5676 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5677     }
5678   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5679 	   && !fd.have_ordered)
5680     {
5681       if (fd.chunk_size == NULL)
5682 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5683       else
5684 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5685     }
5686   else
5687     {
5688       int fn_index, start_ix, next_ix;
5689 
5690       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5691 		  == GF_OMP_FOR_KIND_FOR);
5692       if (fd.chunk_size == NULL
5693 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5694 	fd.chunk_size = integer_zero_node;
5695       gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5696       switch (fd.sched_kind)
5697 	{
5698 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5699 	  fn_index = 3;
5700 	  break;
5701 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5702 	case OMP_CLAUSE_SCHEDULE_GUIDED:
5703 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5704 	      && !fd.ordered
5705 	      && !fd.have_ordered)
5706 	    {
5707 	      fn_index = 3 + fd.sched_kind;
5708 	      break;
5709 	    }
5710 	  /* FALLTHRU */
5711 	default:
5712 	  fn_index = fd.sched_kind;
5713 	  break;
5714 	}
5715       if (!fd.ordered)
5716 	fn_index += fd.have_ordered * 6;
5717       if (fd.ordered)
5718 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5719       else
5720 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5721       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5722       if (fd.iter_type == long_long_unsigned_type_node)
5723 	{
5724 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5725 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5726 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5727 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5728 	}
5729       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5730 			      (enum built_in_function) next_ix, inner_stmt);
5731     }
5732 
5733   if (gimple_in_ssa_p (cfun))
5734     update_ssa (TODO_update_ssa_only_virtuals);
5735 }
5736 
5737 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5738 
5739 	v = GOMP_sections_start (n);
5740     L0:
5741 	switch (v)
5742 	  {
5743 	  case 0:
5744 	    goto L2;
5745 	  case 1:
5746 	    section 1;
5747 	    goto L1;
5748 	  case 2:
5749 	    ...
5750 	  case n:
5751 	    ...
5752 	  default:
5753 	    abort ();
5754 	  }
5755     L1:
5756 	v = GOMP_sections_next ();
5757 	goto L0;
5758     L2:
5759 	reduction;
5760 
5761     If this is a combined parallel sections, replace the call to
5762     GOMP_sections_start with call to GOMP_sections_next.  */
5763 
5764 static void
5765 expand_omp_sections (struct omp_region *region)
5766 {
5767   tree t, u, vin = NULL, vmain, vnext, l2;
5768   unsigned len;
5769   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5770   gimple_stmt_iterator si, switch_si;
5771   gomp_sections *sections_stmt;
5772   gimple *stmt;
5773   gomp_continue *cont;
5774   edge_iterator ei;
5775   edge e;
5776   struct omp_region *inner;
5777   unsigned i, casei;
5778   bool exit_reachable = region->cont != NULL;
5779 
5780   gcc_assert (region->exit != NULL);
5781   entry_bb = region->entry;
5782   l0_bb = single_succ (entry_bb);
5783   l1_bb = region->cont;
5784   l2_bb = region->exit;
5785   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5786     l2 = gimple_block_label (l2_bb);
5787   else
5788     {
5789       /* This can happen if there are reductions.  */
5790       len = EDGE_COUNT (l0_bb->succs);
5791       gcc_assert (len > 0);
5792       e = EDGE_SUCC (l0_bb, len - 1);
5793       si = gsi_last_nondebug_bb (e->dest);
5794       l2 = NULL_TREE;
5795       if (gsi_end_p (si)
5796 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5797 	l2 = gimple_block_label (e->dest);
5798       else
5799 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5800 	  {
5801 	    si = gsi_last_nondebug_bb (e->dest);
5802 	    if (gsi_end_p (si)
5803 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5804 	      {
5805 		l2 = gimple_block_label (e->dest);
5806 		break;
5807 	      }
5808 	  }
5809     }
5810   if (exit_reachable)
5811     default_bb = create_empty_bb (l1_bb->prev_bb);
5812   else
5813     default_bb = create_empty_bb (l0_bb);
5814 
5815   /* We will build a switch() with enough cases for all the
5816      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5817      and a default case to abort if something goes wrong.  */
5818   len = EDGE_COUNT (l0_bb->succs);
5819 
5820   /* Use vec::quick_push on label_vec throughout, since we know the size
5821      in advance.  */
5822   auto_vec<tree> label_vec (len);
5823 
5824   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5825      GIMPLE_OMP_SECTIONS statement.  */
5826   si = gsi_last_nondebug_bb (entry_bb);
5827   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5828   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5829   vin = gimple_omp_sections_control (sections_stmt);
5830   if (!is_combined_parallel (region))
5831     {
5832       /* If we are not inside a combined parallel+sections region,
5833 	 call GOMP_sections_start.  */
5834       t = build_int_cst (unsigned_type_node, len - 1);
5835       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5836       stmt = gimple_build_call (u, 1, t);
5837     }
5838   else
5839     {
5840       /* Otherwise, call GOMP_sections_next.  */
5841       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5842       stmt = gimple_build_call (u, 0);
5843     }
5844   gimple_call_set_lhs (stmt, vin);
5845   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5846   gsi_remove (&si, true);
5847 
5848   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5849      L0_BB.  */
5850   switch_si = gsi_last_nondebug_bb (l0_bb);
5851   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5852   if (exit_reachable)
5853     {
5854       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5855       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5856       vmain = gimple_omp_continue_control_use (cont);
5857       vnext = gimple_omp_continue_control_def (cont);
5858     }
5859   else
5860     {
5861       vmain = vin;
5862       vnext = NULL_TREE;
5863     }
5864 
5865   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5866   label_vec.quick_push (t);
5867   i = 1;
5868 
5869   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
5870   for (inner = region->inner, casei = 1;
5871        inner;
5872        inner = inner->next, i++, casei++)
5873     {
5874       basic_block s_entry_bb, s_exit_bb;
5875 
5876       /* Skip optional reduction region.  */
5877       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5878 	{
5879 	  --i;
5880 	  --casei;
5881 	  continue;
5882 	}
5883 
5884       s_entry_bb = inner->entry;
5885       s_exit_bb = inner->exit;
5886 
5887       t = gimple_block_label (s_entry_bb);
5888       u = build_int_cst (unsigned_type_node, casei);
5889       u = build_case_label (u, NULL, t);
5890       label_vec.quick_push (u);
5891 
5892       si = gsi_last_nondebug_bb (s_entry_bb);
5893       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5894       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5895       gsi_remove (&si, true);
5896       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5897 
5898       if (s_exit_bb == NULL)
5899 	continue;
5900 
5901       si = gsi_last_nondebug_bb (s_exit_bb);
5902       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5903       gsi_remove (&si, true);
5904 
5905       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5906     }
5907 
5908   /* Error handling code goes in DEFAULT_BB.  */
5909   t = gimple_block_label (default_bb);
5910   u = build_case_label (NULL, NULL, t);
5911   make_edge (l0_bb, default_bb, 0);
5912   add_bb_to_loop (default_bb, current_loops->tree_root);
5913 
5914   stmt = gimple_build_switch (vmain, u, label_vec);
5915   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5916   gsi_remove (&switch_si, true);
5917 
5918   si = gsi_start_bb (default_bb);
5919   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5920   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5921 
5922   if (exit_reachable)
5923     {
5924       tree bfn_decl;
5925 
5926       /* Code to get the next section goes in L1_BB.  */
5927       si = gsi_last_nondebug_bb (l1_bb);
5928       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5929 
5930       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5931       stmt = gimple_build_call (bfn_decl, 0);
5932       gimple_call_set_lhs (stmt, vnext);
5933       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5934       gsi_remove (&si, true);
5935 
5936       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5937     }
5938 
5939   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
5940   si = gsi_last_nondebug_bb (l2_bb);
5941   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5942     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5943   else if (gimple_omp_return_lhs (gsi_stmt (si)))
5944     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5945   else
5946     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5947   stmt = gimple_build_call (t, 0);
5948   if (gimple_omp_return_lhs (gsi_stmt (si)))
5949     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5950   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5951   gsi_remove (&si, true);
5952 
5953   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5954 }
5955 
5956 /* Expand code for an OpenMP single directive.  We've already expanded
5957    much of the code, here we simply place the GOMP_barrier call.  */
5958 
5959 static void
5960 expand_omp_single (struct omp_region *region)
5961 {
5962   basic_block entry_bb, exit_bb;
5963   gimple_stmt_iterator si;
5964 
5965   entry_bb = region->entry;
5966   exit_bb = region->exit;
5967 
5968   si = gsi_last_nondebug_bb (entry_bb);
5969   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5970   gsi_remove (&si, true);
5971   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5972 
5973   si = gsi_last_nondebug_bb (exit_bb);
5974   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5975     {
5976       tree t = gimple_omp_return_lhs (gsi_stmt (si));
5977       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5978     }
5979   gsi_remove (&si, true);
5980   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5981 }
5982 
5983 /* Generic expansion for OpenMP synchronization directives: master,
5984    ordered and critical.  All we need to do here is remove the entry
5985    and exit markers for REGION.  */
5986 
5987 static void
5988 expand_omp_synch (struct omp_region *region)
5989 {
5990   basic_block entry_bb, exit_bb;
5991   gimple_stmt_iterator si;
5992 
5993   entry_bb = region->entry;
5994   exit_bb = region->exit;
5995 
5996   si = gsi_last_nondebug_bb (entry_bb);
5997   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5998 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5999 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6000 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6001 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6002 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6003   gsi_remove (&si, true);
6004   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6005 
6006   if (exit_bb)
6007     {
6008       si = gsi_last_nondebug_bb (exit_bb);
6009       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6010       gsi_remove (&si, true);
6011       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6012     }
6013 }
6014 
6015 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6016    operation as a normal volatile load.  */
6017 
6018 static bool
6019 expand_omp_atomic_load (basic_block load_bb, tree addr,
6020 			tree loaded_val, int index)
6021 {
6022   enum built_in_function tmpbase;
6023   gimple_stmt_iterator gsi;
6024   basic_block store_bb;
6025   location_t loc;
6026   gimple *stmt;
6027   tree decl, call, type, itype;
6028 
6029   gsi = gsi_last_nondebug_bb (load_bb);
6030   stmt = gsi_stmt (gsi);
6031   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6032   loc = gimple_location (stmt);
6033 
6034   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6035      is smaller than word size, then expand_atomic_load assumes that the load
6036      is atomic.  We could avoid the builtin entirely in this case.  */
6037 
6038   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6039   decl = builtin_decl_explicit (tmpbase);
6040   if (decl == NULL_TREE)
6041     return false;
6042 
6043   type = TREE_TYPE (loaded_val);
6044   itype = TREE_TYPE (TREE_TYPE (decl));
6045 
6046   call = build_call_expr_loc (loc, decl, 2, addr,
6047 			      build_int_cst (NULL,
6048 					     gimple_omp_atomic_seq_cst_p (stmt)
6049 					     ? MEMMODEL_SEQ_CST
6050 					     : MEMMODEL_RELAXED));
6051   if (!useless_type_conversion_p (type, itype))
6052     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6053   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6054 
6055   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6056   gsi_remove (&gsi, true);
6057 
6058   store_bb = single_succ (load_bb);
6059   gsi = gsi_last_nondebug_bb (store_bb);
6060   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6061   gsi_remove (&gsi, true);
6062 
6063   if (gimple_in_ssa_p (cfun))
6064     update_ssa (TODO_update_ssa_no_phi);
6065 
6066   return true;
6067 }
6068 
6069 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6070    operation as a normal volatile store.  */
6071 
6072 static bool
6073 expand_omp_atomic_store (basic_block load_bb, tree addr,
6074 			 tree loaded_val, tree stored_val, int index)
6075 {
6076   enum built_in_function tmpbase;
6077   gimple_stmt_iterator gsi;
6078   basic_block store_bb = single_succ (load_bb);
6079   location_t loc;
6080   gimple *stmt;
6081   tree decl, call, type, itype;
6082   machine_mode imode;
6083   bool exchange;
6084 
6085   gsi = gsi_last_nondebug_bb (load_bb);
6086   stmt = gsi_stmt (gsi);
6087   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6088 
6089   /* If the load value is needed, then this isn't a store but an exchange.  */
6090   exchange = gimple_omp_atomic_need_value_p (stmt);
6091 
6092   gsi = gsi_last_nondebug_bb (store_bb);
6093   stmt = gsi_stmt (gsi);
6094   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6095   loc = gimple_location (stmt);
6096 
6097   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6098      is smaller than word size, then expand_atomic_store assumes that the store
6099      is atomic.  We could avoid the builtin entirely in this case.  */
6100 
6101   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6102   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6103   decl = builtin_decl_explicit (tmpbase);
6104   if (decl == NULL_TREE)
6105     return false;
6106 
6107   type = TREE_TYPE (stored_val);
6108 
6109   /* Dig out the type of the function's second argument.  */
6110   itype = TREE_TYPE (decl);
6111   itype = TYPE_ARG_TYPES (itype);
6112   itype = TREE_CHAIN (itype);
6113   itype = TREE_VALUE (itype);
6114   imode = TYPE_MODE (itype);
6115 
6116   if (exchange && !can_atomic_exchange_p (imode, true))
6117     return false;
6118 
6119   if (!useless_type_conversion_p (itype, type))
6120     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6121   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6122 			      build_int_cst (NULL,
6123 					     gimple_omp_atomic_seq_cst_p (stmt)
6124 					     ? MEMMODEL_SEQ_CST
6125 					     : MEMMODEL_RELAXED));
6126   if (exchange)
6127     {
6128       if (!useless_type_conversion_p (type, itype))
6129 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6130       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6131     }
6132 
6133   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6134   gsi_remove (&gsi, true);
6135 
6136   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6137   gsi = gsi_last_nondebug_bb (load_bb);
6138   gsi_remove (&gsi, true);
6139 
6140   if (gimple_in_ssa_p (cfun))
6141     update_ssa (TODO_update_ssa_no_phi);
6142 
6143   return true;
6144 }
6145 
6146 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6147    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6148    size of the data type, and thus usable to find the index of the builtin
6149    decl.  Returns false if the expression is not of the proper form.  */
6150 
6151 static bool
6152 expand_omp_atomic_fetch_op (basic_block load_bb,
6153 			    tree addr, tree loaded_val,
6154 			    tree stored_val, int index)
6155 {
6156   enum built_in_function oldbase, newbase, tmpbase;
6157   tree decl, itype, call;
6158   tree lhs, rhs;
6159   basic_block store_bb = single_succ (load_bb);
6160   gimple_stmt_iterator gsi;
6161   gimple *stmt;
6162   location_t loc;
6163   enum tree_code code;
6164   bool need_old, need_new;
6165   machine_mode imode;
6166   bool seq_cst;
6167 
6168   /* We expect to find the following sequences:
6169 
6170    load_bb:
6171        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6172 
6173    store_bb:
6174        val = tmp OP something; (or: something OP tmp)
6175        GIMPLE_OMP_STORE (val)
6176 
6177   ???FIXME: Allow a more flexible sequence.
6178   Perhaps use data flow to pick the statements.
6179 
6180   */
6181 
6182   gsi = gsi_after_labels (store_bb);
6183   stmt = gsi_stmt (gsi);
6184   if (is_gimple_debug (stmt))
6185     {
6186       gsi_next_nondebug (&gsi);
6187       if (gsi_end_p (gsi))
6188 	return false;
6189       stmt = gsi_stmt (gsi);
6190     }
6191   loc = gimple_location (stmt);
6192   if (!is_gimple_assign (stmt))
6193     return false;
6194   gsi_next_nondebug (&gsi);
6195   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6196     return false;
6197   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6198   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6199   seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6200   gcc_checking_assert (!need_old || !need_new);
6201 
6202   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6203     return false;
6204 
6205   /* Check for one of the supported fetch-op operations.  */
6206   code = gimple_assign_rhs_code (stmt);
6207   switch (code)
6208     {
6209     case PLUS_EXPR:
6210     case POINTER_PLUS_EXPR:
6211       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6212       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6213       break;
6214     case MINUS_EXPR:
6215       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6216       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6217       break;
6218     case BIT_AND_EXPR:
6219       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6220       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6221       break;
6222     case BIT_IOR_EXPR:
6223       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6224       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6225       break;
6226     case BIT_XOR_EXPR:
6227       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6228       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6229       break;
6230     default:
6231       return false;
6232     }
6233 
6234   /* Make sure the expression is of the proper form.  */
6235   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6236     rhs = gimple_assign_rhs2 (stmt);
6237   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6238 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6239     rhs = gimple_assign_rhs1 (stmt);
6240   else
6241     return false;
6242 
6243   tmpbase = ((enum built_in_function)
6244 	     ((need_new ? newbase : oldbase) + index + 1));
6245   decl = builtin_decl_explicit (tmpbase);
6246   if (decl == NULL_TREE)
6247     return false;
6248   itype = TREE_TYPE (TREE_TYPE (decl));
6249   imode = TYPE_MODE (itype);
6250 
6251   /* We could test all of the various optabs involved, but the fact of the
6252      matter is that (with the exception of i486 vs i586 and xadd) all targets
6253      that support any atomic operaton optab also implements compare-and-swap.
6254      Let optabs.c take care of expanding any compare-and-swap loop.  */
6255   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6256     return false;
6257 
6258   gsi = gsi_last_nondebug_bb (load_bb);
6259   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6260 
6261   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6262      It only requires that the operation happen atomically.  Thus we can
6263      use the RELAXED memory model.  */
6264   call = build_call_expr_loc (loc, decl, 3, addr,
6265 			      fold_convert_loc (loc, itype, rhs),
6266 			      build_int_cst (NULL,
6267 					     seq_cst ? MEMMODEL_SEQ_CST
6268 						     : MEMMODEL_RELAXED));
6269 
6270   if (need_old || need_new)
6271     {
6272       lhs = need_old ? loaded_val : stored_val;
6273       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6274       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6275     }
6276   else
6277     call = fold_convert_loc (loc, void_type_node, call);
6278   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6279   gsi_remove (&gsi, true);
6280 
6281   gsi = gsi_last_nondebug_bb (store_bb);
6282   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6283   gsi_remove (&gsi, true);
6284   gsi = gsi_last_nondebug_bb (store_bb);
6285   stmt = gsi_stmt (gsi);
6286   gsi_remove (&gsi, true);
6287 
6288   if (gimple_in_ssa_p (cfun))
6289     {
6290       release_defs (stmt);
6291       update_ssa (TODO_update_ssa_no_phi);
6292     }
6293 
6294   return true;
6295 }
6296 
6297 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6298 
6299       oldval = *addr;
6300       repeat:
6301 	newval = rhs;	 // with oldval replacing *addr in rhs
6302 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6303 	if (oldval != newval)
6304 	  goto repeat;
6305 
6306    INDEX is log2 of the size of the data type, and thus usable to find the
6307    index of the builtin decl.  */
6308 
6309 static bool
6310 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6311 			    tree addr, tree loaded_val, tree stored_val,
6312 			    int index)
6313 {
6314   tree loadedi, storedi, initial, new_storedi, old_vali;
6315   tree type, itype, cmpxchg, iaddr, atype;
6316   gimple_stmt_iterator si;
6317   basic_block loop_header = single_succ (load_bb);
6318   gimple *phi, *stmt;
6319   edge e;
6320   enum built_in_function fncode;
6321 
6322   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6323      order to use the RELAXED memory model effectively.  */
6324   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6325 				    + index + 1);
6326   cmpxchg = builtin_decl_explicit (fncode);
6327   if (cmpxchg == NULL_TREE)
6328     return false;
6329   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6330   atype = type;
6331   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6332 
6333   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6334       || !can_atomic_load_p (TYPE_MODE (itype)))
6335     return false;
6336 
6337   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6338   si = gsi_last_nondebug_bb (load_bb);
6339   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6340 
6341   /* For floating-point values, we'll need to view-convert them to integers
6342      so that we can perform the atomic compare and swap.  Simplify the
6343      following code by always setting up the "i"ntegral variables.  */
6344   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6345     {
6346       tree iaddr_val;
6347 
6348       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6349 							   true));
6350       atype = itype;
6351       iaddr_val
6352 	= force_gimple_operand_gsi (&si,
6353 				    fold_convert (TREE_TYPE (iaddr), addr),
6354 				    false, NULL_TREE, true, GSI_SAME_STMT);
6355       stmt = gimple_build_assign (iaddr, iaddr_val);
6356       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6357       loadedi = create_tmp_var (itype);
6358       if (gimple_in_ssa_p (cfun))
6359 	loadedi = make_ssa_name (loadedi);
6360     }
6361   else
6362     {
6363       iaddr = addr;
6364       loadedi = loaded_val;
6365     }
6366 
6367   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6368   tree loaddecl = builtin_decl_explicit (fncode);
6369   if (loaddecl)
6370     initial
6371       = fold_convert (atype,
6372 		      build_call_expr (loaddecl, 2, iaddr,
6373 				       build_int_cst (NULL_TREE,
6374 						      MEMMODEL_RELAXED)));
6375   else
6376     {
6377       tree off
6378 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6379 						      true), 0);
6380       initial = build2 (MEM_REF, atype, iaddr, off);
6381     }
6382 
6383   initial
6384     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6385 				GSI_SAME_STMT);
6386 
6387   /* Move the value to the LOADEDI temporary.  */
6388   if (gimple_in_ssa_p (cfun))
6389     {
6390       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6391       phi = create_phi_node (loadedi, loop_header);
6392       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6393 	       initial);
6394     }
6395   else
6396     gsi_insert_before (&si,
6397 		       gimple_build_assign (loadedi, initial),
6398 		       GSI_SAME_STMT);
6399   if (loadedi != loaded_val)
6400     {
6401       gimple_stmt_iterator gsi2;
6402       tree x;
6403 
6404       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6405       gsi2 = gsi_start_bb (loop_header);
6406       if (gimple_in_ssa_p (cfun))
6407 	{
6408 	  gassign *stmt;
6409 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6410 					true, GSI_SAME_STMT);
6411 	  stmt = gimple_build_assign (loaded_val, x);
6412 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6413 	}
6414       else
6415 	{
6416 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6417 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6418 				    true, GSI_SAME_STMT);
6419 	}
6420     }
6421   gsi_remove (&si, true);
6422 
6423   si = gsi_last_nondebug_bb (store_bb);
6424   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6425 
6426   if (iaddr == addr)
6427     storedi = stored_val;
6428   else
6429     storedi
6430       = force_gimple_operand_gsi (&si,
6431 				  build1 (VIEW_CONVERT_EXPR, itype,
6432 					  stored_val), true, NULL_TREE, true,
6433 				  GSI_SAME_STMT);
6434 
6435   /* Build the compare&swap statement.  */
6436   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6437   new_storedi = force_gimple_operand_gsi (&si,
6438 					  fold_convert (TREE_TYPE (loadedi),
6439 							new_storedi),
6440 					  true, NULL_TREE,
6441 					  true, GSI_SAME_STMT);
6442 
6443   if (gimple_in_ssa_p (cfun))
6444     old_vali = loadedi;
6445   else
6446     {
6447       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6448       stmt = gimple_build_assign (old_vali, loadedi);
6449       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6450 
6451       stmt = gimple_build_assign (loadedi, new_storedi);
6452       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6453     }
6454 
6455   /* Note that we always perform the comparison as an integer, even for
6456      floating point.  This allows the atomic operation to properly
6457      succeed even with NaNs and -0.0.  */
6458   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6459   stmt = gimple_build_cond_empty (ne);
6460   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6461 
6462   /* Update cfg.  */
6463   e = single_succ_edge (store_bb);
6464   e->flags &= ~EDGE_FALLTHRU;
6465   e->flags |= EDGE_FALSE_VALUE;
6466   /* Expect no looping.  */
6467   e->probability = profile_probability::guessed_always ();
6468 
6469   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6470   e->probability = profile_probability::guessed_never ();
6471 
6472   /* Copy the new value to loadedi (we already did that before the condition
6473      if we are not in SSA).  */
6474   if (gimple_in_ssa_p (cfun))
6475     {
6476       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6477       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6478     }
6479 
6480   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6481   gsi_remove (&si, true);
6482 
6483   struct loop *loop = alloc_loop ();
6484   loop->header = loop_header;
6485   loop->latch = store_bb;
6486   add_loop (loop, loop_header->loop_father);
6487 
6488   if (gimple_in_ssa_p (cfun))
6489     update_ssa (TODO_update_ssa_no_phi);
6490 
6491   return true;
6492 }
6493 
6494 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6495 
6496 				  GOMP_atomic_start ();
6497 				  *addr = rhs;
6498 				  GOMP_atomic_end ();
6499 
6500    The result is not globally atomic, but works so long as all parallel
6501    references are within #pragma omp atomic directives.  According to
6502    responses received from omp@openmp.org, appears to be within spec.
6503    Which makes sense, since that's how several other compilers handle
6504    this situation as well.
6505    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6506    expanding.  STORED_VAL is the operand of the matching
6507    GIMPLE_OMP_ATOMIC_STORE.
6508 
6509    We replace
6510    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6511    loaded_val = *addr;
6512 
6513    and replace
6514    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6515    *addr = stored_val;
6516 */
6517 
6518 static bool
6519 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6520 			 tree addr, tree loaded_val, tree stored_val)
6521 {
6522   gimple_stmt_iterator si;
6523   gassign *stmt;
6524   tree t;
6525 
6526   si = gsi_last_nondebug_bb (load_bb);
6527   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6528 
6529   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6530   t = build_call_expr (t, 0);
6531   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6532 
6533   tree mem = build_simple_mem_ref (addr);
6534   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6535   TREE_OPERAND (mem, 1)
6536     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6537 						 true),
6538 		    TREE_OPERAND (mem, 1));
6539   stmt = gimple_build_assign (loaded_val, mem);
6540   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6541   gsi_remove (&si, true);
6542 
6543   si = gsi_last_nondebug_bb (store_bb);
6544   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6545 
6546   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6547   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6548 
6549   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6550   t = build_call_expr (t, 0);
6551   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6552   gsi_remove (&si, true);
6553 
6554   if (gimple_in_ssa_p (cfun))
6555     update_ssa (TODO_update_ssa_no_phi);
6556   return true;
6557 }
6558 
6559 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6560    using expand_omp_atomic_fetch_op.  If it failed, we try to
6561    call expand_omp_atomic_pipeline, and if it fails too, the
6562    ultimate fallback is wrapping the operation in a mutex
6563    (expand_omp_atomic_mutex).  REGION is the atomic region built
6564    by build_omp_regions_1().  */
6565 
6566 static void
6567 expand_omp_atomic (struct omp_region *region)
6568 {
6569   basic_block load_bb = region->entry, store_bb = region->exit;
6570   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6571   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6572   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6573   tree addr = gimple_omp_atomic_load_rhs (load);
6574   tree stored_val = gimple_omp_atomic_store_val (store);
6575   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6576   HOST_WIDE_INT index;
6577 
6578   /* Make sure the type is one of the supported sizes.  */
6579   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6580   index = exact_log2 (index);
6581   if (index >= 0 && index <= 4)
6582     {
6583       unsigned int align = TYPE_ALIGN_UNIT (type);
6584 
6585       /* __sync builtins require strict data alignment.  */
6586       if (exact_log2 (align) >= index)
6587 	{
6588 	  /* Atomic load.  */
6589 	  scalar_mode smode;
6590 	  if (loaded_val == stored_val
6591 	      && (is_int_mode (TYPE_MODE (type), &smode)
6592 		  || is_float_mode (TYPE_MODE (type), &smode))
6593 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6594 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6595 	    return;
6596 
6597 	  /* Atomic store.  */
6598 	  if ((is_int_mode (TYPE_MODE (type), &smode)
6599 	       || is_float_mode (TYPE_MODE (type), &smode))
6600 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6601 	      && store_bb == single_succ (load_bb)
6602 	      && first_stmt (store_bb) == store
6603 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6604 					  stored_val, index))
6605 	    return;
6606 
6607 	  /* When possible, use specialized atomic update functions.  */
6608 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6609 	      && store_bb == single_succ (load_bb)
6610 	      && expand_omp_atomic_fetch_op (load_bb, addr,
6611 					     loaded_val, stored_val, index))
6612 	    return;
6613 
6614 	  /* If we don't have specialized __sync builtins, try and implement
6615 	     as a compare and swap loop.  */
6616 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6617 					  loaded_val, stored_val, index))
6618 	    return;
6619 	}
6620     }
6621 
6622   /* The ultimate fallback is wrapping the operation in a mutex.  */
6623   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6624 }
6625 
6626 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6627    at REGION_EXIT.  */
6628 
6629 static void
6630 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6631 				   basic_block region_exit)
6632 {
6633   struct loop *outer = region_entry->loop_father;
6634   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6635 
6636   /* Don't parallelize the kernels region if it contains more than one outer
6637      loop.  */
6638   unsigned int nr_outer_loops = 0;
6639   struct loop *single_outer = NULL;
6640   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6641     {
6642       gcc_assert (loop_outer (loop) == outer);
6643 
6644       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6645 	continue;
6646 
6647       if (region_exit != NULL
6648 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6649 	continue;
6650 
6651       nr_outer_loops++;
6652       single_outer = loop;
6653     }
6654   if (nr_outer_loops != 1)
6655     return;
6656 
6657   for (struct loop *loop = single_outer->inner;
6658        loop != NULL;
6659        loop = loop->inner)
6660     if (loop->next)
6661       return;
6662 
6663   /* Mark the loops in the region.  */
6664   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6665     loop->in_oacc_kernels_region = true;
6666 }
6667 
6668 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6669 
6670 struct GTY(()) grid_launch_attributes_trees
6671 {
6672   tree kernel_dim_array_type;
6673   tree kernel_lattrs_dimnum_decl;
6674   tree kernel_lattrs_grid_decl;
6675   tree kernel_lattrs_group_decl;
6676   tree kernel_launch_attributes_type;
6677 };
6678 
6679 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6680 
6681 /* Create types used to pass kernel launch attributes to target.  */
6682 
6683 static void
6684 grid_create_kernel_launch_attr_types (void)
6685 {
6686   if (grid_attr_trees)
6687     return;
6688   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6689 
6690   tree dim_arr_index_type
6691     = build_index_type (build_int_cst (integer_type_node, 2));
6692   grid_attr_trees->kernel_dim_array_type
6693     = build_array_type (uint32_type_node, dim_arr_index_type);
6694 
6695   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6696   grid_attr_trees->kernel_lattrs_dimnum_decl
6697     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6698 		  uint32_type_node);
6699   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6700 
6701   grid_attr_trees->kernel_lattrs_grid_decl
6702     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6703 		  grid_attr_trees->kernel_dim_array_type);
6704   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6705     = grid_attr_trees->kernel_lattrs_dimnum_decl;
6706   grid_attr_trees->kernel_lattrs_group_decl
6707     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6708 		  grid_attr_trees->kernel_dim_array_type);
6709   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6710     = grid_attr_trees->kernel_lattrs_grid_decl;
6711   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6712 			 "__gomp_kernel_launch_attributes",
6713 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6714 }
6715 
6716 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6717    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6718    of type uint32_type_node.  */
6719 
6720 static void
6721 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6722 			     tree fld_decl, int index, tree value)
6723 {
6724   tree ref = build4 (ARRAY_REF, uint32_type_node,
6725 		     build3 (COMPONENT_REF,
6726 			     grid_attr_trees->kernel_dim_array_type,
6727 			     range_var, fld_decl, NULL_TREE),
6728 		     build_int_cst (integer_type_node, index),
6729 		     NULL_TREE, NULL_TREE);
6730   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6731 }
6732 
6733 /* Return a tree representation of a pointer to a structure with grid and
6734    work-group size information.  Statements filling that information will be
6735    inserted before GSI, TGT_STMT is the target statement which has the
6736    necessary information in it.  */
6737 
6738 static tree
6739 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6740 				       gomp_target *tgt_stmt)
6741 {
6742   grid_create_kernel_launch_attr_types ();
6743   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6744 				"__kernel_launch_attrs");
6745 
6746   unsigned max_dim = 0;
6747   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6748        clause;
6749        clause = OMP_CLAUSE_CHAIN (clause))
6750     {
6751       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6752 	continue;
6753 
6754       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6755       max_dim = MAX (dim, max_dim);
6756 
6757       grid_insert_store_range_dim (gsi, lattrs,
6758 				   grid_attr_trees->kernel_lattrs_grid_decl,
6759 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6760       grid_insert_store_range_dim (gsi, lattrs,
6761 				   grid_attr_trees->kernel_lattrs_group_decl,
6762 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6763     }
6764 
6765   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6766 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6767   gcc_checking_assert (max_dim <= 2);
6768   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6769   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6770 		     GSI_SAME_STMT);
6771   TREE_ADDRESSABLE (lattrs) = 1;
6772   return build_fold_addr_expr (lattrs);
6773 }
6774 
6775 /* Build target argument identifier from the DEVICE identifier, value
6776    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6777 
6778 static tree
6779 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6780 {
6781   tree t = build_int_cst (integer_type_node, device);
6782   if (subseqent_param)
6783     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6784 		     build_int_cst (integer_type_node,
6785 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6786   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6787 		   build_int_cst (integer_type_node, id));
6788   return t;
6789 }
6790 
6791 /* Like above but return it in type that can be directly stored as an element
6792    of the argument array.  */
6793 
6794 static tree
6795 get_target_argument_identifier (int device, bool subseqent_param, int id)
6796 {
6797   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6798   return fold_convert (ptr_type_node, t);
6799 }
6800 
6801 /* Return a target argument consisting of DEVICE identifier, value identifier
6802    ID, and the actual VALUE.  */
6803 
6804 static tree
6805 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6806 			   tree value)
6807 {
6808   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6809 			fold_convert (integer_type_node, value),
6810 			build_int_cst (unsigned_type_node,
6811 				       GOMP_TARGET_ARG_VALUE_SHIFT));
6812   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6813 		   get_target_argument_identifier_1 (device, false, id));
6814   t = fold_convert (ptr_type_node, t);
6815   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6816 }
6817 
6818 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6819    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6820    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6821    arguments.  */
6822 
6823 static void
6824 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6825 					 int id, tree value, vec <tree> *args)
6826 {
6827   if (tree_fits_shwi_p (value)
6828       && tree_to_shwi (value) > -(1 << 15)
6829       && tree_to_shwi (value) < (1 << 15))
6830     args->quick_push (get_target_argument_value (gsi, device, id, value));
6831   else
6832     {
6833       args->quick_push (get_target_argument_identifier (device, true, id));
6834       value = fold_convert (ptr_type_node, value);
6835       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6836 					GSI_SAME_STMT);
6837       args->quick_push (value);
6838     }
6839 }
6840 
6841 /* Create an array of arguments that is then passed to GOMP_target.  */
6842 
6843 static tree
6844 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6845 {
6846   auto_vec <tree, 6> args;
6847   tree clauses = gimple_omp_target_clauses (tgt_stmt);
6848   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6849   if (c)
6850     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6851   else
6852     t = integer_minus_one_node;
6853   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6854 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6855 
6856   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6857   if (c)
6858     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6859   else
6860     t = integer_minus_one_node;
6861   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6862 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
6863 					   &args);
6864 
6865   /* Add HSA-specific grid sizes, if available.  */
6866   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6867 		       OMP_CLAUSE__GRIDDIM_))
6868     {
6869       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6870       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6871       args.quick_push (t);
6872       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6873     }
6874 
6875   /* Produce more, perhaps device specific, arguments here.  */
6876 
6877   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6878 							  args.length () + 1),
6879 				  ".omp_target_args");
6880   for (unsigned i = 0; i < args.length (); i++)
6881     {
6882       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6883 			 build_int_cst (integer_type_node, i),
6884 			 NULL_TREE, NULL_TREE);
6885       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6886 			 GSI_SAME_STMT);
6887     }
6888   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6889 		     build_int_cst (integer_type_node, args.length ()),
6890 		     NULL_TREE, NULL_TREE);
6891   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6892 		     GSI_SAME_STMT);
6893   TREE_ADDRESSABLE (argarray) = 1;
6894   return build_fold_addr_expr (argarray);
6895 }
6896 
6897 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
6898 
6899 static void
6900 expand_omp_target (struct omp_region *region)
6901 {
6902   basic_block entry_bb, exit_bb, new_bb;
6903   struct function *child_cfun;
6904   tree child_fn, block, t;
6905   gimple_stmt_iterator gsi;
6906   gomp_target *entry_stmt;
6907   gimple *stmt;
6908   edge e;
6909   bool offloaded, data_region;
6910 
6911   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6912   new_bb = region->entry;
6913 
6914   offloaded = is_gimple_omp_offloaded (entry_stmt);
6915   switch (gimple_omp_target_kind (entry_stmt))
6916     {
6917     case GF_OMP_TARGET_KIND_REGION:
6918     case GF_OMP_TARGET_KIND_UPDATE:
6919     case GF_OMP_TARGET_KIND_ENTER_DATA:
6920     case GF_OMP_TARGET_KIND_EXIT_DATA:
6921     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6922     case GF_OMP_TARGET_KIND_OACC_KERNELS:
6923     case GF_OMP_TARGET_KIND_OACC_UPDATE:
6924     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6925     case GF_OMP_TARGET_KIND_OACC_DECLARE:
6926       data_region = false;
6927       break;
6928     case GF_OMP_TARGET_KIND_DATA:
6929     case GF_OMP_TARGET_KIND_OACC_DATA:
6930     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6931       data_region = true;
6932       break;
6933     default:
6934       gcc_unreachable ();
6935     }
6936 
6937   child_fn = NULL_TREE;
6938   child_cfun = NULL;
6939   if (offloaded)
6940     {
6941       child_fn = gimple_omp_target_child_fn (entry_stmt);
6942       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6943     }
6944 
6945   /* Supported by expand_omp_taskreg, but not here.  */
6946   if (child_cfun != NULL)
6947     gcc_checking_assert (!child_cfun->cfg);
6948   gcc_checking_assert (!gimple_in_ssa_p (cfun));
6949 
6950   entry_bb = region->entry;
6951   exit_bb = region->exit;
6952 
6953   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6954     {
6955       mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6956 
6957       /* Further down, both OpenACC kernels and OpenACC parallel constructs
6958 	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6959 	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
6960       DECL_ATTRIBUTES (child_fn)
6961 	= tree_cons (get_identifier ("oacc kernels"),
6962 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
6963     }
6964 
6965   if (offloaded)
6966     {
6967       unsigned srcidx, dstidx, num;
6968 
6969       /* If the offloading region needs data sent from the parent
6970 	 function, then the very first statement (except possible
6971 	 tree profile counter updates) of the offloading body
6972 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
6973 	 &.OMP_DATA_O is passed as an argument to the child function,
6974 	 we need to replace it with the argument as seen by the child
6975 	 function.
6976 
6977 	 In most cases, this will end up being the identity assignment
6978 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
6979 	 a function call that has been inlined, the original PARM_DECL
6980 	 .OMP_DATA_I may have been converted into a different local
6981 	 variable.  In which case, we need to keep the assignment.  */
6982       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6983       if (data_arg)
6984 	{
6985 	  basic_block entry_succ_bb = single_succ (entry_bb);
6986 	  gimple_stmt_iterator gsi;
6987 	  tree arg;
6988 	  gimple *tgtcopy_stmt = NULL;
6989 	  tree sender = TREE_VEC_ELT (data_arg, 0);
6990 
6991 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6992 	    {
6993 	      gcc_assert (!gsi_end_p (gsi));
6994 	      stmt = gsi_stmt (gsi);
6995 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
6996 		continue;
6997 
6998 	      if (gimple_num_ops (stmt) == 2)
6999 		{
7000 		  tree arg = gimple_assign_rhs1 (stmt);
7001 
7002 		  /* We're ignoring the subcode because we're
7003 		     effectively doing a STRIP_NOPS.  */
7004 
7005 		  if (TREE_CODE (arg) == ADDR_EXPR
7006 		      && TREE_OPERAND (arg, 0) == sender)
7007 		    {
7008 		      tgtcopy_stmt = stmt;
7009 		      break;
7010 		    }
7011 		}
7012 	    }
7013 
7014 	  gcc_assert (tgtcopy_stmt != NULL);
7015 	  arg = DECL_ARGUMENTS (child_fn);
7016 
7017 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7018 	  gsi_remove (&gsi, true);
7019 	}
7020 
7021       /* Declare local variables needed in CHILD_CFUN.  */
7022       block = DECL_INITIAL (child_fn);
7023       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7024       /* The gimplifier could record temporaries in the offloading block
7025 	 rather than in containing function's local_decls chain,
7026 	 which would mean cgraph missed finalizing them.  Do it now.  */
7027       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7028 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7029 	  varpool_node::finalize_decl (t);
7030       DECL_SAVED_TREE (child_fn) = NULL;
7031       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7032       gimple_set_body (child_fn, NULL);
7033       TREE_USED (block) = 1;
7034 
7035       /* Reset DECL_CONTEXT on function arguments.  */
7036       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7037 	DECL_CONTEXT (t) = child_fn;
7038 
7039       /* Split ENTRY_BB at GIMPLE_*,
7040 	 so that it can be moved to the child function.  */
7041       gsi = gsi_last_nondebug_bb (entry_bb);
7042       stmt = gsi_stmt (gsi);
7043       gcc_assert (stmt
7044 		  && gimple_code (stmt) == gimple_code (entry_stmt));
7045       e = split_block (entry_bb, stmt);
7046       gsi_remove (&gsi, true);
7047       entry_bb = e->dest;
7048       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7049 
7050       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7051       if (exit_bb)
7052 	{
7053 	  gsi = gsi_last_nondebug_bb (exit_bb);
7054 	  gcc_assert (!gsi_end_p (gsi)
7055 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7056 	  stmt = gimple_build_return (NULL);
7057 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7058 	  gsi_remove (&gsi, true);
7059 	}
7060 
7061       /* Move the offloading region into CHILD_CFUN.  */
7062 
7063       block = gimple_block (entry_stmt);
7064 
7065       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7066       if (exit_bb)
7067 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7068       /* When the OMP expansion process cannot guarantee an up-to-date
7069 	 loop tree arrange for the child function to fixup loops.  */
7070       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7071 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7072 
7073       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7074       num = vec_safe_length (child_cfun->local_decls);
7075       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7076 	{
7077 	  t = (*child_cfun->local_decls)[srcidx];
7078 	  if (DECL_CONTEXT (t) == cfun->decl)
7079 	    continue;
7080 	  if (srcidx != dstidx)
7081 	    (*child_cfun->local_decls)[dstidx] = t;
7082 	  dstidx++;
7083 	}
7084       if (dstidx != num)
7085 	vec_safe_truncate (child_cfun->local_decls, dstidx);
7086 
7087       /* Inform the callgraph about the new function.  */
7088       child_cfun->curr_properties = cfun->curr_properties;
7089       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7090       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7091       cgraph_node *node = cgraph_node::get_create (child_fn);
7092       node->parallelized_function = 1;
7093       cgraph_node::add_new_function (child_fn, true);
7094 
7095       /* Add the new function to the offload table.  */
7096       if (ENABLE_OFFLOADING)
7097 	{
7098 	  if (in_lto_p)
7099 	    DECL_PRESERVE_P (child_fn) = 1;
7100 	  vec_safe_push (offload_funcs, child_fn);
7101 	}
7102 
7103       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7104 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7105 
7106       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7107 	 fixed in a following pass.  */
7108       push_cfun (child_cfun);
7109       if (need_asm)
7110 	assign_assembler_name_if_needed (child_fn);
7111       cgraph_edge::rebuild_edges ();
7112 
7113       /* Some EH regions might become dead, see PR34608.  If
7114 	 pass_cleanup_cfg isn't the first pass to happen with the
7115 	 new child, these dead EH edges might cause problems.
7116 	 Clean them up now.  */
7117       if (flag_exceptions)
7118 	{
7119 	  basic_block bb;
7120 	  bool changed = false;
7121 
7122 	  FOR_EACH_BB_FN (bb, cfun)
7123 	    changed |= gimple_purge_dead_eh_edges (bb);
7124 	  if (changed)
7125 	    cleanup_tree_cfg ();
7126 	}
7127       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7128 	verify_loop_structure ();
7129       pop_cfun ();
7130 
7131       if (dump_file && !gimple_in_ssa_p (cfun))
7132 	{
7133 	  omp_any_child_fn_dumped = true;
7134 	  dump_function_header (dump_file, child_fn, dump_flags);
7135 	  dump_function_to_file (child_fn, dump_file, dump_flags);
7136 	}
7137 
7138       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7139     }
7140 
7141   /* Emit a library call to launch the offloading region, or do data
7142      transfers.  */
7143   tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7144   enum built_in_function start_ix;
7145   location_t clause_loc;
7146   unsigned int flags_i = 0;
7147 
7148   switch (gimple_omp_target_kind (entry_stmt))
7149     {
7150     case GF_OMP_TARGET_KIND_REGION:
7151       start_ix = BUILT_IN_GOMP_TARGET;
7152       break;
7153     case GF_OMP_TARGET_KIND_DATA:
7154       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7155       break;
7156     case GF_OMP_TARGET_KIND_UPDATE:
7157       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7158       break;
7159     case GF_OMP_TARGET_KIND_ENTER_DATA:
7160       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7161       break;
7162     case GF_OMP_TARGET_KIND_EXIT_DATA:
7163       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7164       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7165       break;
7166     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7167     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7168       start_ix = BUILT_IN_GOACC_PARALLEL;
7169       break;
7170     case GF_OMP_TARGET_KIND_OACC_DATA:
7171     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7172       start_ix = BUILT_IN_GOACC_DATA_START;
7173       break;
7174     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7175       start_ix = BUILT_IN_GOACC_UPDATE;
7176       break;
7177     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7178       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7179       break;
7180     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7181       start_ix = BUILT_IN_GOACC_DECLARE;
7182       break;
7183     default:
7184       gcc_unreachable ();
7185     }
7186 
7187   clauses = gimple_omp_target_clauses (entry_stmt);
7188 
7189   /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7190      library choose) and there is no conditional.  */
7191   cond = NULL_TREE;
7192   device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7193 
7194   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7195   if (c)
7196     cond = OMP_CLAUSE_IF_EXPR (c);
7197 
7198   c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7199   if (c)
7200     {
7201       /* Even if we pass it to all library function calls, it is currently only
7202 	 defined/used for the OpenMP target ones.  */
7203       gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7204 			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7205 			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7206 			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7207 
7208       device = OMP_CLAUSE_DEVICE_ID (c);
7209       clause_loc = OMP_CLAUSE_LOCATION (c);
7210     }
7211   else
7212     clause_loc = gimple_location (entry_stmt);
7213 
7214   c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7215   if (c)
7216     flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7217 
7218   /* Ensure 'device' is of the correct type.  */
7219   device = fold_convert_loc (clause_loc, integer_type_node, device);
7220 
7221   /* If we found the clause 'if (cond)', build
7222      (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7223   if (cond)
7224     {
7225       cond = gimple_boolify (cond);
7226 
7227       basic_block cond_bb, then_bb, else_bb;
7228       edge e;
7229       tree tmp_var;
7230 
7231       tmp_var = create_tmp_var (TREE_TYPE (device));
7232       if (offloaded)
7233 	e = split_block_after_labels (new_bb);
7234       else
7235 	{
7236 	  gsi = gsi_last_nondebug_bb (new_bb);
7237 	  gsi_prev (&gsi);
7238 	  e = split_block (new_bb, gsi_stmt (gsi));
7239 	}
7240       cond_bb = e->src;
7241       new_bb = e->dest;
7242       remove_edge (e);
7243 
7244       then_bb = create_empty_bb (cond_bb);
7245       else_bb = create_empty_bb (then_bb);
7246       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7247       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7248 
7249       stmt = gimple_build_cond_empty (cond);
7250       gsi = gsi_last_bb (cond_bb);
7251       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7252 
7253       gsi = gsi_start_bb (then_bb);
7254       stmt = gimple_build_assign (tmp_var, device);
7255       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7256 
7257       gsi = gsi_start_bb (else_bb);
7258       stmt = gimple_build_assign (tmp_var,
7259 				  build_int_cst (integer_type_node,
7260 						 GOMP_DEVICE_HOST_FALLBACK));
7261       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7262 
7263       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7264       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7265       add_bb_to_loop (then_bb, cond_bb->loop_father);
7266       add_bb_to_loop (else_bb, cond_bb->loop_father);
7267       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7268       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7269 
7270       device = tmp_var;
7271       gsi = gsi_last_nondebug_bb (new_bb);
7272     }
7273   else
7274     {
7275       gsi = gsi_last_nondebug_bb (new_bb);
7276       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7277 					 true, GSI_SAME_STMT);
7278     }
7279 
7280   t = gimple_omp_target_data_arg (entry_stmt);
7281   if (t == NULL)
7282     {
7283       t1 = size_zero_node;
7284       t2 = build_zero_cst (ptr_type_node);
7285       t3 = t2;
7286       t4 = t2;
7287     }
7288   else
7289     {
7290       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7291       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7292       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7293       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7294       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7295     }
7296 
7297   gimple *g;
7298   bool tagging = false;
7299   /* The maximum number used by any start_ix, without varargs.  */
7300   auto_vec<tree, 11> args;
7301   args.quick_push (device);
7302   if (offloaded)
7303     args.quick_push (build_fold_addr_expr (child_fn));
7304   args.quick_push (t1);
7305   args.quick_push (t2);
7306   args.quick_push (t3);
7307   args.quick_push (t4);
7308   switch (start_ix)
7309     {
7310     case BUILT_IN_GOACC_DATA_START:
7311     case BUILT_IN_GOACC_DECLARE:
7312     case BUILT_IN_GOMP_TARGET_DATA:
7313       break;
7314     case BUILT_IN_GOMP_TARGET:
7315     case BUILT_IN_GOMP_TARGET_UPDATE:
7316     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7317       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7318       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7319       if (c)
7320 	depend = OMP_CLAUSE_DECL (c);
7321       else
7322 	depend = build_int_cst (ptr_type_node, 0);
7323       args.quick_push (depend);
7324       if (start_ix == BUILT_IN_GOMP_TARGET)
7325 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7326       break;
7327     case BUILT_IN_GOACC_PARALLEL:
7328       oacc_set_fn_attrib (child_fn, clauses, &args);
7329       tagging = true;
7330       /* FALLTHRU */
7331     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7332     case BUILT_IN_GOACC_UPDATE:
7333       {
7334 	tree t_async = NULL_TREE;
7335 
7336 	/* If present, use the value specified by the respective
7337 	   clause, making sure that is of the correct type.  */
7338 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7339 	if (c)
7340 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7341 				      integer_type_node,
7342 				      OMP_CLAUSE_ASYNC_EXPR (c));
7343 	else if (!tagging)
7344 	  /* Default values for t_async.  */
7345 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7346 				      integer_type_node,
7347 				      build_int_cst (integer_type_node,
7348 						     GOMP_ASYNC_SYNC));
7349 	if (tagging && t_async)
7350 	  {
7351 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7352 
7353 	    if (TREE_CODE (t_async) == INTEGER_CST)
7354 	      {
7355 		/* See if we can pack the async arg in to the tag's
7356 		   operand.  */
7357 		i_async = TREE_INT_CST_LOW (t_async);
7358 		if (i_async < GOMP_LAUNCH_OP_MAX)
7359 		  t_async = NULL_TREE;
7360 		else
7361 		  i_async = GOMP_LAUNCH_OP_MAX;
7362 	      }
7363 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7364 					      i_async));
7365 	  }
7366 	if (t_async)
7367 	  args.safe_push (t_async);
7368 
7369 	/* Save the argument index, and ... */
7370 	unsigned t_wait_idx = args.length ();
7371 	unsigned num_waits = 0;
7372 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7373 	if (!tagging || c)
7374 	  /* ... push a placeholder.  */
7375 	  args.safe_push (integer_zero_node);
7376 
7377 	for (; c; c = OMP_CLAUSE_CHAIN (c))
7378 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7379 	    {
7380 	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7381 						integer_type_node,
7382 						OMP_CLAUSE_WAIT_EXPR (c)));
7383 	      num_waits++;
7384 	    }
7385 
7386 	if (!tagging || num_waits)
7387 	  {
7388 	    tree len;
7389 
7390 	    /* Now that we know the number, update the placeholder.  */
7391 	    if (tagging)
7392 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7393 	    else
7394 	      len = build_int_cst (integer_type_node, num_waits);
7395 	    len = fold_convert_loc (gimple_location (entry_stmt),
7396 				    unsigned_type_node, len);
7397 	    args[t_wait_idx] = len;
7398 	  }
7399       }
7400       break;
7401     default:
7402       gcc_unreachable ();
7403     }
7404   if (tagging)
7405     /*  Push terminal marker - zero.  */
7406     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7407 
7408   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7409   gimple_set_location (g, gimple_location (entry_stmt));
7410   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7411   if (!offloaded)
7412     {
7413       g = gsi_stmt (gsi);
7414       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7415       gsi_remove (&gsi, true);
7416     }
7417   if (data_region && region->exit)
7418     {
7419       gsi = gsi_last_nondebug_bb (region->exit);
7420       g = gsi_stmt (gsi);
7421       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7422       gsi_remove (&gsi, true);
7423     }
7424 }
7425 
7426 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7427    iteration variable derived from the thread number.  INTRA_GROUP means this
7428    is an expansion of a loop iterating over work-items within a separate
7429    iteration over groups.  */
7430 
7431 static void
7432 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7433 {
7434   gimple_stmt_iterator gsi;
7435   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7436   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7437 		       == GF_OMP_FOR_KIND_GRID_LOOP);
7438   size_t collapse = gimple_omp_for_collapse (for_stmt);
7439   struct omp_for_data_loop *loops
7440     = XALLOCAVEC (struct omp_for_data_loop,
7441 		  gimple_omp_for_collapse (for_stmt));
7442   struct omp_for_data fd;
7443 
7444   remove_edge (BRANCH_EDGE (kfor->entry));
7445   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7446 
7447   gcc_assert (kfor->cont);
7448   omp_extract_for_data (for_stmt, &fd, loops);
7449 
7450   gsi = gsi_start_bb (body_bb);
7451 
7452   for (size_t dim = 0; dim < collapse; dim++)
7453     {
7454       tree type, itype;
7455       itype = type = TREE_TYPE (fd.loops[dim].v);
7456       if (POINTER_TYPE_P (type))
7457 	itype = signed_type_for (type);
7458 
7459       tree n1 = fd.loops[dim].n1;
7460       tree step = fd.loops[dim].step;
7461       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7462 				     true, NULL_TREE, true, GSI_SAME_STMT);
7463       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7464 				       true, NULL_TREE, true, GSI_SAME_STMT);
7465       tree threadid;
7466       if (gimple_omp_for_grid_group_iter (for_stmt))
7467 	{
7468 	  gcc_checking_assert (!intra_group);
7469 	  threadid = build_call_expr (builtin_decl_explicit
7470 				      (BUILT_IN_HSA_WORKGROUPID), 1,
7471 				      build_int_cstu (unsigned_type_node, dim));
7472 	}
7473       else if (intra_group)
7474 	threadid = build_call_expr (builtin_decl_explicit
7475 				    (BUILT_IN_HSA_WORKITEMID), 1,
7476 				    build_int_cstu (unsigned_type_node, dim));
7477       else
7478 	threadid = build_call_expr (builtin_decl_explicit
7479 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7480 				    build_int_cstu (unsigned_type_node, dim));
7481       threadid = fold_convert (itype, threadid);
7482       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7483 					   true, GSI_SAME_STMT);
7484 
7485       tree startvar = fd.loops[dim].v;
7486       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7487       if (POINTER_TYPE_P (type))
7488 	t = fold_build_pointer_plus (n1, t);
7489       else
7490 	t = fold_build2 (PLUS_EXPR, type, t, n1);
7491       t = fold_convert (type, t);
7492       t = force_gimple_operand_gsi (&gsi, t,
7493 				    DECL_P (startvar)
7494 				    && TREE_ADDRESSABLE (startvar),
7495 				    NULL_TREE, true, GSI_SAME_STMT);
7496       gassign *assign_stmt = gimple_build_assign (startvar, t);
7497       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7498     }
7499   /* Remove the omp for statement.  */
7500   gsi = gsi_last_nondebug_bb (kfor->entry);
7501   gsi_remove (&gsi, true);
7502 
7503   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7504   gsi = gsi_last_nondebug_bb (kfor->cont);
7505   gcc_assert (!gsi_end_p (gsi)
7506 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7507   gsi_remove (&gsi, true);
7508 
7509   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7510   gsi = gsi_last_nondebug_bb (kfor->exit);
7511   gcc_assert (!gsi_end_p (gsi)
7512 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7513   if (intra_group)
7514     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7515   gsi_remove (&gsi, true);
7516 
7517   /* Fixup the much simpler CFG.  */
7518   remove_edge (find_edge (kfor->cont, body_bb));
7519 
7520   if (kfor->cont != body_bb)
7521     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7522   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7523 }
7524 
7525 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7526    argument_decls.  */
7527 
7528 struct grid_arg_decl_map
7529 {
7530   tree old_arg;
7531   tree new_arg;
7532 };
7533 
7534 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7535    pertaining to kernel function.  */
7536 
7537 static tree
7538 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7539 {
7540   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7541   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7542   tree t = *tp;
7543 
7544   if (t == adm->old_arg)
7545     *tp = adm->new_arg;
7546   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7547   return NULL_TREE;
7548 }
7549 
7550 /* If TARGET region contains a kernel body for loop, remove its region from the
7551    TARGET and expand it in HSA gridified kernel fashion.  */
7552 
7553 static void
7554 grid_expand_target_grid_body (struct omp_region *target)
7555 {
7556   if (!hsa_gen_requested_p ())
7557     return;
7558 
7559   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7560   struct omp_region **pp;
7561 
7562   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7563     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7564       break;
7565 
7566   struct omp_region *gpukernel = *pp;
7567 
7568   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7569   if (!gpukernel)
7570     {
7571       /* HSA cannot handle OACC stuff.  */
7572       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7573 	return;
7574       gcc_checking_assert (orig_child_fndecl);
7575       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7576 				    OMP_CLAUSE__GRIDDIM_));
7577       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7578 
7579       hsa_register_kernel (n);
7580       return;
7581     }
7582 
7583   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7584 			       OMP_CLAUSE__GRIDDIM_));
7585   tree inside_block
7586     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7587   *pp = gpukernel->next;
7588   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7589     if ((*pp)->type == GIMPLE_OMP_FOR)
7590       break;
7591 
7592   struct omp_region *kfor = *pp;
7593   gcc_assert (kfor);
7594   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7595   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7596   *pp = kfor->next;
7597   if (kfor->inner)
7598     {
7599       if (gimple_omp_for_grid_group_iter (for_stmt))
7600 	{
7601 	  struct omp_region **next_pp;
7602 	  for (pp = &kfor->inner; *pp; pp = next_pp)
7603 	    {
7604 	      next_pp = &(*pp)->next;
7605 	      if ((*pp)->type != GIMPLE_OMP_FOR)
7606 		continue;
7607 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7608 	      gcc_assert (gimple_omp_for_kind (inner)
7609 			  == GF_OMP_FOR_KIND_GRID_LOOP);
7610 	      grid_expand_omp_for_loop (*pp, true);
7611 	      *pp = (*pp)->next;
7612 	      next_pp = pp;
7613 	    }
7614 	}
7615       expand_omp (kfor->inner);
7616     }
7617   if (gpukernel->inner)
7618     expand_omp (gpukernel->inner);
7619 
7620   tree kern_fndecl = copy_node (orig_child_fndecl);
7621   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7622   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7623   tree tgtblock = gimple_block (tgt_stmt);
7624   tree fniniblock = make_node (BLOCK);
7625   BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7626   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7627   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7628   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7629   DECL_INITIAL (kern_fndecl) = fniniblock;
7630   push_struct_function (kern_fndecl);
7631   cfun->function_end_locus = gimple_location (tgt_stmt);
7632   init_tree_ssa (cfun);
7633   pop_cfun ();
7634 
7635   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7636   gcc_assert (!DECL_CHAIN (old_parm_decl));
7637   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7638   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7639   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7640   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7641   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7642   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7643   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7644   kern_cfun->curr_properties = cfun->curr_properties;
7645 
7646   grid_expand_omp_for_loop (kfor, false);
7647 
7648   /* Remove the omp for statement.  */
7649   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7650   gsi_remove (&gsi, true);
7651   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7652      return.  */
7653   gsi = gsi_last_nondebug_bb (gpukernel->exit);
7654   gcc_assert (!gsi_end_p (gsi)
7655 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7656   gimple *ret_stmt = gimple_build_return (NULL);
7657   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7658   gsi_remove (&gsi, true);
7659 
7660   /* Statements in the first BB in the target construct have been produced by
7661      target lowering and must be copied inside the GPUKERNEL, with the two
7662      exceptions of the first OMP statement and the OMP_DATA assignment
7663      statement.  */
7664   gsi = gsi_start_bb (single_succ (gpukernel->entry));
7665   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7666   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7667   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7668        !gsi_end_p (tsi); gsi_next (&tsi))
7669     {
7670       gimple *stmt = gsi_stmt (tsi);
7671       if (is_gimple_omp (stmt))
7672 	break;
7673       if (sender
7674 	  && is_gimple_assign (stmt)
7675 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7676 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7677 	continue;
7678       gimple *copy = gimple_copy (stmt);
7679       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7680       gimple_set_block (copy, fniniblock);
7681     }
7682 
7683   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7684 			  gpukernel->exit, inside_block);
7685 
7686   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7687   kcn->mark_force_output ();
7688   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7689 
7690   hsa_register_kernel (kcn, orig_child);
7691 
7692   cgraph_node::add_new_function (kern_fndecl, true);
7693   push_cfun (kern_cfun);
7694   cgraph_edge::rebuild_edges ();
7695 
7696   /* Re-map any mention of the PARM_DECL of the original function to the
7697      PARM_DECL of the new one.
7698 
7699      TODO: It would be great if lowering produced references into the GPU
7700      kernel decl straight away and we did not have to do this.  */
7701   struct grid_arg_decl_map adm;
7702   adm.old_arg = old_parm_decl;
7703   adm.new_arg = new_parm_decl;
7704   basic_block bb;
7705   FOR_EACH_BB_FN (bb, kern_cfun)
7706     {
7707       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7708 	{
7709 	  gimple *stmt = gsi_stmt (gsi);
7710 	  struct walk_stmt_info wi;
7711 	  memset (&wi, 0, sizeof (wi));
7712 	  wi.info = &adm;
7713 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7714 	}
7715     }
7716   pop_cfun ();
7717 
7718   return;
7719 }
7720 
7721 /* Expand the parallel region tree rooted at REGION.  Expansion
7722    proceeds in depth-first order.  Innermost regions are expanded
7723    first.  This way, parallel regions that require a new function to
7724    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7725    internal dependencies in their body.  */
7726 
7727 static void
7728 expand_omp (struct omp_region *region)
7729 {
7730   omp_any_child_fn_dumped = false;
7731   while (region)
7732     {
7733       location_t saved_location;
7734       gimple *inner_stmt = NULL;
7735 
7736       /* First, determine whether this is a combined parallel+workshare
7737 	 region.  */
7738       if (region->type == GIMPLE_OMP_PARALLEL)
7739 	determine_parallel_type (region);
7740       else if (region->type == GIMPLE_OMP_TARGET)
7741 	grid_expand_target_grid_body (region);
7742 
7743       if (region->type == GIMPLE_OMP_FOR
7744 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7745 	inner_stmt = last_stmt (region->inner->entry);
7746 
7747       if (region->inner)
7748 	expand_omp (region->inner);
7749 
7750       saved_location = input_location;
7751       if (gimple_has_location (last_stmt (region->entry)))
7752 	input_location = gimple_location (last_stmt (region->entry));
7753 
7754       switch (region->type)
7755 	{
7756 	case GIMPLE_OMP_PARALLEL:
7757 	case GIMPLE_OMP_TASK:
7758 	  expand_omp_taskreg (region);
7759 	  break;
7760 
7761 	case GIMPLE_OMP_FOR:
7762 	  expand_omp_for (region, inner_stmt);
7763 	  break;
7764 
7765 	case GIMPLE_OMP_SECTIONS:
7766 	  expand_omp_sections (region);
7767 	  break;
7768 
7769 	case GIMPLE_OMP_SECTION:
7770 	  /* Individual omp sections are handled together with their
7771 	     parent GIMPLE_OMP_SECTIONS region.  */
7772 	  break;
7773 
7774 	case GIMPLE_OMP_SINGLE:
7775 	  expand_omp_single (region);
7776 	  break;
7777 
7778 	case GIMPLE_OMP_ORDERED:
7779 	  {
7780 	    gomp_ordered *ord_stmt
7781 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7782 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7783 				 OMP_CLAUSE_DEPEND))
7784 	      {
7785 		/* We'll expand these when expanding corresponding
7786 		   worksharing region with ordered(n) clause.  */
7787 		gcc_assert (region->outer
7788 			    && region->outer->type == GIMPLE_OMP_FOR);
7789 		region->ord_stmt = ord_stmt;
7790 		break;
7791 	      }
7792 	  }
7793 	  /* FALLTHRU */
7794 	case GIMPLE_OMP_MASTER:
7795 	case GIMPLE_OMP_TASKGROUP:
7796 	case GIMPLE_OMP_CRITICAL:
7797 	case GIMPLE_OMP_TEAMS:
7798 	  expand_omp_synch (region);
7799 	  break;
7800 
7801 	case GIMPLE_OMP_ATOMIC_LOAD:
7802 	  expand_omp_atomic (region);
7803 	  break;
7804 
7805 	case GIMPLE_OMP_TARGET:
7806 	  expand_omp_target (region);
7807 	  break;
7808 
7809 	default:
7810 	  gcc_unreachable ();
7811 	}
7812 
7813       input_location = saved_location;
7814       region = region->next;
7815     }
7816   if (omp_any_child_fn_dumped)
7817     {
7818       if (dump_file)
7819 	dump_function_header (dump_file, current_function_decl, dump_flags);
7820       omp_any_child_fn_dumped = false;
7821     }
7822 }
7823 
7824 /* Helper for build_omp_regions.  Scan the dominator tree starting at
7825    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7826    true, the function ends once a single tree is built (otherwise, whole
7827    forest of OMP constructs may be built).  */
7828 
7829 static void
7830 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7831 		     bool single_tree)
7832 {
7833   gimple_stmt_iterator gsi;
7834   gimple *stmt;
7835   basic_block son;
7836 
7837   gsi = gsi_last_nondebug_bb (bb);
7838   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7839     {
7840       struct omp_region *region;
7841       enum gimple_code code;
7842 
7843       stmt = gsi_stmt (gsi);
7844       code = gimple_code (stmt);
7845       if (code == GIMPLE_OMP_RETURN)
7846 	{
7847 	  /* STMT is the return point out of region PARENT.  Mark it
7848 	     as the exit point and make PARENT the immediately
7849 	     enclosing region.  */
7850 	  gcc_assert (parent);
7851 	  region = parent;
7852 	  region->exit = bb;
7853 	  parent = parent->outer;
7854 	}
7855       else if (code == GIMPLE_OMP_ATOMIC_STORE)
7856 	{
7857 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7858 	     GIMPLE_OMP_RETURN, but matches with
7859 	     GIMPLE_OMP_ATOMIC_LOAD.  */
7860 	  gcc_assert (parent);
7861 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7862 	  region = parent;
7863 	  region->exit = bb;
7864 	  parent = parent->outer;
7865 	}
7866       else if (code == GIMPLE_OMP_CONTINUE)
7867 	{
7868 	  gcc_assert (parent);
7869 	  parent->cont = bb;
7870 	}
7871       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7872 	{
7873 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7874 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
7875 	}
7876       else
7877 	{
7878 	  region = new_omp_region (bb, code, parent);
7879 	  /* Otherwise...  */
7880 	  if (code == GIMPLE_OMP_TARGET)
7881 	    {
7882 	      switch (gimple_omp_target_kind (stmt))
7883 		{
7884 		case GF_OMP_TARGET_KIND_REGION:
7885 		case GF_OMP_TARGET_KIND_DATA:
7886 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7887 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
7888 		case GF_OMP_TARGET_KIND_OACC_DATA:
7889 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7890 		  break;
7891 		case GF_OMP_TARGET_KIND_UPDATE:
7892 		case GF_OMP_TARGET_KIND_ENTER_DATA:
7893 		case GF_OMP_TARGET_KIND_EXIT_DATA:
7894 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
7895 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7896 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
7897 		  /* ..., other than for those stand-alone directives...  */
7898 		  region = NULL;
7899 		  break;
7900 		default:
7901 		  gcc_unreachable ();
7902 		}
7903 	    }
7904 	  else if (code == GIMPLE_OMP_ORDERED
7905 		   && omp_find_clause (gimple_omp_ordered_clauses
7906 					 (as_a <gomp_ordered *> (stmt)),
7907 				       OMP_CLAUSE_DEPEND))
7908 	    /* #pragma omp ordered depend is also just a stand-alone
7909 	       directive.  */
7910 	    region = NULL;
7911 	  /* ..., this directive becomes the parent for a new region.  */
7912 	  if (region)
7913 	    parent = region;
7914 	}
7915     }
7916 
7917   if (single_tree && !parent)
7918     return;
7919 
7920   for (son = first_dom_son (CDI_DOMINATORS, bb);
7921        son;
7922        son = next_dom_son (CDI_DOMINATORS, son))
7923     build_omp_regions_1 (son, parent, single_tree);
7924 }
7925 
7926 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7927    root_omp_region.  */
7928 
7929 static void
7930 build_omp_regions_root (basic_block root)
7931 {
7932   gcc_assert (root_omp_region == NULL);
7933   build_omp_regions_1 (root, NULL, true);
7934   gcc_assert (root_omp_region != NULL);
7935 }
7936 
7937 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
7938 
7939 void
7940 omp_expand_local (basic_block head)
7941 {
7942   build_omp_regions_root (head);
7943   if (dump_file && (dump_flags & TDF_DETAILS))
7944     {
7945       fprintf (dump_file, "\nOMP region tree\n\n");
7946       dump_omp_region (dump_file, root_omp_region, 0);
7947       fprintf (dump_file, "\n");
7948     }
7949 
7950   remove_exit_barriers (root_omp_region);
7951   expand_omp (root_omp_region);
7952 
7953   omp_free_regions ();
7954 }
7955 
7956 /* Scan the CFG and build a tree of OMP regions.  Return the root of
7957    the OMP region tree.  */
7958 
7959 static void
7960 build_omp_regions (void)
7961 {
7962   gcc_assert (root_omp_region == NULL);
7963   calculate_dominance_info (CDI_DOMINATORS);
7964   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7965 }
7966 
7967 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
7968 
7969 static unsigned int
7970 execute_expand_omp (void)
7971 {
7972   build_omp_regions ();
7973 
7974   if (!root_omp_region)
7975     return 0;
7976 
7977   if (dump_file)
7978     {
7979       fprintf (dump_file, "\nOMP region tree\n\n");
7980       dump_omp_region (dump_file, root_omp_region, 0);
7981       fprintf (dump_file, "\n");
7982     }
7983 
7984   remove_exit_barriers (root_omp_region);
7985 
7986   expand_omp (root_omp_region);
7987 
7988   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7989     verify_loop_structure ();
7990   cleanup_tree_cfg ();
7991 
7992   omp_free_regions ();
7993 
7994   return 0;
7995 }
7996 
7997 /* OMP expansion -- the default pass, run before creation of SSA form.  */
7998 
7999 namespace {
8000 
8001 const pass_data pass_data_expand_omp =
8002 {
8003   GIMPLE_PASS, /* type */
8004   "ompexp", /* name */
8005   OPTGROUP_OMP, /* optinfo_flags */
8006   TV_NONE, /* tv_id */
8007   PROP_gimple_any, /* properties_required */
8008   PROP_gimple_eomp, /* properties_provided */
8009   0, /* properties_destroyed */
8010   0, /* todo_flags_start */
8011   0, /* todo_flags_finish */
8012 };
8013 
8014 class pass_expand_omp : public gimple_opt_pass
8015 {
8016 public:
8017   pass_expand_omp (gcc::context *ctxt)
8018     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8019   {}
8020 
8021   /* opt_pass methods: */
8022   virtual unsigned int execute (function *)
8023     {
8024       bool gate = ((flag_openacc != 0 || flag_openmp != 0
8025 		    || flag_openmp_simd != 0)
8026 		   && !seen_error ());
8027 
8028       /* This pass always runs, to provide PROP_gimple_eomp.
8029 	 But often, there is nothing to do.  */
8030       if (!gate)
8031 	return 0;
8032 
8033       return execute_expand_omp ();
8034     }
8035 
8036 }; // class pass_expand_omp
8037 
8038 } // anon namespace
8039 
8040 gimple_opt_pass *
8041 make_pass_expand_omp (gcc::context *ctxt)
8042 {
8043   return new pass_expand_omp (ctxt);
8044 }
8045 
8046 namespace {
8047 
8048 const pass_data pass_data_expand_omp_ssa =
8049 {
8050   GIMPLE_PASS, /* type */
8051   "ompexpssa", /* name */
8052   OPTGROUP_OMP, /* optinfo_flags */
8053   TV_NONE, /* tv_id */
8054   PROP_cfg | PROP_ssa, /* properties_required */
8055   PROP_gimple_eomp, /* properties_provided */
8056   0, /* properties_destroyed */
8057   0, /* todo_flags_start */
8058   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8059 };
8060 
8061 class pass_expand_omp_ssa : public gimple_opt_pass
8062 {
8063 public:
8064   pass_expand_omp_ssa (gcc::context *ctxt)
8065     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8066   {}
8067 
8068   /* opt_pass methods: */
8069   virtual bool gate (function *fun)
8070     {
8071       return !(fun->curr_properties & PROP_gimple_eomp);
8072     }
8073   virtual unsigned int execute (function *) { return execute_expand_omp (); }
8074   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8075 
8076 }; // class pass_expand_omp_ssa
8077 
8078 } // anon namespace
8079 
8080 gimple_opt_pass *
8081 make_pass_expand_omp_ssa (gcc::context *ctxt)
8082 {
8083   return new pass_expand_omp_ssa (ctxt);
8084 }
8085 
8086 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8087    GIMPLE_* codes.  */
8088 
8089 bool
8090 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8091 		       int *region_idx)
8092 {
8093   gimple *last = last_stmt (bb);
8094   enum gimple_code code = gimple_code (last);
8095   struct omp_region *cur_region = *region;
8096   bool fallthru = false;
8097 
8098   switch (code)
8099     {
8100     case GIMPLE_OMP_PARALLEL:
8101     case GIMPLE_OMP_TASK:
8102     case GIMPLE_OMP_FOR:
8103     case GIMPLE_OMP_SINGLE:
8104     case GIMPLE_OMP_TEAMS:
8105     case GIMPLE_OMP_MASTER:
8106     case GIMPLE_OMP_TASKGROUP:
8107     case GIMPLE_OMP_CRITICAL:
8108     case GIMPLE_OMP_SECTION:
8109     case GIMPLE_OMP_GRID_BODY:
8110       cur_region = new_omp_region (bb, code, cur_region);
8111       fallthru = true;
8112       break;
8113 
8114     case GIMPLE_OMP_ORDERED:
8115       cur_region = new_omp_region (bb, code, cur_region);
8116       fallthru = true;
8117       if (omp_find_clause (gimple_omp_ordered_clauses
8118 			     (as_a <gomp_ordered *> (last)),
8119 			   OMP_CLAUSE_DEPEND))
8120 	cur_region = cur_region->outer;
8121       break;
8122 
8123     case GIMPLE_OMP_TARGET:
8124       cur_region = new_omp_region (bb, code, cur_region);
8125       fallthru = true;
8126       switch (gimple_omp_target_kind (last))
8127 	{
8128 	case GF_OMP_TARGET_KIND_REGION:
8129 	case GF_OMP_TARGET_KIND_DATA:
8130 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8131 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8132 	case GF_OMP_TARGET_KIND_OACC_DATA:
8133 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8134 	  break;
8135 	case GF_OMP_TARGET_KIND_UPDATE:
8136 	case GF_OMP_TARGET_KIND_ENTER_DATA:
8137 	case GF_OMP_TARGET_KIND_EXIT_DATA:
8138 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8139 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8140 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8141 	  cur_region = cur_region->outer;
8142 	  break;
8143 	default:
8144 	  gcc_unreachable ();
8145 	}
8146       break;
8147 
8148     case GIMPLE_OMP_SECTIONS:
8149       cur_region = new_omp_region (bb, code, cur_region);
8150       fallthru = true;
8151       break;
8152 
8153     case GIMPLE_OMP_SECTIONS_SWITCH:
8154       fallthru = false;
8155       break;
8156 
8157     case GIMPLE_OMP_ATOMIC_LOAD:
8158     case GIMPLE_OMP_ATOMIC_STORE:
8159        fallthru = true;
8160        break;
8161 
8162     case GIMPLE_OMP_RETURN:
8163       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8164 	 somewhere other than the next block.  This will be
8165 	 created later.  */
8166       cur_region->exit = bb;
8167       if (cur_region->type == GIMPLE_OMP_TASK)
8168 	/* Add an edge corresponding to not scheduling the task
8169 	   immediately.  */
8170 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8171       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8172       cur_region = cur_region->outer;
8173       break;
8174 
8175     case GIMPLE_OMP_CONTINUE:
8176       cur_region->cont = bb;
8177       switch (cur_region->type)
8178 	{
8179 	case GIMPLE_OMP_FOR:
8180 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8181 	     succs edges as abnormal to prevent splitting
8182 	     them.  */
8183 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8184 	  /* Make the loopback edge.  */
8185 	  make_edge (bb, single_succ (cur_region->entry),
8186 		     EDGE_ABNORMAL);
8187 
8188 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8189 	     corresponds to the case that the body of the loop
8190 	     is not executed at all.  */
8191 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8192 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8193 	  fallthru = false;
8194 	  break;
8195 
8196 	case GIMPLE_OMP_SECTIONS:
8197 	  /* Wire up the edges into and out of the nested sections.  */
8198 	  {
8199 	    basic_block switch_bb = single_succ (cur_region->entry);
8200 
8201 	    struct omp_region *i;
8202 	    for (i = cur_region->inner; i ; i = i->next)
8203 	      {
8204 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8205 		make_edge (switch_bb, i->entry, 0);
8206 		make_edge (i->exit, bb, EDGE_FALLTHRU);
8207 	      }
8208 
8209 	    /* Make the loopback edge to the block with
8210 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8211 	    make_edge (bb, switch_bb, 0);
8212 
8213 	    /* Make the edge from the switch to exit.  */
8214 	    make_edge (switch_bb, bb->next_bb, 0);
8215 	    fallthru = false;
8216 	  }
8217 	  break;
8218 
8219 	case GIMPLE_OMP_TASK:
8220 	  fallthru = true;
8221 	  break;
8222 
8223 	default:
8224 	  gcc_unreachable ();
8225 	}
8226       break;
8227 
8228     default:
8229       gcc_unreachable ();
8230     }
8231 
8232   if (*region != cur_region)
8233     {
8234       *region = cur_region;
8235       if (cur_region)
8236 	*region_idx = cur_region->entry->index;
8237       else
8238 	*region_idx = 0;
8239     }
8240 
8241   return fallthru;
8242 }
8243 
8244 #include "gt-omp-expand.h"
8245