xref: /dragonfly/contrib/gcc-8.0/gcc/omp-expand.c (revision 97fa55c4)
1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2018 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "debug.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62 
63 /* OMP region information.  Every parallel and workshare
64    directive is enclosed between two markers, the OMP_* directive
65    and a corresponding GIMPLE_OMP_RETURN statement.  */
66 
67 struct omp_region
68 {
69   /* The enclosing region.  */
70   struct omp_region *outer;
71 
72   /* First child region.  */
73   struct omp_region *inner;
74 
75   /* Next peer region.  */
76   struct omp_region *next;
77 
78   /* Block containing the omp directive as its last stmt.  */
79   basic_block entry;
80 
81   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82   basic_block exit;
83 
84   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85   basic_block cont;
86 
87   /* If this is a combined parallel+workshare region, this is a list
88      of additional arguments needed by the combined parallel+workshare
89      library call.  */
90   vec<tree, va_gc> *ws_args;
91 
92   /* The code for the omp directive of this region.  */
93   enum gimple_code type;
94 
95   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96   enum omp_clause_schedule_kind sched_kind;
97 
98   /* Schedule modifiers.  */
99   unsigned char sched_modifiers;
100 
101   /* True if this is a combined parallel+workshare region.  */
102   bool is_combined_parallel;
103 
104   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105      a depend clause.  */
106   gomp_ordered *ord_stmt;
107 };
108 
109 static struct omp_region *root_omp_region;
110 static bool omp_any_child_fn_dumped;
111 
112 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 				     bool = false);
114 static gphi *find_phi_with_arg_on_edge (tree, edge);
115 static void expand_omp (struct omp_region *region);
116 
117 /* Return true if REGION is a combined parallel+workshare region.  */
118 
119 static inline bool
120 is_combined_parallel (struct omp_region *region)
121 {
122   return region->is_combined_parallel;
123 }
124 
125 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126    is the immediate dominator of PAR_ENTRY_BB, return true if there
127    are no data dependencies that would prevent expanding the parallel
128    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 
130    When expanding a combined parallel+workshare region, the call to
131    the child function may need additional arguments in the case of
132    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
133    computed out of variables passed in from the parent to the child
134    via 'struct .omp_data_s'.  For instance:
135 
136 	#pragma omp parallel for schedule (guided, i * 4)
137 	for (j ...)
138 
139    Is lowered into:
140 
141 	# BLOCK 2 (PAR_ENTRY_BB)
142 	.omp_data_o.i = i;
143 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 
145 	# BLOCK 3 (WS_ENTRY_BB)
146 	.omp_data_i = &.omp_data_o;
147 	D.1667 = .omp_data_i->i;
148 	D.1598 = D.1667 * 4;
149 	#pragma omp for schedule (guided, D.1598)
150 
151    When we outline the parallel region, the call to the child function
152    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153    that value is computed *after* the call site.  So, in principle we
154    cannot do the transformation.
155 
156    To see whether the code in WS_ENTRY_BB blocks the combined
157    parallel+workshare call, we collect all the variables used in the
158    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
160    call.
161 
162    FIXME.  If we had the SSA form built at this point, we could merely
163    hoist the code in block 3 into block 2 and be done with it.  But at
164    this point we don't have dataflow information and though we could
165    hack something up here, it is really not worth the aggravation.  */
166 
167 static bool
168 workshare_safe_to_combine_p (basic_block ws_entry_bb)
169 {
170   struct omp_for_data fd;
171   gimple *ws_stmt = last_stmt (ws_entry_bb);
172 
173   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174     return true;
175 
176   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177 
178   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179 
180   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181     return false;
182   if (fd.iter_type != long_integer_type_node)
183     return false;
184 
185   /* FIXME.  We give up too easily here.  If any of these arguments
186      are not constants, they will likely involve variables that have
187      been mapped into fields of .omp_data_s for sharing with the child
188      function.  With appropriate data flow, it would be possible to
189      see through this.  */
190   if (!is_gimple_min_invariant (fd.loop.n1)
191       || !is_gimple_min_invariant (fd.loop.n2)
192       || !is_gimple_min_invariant (fd.loop.step)
193       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194     return false;
195 
196   return true;
197 }
198 
199 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200    presence (SIMD_SCHEDULE).  */
201 
202 static tree
203 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204 {
205   if (!simd_schedule)
206     return chunk_size;
207 
208   poly_uint64 vf = omp_max_vf ();
209   if (known_eq (vf, 1U))
210     return chunk_size;
211 
212   tree type = TREE_TYPE (chunk_size);
213   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 			    build_int_cst (type, vf - 1));
215   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 		      build_int_cst (type, -vf));
217 }
218 
219 /* Collect additional arguments needed to emit a combined
220    parallel+workshare call.  WS_STMT is the workshare directive being
221    expanded.  */
222 
223 static vec<tree, va_gc> *
224 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225 {
226   tree t;
227   location_t loc = gimple_location (ws_stmt);
228   vec<tree, va_gc> *ws_args;
229 
230   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231     {
232       struct omp_for_data fd;
233       tree n1, n2;
234 
235       omp_extract_for_data (for_stmt, &fd, NULL);
236       n1 = fd.loop.n1;
237       n2 = fd.loop.n2;
238 
239       if (gimple_omp_for_combined_into_p (for_stmt))
240 	{
241 	  tree innerc
242 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 			       OMP_CLAUSE__LOOPTEMP_);
244 	  gcc_assert (innerc);
245 	  n1 = OMP_CLAUSE_DECL (innerc);
246 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 				    OMP_CLAUSE__LOOPTEMP_);
248 	  gcc_assert (innerc);
249 	  n2 = OMP_CLAUSE_DECL (innerc);
250 	}
251 
252       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253 
254       t = fold_convert_loc (loc, long_integer_type_node, n1);
255       ws_args->quick_push (t);
256 
257       t = fold_convert_loc (loc, long_integer_type_node, n2);
258       ws_args->quick_push (t);
259 
260       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261       ws_args->quick_push (t);
262 
263       if (fd.chunk_size)
264 	{
265 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 	  ws_args->quick_push (t);
268 	}
269 
270       return ws_args;
271     }
272   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273     {
274       /* Number of sections is equal to the number of edges from the
275 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 	 the exit of the sections region.  */
277       basic_block bb = single_succ (gimple_bb (ws_stmt));
278       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279       vec_alloc (ws_args, 1);
280       ws_args->quick_push (t);
281       return ws_args;
282     }
283 
284   gcc_unreachable ();
285 }
286 
287 /* Discover whether REGION is a combined parallel+workshare region.  */
288 
289 static void
290 determine_parallel_type (struct omp_region *region)
291 {
292   basic_block par_entry_bb, par_exit_bb;
293   basic_block ws_entry_bb, ws_exit_bb;
294 
295   if (region == NULL || region->inner == NULL
296       || region->exit == NULL || region->inner->exit == NULL
297       || region->inner->cont == NULL)
298     return;
299 
300   /* We only support parallel+for and parallel+sections.  */
301   if (region->type != GIMPLE_OMP_PARALLEL
302       || (region->inner->type != GIMPLE_OMP_FOR
303 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
304     return;
305 
306   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307      WS_EXIT_BB -> PAR_EXIT_BB.  */
308   par_entry_bb = region->entry;
309   par_exit_bb = region->exit;
310   ws_entry_bb = region->inner->entry;
311   ws_exit_bb = region->inner->exit;
312 
313   if (single_succ (par_entry_bb) == ws_entry_bb
314       && single_succ (ws_exit_bb) == par_exit_bb
315       && workshare_safe_to_combine_p (ws_entry_bb)
316       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 	  || (last_and_only_stmt (ws_entry_bb)
318 	      && last_and_only_stmt (par_exit_bb))))
319     {
320       gimple *par_stmt = last_stmt (par_entry_bb);
321       gimple *ws_stmt = last_stmt (ws_entry_bb);
322 
323       if (region->inner->type == GIMPLE_OMP_FOR)
324 	{
325 	  /* If this is a combined parallel loop, we need to determine
326 	     whether or not to use the combined library calls.  There
327 	     are two cases where we do not apply the transformation:
328 	     static loops and any kind of ordered loop.  In the first
329 	     case, we already open code the loop so there is no need
330 	     to do anything else.  In the latter case, the combined
331 	     parallel loop call would still need extra synchronization
332 	     to implement ordered semantics, so there would not be any
333 	     gain in using the combined call.  */
334 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
335 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 	  if (c == NULL
337 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 		  == OMP_CLAUSE_SCHEDULE_STATIC)
339 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 	    {
341 	      region->is_combined_parallel = false;
342 	      region->inner->is_combined_parallel = false;
343 	      return;
344 	    }
345 	}
346 
347       region->is_combined_parallel = true;
348       region->inner->is_combined_parallel = true;
349       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350     }
351 }
352 
353 /* Debugging dumps for parallel regions.  */
354 void dump_omp_region (FILE *, struct omp_region *, int);
355 void debug_omp_region (struct omp_region *);
356 void debug_all_omp_regions (void);
357 
358 /* Dump the parallel region tree rooted at REGION.  */
359 
360 void
361 dump_omp_region (FILE *file, struct omp_region *region, int indent)
362 {
363   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 	   gimple_code_name[region->type]);
365 
366   if (region->inner)
367     dump_omp_region (file, region->inner, indent + 4);
368 
369   if (region->cont)
370     {
371       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 	       region->cont->index);
373     }
374 
375   if (region->exit)
376     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 	     region->exit->index);
378   else
379     fprintf (file, "%*s[no exit marker]\n", indent, "");
380 
381   if (region->next)
382     dump_omp_region (file, region->next, indent);
383 }
384 
385 DEBUG_FUNCTION void
386 debug_omp_region (struct omp_region *region)
387 {
388   dump_omp_region (stderr, region, 0);
389 }
390 
391 DEBUG_FUNCTION void
392 debug_all_omp_regions (void)
393 {
394   dump_omp_region (stderr, root_omp_region, 0);
395 }
396 
397 /* Create a new parallel region starting at STMT inside region PARENT.  */
398 
399 static struct omp_region *
400 new_omp_region (basic_block bb, enum gimple_code type,
401 		struct omp_region *parent)
402 {
403   struct omp_region *region = XCNEW (struct omp_region);
404 
405   region->outer = parent;
406   region->entry = bb;
407   region->type = type;
408 
409   if (parent)
410     {
411       /* This is a nested region.  Add it to the list of inner
412 	 regions in PARENT.  */
413       region->next = parent->inner;
414       parent->inner = region;
415     }
416   else
417     {
418       /* This is a toplevel region.  Add it to the list of toplevel
419 	 regions in ROOT_OMP_REGION.  */
420       region->next = root_omp_region;
421       root_omp_region = region;
422     }
423 
424   return region;
425 }
426 
427 /* Release the memory associated with the region tree rooted at REGION.  */
428 
429 static void
430 free_omp_region_1 (struct omp_region *region)
431 {
432   struct omp_region *i, *n;
433 
434   for (i = region->inner; i ; i = n)
435     {
436       n = i->next;
437       free_omp_region_1 (i);
438     }
439 
440   free (region);
441 }
442 
443 /* Release the memory for the entire omp region tree.  */
444 
445 void
446 omp_free_regions (void)
447 {
448   struct omp_region *r, *n;
449   for (r = root_omp_region; r ; r = n)
450     {
451       n = r->next;
452       free_omp_region_1 (r);
453     }
454   root_omp_region = NULL;
455 }
456 
457 /* A convenience function to build an empty GIMPLE_COND with just the
458    condition.  */
459 
460 static gcond *
461 gimple_build_cond_empty (tree cond)
462 {
463   enum tree_code pred_code;
464   tree lhs, rhs;
465 
466   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468 }
469 
470 /* Return true if a parallel REGION is within a declare target function or
471    within a target region and is not a part of a gridified target.  */
472 
473 static bool
474 parallel_needs_hsa_kernel_p (struct omp_region *region)
475 {
476   bool indirect = false;
477   for (region = region->outer; region; region = region->outer)
478     {
479       if (region->type == GIMPLE_OMP_PARALLEL)
480 	indirect = true;
481       else if (region->type == GIMPLE_OMP_TARGET)
482 	{
483 	  gomp_target *tgt_stmt
484 	    = as_a <gomp_target *> (last_stmt (region->entry));
485 
486 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 			       OMP_CLAUSE__GRIDDIM_))
488 	    return indirect;
489 	  else
490 	    return true;
491 	}
492     }
493 
494   if (lookup_attribute ("omp declare target",
495 			DECL_ATTRIBUTES (current_function_decl)))
496     return true;
497 
498   return false;
499 }
500 
501 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
502    Add CHILD_FNDECL to decl chain of the supercontext of the block
503    ENTRY_BLOCK - this is the block which originally contained the
504    code from which CHILD_FNDECL was created.
505 
506    Together, these actions ensure that the debug info for the outlined
507    function will be emitted with the correct lexical scope.  */
508 
509 static void
510 adjust_context_and_scope (tree entry_block, tree child_fndecl)
511 {
512   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
513     {
514       tree b = BLOCK_SUPERCONTEXT (entry_block);
515 
516       if (TREE_CODE (b) == BLOCK)
517         {
518 	  tree parent_fndecl;
519 
520 	  /* Follow supercontext chain until the parent fndecl
521 	     is found.  */
522 	  for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
523 	       TREE_CODE (parent_fndecl) == BLOCK;
524 	       parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
525 	    ;
526 
527 	  gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
528 
529 	  DECL_CONTEXT (child_fndecl) = parent_fndecl;
530 
531 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
532 	  BLOCK_VARS (b) = child_fndecl;
533 	}
534     }
535 }
536 
537 /* Build the function calls to GOMP_parallel_start etc to actually
538    generate the parallel operation.  REGION is the parallel region
539    being expanded.  BB is the block where to insert the code.  WS_ARGS
540    will be set if this is a call to a combined parallel+workshare
541    construct, it contains the list of additional arguments needed by
542    the workshare construct.  */
543 
544 static void
545 expand_parallel_call (struct omp_region *region, basic_block bb,
546 		      gomp_parallel *entry_stmt,
547 		      vec<tree, va_gc> *ws_args)
548 {
549   tree t, t1, t2, val, cond, c, clauses, flags;
550   gimple_stmt_iterator gsi;
551   gimple *stmt;
552   enum built_in_function start_ix;
553   int start_ix2;
554   location_t clause_loc;
555   vec<tree, va_gc> *args;
556 
557   clauses = gimple_omp_parallel_clauses (entry_stmt);
558 
559   /* Determine what flavor of GOMP_parallel we will be
560      emitting.  */
561   start_ix = BUILT_IN_GOMP_PARALLEL;
562   if (is_combined_parallel (region))
563     {
564       switch (region->inner->type)
565 	{
566 	case GIMPLE_OMP_FOR:
567 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
568 	  switch (region->inner->sched_kind)
569 	    {
570 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
571 	      start_ix2 = 3;
572 	      break;
573 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
574 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
575 	      if (region->inner->sched_modifiers
576 		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
577 		{
578 		  start_ix2 = 3 + region->inner->sched_kind;
579 		  break;
580 		}
581 	      /* FALLTHRU */
582 	    default:
583 	      start_ix2 = region->inner->sched_kind;
584 	      break;
585 	    }
586 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
587 	  start_ix = (enum built_in_function) start_ix2;
588 	  break;
589 	case GIMPLE_OMP_SECTIONS:
590 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
591 	  break;
592 	default:
593 	  gcc_unreachable ();
594 	}
595     }
596 
597   /* By default, the value of NUM_THREADS is zero (selected at run time)
598      and there is no conditional.  */
599   cond = NULL_TREE;
600   val = build_int_cst (unsigned_type_node, 0);
601   flags = build_int_cst (unsigned_type_node, 0);
602 
603   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
604   if (c)
605     cond = OMP_CLAUSE_IF_EXPR (c);
606 
607   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
608   if (c)
609     {
610       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
611       clause_loc = OMP_CLAUSE_LOCATION (c);
612     }
613   else
614     clause_loc = gimple_location (entry_stmt);
615 
616   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
617   if (c)
618     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
619 
620   /* Ensure 'val' is of the correct type.  */
621   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
622 
623   /* If we found the clause 'if (cond)', build either
624      (cond != 0) or (cond ? val : 1u).  */
625   if (cond)
626     {
627       cond = gimple_boolify (cond);
628 
629       if (integer_zerop (val))
630 	val = fold_build2_loc (clause_loc,
631 			   EQ_EXPR, unsigned_type_node, cond,
632 			   build_int_cst (TREE_TYPE (cond), 0));
633       else
634 	{
635 	  basic_block cond_bb, then_bb, else_bb;
636 	  edge e, e_then, e_else;
637 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
638 
639 	  tmp_var = create_tmp_var (TREE_TYPE (val));
640 	  if (gimple_in_ssa_p (cfun))
641 	    {
642 	      tmp_then = make_ssa_name (tmp_var);
643 	      tmp_else = make_ssa_name (tmp_var);
644 	      tmp_join = make_ssa_name (tmp_var);
645 	    }
646 	  else
647 	    {
648 	      tmp_then = tmp_var;
649 	      tmp_else = tmp_var;
650 	      tmp_join = tmp_var;
651 	    }
652 
653 	  e = split_block_after_labels (bb);
654 	  cond_bb = e->src;
655 	  bb = e->dest;
656 	  remove_edge (e);
657 
658 	  then_bb = create_empty_bb (cond_bb);
659 	  else_bb = create_empty_bb (then_bb);
660 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
661 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
662 
663 	  stmt = gimple_build_cond_empty (cond);
664 	  gsi = gsi_start_bb (cond_bb);
665 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
666 
667 	  gsi = gsi_start_bb (then_bb);
668 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
669 
670 	  gsi = gsi_start_bb (else_bb);
671 	  expand_omp_build_assign (&gsi, tmp_else,
672 				   build_int_cst (unsigned_type_node, 1),
673 				   true);
674 
675 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
676 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
677 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
678 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
679 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
680 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
681 
682 	  if (gimple_in_ssa_p (cfun))
683 	    {
684 	      gphi *phi = create_phi_node (tmp_join, bb);
685 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
686 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
687 	    }
688 
689 	  val = tmp_join;
690 	}
691 
692       gsi = gsi_start_bb (bb);
693       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
694 				      false, GSI_CONTINUE_LINKING);
695     }
696 
697   gsi = gsi_last_nondebug_bb (bb);
698   t = gimple_omp_parallel_data_arg (entry_stmt);
699   if (t == NULL)
700     t1 = null_pointer_node;
701   else
702     t1 = build_fold_addr_expr (t);
703   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
704   t2 = build_fold_addr_expr (child_fndecl);
705 
706   adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
707 
708   vec_alloc (args, 4 + vec_safe_length (ws_args));
709   args->quick_push (t2);
710   args->quick_push (t1);
711   args->quick_push (val);
712   if (ws_args)
713     args->splice (*ws_args);
714   args->quick_push (flags);
715 
716   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
717 			       builtin_decl_explicit (start_ix), args);
718 
719   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
720 			    false, GSI_CONTINUE_LINKING);
721 
722   if (hsa_gen_requested_p ()
723       && parallel_needs_hsa_kernel_p (region))
724     {
725       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
726       hsa_register_kernel (child_cnode);
727     }
728 }
729 
730 /* Build the function call to GOMP_task to actually
731    generate the task operation.  BB is the block where to insert the code.  */
732 
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 		  gomp_task *entry_stmt)
736 {
737   tree t1, t2, t3;
738   gimple_stmt_iterator gsi;
739   location_t loc = gimple_location (entry_stmt);
740 
741   tree clauses = gimple_omp_task_clauses (entry_stmt);
742 
743   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
749 
750   unsigned int iflags
751     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
754 
755   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757   tree num_tasks = NULL_TREE;
758   bool ull = false;
759   if (taskloop_p)
760     {
761       gimple *g = last_stmt (region->outer->entry);
762       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764       struct omp_for_data fd;
765       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 				OMP_CLAUSE__LOOPTEMP_);
769       startvar = OMP_CLAUSE_DECL (startvar);
770       endvar = OMP_CLAUSE_DECL (endvar);
771       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772       if (fd.loop.cond_code == LT_EXPR)
773 	iflags |= GOMP_TASK_FLAG_UP;
774       tree tclauses = gimple_omp_for_clauses (g);
775       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776       if (num_tasks)
777 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778       else
779 	{
780 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 	  if (num_tasks)
782 	    {
783 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
785 	    }
786 	  else
787 	    num_tasks = integer_zero_node;
788 	}
789       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790       if (ifc == NULL_TREE)
791 	iflags |= GOMP_TASK_FLAG_IF;
792       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 	iflags |= GOMP_TASK_FLAG_NOGROUP;
794       ull = fd.iter_type == long_long_unsigned_type_node;
795     }
796   else if (priority)
797     iflags |= GOMP_TASK_FLAG_PRIORITY;
798 
799   tree flags = build_int_cst (unsigned_type_node, iflags);
800 
801   tree cond = boolean_true_node;
802   if (ifc)
803     {
804       if (taskloop_p)
805 	{
806 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 			       build_int_cst (unsigned_type_node,
809 					      GOMP_TASK_FLAG_IF),
810 			       build_int_cst (unsigned_type_node, 0));
811 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 				   flags, t);
813 	}
814       else
815 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
816     }
817 
818   if (finalc)
819     {
820       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 			   build_int_cst (unsigned_type_node,
823 					  GOMP_TASK_FLAG_FINAL),
824 			   build_int_cst (unsigned_type_node, 0));
825       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
826     }
827   if (depend)
828     depend = OMP_CLAUSE_DECL (depend);
829   else
830     depend = build_int_cst (ptr_type_node, 0);
831   if (priority)
832     priority = fold_convert (integer_type_node,
833 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
834   else
835     priority = integer_zero_node;
836 
837   gsi = gsi_last_nondebug_bb (bb);
838   tree t = gimple_omp_task_data_arg (entry_stmt);
839   if (t == NULL)
840     t2 = null_pointer_node;
841   else
842     t2 = build_fold_addr_expr_loc (loc, t);
843   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844   t = gimple_omp_task_copy_fn (entry_stmt);
845   if (t == NULL)
846     t3 = null_pointer_node;
847   else
848     t3 = build_fold_addr_expr_loc (loc, t);
849 
850   if (taskloop_p)
851     t = build_call_expr (ull
852 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 			 11, t1, t2, t3,
855 			 gimple_omp_task_arg_size (entry_stmt),
856 			 gimple_omp_task_arg_align (entry_stmt), flags,
857 			 num_tasks, priority, startvar, endvar, step);
858   else
859     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 			 9, t1, t2, t3,
861 			 gimple_omp_task_arg_size (entry_stmt),
862 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 			 depend, priority);
864 
865   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 			    false, GSI_CONTINUE_LINKING);
867 }
868 
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
870 
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
873 {
874   tree chain = NULL_TREE, t;
875   unsigned ix;
876 
877   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
878     {
879       DECL_CHAIN (t) = chain;
880       chain = t;
881     }
882 
883   return chain;
884 }
885 
886 /* Remove barriers in REGION->EXIT's block.  Note that this is only
887    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
888    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890    removed.  */
891 
892 static void
893 remove_exit_barrier (struct omp_region *region)
894 {
895   gimple_stmt_iterator gsi;
896   basic_block exit_bb;
897   edge_iterator ei;
898   edge e;
899   gimple *stmt;
900   int any_addressable_vars = -1;
901 
902   exit_bb = region->exit;
903 
904   /* If the parallel region doesn't return, we don't have REGION->EXIT
905      block at all.  */
906   if (! exit_bb)
907     return;
908 
909   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
910      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
911      statements that can appear in between are extremely limited -- no
912      memory operations at all.  Here, we allow nothing at all, so the
913      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
914   gsi = gsi_last_nondebug_bb (exit_bb);
915   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916   gsi_prev_nondebug (&gsi);
917   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918     return;
919 
920   FOR_EACH_EDGE (e, ei, exit_bb->preds)
921     {
922       gsi = gsi_last_nondebug_bb (e->src);
923       if (gsi_end_p (gsi))
924 	continue;
925       stmt = gsi_stmt (gsi);
926       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 	  && !gimple_omp_return_nowait_p (stmt))
928 	{
929 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 	     in many cases.  If there could be tasks queued, the barrier
931 	     might be needed to let the tasks run before some local
932 	     variable of the parallel that the task uses as shared
933 	     runs out of scope.  The task can be spawned either
934 	     from within current function (this would be easy to check)
935 	     or from some function it calls and gets passed an address
936 	     of such a variable.  */
937 	  if (any_addressable_vars < 0)
938 	    {
939 	      gomp_parallel *parallel_stmt
940 		= as_a <gomp_parallel *> (last_stmt (region->entry));
941 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 	      tree local_decls, block, decl;
943 	      unsigned ix;
944 
945 	      any_addressable_vars = 0;
946 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 		if (TREE_ADDRESSABLE (decl))
948 		  {
949 		    any_addressable_vars = 1;
950 		    break;
951 		  }
952 	      for (block = gimple_block (stmt);
953 		   !any_addressable_vars
954 		   && block
955 		   && TREE_CODE (block) == BLOCK;
956 		   block = BLOCK_SUPERCONTEXT (block))
957 		{
958 		  for (local_decls = BLOCK_VARS (block);
959 		       local_decls;
960 		       local_decls = DECL_CHAIN (local_decls))
961 		    if (TREE_ADDRESSABLE (local_decls))
962 		      {
963 			any_addressable_vars = 1;
964 			break;
965 		      }
966 		  if (block == gimple_block (parallel_stmt))
967 		    break;
968 		}
969 	    }
970 	  if (!any_addressable_vars)
971 	    gimple_omp_return_set_nowait (stmt);
972 	}
973     }
974 }
975 
976 static void
977 remove_exit_barriers (struct omp_region *region)
978 {
979   if (region->type == GIMPLE_OMP_PARALLEL)
980     remove_exit_barrier (region);
981 
982   if (region->inner)
983     {
984       region = region->inner;
985       remove_exit_barriers (region);
986       while (region->next)
987 	{
988 	  region = region->next;
989 	  remove_exit_barriers (region);
990 	}
991     }
992 }
993 
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995    calls.  These can't be declared as const functions, but
996    within one parallel body they are constant, so they can be
997    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998    which are declared const.  Similarly for task body, except
999    that in untied task omp_get_thread_num () can change at any task
1000    scheduling point.  */
1001 
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1004 {
1005   basic_block bb;
1006   gimple_stmt_iterator gsi;
1007   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 					  OMP_CLAUSE_UNTIED) != NULL);
1014 
1015   FOR_EACH_BB_FN (bb, cfun)
1016     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1017       {
1018 	gimple *call = gsi_stmt (gsi);
1019 	tree decl;
1020 
1021 	if (is_gimple_call (call)
1022 	    && (decl = gimple_call_fndecl (call))
1023 	    && DECL_EXTERNAL (decl)
1024 	    && TREE_PUBLIC (decl)
1025 	    && DECL_INITIAL (decl) == NULL)
1026 	  {
1027 	    tree built_in;
1028 
1029 	    if (DECL_NAME (decl) == thr_num_id)
1030 	      {
1031 		/* In #pragma omp task untied omp_get_thread_num () can change
1032 		   during the execution of the task region.  */
1033 		if (untied_task)
1034 		  continue;
1035 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1036 	      }
1037 	    else if (DECL_NAME (decl) == num_thr_id)
1038 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 	    else
1040 	      continue;
1041 
1042 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 		|| gimple_call_num_args (call) != 0)
1044 	      continue;
1045 
1046 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1047 	      continue;
1048 
1049 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 					TREE_TYPE (TREE_TYPE (built_in))))
1052 	      continue;
1053 
1054 	    gimple_call_set_fndecl (call, built_in);
1055 	  }
1056       }
1057 }
1058 
1059 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1060    regimplified.  */
1061 
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1064 {
1065   tree t = *tp;
1066 
1067   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1068   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069     return t;
1070 
1071   if (TREE_CODE (t) == ADDR_EXPR)
1072     recompute_tree_invariant_for_addr_expr (t);
1073 
1074   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075   return NULL_TREE;
1076 }
1077 
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1079 
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 			 bool after)
1083 {
1084   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 				   !after, after ? GSI_CONTINUE_LINKING
1087 						 : GSI_SAME_STMT);
1088   gimple *stmt = gimple_build_assign (to, from);
1089   if (after)
1090     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091   else
1092     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1095     {
1096       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097       gimple_regimplify_operands (stmt, &gsi);
1098     }
1099 }
1100 
1101 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1102 
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1105 {
1106   basic_block entry_bb, exit_bb, new_bb;
1107   struct function *child_cfun;
1108   tree child_fn, block, t;
1109   gimple_stmt_iterator gsi;
1110   gimple *entry_stmt, *stmt;
1111   edge e;
1112   vec<tree, va_gc> *ws_args;
1113 
1114   entry_stmt = last_stmt (region->entry);
1115   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1117 
1118   entry_bb = region->entry;
1119   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120     exit_bb = region->cont;
1121   else
1122     exit_bb = region->exit;
1123 
1124   if (is_combined_parallel (region))
1125     ws_args = region->ws_args;
1126   else
1127     ws_args = NULL;
1128 
1129   if (child_cfun->cfg)
1130     {
1131       /* Due to inlining, it may happen that we have already outlined
1132 	 the region, in which case all we need to do is make the
1133 	 sub-graph unreachable and emit the parallel call.  */
1134       edge entry_succ_e, exit_succ_e;
1135 
1136       entry_succ_e = single_succ_edge (entry_bb);
1137 
1138       gsi = gsi_last_nondebug_bb (entry_bb);
1139       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1140 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1141       gsi_remove (&gsi, true);
1142 
1143       new_bb = entry_bb;
1144       if (exit_bb)
1145 	{
1146 	  exit_succ_e = single_succ_edge (exit_bb);
1147 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1148 	}
1149       remove_edge_and_dominated_blocks (entry_succ_e);
1150     }
1151   else
1152     {
1153       unsigned srcidx, dstidx, num;
1154 
1155       /* If the parallel region needs data sent from the parent
1156 	 function, then the very first statement (except possible
1157 	 tree profile counter updates) of the parallel body
1158 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1159 	 &.OMP_DATA_O is passed as an argument to the child function,
1160 	 we need to replace it with the argument as seen by the child
1161 	 function.
1162 
1163 	 In most cases, this will end up being the identity assignment
1164 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1165 	 a function call that has been inlined, the original PARM_DECL
1166 	 .OMP_DATA_I may have been converted into a different local
1167 	 variable.  In which case, we need to keep the assignment.  */
1168       if (gimple_omp_taskreg_data_arg (entry_stmt))
1169 	{
1170 	  basic_block entry_succ_bb
1171 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1172 				       : FALLTHRU_EDGE (entry_bb)->dest;
1173 	  tree arg;
1174 	  gimple *parcopy_stmt = NULL;
1175 
1176 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1177 	    {
1178 	      gimple *stmt;
1179 
1180 	      gcc_assert (!gsi_end_p (gsi));
1181 	      stmt = gsi_stmt (gsi);
1182 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1183 		continue;
1184 
1185 	      if (gimple_num_ops (stmt) == 2)
1186 		{
1187 		  tree arg = gimple_assign_rhs1 (stmt);
1188 
1189 		  /* We're ignore the subcode because we're
1190 		     effectively doing a STRIP_NOPS.  */
1191 
1192 		  if (TREE_CODE (arg) == ADDR_EXPR
1193 		      && TREE_OPERAND (arg, 0)
1194 			== gimple_omp_taskreg_data_arg (entry_stmt))
1195 		    {
1196 		      parcopy_stmt = stmt;
1197 		      break;
1198 		    }
1199 		}
1200 	    }
1201 
1202 	  gcc_assert (parcopy_stmt != NULL);
1203 	  arg = DECL_ARGUMENTS (child_fn);
1204 
1205 	  if (!gimple_in_ssa_p (cfun))
1206 	    {
1207 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1208 		gsi_remove (&gsi, true);
1209 	      else
1210 		{
1211 		  /* ?? Is setting the subcode really necessary ??  */
1212 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1213 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1214 		}
1215 	    }
1216 	  else
1217 	    {
1218 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1219 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1220 	      /* We'd like to set the rhs to the default def in the child_fn,
1221 		 but it's too early to create ssa names in the child_fn.
1222 		 Instead, we set the rhs to the parm.  In
1223 		 move_sese_region_to_fn, we introduce a default def for the
1224 		 parm, map the parm to it's default def, and once we encounter
1225 		 this stmt, replace the parm with the default def.  */
1226 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 	      update_stmt (parcopy_stmt);
1228 	    }
1229 	}
1230 
1231       /* Declare local variables needed in CHILD_CFUN.  */
1232       block = DECL_INITIAL (child_fn);
1233       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1234       /* The gimplifier could record temporaries in parallel/task block
1235 	 rather than in containing function's local_decls chain,
1236 	 which would mean cgraph missed finalizing them.  Do it now.  */
1237       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1238 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1239 	  varpool_node::finalize_decl (t);
1240       DECL_SAVED_TREE (child_fn) = NULL;
1241       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1242       gimple_set_body (child_fn, NULL);
1243       TREE_USED (block) = 1;
1244 
1245       /* Reset DECL_CONTEXT on function arguments.  */
1246       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1247 	DECL_CONTEXT (t) = child_fn;
1248 
1249       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1250 	 so that it can be moved to the child function.  */
1251       gsi = gsi_last_nondebug_bb (entry_bb);
1252       stmt = gsi_stmt (gsi);
1253       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1254 			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1255       e = split_block (entry_bb, stmt);
1256       gsi_remove (&gsi, true);
1257       entry_bb = e->dest;
1258       edge e2 = NULL;
1259       if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1260 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1261       else
1262 	{
1263 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1264 	  gcc_assert (e2->dest == region->exit);
1265 	  remove_edge (BRANCH_EDGE (entry_bb));
1266 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1267 	  gsi = gsi_last_nondebug_bb (region->exit);
1268 	  gcc_assert (!gsi_end_p (gsi)
1269 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1270 	  gsi_remove (&gsi, true);
1271 	}
1272 
1273       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1274       if (exit_bb)
1275 	{
1276 	  gsi = gsi_last_nondebug_bb (exit_bb);
1277 	  gcc_assert (!gsi_end_p (gsi)
1278 		      && (gimple_code (gsi_stmt (gsi))
1279 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1280 	  stmt = gimple_build_return (NULL);
1281 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1282 	  gsi_remove (&gsi, true);
1283 	}
1284 
1285       /* Move the parallel region into CHILD_CFUN.  */
1286 
1287       if (gimple_in_ssa_p (cfun))
1288 	{
1289 	  init_tree_ssa (child_cfun);
1290 	  init_ssa_operands (child_cfun);
1291 	  child_cfun->gimple_df->in_ssa_p = true;
1292 	  block = NULL_TREE;
1293 	}
1294       else
1295 	block = gimple_block (entry_stmt);
1296 
1297       /* Make sure to generate early debug for the function before
1298          outlining anything.  */
1299       if (! gimple_in_ssa_p (cfun))
1300 	(*debug_hooks->early_global_decl) (cfun->decl);
1301 
1302       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1303       if (exit_bb)
1304 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1305       if (e2)
1306 	{
1307 	  basic_block dest_bb = e2->dest;
1308 	  if (!exit_bb)
1309 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1310 	  remove_edge (e2);
1311 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1312 	}
1313       /* When the OMP expansion process cannot guarantee an up-to-date
1314 	 loop tree arrange for the child function to fixup loops.  */
1315       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1316 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1317 
1318       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1319       num = vec_safe_length (child_cfun->local_decls);
1320       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1321 	{
1322 	  t = (*child_cfun->local_decls)[srcidx];
1323 	  if (DECL_CONTEXT (t) == cfun->decl)
1324 	    continue;
1325 	  if (srcidx != dstidx)
1326 	    (*child_cfun->local_decls)[dstidx] = t;
1327 	  dstidx++;
1328 	}
1329       if (dstidx != num)
1330 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1331 
1332       /* Inform the callgraph about the new function.  */
1333       child_cfun->curr_properties = cfun->curr_properties;
1334       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1335       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1336       cgraph_node *node = cgraph_node::get_create (child_fn);
1337       node->parallelized_function = 1;
1338       cgraph_node::add_new_function (child_fn, true);
1339 
1340       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1341 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1342 
1343       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1344 	 fixed in a following pass.  */
1345       push_cfun (child_cfun);
1346       if (need_asm)
1347 	assign_assembler_name_if_needed (child_fn);
1348 
1349       if (optimize)
1350 	optimize_omp_library_calls (entry_stmt);
1351       update_max_bb_count ();
1352       cgraph_edge::rebuild_edges ();
1353 
1354       /* Some EH regions might become dead, see PR34608.  If
1355 	 pass_cleanup_cfg isn't the first pass to happen with the
1356 	 new child, these dead EH edges might cause problems.
1357 	 Clean them up now.  */
1358       if (flag_exceptions)
1359 	{
1360 	  basic_block bb;
1361 	  bool changed = false;
1362 
1363 	  FOR_EACH_BB_FN (bb, cfun)
1364 	    changed |= gimple_purge_dead_eh_edges (bb);
1365 	  if (changed)
1366 	    cleanup_tree_cfg ();
1367 	}
1368       if (gimple_in_ssa_p (cfun))
1369 	update_ssa (TODO_update_ssa);
1370       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1371 	verify_loop_structure ();
1372       pop_cfun ();
1373 
1374       if (dump_file && !gimple_in_ssa_p (cfun))
1375 	{
1376 	  omp_any_child_fn_dumped = true;
1377 	  dump_function_header (dump_file, child_fn, dump_flags);
1378 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1379 	}
1380     }
1381 
1382   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1383     expand_parallel_call (region, new_bb,
1384 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1385   else
1386     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1387   if (gimple_in_ssa_p (cfun))
1388     update_ssa (TODO_update_ssa_only_virtuals);
1389 }
1390 
1391 /* Information about members of an OpenACC collapsed loop nest.  */
1392 
1393 struct oacc_collapse
1394 {
1395   tree base;  /* Base value.  */
1396   tree iters; /* Number of steps.  */
1397   tree step;  /* Step size.  */
1398   tree tile;  /* Tile increment (if tiled).  */
1399   tree outer; /* Tile iterator var. */
1400 };
1401 
1402 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1403    Fill in COUNTS array.  Emit any initialization code before GSI.
1404    Return the calculated outer loop bound of BOUND_TYPE.  */
1405 
1406 static tree
1407 expand_oacc_collapse_init (const struct omp_for_data *fd,
1408 			   gimple_stmt_iterator *gsi,
1409 			   oacc_collapse *counts, tree bound_type,
1410 			   location_t loc)
1411 {
1412   tree tiling = fd->tiling;
1413   tree total = build_int_cst (bound_type, 1);
1414   int ix;
1415 
1416   gcc_assert (integer_onep (fd->loop.step));
1417   gcc_assert (integer_zerop (fd->loop.n1));
1418 
1419   /* When tiling, the first operand of the tile clause applies to the
1420      innermost loop, and we work outwards from there.  Seems
1421      backwards, but whatever.  */
1422   for (ix = fd->collapse; ix--;)
1423     {
1424       const omp_for_data_loop *loop = &fd->loops[ix];
1425 
1426       tree iter_type = TREE_TYPE (loop->v);
1427       tree diff_type = iter_type;
1428       tree plus_type = iter_type;
1429 
1430       gcc_assert (loop->cond_code == fd->loop.cond_code);
1431 
1432       if (POINTER_TYPE_P (iter_type))
1433 	plus_type = sizetype;
1434       if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1435 	diff_type = signed_type_for (diff_type);
1436       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1437 	diff_type = integer_type_node;
1438 
1439       if (tiling)
1440 	{
1441 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1442 	  tree loop_no = build_int_cst (integer_type_node, ix);
1443 	  tree tile = TREE_VALUE (tiling);
1444 	  gcall *call
1445 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1446 					  /* gwv-outer=*/integer_zero_node,
1447 					  /* gwv-inner=*/integer_zero_node);
1448 
1449 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1450 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1451 	  gimple_call_set_lhs (call, counts[ix].tile);
1452 	  gimple_set_location (call, loc);
1453 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1454 
1455 	  tiling = TREE_CHAIN (tiling);
1456 	}
1457       else
1458 	{
1459 	  counts[ix].tile = NULL;
1460 	  counts[ix].outer = loop->v;
1461 	}
1462 
1463       tree b = loop->n1;
1464       tree e = loop->n2;
1465       tree s = loop->step;
1466       bool up = loop->cond_code == LT_EXPR;
1467       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1468       bool negating;
1469       tree expr;
1470 
1471       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1472 				    true, GSI_SAME_STMT);
1473       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1474 				    true, GSI_SAME_STMT);
1475 
1476       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1477       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1478       if (negating)
1479 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1480       s = fold_convert (diff_type, s);
1481       if (negating)
1482 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1483       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1484 				    true, GSI_SAME_STMT);
1485 
1486       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1487       negating = !up && TYPE_UNSIGNED (iter_type);
1488       expr = fold_build2 (MINUS_EXPR, plus_type,
1489 			  fold_convert (plus_type, negating ? b : e),
1490 			  fold_convert (plus_type, negating ? e : b));
1491       expr = fold_convert (diff_type, expr);
1492       if (negating)
1493 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1494       tree range = force_gimple_operand_gsi
1495 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1496 
1497       /* Determine number of iterations.  */
1498       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1499       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1500       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1501 
1502       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1503 					     true, GSI_SAME_STMT);
1504 
1505       counts[ix].base = b;
1506       counts[ix].iters = iters;
1507       counts[ix].step = s;
1508 
1509       total = fold_build2 (MULT_EXPR, bound_type, total,
1510 			   fold_convert (bound_type, iters));
1511     }
1512 
1513   return total;
1514 }
1515 
1516 /* Emit initializers for collapsed loop members.  INNER is true if
1517    this is for the element loop of a TILE.  IVAR is the outer
1518    loop iteration variable, from which collapsed loop iteration values
1519    are  calculated.  COUNTS array has been initialized by
1520    expand_oacc_collapse_inits.  */
1521 
1522 static void
1523 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1524 			   gimple_stmt_iterator *gsi,
1525 			   const oacc_collapse *counts, tree ivar)
1526 {
1527   tree ivar_type = TREE_TYPE (ivar);
1528 
1529   /*  The most rapidly changing iteration variable is the innermost
1530       one.  */
1531   for (int ix = fd->collapse; ix--;)
1532     {
1533       const omp_for_data_loop *loop = &fd->loops[ix];
1534       const oacc_collapse *collapse = &counts[ix];
1535       tree v = inner ? loop->v : collapse->outer;
1536       tree iter_type = TREE_TYPE (v);
1537       tree diff_type = TREE_TYPE (collapse->step);
1538       tree plus_type = iter_type;
1539       enum tree_code plus_code = PLUS_EXPR;
1540       tree expr;
1541 
1542       if (POINTER_TYPE_P (iter_type))
1543 	{
1544 	  plus_code = POINTER_PLUS_EXPR;
1545 	  plus_type = sizetype;
1546 	}
1547 
1548       expr = ivar;
1549       if (ix)
1550 	{
1551 	  tree mod = fold_convert (ivar_type, collapse->iters);
1552 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1553 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1554 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1555 					   true, GSI_SAME_STMT);
1556 	}
1557 
1558       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1559 			  collapse->step);
1560       expr = fold_build2 (plus_code, iter_type,
1561 			  inner ? collapse->outer : collapse->base,
1562 			  fold_convert (plus_type, expr));
1563       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1564 				       true, GSI_SAME_STMT);
1565       gassign *ass = gimple_build_assign (v, expr);
1566       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1567     }
1568 }
1569 
1570 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1571    of the combined collapse > 1 loop constructs, generate code like:
1572 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1573 	if (cond3 is <)
1574 	  adj = STEP3 - 1;
1575 	else
1576 	  adj = STEP3 + 1;
1577 	count3 = (adj + N32 - N31) / STEP3;
1578 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1579 	if (cond2 is <)
1580 	  adj = STEP2 - 1;
1581 	else
1582 	  adj = STEP2 + 1;
1583 	count2 = (adj + N22 - N21) / STEP2;
1584 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1585 	if (cond1 is <)
1586 	  adj = STEP1 - 1;
1587 	else
1588 	  adj = STEP1 + 1;
1589 	count1 = (adj + N12 - N11) / STEP1;
1590 	count = count1 * count2 * count3;
1591    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1592 	count = 0;
1593    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1594    of the combined loop constructs, just initialize COUNTS array
1595    from the _looptemp_ clauses.  */
1596 
1597 /* NOTE: It *could* be better to moosh all of the BBs together,
1598    creating one larger BB with all the computation and the unexpected
1599    jump at the end.  I.e.
1600 
1601    bool zero3, zero2, zero1, zero;
1602 
1603    zero3 = N32 c3 N31;
1604    count3 = (N32 - N31) /[cl] STEP3;
1605    zero2 = N22 c2 N21;
1606    count2 = (N22 - N21) /[cl] STEP2;
1607    zero1 = N12 c1 N11;
1608    count1 = (N12 - N11) /[cl] STEP1;
1609    zero = zero3 || zero2 || zero1;
1610    count = count1 * count2 * count3;
1611    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1612 
1613    After all, we expect the zero=false, and thus we expect to have to
1614    evaluate all of the comparison expressions, so short-circuiting
1615    oughtn't be a win.  Since the condition isn't protecting a
1616    denominator, we're not concerned about divide-by-zero, so we can
1617    fully evaluate count even if a numerator turned out to be wrong.
1618 
1619    It seems like putting this all together would create much better
1620    scheduling opportunities, and less pressure on the chip's branch
1621    predictor.  */
1622 
1623 static void
1624 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1625 			    basic_block &entry_bb, tree *counts,
1626 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1627 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1628 			    basic_block &l2_dom_bb)
1629 {
1630   tree t, type = TREE_TYPE (fd->loop.v);
1631   edge e, ne;
1632   int i;
1633 
1634   /* Collapsed loops need work for expansion into SSA form.  */
1635   gcc_assert (!gimple_in_ssa_p (cfun));
1636 
1637   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1638       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1639     {
1640       gcc_assert (fd->ordered == 0);
1641       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1642 	 isn't supposed to be handled, as the inner loop doesn't
1643 	 use it.  */
1644       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1645 				     OMP_CLAUSE__LOOPTEMP_);
1646       gcc_assert (innerc);
1647       for (i = 0; i < fd->collapse; i++)
1648 	{
1649 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1650 				    OMP_CLAUSE__LOOPTEMP_);
1651 	  gcc_assert (innerc);
1652 	  if (i)
1653 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1654 	  else
1655 	    counts[0] = NULL_TREE;
1656 	}
1657       return;
1658     }
1659 
1660   for (i = fd->collapse; i < fd->ordered; i++)
1661     {
1662       tree itype = TREE_TYPE (fd->loops[i].v);
1663       counts[i] = NULL_TREE;
1664       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1665 		       fold_convert (itype, fd->loops[i].n1),
1666 		       fold_convert (itype, fd->loops[i].n2));
1667       if (t && integer_zerop (t))
1668 	{
1669 	  for (i = fd->collapse; i < fd->ordered; i++)
1670 	    counts[i] = build_int_cst (type, 0);
1671 	  break;
1672 	}
1673     }
1674   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1675     {
1676       tree itype = TREE_TYPE (fd->loops[i].v);
1677 
1678       if (i >= fd->collapse && counts[i])
1679 	continue;
1680       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1681 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1682 				fold_convert (itype, fd->loops[i].n1),
1683 				fold_convert (itype, fd->loops[i].n2)))
1684 	      == NULL_TREE || !integer_onep (t)))
1685 	{
1686 	  gcond *cond_stmt;
1687 	  tree n1, n2;
1688 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1689 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1690 					 true, GSI_SAME_STMT);
1691 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1692 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1693 					 true, GSI_SAME_STMT);
1694 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1695 					 NULL_TREE, NULL_TREE);
1696 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1697 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1698 			 expand_omp_regimplify_p, NULL, NULL)
1699 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1700 			    expand_omp_regimplify_p, NULL, NULL))
1701 	    {
1702 	      *gsi = gsi_for_stmt (cond_stmt);
1703 	      gimple_regimplify_operands (cond_stmt, gsi);
1704 	    }
1705 	  e = split_block (entry_bb, cond_stmt);
1706 	  basic_block &zero_iter_bb
1707 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1708 	  int &first_zero_iter
1709 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1710 	  if (zero_iter_bb == NULL)
1711 	    {
1712 	      gassign *assign_stmt;
1713 	      first_zero_iter = i;
1714 	      zero_iter_bb = create_empty_bb (entry_bb);
1715 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1716 	      *gsi = gsi_after_labels (zero_iter_bb);
1717 	      if (i < fd->collapse)
1718 		assign_stmt = gimple_build_assign (fd->loop.n2,
1719 						   build_zero_cst (type));
1720 	      else
1721 		{
1722 		  counts[i] = create_tmp_reg (type, ".count");
1723 		  assign_stmt
1724 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1725 		}
1726 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1727 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1728 				       entry_bb);
1729 	    }
1730 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1731 	  ne->probability = profile_probability::very_unlikely ();
1732 	  e->flags = EDGE_TRUE_VALUE;
1733 	  e->probability = ne->probability.invert ();
1734 	  if (l2_dom_bb == NULL)
1735 	    l2_dom_bb = entry_bb;
1736 	  entry_bb = e->dest;
1737 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1738 	}
1739 
1740       if (POINTER_TYPE_P (itype))
1741 	itype = signed_type_for (itype);
1742       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1743 				 ? -1 : 1));
1744       t = fold_build2 (PLUS_EXPR, itype,
1745 		       fold_convert (itype, fd->loops[i].step), t);
1746       t = fold_build2 (PLUS_EXPR, itype, t,
1747 		       fold_convert (itype, fd->loops[i].n2));
1748       t = fold_build2 (MINUS_EXPR, itype, t,
1749 		       fold_convert (itype, fd->loops[i].n1));
1750       /* ?? We could probably use CEIL_DIV_EXPR instead of
1751 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1752 	 generate the same code in the end because generically we
1753 	 don't know that the values involved must be negative for
1754 	 GT??  */
1755       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1756 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1757 			 fold_build1 (NEGATE_EXPR, itype, t),
1758 			 fold_build1 (NEGATE_EXPR, itype,
1759 				      fold_convert (itype,
1760 						    fd->loops[i].step)));
1761       else
1762 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1763 			 fold_convert (itype, fd->loops[i].step));
1764       t = fold_convert (type, t);
1765       if (TREE_CODE (t) == INTEGER_CST)
1766 	counts[i] = t;
1767       else
1768 	{
1769 	  if (i < fd->collapse || i != first_zero_iter2)
1770 	    counts[i] = create_tmp_reg (type, ".count");
1771 	  expand_omp_build_assign (gsi, counts[i], t);
1772 	}
1773       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1774 	{
1775 	  if (i == 0)
1776 	    t = counts[0];
1777 	  else
1778 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1779 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1780 	}
1781     }
1782 }
1783 
1784 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1785 	T = V;
1786 	V3 = N31 + (T % count3) * STEP3;
1787 	T = T / count3;
1788 	V2 = N21 + (T % count2) * STEP2;
1789 	T = T / count2;
1790 	V1 = N11 + T * STEP1;
1791    if this loop doesn't have an inner loop construct combined with it.
1792    If it does have an inner loop construct combined with it and the
1793    iteration count isn't known constant, store values from counts array
1794    into its _looptemp_ temporaries instead.  */
1795 
1796 static void
1797 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1798 			  tree *counts, gimple *inner_stmt, tree startvar)
1799 {
1800   int i;
1801   if (gimple_omp_for_combined_p (fd->for_stmt))
1802     {
1803       /* If fd->loop.n2 is constant, then no propagation of the counts
1804 	 is needed, they are constant.  */
1805       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1806 	return;
1807 
1808       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1809 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1810 		     : gimple_omp_for_clauses (inner_stmt);
1811       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1812 	 isn't supposed to be handled, as the inner loop doesn't
1813 	 use it.  */
1814       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1815       gcc_assert (innerc);
1816       for (i = 0; i < fd->collapse; i++)
1817 	{
1818 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1819 				    OMP_CLAUSE__LOOPTEMP_);
1820 	  gcc_assert (innerc);
1821 	  if (i)
1822 	    {
1823 	      tree tem = OMP_CLAUSE_DECL (innerc);
1824 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1825 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1826 					    false, GSI_CONTINUE_LINKING);
1827 	      gassign *stmt = gimple_build_assign (tem, t);
1828 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1829 	    }
1830 	}
1831       return;
1832     }
1833 
1834   tree type = TREE_TYPE (fd->loop.v);
1835   tree tem = create_tmp_reg (type, ".tem");
1836   gassign *stmt = gimple_build_assign (tem, startvar);
1837   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1838 
1839   for (i = fd->collapse - 1; i >= 0; i--)
1840     {
1841       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1842       itype = vtype;
1843       if (POINTER_TYPE_P (vtype))
1844 	itype = signed_type_for (vtype);
1845       if (i != 0)
1846 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1847       else
1848 	t = tem;
1849       t = fold_convert (itype, t);
1850       t = fold_build2 (MULT_EXPR, itype, t,
1851 		       fold_convert (itype, fd->loops[i].step));
1852       if (POINTER_TYPE_P (vtype))
1853 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1854       else
1855 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1856       t = force_gimple_operand_gsi (gsi, t,
1857 				    DECL_P (fd->loops[i].v)
1858 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1859 				    NULL_TREE, false,
1860 				    GSI_CONTINUE_LINKING);
1861       stmt = gimple_build_assign (fd->loops[i].v, t);
1862       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1863       if (i != 0)
1864 	{
1865 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1866 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1867 					false, GSI_CONTINUE_LINKING);
1868 	  stmt = gimple_build_assign (tem, t);
1869 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1870 	}
1871     }
1872 }
1873 
1874 /* Helper function for expand_omp_for_*.  Generate code like:
1875     L10:
1876 	V3 += STEP3;
1877 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1878     L11:
1879 	V3 = N31;
1880 	V2 += STEP2;
1881 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1882     L12:
1883 	V2 = N21;
1884 	V1 += STEP1;
1885 	goto BODY_BB;  */
1886 
1887 static basic_block
1888 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1889 			     basic_block body_bb)
1890 {
1891   basic_block last_bb, bb, collapse_bb = NULL;
1892   int i;
1893   gimple_stmt_iterator gsi;
1894   edge e;
1895   tree t;
1896   gimple *stmt;
1897 
1898   last_bb = cont_bb;
1899   for (i = fd->collapse - 1; i >= 0; i--)
1900     {
1901       tree vtype = TREE_TYPE (fd->loops[i].v);
1902 
1903       bb = create_empty_bb (last_bb);
1904       add_bb_to_loop (bb, last_bb->loop_father);
1905       gsi = gsi_start_bb (bb);
1906 
1907       if (i < fd->collapse - 1)
1908 	{
1909 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1910 	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1911 
1912 	  t = fd->loops[i + 1].n1;
1913 	  t = force_gimple_operand_gsi (&gsi, t,
1914 					DECL_P (fd->loops[i + 1].v)
1915 					&& TREE_ADDRESSABLE (fd->loops[i
1916 								       + 1].v),
1917 					NULL_TREE, false,
1918 					GSI_CONTINUE_LINKING);
1919 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1920 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1921 	}
1922       else
1923 	collapse_bb = bb;
1924 
1925       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1926 
1927       if (POINTER_TYPE_P (vtype))
1928 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1929       else
1930 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1931       t = force_gimple_operand_gsi (&gsi, t,
1932 				    DECL_P (fd->loops[i].v)
1933 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1934 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1935       stmt = gimple_build_assign (fd->loops[i].v, t);
1936       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1937 
1938       if (i > 0)
1939 	{
1940 	  t = fd->loops[i].n2;
1941 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1942 					false, GSI_CONTINUE_LINKING);
1943 	  tree v = fd->loops[i].v;
1944 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1945 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1946 					  false, GSI_CONTINUE_LINKING);
1947 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1948 	  stmt = gimple_build_cond_empty (t);
1949 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1951 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1952 	}
1953       else
1954 	make_edge (bb, body_bb, EDGE_FALLTHRU);
1955       last_bb = bb;
1956     }
1957 
1958   return collapse_bb;
1959 }
1960 
1961 /* Expand #pragma omp ordered depend(source).  */
1962 
1963 static void
1964 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1965 			   tree *counts, location_t loc)
1966 {
1967   enum built_in_function source_ix
1968     = fd->iter_type == long_integer_type_node
1969       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1970   gimple *g
1971     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1972 			 build_fold_addr_expr (counts[fd->ordered]));
1973   gimple_set_location (g, loc);
1974   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1975 }
1976 
1977 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1978 
1979 static void
1980 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1981 			 tree *counts, tree c, location_t loc)
1982 {
1983   auto_vec<tree, 10> args;
1984   enum built_in_function sink_ix
1985     = fd->iter_type == long_integer_type_node
1986       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1987   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1988   int i;
1989   gimple_stmt_iterator gsi2 = *gsi;
1990   bool warned_step = false;
1991 
1992   for (i = 0; i < fd->ordered; i++)
1993     {
1994       tree step = NULL_TREE;
1995       off = TREE_PURPOSE (deps);
1996       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1997 	{
1998 	  step = TREE_OPERAND (off, 1);
1999 	  off = TREE_OPERAND (off, 0);
2000 	}
2001       if (!integer_zerop (off))
2002 	{
2003 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2004 		      || fd->loops[i].cond_code == GT_EXPR);
2005 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2006 	  if (step)
2007 	    {
2008 	      /* Non-simple Fortran DO loops.  If step is variable,
2009 		 we don't know at compile even the direction, so can't
2010 		 warn.  */
2011 	      if (TREE_CODE (step) != INTEGER_CST)
2012 		break;
2013 	      forward = tree_int_cst_sgn (step) != -1;
2014 	    }
2015 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2016 	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2017 				"lexically later iteration");
2018 	  break;
2019 	}
2020       deps = TREE_CHAIN (deps);
2021     }
2022   /* If all offsets corresponding to the collapsed loops are zero,
2023      this depend clause can be ignored.  FIXME: but there is still a
2024      flush needed.  We need to emit one __sync_synchronize () for it
2025      though (perhaps conditionally)?  Solve this together with the
2026      conservative dependence folding optimization.
2027   if (i >= fd->collapse)
2028     return;  */
2029 
2030   deps = OMP_CLAUSE_DECL (c);
2031   gsi_prev (&gsi2);
2032   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2033   edge e2 = split_block_after_labels (e1->dest);
2034 
2035   gsi2 = gsi_after_labels (e1->dest);
2036   *gsi = gsi_last_bb (e1->src);
2037   for (i = 0; i < fd->ordered; i++)
2038     {
2039       tree itype = TREE_TYPE (fd->loops[i].v);
2040       tree step = NULL_TREE;
2041       tree orig_off = NULL_TREE;
2042       if (POINTER_TYPE_P (itype))
2043 	itype = sizetype;
2044       if (i)
2045 	deps = TREE_CHAIN (deps);
2046       off = TREE_PURPOSE (deps);
2047       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2048 	{
2049 	  step = TREE_OPERAND (off, 1);
2050 	  off = TREE_OPERAND (off, 0);
2051 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2052 		      && integer_onep (fd->loops[i].step)
2053 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2054 	}
2055       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2056       if (step)
2057 	{
2058 	  off = fold_convert_loc (loc, itype, off);
2059 	  orig_off = off;
2060 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2061 	}
2062 
2063       if (integer_zerop (off))
2064 	t = boolean_true_node;
2065       else
2066 	{
2067 	  tree a;
2068 	  tree co = fold_convert_loc (loc, itype, off);
2069 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2070 	    {
2071 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2072 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2073 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2074 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2075 				   co);
2076 	    }
2077 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2078 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2079 				 fd->loops[i].v, co);
2080 	  else
2081 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2082 				 fd->loops[i].v, co);
2083 	  if (step)
2084 	    {
2085 	      tree t1, t2;
2086 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2088 				      fd->loops[i].n1);
2089 	      else
2090 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2091 				      fd->loops[i].n2);
2092 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2094 				      fd->loops[i].n2);
2095 	      else
2096 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2097 				      fd->loops[i].n1);
2098 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2099 				   step, build_int_cst (TREE_TYPE (step), 0));
2100 	      if (TREE_CODE (step) != INTEGER_CST)
2101 		{
2102 		  t1 = unshare_expr (t1);
2103 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2104 						 false, GSI_CONTINUE_LINKING);
2105 		  t2 = unshare_expr (t2);
2106 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2107 						 false, GSI_CONTINUE_LINKING);
2108 		}
2109 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2110 				   t, t2, t1);
2111 	    }
2112 	  else if (fd->loops[i].cond_code == LT_EXPR)
2113 	    {
2114 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2115 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2116 				     fd->loops[i].n1);
2117 	      else
2118 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2119 				     fd->loops[i].n2);
2120 	    }
2121 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2122 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2123 				 fd->loops[i].n2);
2124 	  else
2125 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2126 				 fd->loops[i].n1);
2127 	}
2128       if (cond)
2129 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2130       else
2131 	cond = t;
2132 
2133       off = fold_convert_loc (loc, itype, off);
2134 
2135       if (step
2136 	  || (fd->loops[i].cond_code == LT_EXPR
2137 	      ? !integer_onep (fd->loops[i].step)
2138 	      : !integer_minus_onep (fd->loops[i].step)))
2139 	{
2140 	  if (step == NULL_TREE
2141 	      && TYPE_UNSIGNED (itype)
2142 	      && fd->loops[i].cond_code == GT_EXPR)
2143 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2144 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2145 						  s));
2146 	  else
2147 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2148 				 orig_off ? orig_off : off, s);
2149 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2150 			       build_int_cst (itype, 0));
2151 	  if (integer_zerop (t) && !warned_step)
2152 	    {
2153 	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2154 				  "in the iteration space");
2155 	      warned_step = true;
2156 	    }
2157 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2158 				  cond, t);
2159 	}
2160 
2161       if (i <= fd->collapse - 1 && fd->collapse > 1)
2162 	t = fd->loop.v;
2163       else if (counts[i])
2164 	t = counts[i];
2165       else
2166 	{
2167 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2168 			       fd->loops[i].v, fd->loops[i].n1);
2169 	  t = fold_convert_loc (loc, fd->iter_type, t);
2170 	}
2171       if (step)
2172 	/* We have divided off by step already earlier.  */;
2173       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2174 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2175 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2176 						s));
2177       else
2178 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2179       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2180 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2181       off = fold_convert_loc (loc, fd->iter_type, off);
2182       if (i <= fd->collapse - 1 && fd->collapse > 1)
2183 	{
2184 	  if (i)
2185 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2186 				   off);
2187 	  if (i < fd->collapse - 1)
2188 	    {
2189 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2190 				      counts[i]);
2191 	      continue;
2192 	    }
2193 	}
2194       off = unshare_expr (off);
2195       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2196       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2197 				    true, GSI_SAME_STMT);
2198       args.safe_push (t);
2199     }
2200   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2201   gimple_set_location (g, loc);
2202   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2203 
2204   cond = unshare_expr (cond);
2205   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2206 				   GSI_CONTINUE_LINKING);
2207   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2208   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2209   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2210   e1->probability = e3->probability.invert ();
2211   e1->flags = EDGE_TRUE_VALUE;
2212   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2213 
2214   *gsi = gsi_after_labels (e2->dest);
2215 }
2216 
2217 /* Expand all #pragma omp ordered depend(source) and
2218    #pragma omp ordered depend(sink:...) constructs in the current
2219    #pragma omp for ordered(n) region.  */
2220 
2221 static void
2222 expand_omp_ordered_source_sink (struct omp_region *region,
2223 				struct omp_for_data *fd, tree *counts,
2224 				basic_block cont_bb)
2225 {
2226   struct omp_region *inner;
2227   int i;
2228   for (i = fd->collapse - 1; i < fd->ordered; i++)
2229     if (i == fd->collapse - 1 && fd->collapse > 1)
2230       counts[i] = NULL_TREE;
2231     else if (i >= fd->collapse && !cont_bb)
2232       counts[i] = build_zero_cst (fd->iter_type);
2233     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2234 	     && integer_onep (fd->loops[i].step))
2235       counts[i] = NULL_TREE;
2236     else
2237       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2238   tree atype
2239     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2240   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2241   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2242 
2243   for (inner = region->inner; inner; inner = inner->next)
2244     if (inner->type == GIMPLE_OMP_ORDERED)
2245       {
2246 	gomp_ordered *ord_stmt = inner->ord_stmt;
2247 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2248 	location_t loc = gimple_location (ord_stmt);
2249 	tree c;
2250 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2251 	     c; c = OMP_CLAUSE_CHAIN (c))
2252 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2253 	    break;
2254 	if (c)
2255 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2256 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2257 	     c; c = OMP_CLAUSE_CHAIN (c))
2258 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2259 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2260 	gsi_remove (&gsi, true);
2261       }
2262 }
2263 
2264 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2265    collapsed.  */
2266 
2267 static basic_block
2268 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2269 			      basic_block cont_bb, basic_block body_bb,
2270 			      bool ordered_lastprivate)
2271 {
2272   if (fd->ordered == fd->collapse)
2273     return cont_bb;
2274 
2275   if (!cont_bb)
2276     {
2277       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2278       for (int i = fd->collapse; i < fd->ordered; i++)
2279 	{
2280 	  tree type = TREE_TYPE (fd->loops[i].v);
2281 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2282 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2283 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2284 			      size_int (i - fd->collapse + 1),
2285 			      NULL_TREE, NULL_TREE);
2286 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2287 	}
2288       return NULL;
2289     }
2290 
2291   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2292     {
2293       tree t, type = TREE_TYPE (fd->loops[i].v);
2294       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2295       expand_omp_build_assign (&gsi, fd->loops[i].v,
2296 			       fold_convert (type, fd->loops[i].n1));
2297       if (counts[i])
2298 	expand_omp_build_assign (&gsi, counts[i],
2299 				 build_zero_cst (fd->iter_type));
2300       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2301 			  size_int (i - fd->collapse + 1),
2302 			  NULL_TREE, NULL_TREE);
2303       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2304       if (!gsi_end_p (gsi))
2305 	gsi_prev (&gsi);
2306       else
2307 	gsi = gsi_last_bb (body_bb);
2308       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2309       basic_block new_body = e1->dest;
2310       if (body_bb == cont_bb)
2311 	cont_bb = new_body;
2312       edge e2 = NULL;
2313       basic_block new_header;
2314       if (EDGE_COUNT (cont_bb->preds) > 0)
2315 	{
2316 	  gsi = gsi_last_bb (cont_bb);
2317 	  if (POINTER_TYPE_P (type))
2318 	    t = fold_build_pointer_plus (fd->loops[i].v,
2319 					 fold_convert (sizetype,
2320 						       fd->loops[i].step));
2321 	  else
2322 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2323 			     fold_convert (type, fd->loops[i].step));
2324 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2325 	  if (counts[i])
2326 	    {
2327 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2328 			       build_int_cst (fd->iter_type, 1));
2329 	      expand_omp_build_assign (&gsi, counts[i], t);
2330 	      t = counts[i];
2331 	    }
2332 	  else
2333 	    {
2334 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2335 			       fd->loops[i].v, fd->loops[i].n1);
2336 	      t = fold_convert (fd->iter_type, t);
2337 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2338 					    true, GSI_SAME_STMT);
2339 	    }
2340 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2341 			 size_int (i - fd->collapse + 1),
2342 			 NULL_TREE, NULL_TREE);
2343 	  expand_omp_build_assign (&gsi, aref, t);
2344 	  gsi_prev (&gsi);
2345 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2346 	  new_header = e2->dest;
2347 	}
2348       else
2349 	new_header = cont_bb;
2350       gsi = gsi_after_labels (new_header);
2351       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2352 					 true, GSI_SAME_STMT);
2353       tree n2
2354 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2355 				    true, NULL_TREE, true, GSI_SAME_STMT);
2356       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2357       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2358       edge e3 = split_block (new_header, gsi_stmt (gsi));
2359       cont_bb = e3->dest;
2360       remove_edge (e1);
2361       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2362       e3->flags = EDGE_FALSE_VALUE;
2363       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2364       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2365       e1->probability = e3->probability.invert ();
2366 
2367       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2368       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2369 
2370       if (e2)
2371 	{
2372 	  struct loop *loop = alloc_loop ();
2373 	  loop->header = new_header;
2374 	  loop->latch = e2->src;
2375 	  add_loop (loop, body_bb->loop_father);
2376 	}
2377     }
2378 
2379   /* If there are any lastprivate clauses and it is possible some loops
2380      might have zero iterations, ensure all the decls are initialized,
2381      otherwise we could crash evaluating C++ class iterators with lastprivate
2382      clauses.  */
2383   bool need_inits = false;
2384   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2385     if (need_inits)
2386       {
2387 	tree type = TREE_TYPE (fd->loops[i].v);
2388 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2389 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2390 				 fold_convert (type, fd->loops[i].n1));
2391       }
2392     else
2393       {
2394 	tree type = TREE_TYPE (fd->loops[i].v);
2395 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2396 				      boolean_type_node,
2397 				      fold_convert (type, fd->loops[i].n1),
2398 				      fold_convert (type, fd->loops[i].n2));
2399 	if (!integer_onep (this_cond))
2400 	  need_inits = true;
2401       }
2402 
2403   return cont_bb;
2404 }
2405 
2406 /* A subroutine of expand_omp_for.  Generate code for a parallel
2407    loop with any schedule.  Given parameters:
2408 
2409 	for (V = N1; V cond N2; V += STEP) BODY;
2410 
2411    where COND is "<" or ">", we generate pseudocode
2412 
2413 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2414 	if (more) goto L0; else goto L3;
2415     L0:
2416 	V = istart0;
2417 	iend = iend0;
2418     L1:
2419 	BODY;
2420 	V += STEP;
2421 	if (V cond iend) goto L1; else goto L2;
2422     L2:
2423 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2424     L3:
2425 
2426     If this is a combined omp parallel loop, instead of the call to
2427     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2428     If this is gimple_omp_for_combined_p loop, then instead of assigning
2429     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2430     inner GIMPLE_OMP_FOR and V += STEP; and
2431     if (V cond iend) goto L1; else goto L2; are removed.
2432 
2433     For collapsed loops, given parameters:
2434       collapse(3)
2435       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2436 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2437 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2438 	    BODY;
2439 
2440     we generate pseudocode
2441 
2442 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2443 	if (cond3 is <)
2444 	  adj = STEP3 - 1;
2445 	else
2446 	  adj = STEP3 + 1;
2447 	count3 = (adj + N32 - N31) / STEP3;
2448 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2449 	if (cond2 is <)
2450 	  adj = STEP2 - 1;
2451 	else
2452 	  adj = STEP2 + 1;
2453 	count2 = (adj + N22 - N21) / STEP2;
2454 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2455 	if (cond1 is <)
2456 	  adj = STEP1 - 1;
2457 	else
2458 	  adj = STEP1 + 1;
2459 	count1 = (adj + N12 - N11) / STEP1;
2460 	count = count1 * count2 * count3;
2461 	goto Z1;
2462     Z0:
2463 	count = 0;
2464     Z1:
2465 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2466 	if (more) goto L0; else goto L3;
2467     L0:
2468 	V = istart0;
2469 	T = V;
2470 	V3 = N31 + (T % count3) * STEP3;
2471 	T = T / count3;
2472 	V2 = N21 + (T % count2) * STEP2;
2473 	T = T / count2;
2474 	V1 = N11 + T * STEP1;
2475 	iend = iend0;
2476     L1:
2477 	BODY;
2478 	V += 1;
2479 	if (V < iend) goto L10; else goto L2;
2480     L10:
2481 	V3 += STEP3;
2482 	if (V3 cond3 N32) goto L1; else goto L11;
2483     L11:
2484 	V3 = N31;
2485 	V2 += STEP2;
2486 	if (V2 cond2 N22) goto L1; else goto L12;
2487     L12:
2488 	V2 = N21;
2489 	V1 += STEP1;
2490 	goto L1;
2491     L2:
2492 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2493     L3:
2494 
2495       */
2496 
2497 static void
2498 expand_omp_for_generic (struct omp_region *region,
2499 			struct omp_for_data *fd,
2500 			enum built_in_function start_fn,
2501 			enum built_in_function next_fn,
2502 			gimple *inner_stmt)
2503 {
2504   tree type, istart0, iend0, iend;
2505   tree t, vmain, vback, bias = NULL_TREE;
2506   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2507   basic_block l2_bb = NULL, l3_bb = NULL;
2508   gimple_stmt_iterator gsi;
2509   gassign *assign_stmt;
2510   bool in_combined_parallel = is_combined_parallel (region);
2511   bool broken_loop = region->cont == NULL;
2512   edge e, ne;
2513   tree *counts = NULL;
2514   int i;
2515   bool ordered_lastprivate = false;
2516 
2517   gcc_assert (!broken_loop || !in_combined_parallel);
2518   gcc_assert (fd->iter_type == long_integer_type_node
2519 	      || !in_combined_parallel);
2520 
2521   entry_bb = region->entry;
2522   cont_bb = region->cont;
2523   collapse_bb = NULL;
2524   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2525   gcc_assert (broken_loop
2526 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2527   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2528   l1_bb = single_succ (l0_bb);
2529   if (!broken_loop)
2530     {
2531       l2_bb = create_empty_bb (cont_bb);
2532       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2533 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2534 		      == l1_bb));
2535       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2536     }
2537   else
2538     l2_bb = NULL;
2539   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2540   exit_bb = region->exit;
2541 
2542   gsi = gsi_last_nondebug_bb (entry_bb);
2543 
2544   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2545   if (fd->ordered
2546       && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2547 			  OMP_CLAUSE_LASTPRIVATE))
2548     ordered_lastprivate = false;
2549   if (fd->collapse > 1 || fd->ordered)
2550     {
2551       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2552       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2553 
2554       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2555       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2556 				  zero_iter1_bb, first_zero_iter1,
2557 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2558 
2559       if (zero_iter1_bb)
2560 	{
2561 	  /* Some counts[i] vars might be uninitialized if
2562 	     some loop has zero iterations.  But the body shouldn't
2563 	     be executed in that case, so just avoid uninit warnings.  */
2564 	  for (i = first_zero_iter1;
2565 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2566 	    if (SSA_VAR_P (counts[i]))
2567 	      TREE_NO_WARNING (counts[i]) = 1;
2568 	  gsi_prev (&gsi);
2569 	  e = split_block (entry_bb, gsi_stmt (gsi));
2570 	  entry_bb = e->dest;
2571 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2572 	  gsi = gsi_last_nondebug_bb (entry_bb);
2573 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2574 				   get_immediate_dominator (CDI_DOMINATORS,
2575 							    zero_iter1_bb));
2576 	}
2577       if (zero_iter2_bb)
2578 	{
2579 	  /* Some counts[i] vars might be uninitialized if
2580 	     some loop has zero iterations.  But the body shouldn't
2581 	     be executed in that case, so just avoid uninit warnings.  */
2582 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2583 	    if (SSA_VAR_P (counts[i]))
2584 	      TREE_NO_WARNING (counts[i]) = 1;
2585 	  if (zero_iter1_bb)
2586 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2587 	  else
2588 	    {
2589 	      gsi_prev (&gsi);
2590 	      e = split_block (entry_bb, gsi_stmt (gsi));
2591 	      entry_bb = e->dest;
2592 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2593 	      gsi = gsi_last_nondebug_bb (entry_bb);
2594 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2595 				       get_immediate_dominator
2596 					 (CDI_DOMINATORS, zero_iter2_bb));
2597 	    }
2598 	}
2599       if (fd->collapse == 1)
2600 	{
2601 	  counts[0] = fd->loop.n2;
2602 	  fd->loop = fd->loops[0];
2603 	}
2604     }
2605 
2606   type = TREE_TYPE (fd->loop.v);
2607   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2608   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2609   TREE_ADDRESSABLE (istart0) = 1;
2610   TREE_ADDRESSABLE (iend0) = 1;
2611 
2612   /* See if we need to bias by LLONG_MIN.  */
2613   if (fd->iter_type == long_long_unsigned_type_node
2614       && TREE_CODE (type) == INTEGER_TYPE
2615       && !TYPE_UNSIGNED (type)
2616       && fd->ordered == 0)
2617     {
2618       tree n1, n2;
2619 
2620       if (fd->loop.cond_code == LT_EXPR)
2621 	{
2622 	  n1 = fd->loop.n1;
2623 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2624 	}
2625       else
2626 	{
2627 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2628 	  n2 = fd->loop.n1;
2629 	}
2630       if (TREE_CODE (n1) != INTEGER_CST
2631 	  || TREE_CODE (n2) != INTEGER_CST
2632 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2633 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2634     }
2635 
2636   gimple_stmt_iterator gsif = gsi;
2637   gsi_prev (&gsif);
2638 
2639   tree arr = NULL_TREE;
2640   if (in_combined_parallel)
2641     {
2642       gcc_assert (fd->ordered == 0);
2643       /* In a combined parallel loop, emit a call to
2644 	 GOMP_loop_foo_next.  */
2645       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2646 			   build_fold_addr_expr (istart0),
2647 			   build_fold_addr_expr (iend0));
2648     }
2649   else
2650     {
2651       tree t0, t1, t2, t3, t4;
2652       /* If this is not a combined parallel loop, emit a call to
2653 	 GOMP_loop_foo_start in ENTRY_BB.  */
2654       t4 = build_fold_addr_expr (iend0);
2655       t3 = build_fold_addr_expr (istart0);
2656       if (fd->ordered)
2657 	{
2658 	  t0 = build_int_cst (unsigned_type_node,
2659 			      fd->ordered - fd->collapse + 1);
2660 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2661 							fd->ordered
2662 							- fd->collapse + 1),
2663 				".omp_counts");
2664 	  DECL_NAMELESS (arr) = 1;
2665 	  TREE_ADDRESSABLE (arr) = 1;
2666 	  TREE_STATIC (arr) = 1;
2667 	  vec<constructor_elt, va_gc> *v;
2668 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2669 	  int idx;
2670 
2671 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2672 	    {
2673 	      tree c;
2674 	      if (idx == 0 && fd->collapse > 1)
2675 		c = fd->loop.n2;
2676 	      else
2677 		c = counts[idx + fd->collapse - 1];
2678 	      tree purpose = size_int (idx);
2679 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2680 	      if (TREE_CODE (c) != INTEGER_CST)
2681 		TREE_STATIC (arr) = 0;
2682 	    }
2683 
2684 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2685 	  if (!TREE_STATIC (arr))
2686 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2687 						    void_type_node, arr),
2688 				      true, NULL_TREE, true, GSI_SAME_STMT);
2689 	  t1 = build_fold_addr_expr (arr);
2690 	  t2 = NULL_TREE;
2691 	}
2692       else
2693 	{
2694 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2695 	  t1 = fd->loop.n2;
2696 	  t0 = fd->loop.n1;
2697 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2698 	    {
2699 	      tree innerc
2700 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2701 				   OMP_CLAUSE__LOOPTEMP_);
2702 	      gcc_assert (innerc);
2703 	      t0 = OMP_CLAUSE_DECL (innerc);
2704 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2705 					OMP_CLAUSE__LOOPTEMP_);
2706 	      gcc_assert (innerc);
2707 	      t1 = OMP_CLAUSE_DECL (innerc);
2708 	    }
2709 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2710 	      && TYPE_PRECISION (TREE_TYPE (t0))
2711 		 != TYPE_PRECISION (fd->iter_type))
2712 	    {
2713 	      /* Avoid casting pointers to integer of a different size.  */
2714 	      tree itype = signed_type_for (type);
2715 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2716 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2717 	    }
2718 	  else
2719 	    {
2720 	      t1 = fold_convert (fd->iter_type, t1);
2721 	      t0 = fold_convert (fd->iter_type, t0);
2722 	    }
2723 	  if (bias)
2724 	    {
2725 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2726 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2727 	    }
2728 	}
2729       if (fd->iter_type == long_integer_type_node || fd->ordered)
2730 	{
2731 	  if (fd->chunk_size)
2732 	    {
2733 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2734 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2735 	      if (fd->ordered)
2736 		t = build_call_expr (builtin_decl_explicit (start_fn),
2737 				     5, t0, t1, t, t3, t4);
2738 	      else
2739 		t = build_call_expr (builtin_decl_explicit (start_fn),
2740 				     6, t0, t1, t2, t, t3, t4);
2741 	    }
2742 	  else if (fd->ordered)
2743 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2744 				 4, t0, t1, t3, t4);
2745 	  else
2746 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2747 				 5, t0, t1, t2, t3, t4);
2748 	}
2749       else
2750 	{
2751 	  tree t5;
2752 	  tree c_bool_type;
2753 	  tree bfn_decl;
2754 
2755 	  /* The GOMP_loop_ull_*start functions have additional boolean
2756 	     argument, true for < loops and false for > loops.
2757 	     In Fortran, the C bool type can be different from
2758 	     boolean_type_node.  */
2759 	  bfn_decl = builtin_decl_explicit (start_fn);
2760 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2761 	  t5 = build_int_cst (c_bool_type,
2762 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2763 	  if (fd->chunk_size)
2764 	    {
2765 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2766 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2767 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2768 	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2769 	    }
2770 	  else
2771 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2772 				 6, t5, t0, t1, t2, t3, t4);
2773 	}
2774     }
2775   if (TREE_TYPE (t) != boolean_type_node)
2776     t = fold_build2 (NE_EXPR, boolean_type_node,
2777 		     t, build_int_cst (TREE_TYPE (t), 0));
2778   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2779 				true, GSI_SAME_STMT);
2780   if (arr && !TREE_STATIC (arr))
2781     {
2782       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2783       TREE_THIS_VOLATILE (clobber) = 1;
2784       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2785 			 GSI_SAME_STMT);
2786     }
2787   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2788 
2789   /* Remove the GIMPLE_OMP_FOR statement.  */
2790   gsi_remove (&gsi, true);
2791 
2792   if (gsi_end_p (gsif))
2793     gsif = gsi_after_labels (gsi_bb (gsif));
2794   gsi_next (&gsif);
2795 
2796   /* Iteration setup for sequential loop goes in L0_BB.  */
2797   tree startvar = fd->loop.v;
2798   tree endvar = NULL_TREE;
2799 
2800   if (gimple_omp_for_combined_p (fd->for_stmt))
2801     {
2802       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2803 		  && gimple_omp_for_kind (inner_stmt)
2804 		     == GF_OMP_FOR_KIND_SIMD);
2805       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2806 				     OMP_CLAUSE__LOOPTEMP_);
2807       gcc_assert (innerc);
2808       startvar = OMP_CLAUSE_DECL (innerc);
2809       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2810 				OMP_CLAUSE__LOOPTEMP_);
2811       gcc_assert (innerc);
2812       endvar = OMP_CLAUSE_DECL (innerc);
2813     }
2814 
2815   gsi = gsi_start_bb (l0_bb);
2816   t = istart0;
2817   if (fd->ordered && fd->collapse == 1)
2818     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2819 		     fold_convert (fd->iter_type, fd->loop.step));
2820   else if (bias)
2821     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2822   if (fd->ordered && fd->collapse == 1)
2823     {
2824       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2825 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2826 			 fd->loop.n1, fold_convert (sizetype, t));
2827       else
2828 	{
2829 	  t = fold_convert (TREE_TYPE (startvar), t);
2830 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2831 			   fd->loop.n1, t);
2832 	}
2833     }
2834   else
2835     {
2836       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2837 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2838       t = fold_convert (TREE_TYPE (startvar), t);
2839     }
2840   t = force_gimple_operand_gsi (&gsi, t,
2841 				DECL_P (startvar)
2842 				&& TREE_ADDRESSABLE (startvar),
2843 				NULL_TREE, false, GSI_CONTINUE_LINKING);
2844   assign_stmt = gimple_build_assign (startvar, t);
2845   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2846 
2847   t = iend0;
2848   if (fd->ordered && fd->collapse == 1)
2849     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2850 		     fold_convert (fd->iter_type, fd->loop.step));
2851   else if (bias)
2852     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2853   if (fd->ordered && fd->collapse == 1)
2854     {
2855       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2856 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2857 			 fd->loop.n1, fold_convert (sizetype, t));
2858       else
2859 	{
2860 	  t = fold_convert (TREE_TYPE (startvar), t);
2861 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2862 			   fd->loop.n1, t);
2863 	}
2864     }
2865   else
2866     {
2867       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2868 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2869       t = fold_convert (TREE_TYPE (startvar), t);
2870     }
2871   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2872 				   false, GSI_CONTINUE_LINKING);
2873   if (endvar)
2874     {
2875       assign_stmt = gimple_build_assign (endvar, iend);
2876       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2877       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2878 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2879       else
2880 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2881       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2882     }
2883   /* Handle linear clause adjustments.  */
2884   tree itercnt = NULL_TREE;
2885   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2886     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2887 	 c; c = OMP_CLAUSE_CHAIN (c))
2888       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2889 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2890 	{
2891 	  tree d = OMP_CLAUSE_DECL (c);
2892 	  bool is_ref = omp_is_reference (d);
2893 	  tree t = d, a, dest;
2894 	  if (is_ref)
2895 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2896 	  tree type = TREE_TYPE (t);
2897 	  if (POINTER_TYPE_P (type))
2898 	    type = sizetype;
2899 	  dest = unshare_expr (t);
2900 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2901 	  expand_omp_build_assign (&gsif, v, t);
2902 	  if (itercnt == NULL_TREE)
2903 	    {
2904 	      itercnt = startvar;
2905 	      tree n1 = fd->loop.n1;
2906 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2907 		{
2908 		  itercnt
2909 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2910 				    itercnt);
2911 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2912 		}
2913 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2914 				     itercnt, n1);
2915 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2916 				     itercnt, fd->loop.step);
2917 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2918 						  NULL_TREE, false,
2919 						  GSI_CONTINUE_LINKING);
2920 	    }
2921 	  a = fold_build2 (MULT_EXPR, type,
2922 			   fold_convert (type, itercnt),
2923 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2924 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2925 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2926 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2927 					false, GSI_CONTINUE_LINKING);
2928 	  assign_stmt = gimple_build_assign (dest, t);
2929 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2930 	}
2931   if (fd->collapse > 1)
2932     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2933 
2934   if (fd->ordered)
2935     {
2936       /* Until now, counts array contained number of iterations or
2937 	 variable containing it for ith loop.  From now on, we need
2938 	 those counts only for collapsed loops, and only for the 2nd
2939 	 till the last collapsed one.  Move those one element earlier,
2940 	 we'll use counts[fd->collapse - 1] for the first source/sink
2941 	 iteration counter and so on and counts[fd->ordered]
2942 	 as the array holding the current counter values for
2943 	 depend(source).  */
2944       if (fd->collapse > 1)
2945 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2946       if (broken_loop)
2947 	{
2948 	  int i;
2949 	  for (i = fd->collapse; i < fd->ordered; i++)
2950 	    {
2951 	      tree type = TREE_TYPE (fd->loops[i].v);
2952 	      tree this_cond
2953 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2954 			       fold_convert (type, fd->loops[i].n1),
2955 			       fold_convert (type, fd->loops[i].n2));
2956 	      if (!integer_onep (this_cond))
2957 		break;
2958 	    }
2959 	  if (i < fd->ordered)
2960 	    {
2961 	      cont_bb
2962 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2963 	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2964 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2965 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2966 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2967 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2968 	      make_edge (cont_bb, l1_bb, 0);
2969 	      l2_bb = create_empty_bb (cont_bb);
2970 	      broken_loop = false;
2971 	    }
2972 	}
2973       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2974       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2975 					      ordered_lastprivate);
2976       if (counts[fd->collapse - 1])
2977 	{
2978 	  gcc_assert (fd->collapse == 1);
2979 	  gsi = gsi_last_bb (l0_bb);
2980 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2981 				   istart0, true);
2982 	  gsi = gsi_last_bb (cont_bb);
2983 	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2984 			   build_int_cst (fd->iter_type, 1));
2985 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2986 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2987 			      size_zero_node, NULL_TREE, NULL_TREE);
2988 	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2989 	  t = counts[fd->collapse - 1];
2990 	}
2991       else if (fd->collapse > 1)
2992 	t = fd->loop.v;
2993       else
2994 	{
2995 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2996 			   fd->loops[0].v, fd->loops[0].n1);
2997 	  t = fold_convert (fd->iter_type, t);
2998 	}
2999       gsi = gsi_last_bb (l0_bb);
3000       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 			  size_zero_node, NULL_TREE, NULL_TREE);
3002       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3003 				    false, GSI_CONTINUE_LINKING);
3004       expand_omp_build_assign (&gsi, aref, t, true);
3005     }
3006 
3007   if (!broken_loop)
3008     {
3009       /* Code to control the increment and predicate for the sequential
3010 	 loop goes in the CONT_BB.  */
3011       gsi = gsi_last_nondebug_bb (cont_bb);
3012       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3013       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3014       vmain = gimple_omp_continue_control_use (cont_stmt);
3015       vback = gimple_omp_continue_control_def (cont_stmt);
3016 
3017       if (!gimple_omp_for_combined_p (fd->for_stmt))
3018 	{
3019 	  if (POINTER_TYPE_P (type))
3020 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3021 	  else
3022 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3023 	  t = force_gimple_operand_gsi (&gsi, t,
3024 					DECL_P (vback)
3025 					&& TREE_ADDRESSABLE (vback),
3026 					NULL_TREE, true, GSI_SAME_STMT);
3027 	  assign_stmt = gimple_build_assign (vback, t);
3028 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3029 
3030 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3031 	    {
3032 	      if (fd->collapse > 1)
3033 		t = fd->loop.v;
3034 	      else
3035 		{
3036 		  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3037 				   fd->loops[0].v, fd->loops[0].n1);
3038 		  t = fold_convert (fd->iter_type, t);
3039 		}
3040 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3041 				  counts[fd->ordered], size_zero_node,
3042 				  NULL_TREE, NULL_TREE);
3043 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3044 					    true, GSI_SAME_STMT);
3045 	      expand_omp_build_assign (&gsi, aref, t);
3046 	    }
3047 
3048 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3049 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3050 		      iend);
3051 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3052 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3053 	}
3054 
3055       /* Remove GIMPLE_OMP_CONTINUE.  */
3056       gsi_remove (&gsi, true);
3057 
3058       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3059 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3060 
3061       /* Emit code to get the next parallel iteration in L2_BB.  */
3062       gsi = gsi_start_bb (l2_bb);
3063 
3064       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3065 			   build_fold_addr_expr (istart0),
3066 			   build_fold_addr_expr (iend0));
3067       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3068 				    false, GSI_CONTINUE_LINKING);
3069       if (TREE_TYPE (t) != boolean_type_node)
3070 	t = fold_build2 (NE_EXPR, boolean_type_node,
3071 			 t, build_int_cst (TREE_TYPE (t), 0));
3072       gcond *cond_stmt = gimple_build_cond_empty (t);
3073       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3074     }
3075 
3076   /* Add the loop cleanup function.  */
3077   gsi = gsi_last_nondebug_bb (exit_bb);
3078   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3079     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3080   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3081     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3082   else
3083     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3084   gcall *call_stmt = gimple_build_call (t, 0);
3085   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3086     gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3087   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3088   if (fd->ordered)
3089     {
3090       tree arr = counts[fd->ordered];
3091       tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3092       TREE_THIS_VOLATILE (clobber) = 1;
3093       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3094 			GSI_SAME_STMT);
3095     }
3096   gsi_remove (&gsi, true);
3097 
3098   /* Connect the new blocks.  */
3099   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3100   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3101 
3102   if (!broken_loop)
3103     {
3104       gimple_seq phis;
3105 
3106       e = find_edge (cont_bb, l3_bb);
3107       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3108 
3109       phis = phi_nodes (l3_bb);
3110       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3111 	{
3112 	  gimple *phi = gsi_stmt (gsi);
3113 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3114 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3115 	}
3116       remove_edge (e);
3117 
3118       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3119       e = find_edge (cont_bb, l1_bb);
3120       if (e == NULL)
3121 	{
3122 	  e = BRANCH_EDGE (cont_bb);
3123 	  gcc_assert (single_succ (e->dest) == l1_bb);
3124 	}
3125       if (gimple_omp_for_combined_p (fd->for_stmt))
3126 	{
3127 	  remove_edge (e);
3128 	  e = NULL;
3129 	}
3130       else if (fd->collapse > 1)
3131 	{
3132 	  remove_edge (e);
3133 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3134 	}
3135       else
3136 	e->flags = EDGE_TRUE_VALUE;
3137       if (e)
3138 	{
3139 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3140 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3141 	}
3142       else
3143 	{
3144 	  e = find_edge (cont_bb, l2_bb);
3145 	  e->flags = EDGE_FALLTHRU;
3146 	}
3147       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3148 
3149       if (gimple_in_ssa_p (cfun))
3150 	{
3151 	  /* Add phis to the outer loop that connect to the phis in the inner,
3152 	     original loop, and move the loop entry value of the inner phi to
3153 	     the loop entry value of the outer phi.  */
3154 	  gphi_iterator psi;
3155 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3156 	    {
3157 	      source_location locus;
3158 	      gphi *nphi;
3159 	      gphi *exit_phi = psi.phi ();
3160 
3161 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3162 		continue;
3163 
3164 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3165 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3166 
3167 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3168 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3169 	      gphi *inner_phi
3170 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3171 
3172 	      tree t = gimple_phi_result (exit_phi);
3173 	      tree new_res = copy_ssa_name (t, NULL);
3174 	      nphi = create_phi_node (new_res, l0_bb);
3175 
3176 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3177 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3178 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3179 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3180 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3181 
3182 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3183 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3184 
3185 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3186 	    }
3187 	}
3188 
3189       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3190 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3191       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3192 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3193       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3194 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3195       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3196 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3197 
3198       /* We enter expand_omp_for_generic with a loop.  This original loop may
3199 	 have its own loop struct, or it may be part of an outer loop struct
3200 	 (which may be the fake loop).  */
3201       struct loop *outer_loop = entry_bb->loop_father;
3202       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3203 
3204       add_bb_to_loop (l2_bb, outer_loop);
3205 
3206       /* We've added a new loop around the original loop.  Allocate the
3207 	 corresponding loop struct.  */
3208       struct loop *new_loop = alloc_loop ();
3209       new_loop->header = l0_bb;
3210       new_loop->latch = l2_bb;
3211       add_loop (new_loop, outer_loop);
3212 
3213       /* Allocate a loop structure for the original loop unless we already
3214 	 had one.  */
3215       if (!orig_loop_has_loop_struct
3216 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3217 	{
3218 	  struct loop *orig_loop = alloc_loop ();
3219 	  orig_loop->header = l1_bb;
3220 	  /* The loop may have multiple latches.  */
3221 	  add_loop (orig_loop, new_loop);
3222 	}
3223     }
3224 }
3225 
3226 /* A subroutine of expand_omp_for.  Generate code for a parallel
3227    loop with static schedule and no specified chunk size.  Given
3228    parameters:
3229 
3230 	for (V = N1; V cond N2; V += STEP) BODY;
3231 
3232    where COND is "<" or ">", we generate pseudocode
3233 
3234 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3235 	if (cond is <)
3236 	  adj = STEP - 1;
3237 	else
3238 	  adj = STEP + 1;
3239 	if ((__typeof (V)) -1 > 0 && cond is >)
3240 	  n = -(adj + N2 - N1) / -STEP;
3241 	else
3242 	  n = (adj + N2 - N1) / STEP;
3243 	q = n / nthreads;
3244 	tt = n % nthreads;
3245 	if (threadid < tt) goto L3; else goto L4;
3246     L3:
3247 	tt = 0;
3248 	q = q + 1;
3249     L4:
3250 	s0 = q * threadid + tt;
3251 	e0 = s0 + q;
3252 	V = s0 * STEP + N1;
3253 	if (s0 >= e0) goto L2; else goto L0;
3254     L0:
3255 	e = e0 * STEP + N1;
3256     L1:
3257 	BODY;
3258 	V += STEP;
3259 	if (V cond e) goto L1;
3260     L2:
3261 */
3262 
3263 static void
3264 expand_omp_for_static_nochunk (struct omp_region *region,
3265 			       struct omp_for_data *fd,
3266 			       gimple *inner_stmt)
3267 {
3268   tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3269   tree type, itype, vmain, vback;
3270   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3271   basic_block body_bb, cont_bb, collapse_bb = NULL;
3272   basic_block fin_bb;
3273   gimple_stmt_iterator gsi;
3274   edge ep;
3275   bool broken_loop = region->cont == NULL;
3276   tree *counts = NULL;
3277   tree n1, n2, step;
3278 
3279   itype = type = TREE_TYPE (fd->loop.v);
3280   if (POINTER_TYPE_P (type))
3281     itype = signed_type_for (type);
3282 
3283   entry_bb = region->entry;
3284   cont_bb = region->cont;
3285   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3286   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3287   gcc_assert (broken_loop
3288 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3289   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3290   body_bb = single_succ (seq_start_bb);
3291   if (!broken_loop)
3292     {
3293       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3294 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3295       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3296     }
3297   exit_bb = region->exit;
3298 
3299   /* Iteration space partitioning goes in ENTRY_BB.  */
3300   gsi = gsi_last_nondebug_bb (entry_bb);
3301   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3302 
3303   if (fd->collapse > 1)
3304     {
3305       int first_zero_iter = -1, dummy = -1;
3306       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3307 
3308       counts = XALLOCAVEC (tree, fd->collapse);
3309       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3310 				  fin_bb, first_zero_iter,
3311 				  dummy_bb, dummy, l2_dom_bb);
3312       t = NULL_TREE;
3313     }
3314   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3315     t = integer_one_node;
3316   else
3317     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3318 		     fold_convert (type, fd->loop.n1),
3319 		     fold_convert (type, fd->loop.n2));
3320   if (fd->collapse == 1
3321       && TYPE_UNSIGNED (type)
3322       && (t == NULL_TREE || !integer_onep (t)))
3323     {
3324       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3325       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3326 				     true, GSI_SAME_STMT);
3327       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3328       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3329 				     true, GSI_SAME_STMT);
3330       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3331 						 NULL_TREE, NULL_TREE);
3332       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3333       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3334 		     expand_omp_regimplify_p, NULL, NULL)
3335 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3336 			expand_omp_regimplify_p, NULL, NULL))
3337 	{
3338 	  gsi = gsi_for_stmt (cond_stmt);
3339 	  gimple_regimplify_operands (cond_stmt, &gsi);
3340 	}
3341       ep = split_block (entry_bb, cond_stmt);
3342       ep->flags = EDGE_TRUE_VALUE;
3343       entry_bb = ep->dest;
3344       ep->probability = profile_probability::very_likely ();
3345       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3346       ep->probability = profile_probability::very_unlikely ();
3347       if (gimple_in_ssa_p (cfun))
3348 	{
3349 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3350 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3351 	       !gsi_end_p (gpi); gsi_next (&gpi))
3352 	    {
3353 	      gphi *phi = gpi.phi ();
3354 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3355 			   ep, UNKNOWN_LOCATION);
3356 	    }
3357 	}
3358       gsi = gsi_last_bb (entry_bb);
3359     }
3360 
3361   switch (gimple_omp_for_kind (fd->for_stmt))
3362     {
3363     case GF_OMP_FOR_KIND_FOR:
3364       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3365       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3366       break;
3367     case GF_OMP_FOR_KIND_DISTRIBUTE:
3368       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3369       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3370       break;
3371     default:
3372       gcc_unreachable ();
3373     }
3374   nthreads = build_call_expr (nthreads, 0);
3375   nthreads = fold_convert (itype, nthreads);
3376   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3377 				       true, GSI_SAME_STMT);
3378   threadid = build_call_expr (threadid, 0);
3379   threadid = fold_convert (itype, threadid);
3380   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3381 				       true, GSI_SAME_STMT);
3382 
3383   n1 = fd->loop.n1;
3384   n2 = fd->loop.n2;
3385   step = fd->loop.step;
3386   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3387     {
3388       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3389 				     OMP_CLAUSE__LOOPTEMP_);
3390       gcc_assert (innerc);
3391       n1 = OMP_CLAUSE_DECL (innerc);
3392       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3393 				OMP_CLAUSE__LOOPTEMP_);
3394       gcc_assert (innerc);
3395       n2 = OMP_CLAUSE_DECL (innerc);
3396     }
3397   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3398 				 true, NULL_TREE, true, GSI_SAME_STMT);
3399   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3400 				 true, NULL_TREE, true, GSI_SAME_STMT);
3401   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3402 				   true, NULL_TREE, true, GSI_SAME_STMT);
3403 
3404   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3405   t = fold_build2 (PLUS_EXPR, itype, step, t);
3406   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3407   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3408   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3409     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3410 		     fold_build1 (NEGATE_EXPR, itype, t),
3411 		     fold_build1 (NEGATE_EXPR, itype, step));
3412   else
3413     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3414   t = fold_convert (itype, t);
3415   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3416 
3417   q = create_tmp_reg (itype, "q");
3418   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3419   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3420   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3421 
3422   tt = create_tmp_reg (itype, "tt");
3423   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3424   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3425   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3426 
3427   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3428   gcond *cond_stmt = gimple_build_cond_empty (t);
3429   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3430 
3431   second_bb = split_block (entry_bb, cond_stmt)->dest;
3432   gsi = gsi_last_nondebug_bb (second_bb);
3433   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3434 
3435   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3436 		     GSI_SAME_STMT);
3437   gassign *assign_stmt
3438     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3439   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3440 
3441   third_bb = split_block (second_bb, assign_stmt)->dest;
3442   gsi = gsi_last_nondebug_bb (third_bb);
3443   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3444 
3445   t = build2 (MULT_EXPR, itype, q, threadid);
3446   t = build2 (PLUS_EXPR, itype, t, tt);
3447   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3448 
3449   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3450   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3451 
3452   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3453   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3454 
3455   /* Remove the GIMPLE_OMP_FOR statement.  */
3456   gsi_remove (&gsi, true);
3457 
3458   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3459   gsi = gsi_start_bb (seq_start_bb);
3460 
3461   tree startvar = fd->loop.v;
3462   tree endvar = NULL_TREE;
3463 
3464   if (gimple_omp_for_combined_p (fd->for_stmt))
3465     {
3466       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3467 		     ? gimple_omp_parallel_clauses (inner_stmt)
3468 		     : gimple_omp_for_clauses (inner_stmt);
3469       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3470       gcc_assert (innerc);
3471       startvar = OMP_CLAUSE_DECL (innerc);
3472       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3473 				OMP_CLAUSE__LOOPTEMP_);
3474       gcc_assert (innerc);
3475       endvar = OMP_CLAUSE_DECL (innerc);
3476       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3477 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3478 	{
3479 	  int i;
3480 	  for (i = 1; i < fd->collapse; i++)
3481 	    {
3482 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3483 					OMP_CLAUSE__LOOPTEMP_);
3484 	      gcc_assert (innerc);
3485 	    }
3486 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3487 				    OMP_CLAUSE__LOOPTEMP_);
3488 	  if (innerc)
3489 	    {
3490 	      /* If needed (distribute parallel for with lastprivate),
3491 		 propagate down the total number of iterations.  */
3492 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3493 				     fd->loop.n2);
3494 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3495 					    GSI_CONTINUE_LINKING);
3496 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3497 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 	    }
3499 	}
3500     }
3501   t = fold_convert (itype, s0);
3502   t = fold_build2 (MULT_EXPR, itype, t, step);
3503   if (POINTER_TYPE_P (type))
3504     t = fold_build_pointer_plus (n1, t);
3505   else
3506     t = fold_build2 (PLUS_EXPR, type, t, n1);
3507   t = fold_convert (TREE_TYPE (startvar), t);
3508   t = force_gimple_operand_gsi (&gsi, t,
3509 				DECL_P (startvar)
3510 				&& TREE_ADDRESSABLE (startvar),
3511 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3512   assign_stmt = gimple_build_assign (startvar, t);
3513   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514 
3515   t = fold_convert (itype, e0);
3516   t = fold_build2 (MULT_EXPR, itype, t, step);
3517   if (POINTER_TYPE_P (type))
3518     t = fold_build_pointer_plus (n1, t);
3519   else
3520     t = fold_build2 (PLUS_EXPR, type, t, n1);
3521   t = fold_convert (TREE_TYPE (startvar), t);
3522   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3523 				false, GSI_CONTINUE_LINKING);
3524   if (endvar)
3525     {
3526       assign_stmt = gimple_build_assign (endvar, e);
3527       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3528       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3529 	assign_stmt = gimple_build_assign (fd->loop.v, e);
3530       else
3531 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3532       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3533     }
3534   /* Handle linear clause adjustments.  */
3535   tree itercnt = NULL_TREE;
3536   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3537     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3538 	 c; c = OMP_CLAUSE_CHAIN (c))
3539       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3540 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3541 	{
3542 	  tree d = OMP_CLAUSE_DECL (c);
3543 	  bool is_ref = omp_is_reference (d);
3544 	  tree t = d, a, dest;
3545 	  if (is_ref)
3546 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3547 	  if (itercnt == NULL_TREE)
3548 	    {
3549 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3550 		{
3551 		  itercnt = fold_build2 (MINUS_EXPR, itype,
3552 					 fold_convert (itype, n1),
3553 					 fold_convert (itype, fd->loop.n1));
3554 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3555 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3556 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3557 						      NULL_TREE, false,
3558 						      GSI_CONTINUE_LINKING);
3559 		}
3560 	      else
3561 		itercnt = s0;
3562 	    }
3563 	  tree type = TREE_TYPE (t);
3564 	  if (POINTER_TYPE_P (type))
3565 	    type = sizetype;
3566 	  a = fold_build2 (MULT_EXPR, type,
3567 			   fold_convert (type, itercnt),
3568 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3569 	  dest = unshare_expr (t);
3570 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3571 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3572 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3573 					false, GSI_CONTINUE_LINKING);
3574 	  assign_stmt = gimple_build_assign (dest, t);
3575 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3576 	}
3577   if (fd->collapse > 1)
3578     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3579 
3580   if (!broken_loop)
3581     {
3582       /* The code controlling the sequential loop replaces the
3583 	 GIMPLE_OMP_CONTINUE.  */
3584       gsi = gsi_last_nondebug_bb (cont_bb);
3585       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3586       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3587       vmain = gimple_omp_continue_control_use (cont_stmt);
3588       vback = gimple_omp_continue_control_def (cont_stmt);
3589 
3590       if (!gimple_omp_for_combined_p (fd->for_stmt))
3591 	{
3592 	  if (POINTER_TYPE_P (type))
3593 	    t = fold_build_pointer_plus (vmain, step);
3594 	  else
3595 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3596 	  t = force_gimple_operand_gsi (&gsi, t,
3597 					DECL_P (vback)
3598 					&& TREE_ADDRESSABLE (vback),
3599 					NULL_TREE, true, GSI_SAME_STMT);
3600 	  assign_stmt = gimple_build_assign (vback, t);
3601 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3602 
3603 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3604 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3605 		      ? t : vback, e);
3606 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3607 	}
3608 
3609       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3610       gsi_remove (&gsi, true);
3611 
3612       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3613 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3614     }
3615 
3616   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3617   gsi = gsi_last_nondebug_bb (exit_bb);
3618   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3619     {
3620       t = gimple_omp_return_lhs (gsi_stmt (gsi));
3621       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3622     }
3623   gsi_remove (&gsi, true);
3624 
3625   /* Connect all the blocks.  */
3626   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3627   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3628   ep = find_edge (entry_bb, second_bb);
3629   ep->flags = EDGE_TRUE_VALUE;
3630   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3631   find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3632   find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3633 
3634   if (!broken_loop)
3635     {
3636       ep = find_edge (cont_bb, body_bb);
3637       if (ep == NULL)
3638 	{
3639 	  ep = BRANCH_EDGE (cont_bb);
3640 	  gcc_assert (single_succ (ep->dest) == body_bb);
3641 	}
3642       if (gimple_omp_for_combined_p (fd->for_stmt))
3643 	{
3644 	  remove_edge (ep);
3645 	  ep = NULL;
3646 	}
3647       else if (fd->collapse > 1)
3648 	{
3649 	  remove_edge (ep);
3650 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3651 	}
3652       else
3653 	ep->flags = EDGE_TRUE_VALUE;
3654       find_edge (cont_bb, fin_bb)->flags
3655 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3656     }
3657 
3658   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3659   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3660   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3661 
3662   set_immediate_dominator (CDI_DOMINATORS, body_bb,
3663 			   recompute_dominator (CDI_DOMINATORS, body_bb));
3664   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3665 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3666 
3667   struct loop *loop = body_bb->loop_father;
3668   if (loop != entry_bb->loop_father)
3669     {
3670       gcc_assert (broken_loop || loop->header == body_bb);
3671       gcc_assert (broken_loop
3672 		  || loop->latch == region->cont
3673 		  || single_pred (loop->latch) == region->cont);
3674       return;
3675     }
3676 
3677   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3678     {
3679       loop = alloc_loop ();
3680       loop->header = body_bb;
3681       if (collapse_bb == NULL)
3682 	loop->latch = cont_bb;
3683       add_loop (loop, body_bb->loop_father);
3684     }
3685 }
3686 
3687 /* Return phi in E->DEST with ARG on edge E.  */
3688 
3689 static gphi *
3690 find_phi_with_arg_on_edge (tree arg, edge e)
3691 {
3692   basic_block bb = e->dest;
3693 
3694   for (gphi_iterator gpi = gsi_start_phis (bb);
3695        !gsi_end_p (gpi);
3696        gsi_next (&gpi))
3697     {
3698       gphi *phi = gpi.phi ();
3699       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3700 	return phi;
3701     }
3702 
3703   return NULL;
3704 }
3705 
3706 /* A subroutine of expand_omp_for.  Generate code for a parallel
3707    loop with static schedule and a specified chunk size.  Given
3708    parameters:
3709 
3710 	for (V = N1; V cond N2; V += STEP) BODY;
3711 
3712    where COND is "<" or ">", we generate pseudocode
3713 
3714 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3715 	if (cond is <)
3716 	  adj = STEP - 1;
3717 	else
3718 	  adj = STEP + 1;
3719 	if ((__typeof (V)) -1 > 0 && cond is >)
3720 	  n = -(adj + N2 - N1) / -STEP;
3721 	else
3722 	  n = (adj + N2 - N1) / STEP;
3723 	trip = 0;
3724 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3725 					      here so that V is defined
3726 					      if the loop is not entered
3727     L0:
3728 	s0 = (trip * nthreads + threadid) * CHUNK;
3729 	e0 = min (s0 + CHUNK, n);
3730 	if (s0 < n) goto L1; else goto L4;
3731     L1:
3732 	V = s0 * STEP + N1;
3733 	e = e0 * STEP + N1;
3734     L2:
3735 	BODY;
3736 	V += STEP;
3737 	if (V cond e) goto L2; else goto L3;
3738     L3:
3739 	trip += 1;
3740 	goto L0;
3741     L4:
3742 */
3743 
3744 static void
3745 expand_omp_for_static_chunk (struct omp_region *region,
3746 			     struct omp_for_data *fd, gimple *inner_stmt)
3747 {
3748   tree n, s0, e0, e, t;
3749   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3750   tree type, itype, vmain, vback, vextra;
3751   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3752   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3753   gimple_stmt_iterator gsi;
3754   edge se;
3755   bool broken_loop = region->cont == NULL;
3756   tree *counts = NULL;
3757   tree n1, n2, step;
3758 
3759   itype = type = TREE_TYPE (fd->loop.v);
3760   if (POINTER_TYPE_P (type))
3761     itype = signed_type_for (type);
3762 
3763   entry_bb = region->entry;
3764   se = split_block (entry_bb, last_stmt (entry_bb));
3765   entry_bb = se->src;
3766   iter_part_bb = se->dest;
3767   cont_bb = region->cont;
3768   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3769   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3770   gcc_assert (broken_loop
3771 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3772   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3773   body_bb = single_succ (seq_start_bb);
3774   if (!broken_loop)
3775     {
3776       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3777 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3778       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3779       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3780     }
3781   exit_bb = region->exit;
3782 
3783   /* Trip and adjustment setup goes in ENTRY_BB.  */
3784   gsi = gsi_last_nondebug_bb (entry_bb);
3785   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3786 
3787   if (fd->collapse > 1)
3788     {
3789       int first_zero_iter = -1, dummy = -1;
3790       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3791 
3792       counts = XALLOCAVEC (tree, fd->collapse);
3793       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3794 				  fin_bb, first_zero_iter,
3795 				  dummy_bb, dummy, l2_dom_bb);
3796       t = NULL_TREE;
3797     }
3798   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3799     t = integer_one_node;
3800   else
3801     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3802 		     fold_convert (type, fd->loop.n1),
3803 		     fold_convert (type, fd->loop.n2));
3804   if (fd->collapse == 1
3805       && TYPE_UNSIGNED (type)
3806       && (t == NULL_TREE || !integer_onep (t)))
3807     {
3808       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3809       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3810 				     true, GSI_SAME_STMT);
3811       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3812       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3813 				     true, GSI_SAME_STMT);
3814       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3815 						 NULL_TREE, NULL_TREE);
3816       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3817       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3818 		     expand_omp_regimplify_p, NULL, NULL)
3819 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3820 			expand_omp_regimplify_p, NULL, NULL))
3821 	{
3822 	  gsi = gsi_for_stmt (cond_stmt);
3823 	  gimple_regimplify_operands (cond_stmt, &gsi);
3824 	}
3825       se = split_block (entry_bb, cond_stmt);
3826       se->flags = EDGE_TRUE_VALUE;
3827       entry_bb = se->dest;
3828       se->probability = profile_probability::very_likely ();
3829       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3830       se->probability = profile_probability::very_unlikely ();
3831       if (gimple_in_ssa_p (cfun))
3832 	{
3833 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3834 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3835 	       !gsi_end_p (gpi); gsi_next (&gpi))
3836 	    {
3837 	      gphi *phi = gpi.phi ();
3838 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3839 			   se, UNKNOWN_LOCATION);
3840 	    }
3841 	}
3842       gsi = gsi_last_bb (entry_bb);
3843     }
3844 
3845   switch (gimple_omp_for_kind (fd->for_stmt))
3846     {
3847     case GF_OMP_FOR_KIND_FOR:
3848       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3849       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3850       break;
3851     case GF_OMP_FOR_KIND_DISTRIBUTE:
3852       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3853       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3854       break;
3855     default:
3856       gcc_unreachable ();
3857     }
3858   nthreads = build_call_expr (nthreads, 0);
3859   nthreads = fold_convert (itype, nthreads);
3860   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3861 				       true, GSI_SAME_STMT);
3862   threadid = build_call_expr (threadid, 0);
3863   threadid = fold_convert (itype, threadid);
3864   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3865 				       true, GSI_SAME_STMT);
3866 
3867   n1 = fd->loop.n1;
3868   n2 = fd->loop.n2;
3869   step = fd->loop.step;
3870   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3871     {
3872       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873 				     OMP_CLAUSE__LOOPTEMP_);
3874       gcc_assert (innerc);
3875       n1 = OMP_CLAUSE_DECL (innerc);
3876       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3877 				OMP_CLAUSE__LOOPTEMP_);
3878       gcc_assert (innerc);
3879       n2 = OMP_CLAUSE_DECL (innerc);
3880     }
3881   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3882 				 true, NULL_TREE, true, GSI_SAME_STMT);
3883   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3884 				 true, NULL_TREE, true, GSI_SAME_STMT);
3885   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3886 				   true, NULL_TREE, true, GSI_SAME_STMT);
3887   tree chunk_size = fold_convert (itype, fd->chunk_size);
3888   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3889   chunk_size
3890     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3891 				GSI_SAME_STMT);
3892 
3893   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3894   t = fold_build2 (PLUS_EXPR, itype, step, t);
3895   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3896   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3897   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3898     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3899 		     fold_build1 (NEGATE_EXPR, itype, t),
3900 		     fold_build1 (NEGATE_EXPR, itype, step));
3901   else
3902     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3903   t = fold_convert (itype, t);
3904   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3905 				true, GSI_SAME_STMT);
3906 
3907   trip_var = create_tmp_reg (itype, ".trip");
3908   if (gimple_in_ssa_p (cfun))
3909     {
3910       trip_init = make_ssa_name (trip_var);
3911       trip_main = make_ssa_name (trip_var);
3912       trip_back = make_ssa_name (trip_var);
3913     }
3914   else
3915     {
3916       trip_init = trip_var;
3917       trip_main = trip_var;
3918       trip_back = trip_var;
3919     }
3920 
3921   gassign *assign_stmt
3922     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3923   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3924 
3925   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3926   t = fold_build2 (MULT_EXPR, itype, t, step);
3927   if (POINTER_TYPE_P (type))
3928     t = fold_build_pointer_plus (n1, t);
3929   else
3930     t = fold_build2 (PLUS_EXPR, type, t, n1);
3931   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3932 				     true, GSI_SAME_STMT);
3933 
3934   /* Remove the GIMPLE_OMP_FOR.  */
3935   gsi_remove (&gsi, true);
3936 
3937   gimple_stmt_iterator gsif = gsi;
3938 
3939   /* Iteration space partitioning goes in ITER_PART_BB.  */
3940   gsi = gsi_last_bb (iter_part_bb);
3941 
3942   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3943   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3944   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3945   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3946 				 false, GSI_CONTINUE_LINKING);
3947 
3948   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3949   t = fold_build2 (MIN_EXPR, itype, t, n);
3950   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3951 				 false, GSI_CONTINUE_LINKING);
3952 
3953   t = build2 (LT_EXPR, boolean_type_node, s0, n);
3954   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3955 
3956   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3957   gsi = gsi_start_bb (seq_start_bb);
3958 
3959   tree startvar = fd->loop.v;
3960   tree endvar = NULL_TREE;
3961 
3962   if (gimple_omp_for_combined_p (fd->for_stmt))
3963     {
3964       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3965 		     ? gimple_omp_parallel_clauses (inner_stmt)
3966 		     : gimple_omp_for_clauses (inner_stmt);
3967       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3968       gcc_assert (innerc);
3969       startvar = OMP_CLAUSE_DECL (innerc);
3970       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3971 				OMP_CLAUSE__LOOPTEMP_);
3972       gcc_assert (innerc);
3973       endvar = OMP_CLAUSE_DECL (innerc);
3974       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3975 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3976 	{
3977 	  int i;
3978 	  for (i = 1; i < fd->collapse; i++)
3979 	    {
3980 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3981 					OMP_CLAUSE__LOOPTEMP_);
3982 	      gcc_assert (innerc);
3983 	    }
3984 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3985 				    OMP_CLAUSE__LOOPTEMP_);
3986 	  if (innerc)
3987 	    {
3988 	      /* If needed (distribute parallel for with lastprivate),
3989 		 propagate down the total number of iterations.  */
3990 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3991 				     fd->loop.n2);
3992 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3993 					    GSI_CONTINUE_LINKING);
3994 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3995 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3996 	    }
3997 	}
3998     }
3999 
4000   t = fold_convert (itype, s0);
4001   t = fold_build2 (MULT_EXPR, itype, t, step);
4002   if (POINTER_TYPE_P (type))
4003     t = fold_build_pointer_plus (n1, t);
4004   else
4005     t = fold_build2 (PLUS_EXPR, type, t, n1);
4006   t = fold_convert (TREE_TYPE (startvar), t);
4007   t = force_gimple_operand_gsi (&gsi, t,
4008 				DECL_P (startvar)
4009 				&& TREE_ADDRESSABLE (startvar),
4010 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4011   assign_stmt = gimple_build_assign (startvar, t);
4012   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4013 
4014   t = fold_convert (itype, e0);
4015   t = fold_build2 (MULT_EXPR, itype, t, step);
4016   if (POINTER_TYPE_P (type))
4017     t = fold_build_pointer_plus (n1, t);
4018   else
4019     t = fold_build2 (PLUS_EXPR, type, t, n1);
4020   t = fold_convert (TREE_TYPE (startvar), t);
4021   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4022 				false, GSI_CONTINUE_LINKING);
4023   if (endvar)
4024     {
4025       assign_stmt = gimple_build_assign (endvar, e);
4026       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4027       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4028 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4029       else
4030 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4031       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4032     }
4033   /* Handle linear clause adjustments.  */
4034   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4035   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4036     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4037 	 c; c = OMP_CLAUSE_CHAIN (c))
4038       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4039 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4040 	{
4041 	  tree d = OMP_CLAUSE_DECL (c);
4042 	  bool is_ref = omp_is_reference (d);
4043 	  tree t = d, a, dest;
4044 	  if (is_ref)
4045 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4046 	  tree type = TREE_TYPE (t);
4047 	  if (POINTER_TYPE_P (type))
4048 	    type = sizetype;
4049 	  dest = unshare_expr (t);
4050 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4051 	  expand_omp_build_assign (&gsif, v, t);
4052 	  if (itercnt == NULL_TREE)
4053 	    {
4054 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4055 		{
4056 		  itercntbias
4057 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4058 				   fold_convert (itype, fd->loop.n1));
4059 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4060 					     itercntbias, step);
4061 		  itercntbias
4062 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4063 						NULL_TREE, true,
4064 						GSI_SAME_STMT);
4065 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4066 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4067 						      NULL_TREE, false,
4068 						      GSI_CONTINUE_LINKING);
4069 		}
4070 	      else
4071 		itercnt = s0;
4072 	    }
4073 	  a = fold_build2 (MULT_EXPR, type,
4074 			   fold_convert (type, itercnt),
4075 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4076 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4077 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4078 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4079 					false, GSI_CONTINUE_LINKING);
4080 	  assign_stmt = gimple_build_assign (dest, t);
4081 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4082 	}
4083   if (fd->collapse > 1)
4084     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4085 
4086   if (!broken_loop)
4087     {
4088       /* The code controlling the sequential loop goes in CONT_BB,
4089 	 replacing the GIMPLE_OMP_CONTINUE.  */
4090       gsi = gsi_last_nondebug_bb (cont_bb);
4091       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4092       vmain = gimple_omp_continue_control_use (cont_stmt);
4093       vback = gimple_omp_continue_control_def (cont_stmt);
4094 
4095       if (!gimple_omp_for_combined_p (fd->for_stmt))
4096 	{
4097 	  if (POINTER_TYPE_P (type))
4098 	    t = fold_build_pointer_plus (vmain, step);
4099 	  else
4100 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4101 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4102 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4103 					  true, GSI_SAME_STMT);
4104 	  assign_stmt = gimple_build_assign (vback, t);
4105 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4106 
4107 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4108 	    t = build2 (EQ_EXPR, boolean_type_node,
4109 			build_int_cst (itype, 0),
4110 			build_int_cst (itype, 1));
4111 	  else
4112 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4113 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4114 			? t : vback, e);
4115 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4116 	}
4117 
4118       /* Remove GIMPLE_OMP_CONTINUE.  */
4119       gsi_remove (&gsi, true);
4120 
4121       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4122 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4123 
4124       /* Trip update code goes into TRIP_UPDATE_BB.  */
4125       gsi = gsi_start_bb (trip_update_bb);
4126 
4127       t = build_int_cst (itype, 1);
4128       t = build2 (PLUS_EXPR, itype, trip_main, t);
4129       assign_stmt = gimple_build_assign (trip_back, t);
4130       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4131     }
4132 
4133   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4134   gsi = gsi_last_nondebug_bb (exit_bb);
4135   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4136     {
4137       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4138       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4139     }
4140   gsi_remove (&gsi, true);
4141 
4142   /* Connect the new blocks.  */
4143   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4144   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4145 
4146   if (!broken_loop)
4147     {
4148       se = find_edge (cont_bb, body_bb);
4149       if (se == NULL)
4150 	{
4151 	  se = BRANCH_EDGE (cont_bb);
4152 	  gcc_assert (single_succ (se->dest) == body_bb);
4153 	}
4154       if (gimple_omp_for_combined_p (fd->for_stmt))
4155 	{
4156 	  remove_edge (se);
4157 	  se = NULL;
4158 	}
4159       else if (fd->collapse > 1)
4160 	{
4161 	  remove_edge (se);
4162 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4163 	}
4164       else
4165 	se->flags = EDGE_TRUE_VALUE;
4166       find_edge (cont_bb, trip_update_bb)->flags
4167 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4168 
4169       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4170 				iter_part_bb);
4171     }
4172 
4173   if (gimple_in_ssa_p (cfun))
4174     {
4175       gphi_iterator psi;
4176       gphi *phi;
4177       edge re, ene;
4178       edge_var_map *vm;
4179       size_t i;
4180 
4181       gcc_assert (fd->collapse == 1 && !broken_loop);
4182 
4183       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4184 	 remove arguments of the phi nodes in fin_bb.  We need to create
4185 	 appropriate phi nodes in iter_part_bb instead.  */
4186       se = find_edge (iter_part_bb, fin_bb);
4187       re = single_succ_edge (trip_update_bb);
4188       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4189       ene = single_succ_edge (entry_bb);
4190 
4191       psi = gsi_start_phis (fin_bb);
4192       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4193 	   gsi_next (&psi), ++i)
4194 	{
4195 	  gphi *nphi;
4196 	  source_location locus;
4197 
4198 	  phi = psi.phi ();
4199 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4200 			       redirect_edge_var_map_def (vm), 0))
4201 	    continue;
4202 
4203 	  t = gimple_phi_result (phi);
4204 	  gcc_assert (t == redirect_edge_var_map_result (vm));
4205 
4206 	  if (!single_pred_p (fin_bb))
4207 	    t = copy_ssa_name (t, phi);
4208 
4209 	  nphi = create_phi_node (t, iter_part_bb);
4210 
4211 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4212 	  locus = gimple_phi_arg_location_from_edge (phi, se);
4213 
4214 	  /* A special case -- fd->loop.v is not yet computed in
4215 	     iter_part_bb, we need to use vextra instead.  */
4216 	  if (t == fd->loop.v)
4217 	    t = vextra;
4218 	  add_phi_arg (nphi, t, ene, locus);
4219 	  locus = redirect_edge_var_map_location (vm);
4220 	  tree back_arg = redirect_edge_var_map_def (vm);
4221 	  add_phi_arg (nphi, back_arg, re, locus);
4222 	  edge ce = find_edge (cont_bb, body_bb);
4223 	  if (ce == NULL)
4224 	    {
4225 	      ce = BRANCH_EDGE (cont_bb);
4226 	      gcc_assert (single_succ (ce->dest) == body_bb);
4227 	      ce = single_succ_edge (ce->dest);
4228 	    }
4229 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4230 	  gcc_assert (inner_loop_phi != NULL);
4231 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4232 		       find_edge (seq_start_bb, body_bb), locus);
4233 
4234 	  if (!single_pred_p (fin_bb))
4235 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4236 	}
4237       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4238       redirect_edge_var_map_clear (re);
4239       if (single_pred_p (fin_bb))
4240 	while (1)
4241 	  {
4242 	    psi = gsi_start_phis (fin_bb);
4243 	    if (gsi_end_p (psi))
4244 	      break;
4245 	    remove_phi_node (&psi, false);
4246 	  }
4247 
4248       /* Make phi node for trip.  */
4249       phi = create_phi_node (trip_main, iter_part_bb);
4250       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4251 		   UNKNOWN_LOCATION);
4252       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4253 		   UNKNOWN_LOCATION);
4254     }
4255 
4256   if (!broken_loop)
4257     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4258   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4259 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4260   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4261 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4262   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4263 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4264   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4265 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4266 
4267   if (!broken_loop)
4268     {
4269       struct loop *loop = body_bb->loop_father;
4270       struct loop *trip_loop = alloc_loop ();
4271       trip_loop->header = iter_part_bb;
4272       trip_loop->latch = trip_update_bb;
4273       add_loop (trip_loop, iter_part_bb->loop_father);
4274 
4275       if (loop != entry_bb->loop_father)
4276 	{
4277 	  gcc_assert (loop->header == body_bb);
4278 	  gcc_assert (loop->latch == region->cont
4279 		      || single_pred (loop->latch) == region->cont);
4280 	  trip_loop->inner = loop;
4281 	  return;
4282 	}
4283 
4284       if (!gimple_omp_for_combined_p (fd->for_stmt))
4285 	{
4286 	  loop = alloc_loop ();
4287 	  loop->header = body_bb;
4288 	  if (collapse_bb == NULL)
4289 	    loop->latch = cont_bb;
4290 	  add_loop (loop, trip_loop);
4291 	}
4292     }
4293 }
4294 
4295 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4296    loop.  Given parameters:
4297 
4298 	for (V = N1; V cond N2; V += STEP) BODY;
4299 
4300    where COND is "<" or ">", we generate pseudocode
4301 
4302 	V = N1;
4303 	goto L1;
4304     L0:
4305 	BODY;
4306 	V += STEP;
4307     L1:
4308 	if (V cond N2) goto L0; else goto L2;
4309     L2:
4310 
4311     For collapsed loops, given parameters:
4312       collapse(3)
4313       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4314 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4315 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4316 	    BODY;
4317 
4318     we generate pseudocode
4319 
4320 	if (cond3 is <)
4321 	  adj = STEP3 - 1;
4322 	else
4323 	  adj = STEP3 + 1;
4324 	count3 = (adj + N32 - N31) / STEP3;
4325 	if (cond2 is <)
4326 	  adj = STEP2 - 1;
4327 	else
4328 	  adj = STEP2 + 1;
4329 	count2 = (adj + N22 - N21) / STEP2;
4330 	if (cond1 is <)
4331 	  adj = STEP1 - 1;
4332 	else
4333 	  adj = STEP1 + 1;
4334 	count1 = (adj + N12 - N11) / STEP1;
4335 	count = count1 * count2 * count3;
4336 	V = 0;
4337 	V1 = N11;
4338 	V2 = N21;
4339 	V3 = N31;
4340 	goto L1;
4341     L0:
4342 	BODY;
4343 	V += 1;
4344 	V3 += STEP3;
4345 	V2 += (V3 cond3 N32) ? 0 : STEP2;
4346 	V3 = (V3 cond3 N32) ? V3 : N31;
4347 	V1 += (V2 cond2 N22) ? 0 : STEP1;
4348 	V2 = (V2 cond2 N22) ? V2 : N21;
4349     L1:
4350 	if (V < count) goto L0; else goto L2;
4351     L2:
4352 
4353       */
4354 
4355 static void
4356 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4357 {
4358   tree type, t;
4359   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4360   gimple_stmt_iterator gsi;
4361   gimple *stmt;
4362   gcond *cond_stmt;
4363   bool broken_loop = region->cont == NULL;
4364   edge e, ne;
4365   tree *counts = NULL;
4366   int i;
4367   int safelen_int = INT_MAX;
4368   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4369 				  OMP_CLAUSE_SAFELEN);
4370   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4371 				  OMP_CLAUSE__SIMDUID_);
4372   tree n1, n2;
4373 
4374   if (safelen)
4375     {
4376       poly_uint64 val;
4377       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4378       if (!poly_int_tree_p (safelen, &val))
4379 	safelen_int = 0;
4380       else
4381 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4382       if (safelen_int == 1)
4383 	safelen_int = 0;
4384     }
4385   type = TREE_TYPE (fd->loop.v);
4386   entry_bb = region->entry;
4387   cont_bb = region->cont;
4388   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4389   gcc_assert (broken_loop
4390 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4391   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4392   if (!broken_loop)
4393     {
4394       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4395       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4396       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4397       l2_bb = BRANCH_EDGE (entry_bb)->dest;
4398     }
4399   else
4400     {
4401       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4402       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4403       l2_bb = single_succ (l1_bb);
4404     }
4405   exit_bb = region->exit;
4406   l2_dom_bb = NULL;
4407 
4408   gsi = gsi_last_nondebug_bb (entry_bb);
4409 
4410   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4411   /* Not needed in SSA form right now.  */
4412   gcc_assert (!gimple_in_ssa_p (cfun));
4413   if (fd->collapse > 1)
4414     {
4415       int first_zero_iter = -1, dummy = -1;
4416       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4417 
4418       counts = XALLOCAVEC (tree, fd->collapse);
4419       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4420 				  zero_iter_bb, first_zero_iter,
4421 				  dummy_bb, dummy, l2_dom_bb);
4422     }
4423   if (l2_dom_bb == NULL)
4424     l2_dom_bb = l1_bb;
4425 
4426   n1 = fd->loop.n1;
4427   n2 = fd->loop.n2;
4428   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4429     {
4430       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4431 				     OMP_CLAUSE__LOOPTEMP_);
4432       gcc_assert (innerc);
4433       n1 = OMP_CLAUSE_DECL (innerc);
4434       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4435 				OMP_CLAUSE__LOOPTEMP_);
4436       gcc_assert (innerc);
4437       n2 = OMP_CLAUSE_DECL (innerc);
4438     }
4439   tree step = fd->loop.step;
4440 
4441   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4442 				  OMP_CLAUSE__SIMT_);
4443   if (is_simt)
4444     {
4445       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4446       is_simt = safelen_int > 1;
4447     }
4448   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4449   if (is_simt)
4450     {
4451       simt_lane = create_tmp_var (unsigned_type_node);
4452       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4453       gimple_call_set_lhs (g, simt_lane);
4454       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4455       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4456 				 fold_convert (TREE_TYPE (step), simt_lane));
4457       n1 = fold_convert (type, n1);
4458       if (POINTER_TYPE_P (type))
4459 	n1 = fold_build_pointer_plus (n1, offset);
4460       else
4461 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4462 
4463       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4464       if (fd->collapse > 1)
4465 	simt_maxlane = build_one_cst (unsigned_type_node);
4466       else if (safelen_int < omp_max_simt_vf ())
4467 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4468       tree vf
4469 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4470 					unsigned_type_node, 0);
4471       if (simt_maxlane)
4472 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4473       vf = fold_convert (TREE_TYPE (step), vf);
4474       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4475     }
4476 
4477   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4478   if (fd->collapse > 1)
4479     {
4480       if (gimple_omp_for_combined_into_p (fd->for_stmt))
4481 	{
4482 	  gsi_prev (&gsi);
4483 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4484 	  gsi_next (&gsi);
4485 	}
4486       else
4487 	for (i = 0; i < fd->collapse; i++)
4488 	  {
4489 	    tree itype = TREE_TYPE (fd->loops[i].v);
4490 	    if (POINTER_TYPE_P (itype))
4491 	      itype = signed_type_for (itype);
4492 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4493 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4494 	  }
4495     }
4496 
4497   /* Remove the GIMPLE_OMP_FOR statement.  */
4498   gsi_remove (&gsi, true);
4499 
4500   if (!broken_loop)
4501     {
4502       /* Code to control the increment goes in the CONT_BB.  */
4503       gsi = gsi_last_nondebug_bb (cont_bb);
4504       stmt = gsi_stmt (gsi);
4505       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4506 
4507       if (POINTER_TYPE_P (type))
4508 	t = fold_build_pointer_plus (fd->loop.v, step);
4509       else
4510 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4511       expand_omp_build_assign (&gsi, fd->loop.v, t);
4512 
4513       if (fd->collapse > 1)
4514 	{
4515 	  i = fd->collapse - 1;
4516 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4517 	    {
4518 	      t = fold_convert (sizetype, fd->loops[i].step);
4519 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4520 	    }
4521 	  else
4522 	    {
4523 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4524 				fd->loops[i].step);
4525 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4526 			       fd->loops[i].v, t);
4527 	    }
4528 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4529 
4530 	  for (i = fd->collapse - 1; i > 0; i--)
4531 	    {
4532 	      tree itype = TREE_TYPE (fd->loops[i].v);
4533 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4534 	      if (POINTER_TYPE_P (itype2))
4535 		itype2 = signed_type_for (itype2);
4536 	      t = fold_convert (itype2, fd->loops[i - 1].step);
4537 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4538 					    GSI_SAME_STMT);
4539 	      t = build3 (COND_EXPR, itype2,
4540 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4541 				  fd->loops[i].v,
4542 				  fold_convert (itype, fd->loops[i].n2)),
4543 			  build_int_cst (itype2, 0), t);
4544 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4545 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4546 	      else
4547 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4548 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4549 
4550 	      t = fold_convert (itype, fd->loops[i].n1);
4551 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4552 					    GSI_SAME_STMT);
4553 	      t = build3 (COND_EXPR, itype,
4554 			  build2 (fd->loops[i].cond_code, boolean_type_node,
4555 				  fd->loops[i].v,
4556 				  fold_convert (itype, fd->loops[i].n2)),
4557 			  fd->loops[i].v, t);
4558 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4559 	    }
4560 	}
4561 
4562       /* Remove GIMPLE_OMP_CONTINUE.  */
4563       gsi_remove (&gsi, true);
4564     }
4565 
4566   /* Emit the condition in L1_BB.  */
4567   gsi = gsi_start_bb (l1_bb);
4568 
4569   t = fold_convert (type, n2);
4570   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4571 				false, GSI_CONTINUE_LINKING);
4572   tree v = fd->loop.v;
4573   if (DECL_P (v) && TREE_ADDRESSABLE (v))
4574     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4575 				  false, GSI_CONTINUE_LINKING);
4576   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4577   cond_stmt = gimple_build_cond_empty (t);
4578   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4579   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4580 		 NULL, NULL)
4581       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4582 		    NULL, NULL))
4583     {
4584       gsi = gsi_for_stmt (cond_stmt);
4585       gimple_regimplify_operands (cond_stmt, &gsi);
4586     }
4587 
4588   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4589   if (is_simt)
4590     {
4591       gsi = gsi_start_bb (l2_bb);
4592       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4593       if (POINTER_TYPE_P (type))
4594 	t = fold_build_pointer_plus (fd->loop.v, step);
4595       else
4596 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4597       expand_omp_build_assign (&gsi, fd->loop.v, t);
4598     }
4599 
4600   /* Remove GIMPLE_OMP_RETURN.  */
4601   gsi = gsi_last_nondebug_bb (exit_bb);
4602   gsi_remove (&gsi, true);
4603 
4604   /* Connect the new blocks.  */
4605   remove_edge (FALLTHRU_EDGE (entry_bb));
4606 
4607   if (!broken_loop)
4608     {
4609       remove_edge (BRANCH_EDGE (entry_bb));
4610       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4611 
4612       e = BRANCH_EDGE (l1_bb);
4613       ne = FALLTHRU_EDGE (l1_bb);
4614       e->flags = EDGE_TRUE_VALUE;
4615     }
4616   else
4617     {
4618       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4619 
4620       ne = single_succ_edge (l1_bb);
4621       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4622 
4623     }
4624   ne->flags = EDGE_FALSE_VALUE;
4625   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4626   ne->probability = e->probability.invert ();
4627 
4628   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4629   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4630 
4631   if (simt_maxlane)
4632     {
4633       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4634 				     NULL_TREE, NULL_TREE);
4635       gsi = gsi_last_bb (entry_bb);
4636       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4637       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4638       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4639       FALLTHRU_EDGE (entry_bb)->probability
4640 	 = profile_probability::guessed_always ().apply_scale (7, 8);
4641       BRANCH_EDGE (entry_bb)->probability
4642 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4643       l2_dom_bb = entry_bb;
4644     }
4645   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4646 
4647   if (!broken_loop)
4648     {
4649       struct loop *loop = alloc_loop ();
4650       loop->header = l1_bb;
4651       loop->latch = cont_bb;
4652       add_loop (loop, l1_bb->loop_father);
4653       loop->safelen = safelen_int;
4654       if (simduid)
4655 	{
4656 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4657 	  cfun->has_simduid_loops = true;
4658 	}
4659       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4660 	 the loop.  */
4661       if ((flag_tree_loop_vectorize
4662 	   || !global_options_set.x_flag_tree_loop_vectorize)
4663 	  && flag_tree_loop_optimize
4664 	  && loop->safelen > 1)
4665 	{
4666 	  loop->force_vectorize = true;
4667 	  cfun->has_force_vectorize_loops = true;
4668 	}
4669     }
4670   else if (simduid)
4671     cfun->has_simduid_loops = true;
4672 }
4673 
4674 /* Taskloop construct is represented after gimplification with
4675    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4676    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4677    which should just compute all the needed loop temporaries
4678    for GIMPLE_OMP_TASK.  */
4679 
4680 static void
4681 expand_omp_taskloop_for_outer (struct omp_region *region,
4682 			       struct omp_for_data *fd,
4683 			       gimple *inner_stmt)
4684 {
4685   tree type, bias = NULL_TREE;
4686   basic_block entry_bb, cont_bb, exit_bb;
4687   gimple_stmt_iterator gsi;
4688   gassign *assign_stmt;
4689   tree *counts = NULL;
4690   int i;
4691 
4692   gcc_assert (inner_stmt);
4693   gcc_assert (region->cont);
4694   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4695 	      && gimple_omp_task_taskloop_p (inner_stmt));
4696   type = TREE_TYPE (fd->loop.v);
4697 
4698   /* See if we need to bias by LLONG_MIN.  */
4699   if (fd->iter_type == long_long_unsigned_type_node
4700       && TREE_CODE (type) == INTEGER_TYPE
4701       && !TYPE_UNSIGNED (type))
4702     {
4703       tree n1, n2;
4704 
4705       if (fd->loop.cond_code == LT_EXPR)
4706 	{
4707 	  n1 = fd->loop.n1;
4708 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4709 	}
4710       else
4711 	{
4712 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4713 	  n2 = fd->loop.n1;
4714 	}
4715       if (TREE_CODE (n1) != INTEGER_CST
4716 	  || TREE_CODE (n2) != INTEGER_CST
4717 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4718 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4719     }
4720 
4721   entry_bb = region->entry;
4722   cont_bb = region->cont;
4723   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4724   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4725   exit_bb = region->exit;
4726 
4727   gsi = gsi_last_nondebug_bb (entry_bb);
4728   gimple *for_stmt = gsi_stmt (gsi);
4729   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4730   if (fd->collapse > 1)
4731     {
4732       int first_zero_iter = -1, dummy = -1;
4733       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4734 
4735       counts = XALLOCAVEC (tree, fd->collapse);
4736       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4737 				  zero_iter_bb, first_zero_iter,
4738 				  dummy_bb, dummy, l2_dom_bb);
4739 
4740       if (zero_iter_bb)
4741 	{
4742 	  /* Some counts[i] vars might be uninitialized if
4743 	     some loop has zero iterations.  But the body shouldn't
4744 	     be executed in that case, so just avoid uninit warnings.  */
4745 	  for (i = first_zero_iter; i < fd->collapse; i++)
4746 	    if (SSA_VAR_P (counts[i]))
4747 	      TREE_NO_WARNING (counts[i]) = 1;
4748 	  gsi_prev (&gsi);
4749 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4750 	  entry_bb = e->dest;
4751 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4752 	  gsi = gsi_last_bb (entry_bb);
4753 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4754 				   get_immediate_dominator (CDI_DOMINATORS,
4755 							    zero_iter_bb));
4756 	}
4757     }
4758 
4759   tree t0, t1;
4760   t1 = fd->loop.n2;
4761   t0 = fd->loop.n1;
4762   if (POINTER_TYPE_P (TREE_TYPE (t0))
4763       && TYPE_PRECISION (TREE_TYPE (t0))
4764 	 != TYPE_PRECISION (fd->iter_type))
4765     {
4766       /* Avoid casting pointers to integer of a different size.  */
4767       tree itype = signed_type_for (type);
4768       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4769       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4770     }
4771   else
4772     {
4773       t1 = fold_convert (fd->iter_type, t1);
4774       t0 = fold_convert (fd->iter_type, t0);
4775     }
4776   if (bias)
4777     {
4778       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4779       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4780     }
4781 
4782   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4783 				 OMP_CLAUSE__LOOPTEMP_);
4784   gcc_assert (innerc);
4785   tree startvar = OMP_CLAUSE_DECL (innerc);
4786   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4787   gcc_assert (innerc);
4788   tree endvar = OMP_CLAUSE_DECL (innerc);
4789   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4790     {
4791       gcc_assert (innerc);
4792       for (i = 1; i < fd->collapse; i++)
4793 	{
4794 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4795 				    OMP_CLAUSE__LOOPTEMP_);
4796 	  gcc_assert (innerc);
4797 	}
4798       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4799 				OMP_CLAUSE__LOOPTEMP_);
4800       if (innerc)
4801 	{
4802 	  /* If needed (inner taskloop has lastprivate clause), propagate
4803 	     down the total number of iterations.  */
4804 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4805 					     NULL_TREE, false,
4806 					     GSI_CONTINUE_LINKING);
4807 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4808 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4809 	}
4810     }
4811 
4812   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4813 				 GSI_CONTINUE_LINKING);
4814   assign_stmt = gimple_build_assign (startvar, t0);
4815   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4816 
4817   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4818 				 GSI_CONTINUE_LINKING);
4819   assign_stmt = gimple_build_assign (endvar, t1);
4820   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4821   if (fd->collapse > 1)
4822     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4823 
4824   /* Remove the GIMPLE_OMP_FOR statement.  */
4825   gsi = gsi_for_stmt (for_stmt);
4826   gsi_remove (&gsi, true);
4827 
4828   gsi = gsi_last_nondebug_bb (cont_bb);
4829   gsi_remove (&gsi, true);
4830 
4831   gsi = gsi_last_nondebug_bb (exit_bb);
4832   gsi_remove (&gsi, true);
4833 
4834   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4835   remove_edge (BRANCH_EDGE (entry_bb));
4836   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4837   remove_edge (BRANCH_EDGE (cont_bb));
4838   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4839   set_immediate_dominator (CDI_DOMINATORS, region->entry,
4840 			   recompute_dominator (CDI_DOMINATORS, region->entry));
4841 }
4842 
4843 /* Taskloop construct is represented after gimplification with
4844    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4845    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
4846    GOMP_taskloop{,_ull} function arranges for each task to be given just
4847    a single range of iterations.  */
4848 
4849 static void
4850 expand_omp_taskloop_for_inner (struct omp_region *region,
4851 			       struct omp_for_data *fd,
4852 			       gimple *inner_stmt)
4853 {
4854   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4855   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4856   basic_block fin_bb;
4857   gimple_stmt_iterator gsi;
4858   edge ep;
4859   bool broken_loop = region->cont == NULL;
4860   tree *counts = NULL;
4861   tree n1, n2, step;
4862 
4863   itype = type = TREE_TYPE (fd->loop.v);
4864   if (POINTER_TYPE_P (type))
4865     itype = signed_type_for (type);
4866 
4867   /* See if we need to bias by LLONG_MIN.  */
4868   if (fd->iter_type == long_long_unsigned_type_node
4869       && TREE_CODE (type) == INTEGER_TYPE
4870       && !TYPE_UNSIGNED (type))
4871     {
4872       tree n1, n2;
4873 
4874       if (fd->loop.cond_code == LT_EXPR)
4875 	{
4876 	  n1 = fd->loop.n1;
4877 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4878 	}
4879       else
4880 	{
4881 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4882 	  n2 = fd->loop.n1;
4883 	}
4884       if (TREE_CODE (n1) != INTEGER_CST
4885 	  || TREE_CODE (n2) != INTEGER_CST
4886 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4887 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4888     }
4889 
4890   entry_bb = region->entry;
4891   cont_bb = region->cont;
4892   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4893   fin_bb = BRANCH_EDGE (entry_bb)->dest;
4894   gcc_assert (broken_loop
4895 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4896   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4897   if (!broken_loop)
4898     {
4899       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4900       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4901     }
4902   exit_bb = region->exit;
4903 
4904   /* Iteration space partitioning goes in ENTRY_BB.  */
4905   gsi = gsi_last_nondebug_bb (entry_bb);
4906   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4907 
4908   if (fd->collapse > 1)
4909     {
4910       int first_zero_iter = -1, dummy = -1;
4911       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4912 
4913       counts = XALLOCAVEC (tree, fd->collapse);
4914       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4915 				  fin_bb, first_zero_iter,
4916 				  dummy_bb, dummy, l2_dom_bb);
4917       t = NULL_TREE;
4918     }
4919   else
4920     t = integer_one_node;
4921 
4922   step = fd->loop.step;
4923   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4924 				 OMP_CLAUSE__LOOPTEMP_);
4925   gcc_assert (innerc);
4926   n1 = OMP_CLAUSE_DECL (innerc);
4927   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4928   gcc_assert (innerc);
4929   n2 = OMP_CLAUSE_DECL (innerc);
4930   if (bias)
4931     {
4932       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4933       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4934     }
4935   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4936 				 true, NULL_TREE, true, GSI_SAME_STMT);
4937   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4938 				 true, NULL_TREE, true, GSI_SAME_STMT);
4939   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4940 				   true, NULL_TREE, true, GSI_SAME_STMT);
4941 
4942   tree startvar = fd->loop.v;
4943   tree endvar = NULL_TREE;
4944 
4945   if (gimple_omp_for_combined_p (fd->for_stmt))
4946     {
4947       tree clauses = gimple_omp_for_clauses (inner_stmt);
4948       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4949       gcc_assert (innerc);
4950       startvar = OMP_CLAUSE_DECL (innerc);
4951       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4952 				OMP_CLAUSE__LOOPTEMP_);
4953       gcc_assert (innerc);
4954       endvar = OMP_CLAUSE_DECL (innerc);
4955     }
4956   t = fold_convert (TREE_TYPE (startvar), n1);
4957   t = force_gimple_operand_gsi (&gsi, t,
4958 				DECL_P (startvar)
4959 				&& TREE_ADDRESSABLE (startvar),
4960 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4961   gimple *assign_stmt = gimple_build_assign (startvar, t);
4962   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4963 
4964   t = fold_convert (TREE_TYPE (startvar), n2);
4965   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4966 				false, GSI_CONTINUE_LINKING);
4967   if (endvar)
4968     {
4969       assign_stmt = gimple_build_assign (endvar, e);
4970       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4971       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4972 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4973       else
4974 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4975       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4976     }
4977   if (fd->collapse > 1)
4978     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4979 
4980   if (!broken_loop)
4981     {
4982       /* The code controlling the sequential loop replaces the
4983 	 GIMPLE_OMP_CONTINUE.  */
4984       gsi = gsi_last_nondebug_bb (cont_bb);
4985       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4986       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4987       vmain = gimple_omp_continue_control_use (cont_stmt);
4988       vback = gimple_omp_continue_control_def (cont_stmt);
4989 
4990       if (!gimple_omp_for_combined_p (fd->for_stmt))
4991 	{
4992 	  if (POINTER_TYPE_P (type))
4993 	    t = fold_build_pointer_plus (vmain, step);
4994 	  else
4995 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4996 	  t = force_gimple_operand_gsi (&gsi, t,
4997 					DECL_P (vback)
4998 					&& TREE_ADDRESSABLE (vback),
4999 					NULL_TREE, true, GSI_SAME_STMT);
5000 	  assign_stmt = gimple_build_assign (vback, t);
5001 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5002 
5003 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5004 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5005 		      ? t : vback, e);
5006 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5007 	}
5008 
5009       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5010       gsi_remove (&gsi, true);
5011 
5012       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5013 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5014     }
5015 
5016   /* Remove the GIMPLE_OMP_FOR statement.  */
5017   gsi = gsi_for_stmt (fd->for_stmt);
5018   gsi_remove (&gsi, true);
5019 
5020   /* Remove the GIMPLE_OMP_RETURN statement.  */
5021   gsi = gsi_last_nondebug_bb (exit_bb);
5022   gsi_remove (&gsi, true);
5023 
5024   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5025   if (!broken_loop)
5026     remove_edge (BRANCH_EDGE (entry_bb));
5027   else
5028     {
5029       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5030       region->outer->cont = NULL;
5031     }
5032 
5033   /* Connect all the blocks.  */
5034   if (!broken_loop)
5035     {
5036       ep = find_edge (cont_bb, body_bb);
5037       if (gimple_omp_for_combined_p (fd->for_stmt))
5038 	{
5039 	  remove_edge (ep);
5040 	  ep = NULL;
5041 	}
5042       else if (fd->collapse > 1)
5043 	{
5044 	  remove_edge (ep);
5045 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5046 	}
5047       else
5048 	ep->flags = EDGE_TRUE_VALUE;
5049       find_edge (cont_bb, fin_bb)->flags
5050 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5051     }
5052 
5053   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5054 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5055   if (!broken_loop)
5056     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5057 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5058 
5059   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5060     {
5061       struct loop *loop = alloc_loop ();
5062       loop->header = body_bb;
5063       if (collapse_bb == NULL)
5064 	loop->latch = cont_bb;
5065       add_loop (loop, body_bb->loop_father);
5066     }
5067 }
5068 
5069 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5070    partitioned loop.  The lowering here is abstracted, in that the
5071    loop parameters are passed through internal functions, which are
5072    further lowered by oacc_device_lower, once we get to the target
5073    compiler.  The loop is of the form:
5074 
5075    for (V = B; V LTGT E; V += S) {BODY}
5076 
5077    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5078    (constant 0 for no chunking) and we will have a GWV partitioning
5079    mask, specifying dimensions over which the loop is to be
5080    partitioned (see note below).  We generate code that looks like
5081    (this ignores tiling):
5082 
5083    <entry_bb> [incoming FALL->body, BRANCH->exit]
5084      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5085      T range = E - B;
5086      T chunk_no = 0;
5087      T DIR = LTGT == '<' ? +1 : -1;
5088      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5089      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5090 
5091    <head_bb> [created by splitting end of entry_bb]
5092      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5093      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5094      if (!(offset LTGT bound)) goto bottom_bb;
5095 
5096    <body_bb> [incoming]
5097      V = B + offset;
5098      {BODY}
5099 
5100    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5101      offset += step;
5102      if (offset LTGT bound) goto body_bb; [*]
5103 
5104    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5105      chunk_no++;
5106      if (chunk < chunk_max) goto head_bb;
5107 
5108    <exit_bb> [incoming]
5109      V = B + ((range -/+ 1) / S +/- 1) * S [*]
5110 
5111    [*] Needed if V live at end of loop.  */
5112 
5113 static void
5114 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5115 {
5116   tree v = fd->loop.v;
5117   enum tree_code cond_code = fd->loop.cond_code;
5118   enum tree_code plus_code = PLUS_EXPR;
5119 
5120   tree chunk_size = integer_minus_one_node;
5121   tree gwv = integer_zero_node;
5122   tree iter_type = TREE_TYPE (v);
5123   tree diff_type = iter_type;
5124   tree plus_type = iter_type;
5125   struct oacc_collapse *counts = NULL;
5126 
5127   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5128 		       == GF_OMP_FOR_KIND_OACC_LOOP);
5129   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5130   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5131 
5132   if (POINTER_TYPE_P (iter_type))
5133     {
5134       plus_code = POINTER_PLUS_EXPR;
5135       plus_type = sizetype;
5136     }
5137   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5138     diff_type = signed_type_for (diff_type);
5139   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5140     diff_type = integer_type_node;
5141 
5142   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5143   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5144   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5145   basic_block bottom_bb = NULL;
5146 
5147   /* entry_bb has two sucessors; the branch edge is to the exit
5148      block,  fallthrough edge to body.  */
5149   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5150 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5151 
5152   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5153      body_bb, or to a block whose only successor is the body_bb.  Its
5154      fallthrough successor is the final block (same as the branch
5155      successor of the entry_bb).  */
5156   if (cont_bb)
5157     {
5158       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5159       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5160 
5161       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5162       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5163     }
5164   else
5165     gcc_assert (!gimple_in_ssa_p (cfun));
5166 
5167   /* The exit block only has entry_bb and cont_bb as predecessors.  */
5168   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5169 
5170   tree chunk_no;
5171   tree chunk_max = NULL_TREE;
5172   tree bound, offset;
5173   tree step = create_tmp_var (diff_type, ".step");
5174   bool up = cond_code == LT_EXPR;
5175   tree dir = build_int_cst (diff_type, up ? +1 : -1);
5176   bool chunking = !gimple_in_ssa_p (cfun);
5177   bool negating;
5178 
5179   /* Tiling vars.  */
5180   tree tile_size = NULL_TREE;
5181   tree element_s = NULL_TREE;
5182   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5183   basic_block elem_body_bb = NULL;
5184   basic_block elem_cont_bb = NULL;
5185 
5186   /* SSA instances.  */
5187   tree offset_incr = NULL_TREE;
5188   tree offset_init = NULL_TREE;
5189 
5190   gimple_stmt_iterator gsi;
5191   gassign *ass;
5192   gcall *call;
5193   gimple *stmt;
5194   tree expr;
5195   location_t loc;
5196   edge split, be, fte;
5197 
5198   /* Split the end of entry_bb to create head_bb.  */
5199   split = split_block (entry_bb, last_stmt (entry_bb));
5200   basic_block head_bb = split->dest;
5201   entry_bb = split->src;
5202 
5203   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5204   gsi = gsi_last_nondebug_bb (entry_bb);
5205   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5206   loc = gimple_location (for_stmt);
5207 
5208   if (gimple_in_ssa_p (cfun))
5209     {
5210       offset_init = gimple_omp_for_index (for_stmt, 0);
5211       gcc_assert (integer_zerop (fd->loop.n1));
5212       /* The SSA parallelizer does gang parallelism.  */
5213       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5214     }
5215 
5216   if (fd->collapse > 1 || fd->tiling)
5217     {
5218       gcc_assert (!gimple_in_ssa_p (cfun) && up);
5219       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5220       tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5221 					      TREE_TYPE (fd->loop.n2), loc);
5222 
5223       if (SSA_VAR_P (fd->loop.n2))
5224 	{
5225 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5226 					    true, GSI_SAME_STMT);
5227 	  ass = gimple_build_assign (fd->loop.n2, total);
5228 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5229 	}
5230     }
5231 
5232   tree b = fd->loop.n1;
5233   tree e = fd->loop.n2;
5234   tree s = fd->loop.step;
5235 
5236   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5237   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5238 
5239   /* Convert the step, avoiding possible unsigned->signed overflow.  */
5240   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5241   if (negating)
5242     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5243   s = fold_convert (diff_type, s);
5244   if (negating)
5245     s = fold_build1 (NEGATE_EXPR, diff_type, s);
5246   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5247 
5248   if (!chunking)
5249     chunk_size = integer_zero_node;
5250   expr = fold_convert (diff_type, chunk_size);
5251   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5252 					 NULL_TREE, true, GSI_SAME_STMT);
5253 
5254   if (fd->tiling)
5255     {
5256       /* Determine the tile size and element step,
5257 	 modify the outer loop step size.  */
5258       tile_size = create_tmp_var (diff_type, ".tile_size");
5259       expr = build_int_cst (diff_type, 1);
5260       for (int ix = 0; ix < fd->collapse; ix++)
5261 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5262       expr = force_gimple_operand_gsi (&gsi, expr, true,
5263 				       NULL_TREE, true, GSI_SAME_STMT);
5264       ass = gimple_build_assign (tile_size, expr);
5265       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5266 
5267       element_s = create_tmp_var (diff_type, ".element_s");
5268       ass = gimple_build_assign (element_s, s);
5269       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5270 
5271       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5272       s = force_gimple_operand_gsi (&gsi, expr, true,
5273 				    NULL_TREE, true, GSI_SAME_STMT);
5274     }
5275 
5276   /* Determine the range, avoiding possible unsigned->signed overflow.  */
5277   negating = !up && TYPE_UNSIGNED (iter_type);
5278   expr = fold_build2 (MINUS_EXPR, plus_type,
5279 		      fold_convert (plus_type, negating ? b : e),
5280 		      fold_convert (plus_type, negating ? e : b));
5281   expr = fold_convert (diff_type, expr);
5282   if (negating)
5283     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5284   tree range = force_gimple_operand_gsi (&gsi, expr, true,
5285 					 NULL_TREE, true, GSI_SAME_STMT);
5286 
5287   chunk_no = build_int_cst (diff_type, 0);
5288   if (chunking)
5289     {
5290       gcc_assert (!gimple_in_ssa_p (cfun));
5291 
5292       expr = chunk_no;
5293       chunk_max = create_tmp_var (diff_type, ".chunk_max");
5294       chunk_no = create_tmp_var (diff_type, ".chunk_no");
5295 
5296       ass = gimple_build_assign (chunk_no, expr);
5297       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5298 
5299       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5300 					 build_int_cst (integer_type_node,
5301 							IFN_GOACC_LOOP_CHUNKS),
5302 					 dir, range, s, chunk_size, gwv);
5303       gimple_call_set_lhs (call, chunk_max);
5304       gimple_set_location (call, loc);
5305       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5306     }
5307   else
5308     chunk_size = chunk_no;
5309 
5310   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5311 				     build_int_cst (integer_type_node,
5312 						    IFN_GOACC_LOOP_STEP),
5313 				     dir, range, s, chunk_size, gwv);
5314   gimple_call_set_lhs (call, step);
5315   gimple_set_location (call, loc);
5316   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5317 
5318   /* Remove the GIMPLE_OMP_FOR.  */
5319   gsi_remove (&gsi, true);
5320 
5321   /* Fixup edges from head_bb.  */
5322   be = BRANCH_EDGE (head_bb);
5323   fte = FALLTHRU_EDGE (head_bb);
5324   be->flags |= EDGE_FALSE_VALUE;
5325   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5326 
5327   basic_block body_bb = fte->dest;
5328 
5329   if (gimple_in_ssa_p (cfun))
5330     {
5331       gsi = gsi_last_nondebug_bb (cont_bb);
5332       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5333 
5334       offset = gimple_omp_continue_control_use (cont_stmt);
5335       offset_incr = gimple_omp_continue_control_def (cont_stmt);
5336     }
5337   else
5338     {
5339       offset = create_tmp_var (diff_type, ".offset");
5340       offset_init = offset_incr = offset;
5341     }
5342   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5343 
5344   /* Loop offset & bound go into head_bb.  */
5345   gsi = gsi_start_bb (head_bb);
5346 
5347   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5348 				     build_int_cst (integer_type_node,
5349 						    IFN_GOACC_LOOP_OFFSET),
5350 				     dir, range, s,
5351 				     chunk_size, gwv, chunk_no);
5352   gimple_call_set_lhs (call, offset_init);
5353   gimple_set_location (call, loc);
5354   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5355 
5356   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5357 				     build_int_cst (integer_type_node,
5358 						    IFN_GOACC_LOOP_BOUND),
5359 				     dir, range, s,
5360 				     chunk_size, gwv, offset_init);
5361   gimple_call_set_lhs (call, bound);
5362   gimple_set_location (call, loc);
5363   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5364 
5365   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5366   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5367 		    GSI_CONTINUE_LINKING);
5368 
5369   /* V assignment goes into body_bb.  */
5370   if (!gimple_in_ssa_p (cfun))
5371     {
5372       gsi = gsi_start_bb (body_bb);
5373 
5374       expr = build2 (plus_code, iter_type, b,
5375 		     fold_convert (plus_type, offset));
5376       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5377 				       true, GSI_SAME_STMT);
5378       ass = gimple_build_assign (v, expr);
5379       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5380 
5381       if (fd->collapse > 1 || fd->tiling)
5382 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5383 
5384       if (fd->tiling)
5385 	{
5386 	  /* Determine the range of the element loop -- usually simply
5387 	     the tile_size, but could be smaller if the final
5388 	     iteration of the outer loop is a partial tile.  */
5389 	  tree e_range = create_tmp_var (diff_type, ".e_range");
5390 
5391 	  expr = build2 (MIN_EXPR, diff_type,
5392 			 build2 (MINUS_EXPR, diff_type, bound, offset),
5393 			 build2 (MULT_EXPR, diff_type, tile_size,
5394 				 element_s));
5395 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5396 					   true, GSI_SAME_STMT);
5397 	  ass = gimple_build_assign (e_range, expr);
5398 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5399 
5400 	  /* Determine bound, offset & step of inner loop. */
5401 	  e_bound = create_tmp_var (diff_type, ".e_bound");
5402 	  e_offset = create_tmp_var (diff_type, ".e_offset");
5403 	  e_step = create_tmp_var (diff_type, ".e_step");
5404 
5405 	  /* Mark these as element loops.  */
5406 	  tree t, e_gwv = integer_minus_one_node;
5407 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5408 
5409 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5410 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5411 					     element_s, chunk, e_gwv, chunk);
5412 	  gimple_call_set_lhs (call, e_offset);
5413 	  gimple_set_location (call, loc);
5414 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5415 
5416 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5417 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5418 					     element_s, chunk, e_gwv, e_offset);
5419 	  gimple_call_set_lhs (call, e_bound);
5420 	  gimple_set_location (call, loc);
5421 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5422 
5423 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5424 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5425 					     element_s, chunk, e_gwv);
5426 	  gimple_call_set_lhs (call, e_step);
5427 	  gimple_set_location (call, loc);
5428 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5429 
5430 	  /* Add test and split block.  */
5431 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5432 	  stmt = gimple_build_cond_empty (expr);
5433 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5434 	  split = split_block (body_bb, stmt);
5435 	  elem_body_bb = split->dest;
5436 	  if (cont_bb == body_bb)
5437 	    cont_bb = elem_body_bb;
5438 	  body_bb = split->src;
5439 
5440 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5441 
5442 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5443 	  if (cont_bb == NULL)
5444 	    {
5445 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5446 	      e->probability = profile_probability::even ();
5447 	      split->probability = profile_probability::even ();
5448 	    }
5449 
5450 	  /* Initialize the user's loop vars.  */
5451 	  gsi = gsi_start_bb (elem_body_bb);
5452 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5453 	}
5454     }
5455 
5456   /* Loop increment goes into cont_bb.  If this is not a loop, we
5457      will have spawned threads as if it was, and each one will
5458      execute one iteration.  The specification is not explicit about
5459      whether such constructs are ill-formed or not, and they can
5460      occur, especially when noreturn routines are involved.  */
5461   if (cont_bb)
5462     {
5463       gsi = gsi_last_nondebug_bb (cont_bb);
5464       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5465       loc = gimple_location (cont_stmt);
5466 
5467       if (fd->tiling)
5468 	{
5469 	  /* Insert element loop increment and test.  */
5470 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5471 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5472 					   true, GSI_SAME_STMT);
5473 	  ass = gimple_build_assign (e_offset, expr);
5474 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5475 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5476 
5477 	  stmt = gimple_build_cond_empty (expr);
5478 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5479 	  split = split_block (cont_bb, stmt);
5480 	  elem_cont_bb = split->src;
5481 	  cont_bb = split->dest;
5482 
5483 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5484 	  split->probability = profile_probability::unlikely ().guessed ();
5485 	  edge latch_edge
5486 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5487 	  latch_edge->probability = profile_probability::likely ().guessed ();
5488 
5489 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5490 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
5491 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5492 	  loop_entry_edge->probability
5493 	    = profile_probability::likely ().guessed ();
5494 
5495 	  gsi = gsi_for_stmt (cont_stmt);
5496 	}
5497 
5498       /* Increment offset.  */
5499       if (gimple_in_ssa_p (cfun))
5500 	expr = build2 (plus_code, iter_type, offset,
5501 		       fold_convert (plus_type, step));
5502       else
5503 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5504       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5505 				       true, GSI_SAME_STMT);
5506       ass = gimple_build_assign (offset_incr, expr);
5507       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5508       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5509       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5510 
5511       /*  Remove the GIMPLE_OMP_CONTINUE.  */
5512       gsi_remove (&gsi, true);
5513 
5514       /* Fixup edges from cont_bb.  */
5515       be = BRANCH_EDGE (cont_bb);
5516       fte = FALLTHRU_EDGE (cont_bb);
5517       be->flags |= EDGE_TRUE_VALUE;
5518       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5519 
5520       if (chunking)
5521 	{
5522 	  /* Split the beginning of exit_bb to make bottom_bb.  We
5523 	     need to insert a nop at the start, because splitting is
5524 	     after a stmt, not before.  */
5525 	  gsi = gsi_start_bb (exit_bb);
5526 	  stmt = gimple_build_nop ();
5527 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5528 	  split = split_block (exit_bb, stmt);
5529 	  bottom_bb = split->src;
5530 	  exit_bb = split->dest;
5531 	  gsi = gsi_last_bb (bottom_bb);
5532 
5533 	  /* Chunk increment and test goes into bottom_bb.  */
5534 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5535 			 build_int_cst (diff_type, 1));
5536 	  ass = gimple_build_assign (chunk_no, expr);
5537 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5538 
5539 	  /* Chunk test at end of bottom_bb.  */
5540 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5541 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5542 			    GSI_CONTINUE_LINKING);
5543 
5544 	  /* Fixup edges from bottom_bb.  */
5545 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5546 	  split->probability = profile_probability::unlikely ().guessed ();
5547 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5548 	  latch_edge->probability = profile_probability::likely ().guessed ();
5549 	}
5550     }
5551 
5552   gsi = gsi_last_nondebug_bb (exit_bb);
5553   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5554   loc = gimple_location (gsi_stmt (gsi));
5555 
5556   if (!gimple_in_ssa_p (cfun))
5557     {
5558       /* Insert the final value of V, in case it is live.  This is the
5559 	 value for the only thread that survives past the join.  */
5560       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5561       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5562       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5563       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5564       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5565       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5566 				       true, GSI_SAME_STMT);
5567       ass = gimple_build_assign (v, expr);
5568       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5569     }
5570 
5571   /* Remove the OMP_RETURN.  */
5572   gsi_remove (&gsi, true);
5573 
5574   if (cont_bb)
5575     {
5576       /* We now have one, two or three nested loops.  Update the loop
5577 	 structures.  */
5578       struct loop *parent = entry_bb->loop_father;
5579       struct loop *body = body_bb->loop_father;
5580 
5581       if (chunking)
5582 	{
5583 	  struct loop *chunk_loop = alloc_loop ();
5584 	  chunk_loop->header = head_bb;
5585 	  chunk_loop->latch = bottom_bb;
5586 	  add_loop (chunk_loop, parent);
5587 	  parent = chunk_loop;
5588 	}
5589       else if (parent != body)
5590 	{
5591 	  gcc_assert (body->header == body_bb);
5592 	  gcc_assert (body->latch == cont_bb
5593 		      || single_pred (body->latch) == cont_bb);
5594 	  parent = NULL;
5595 	}
5596 
5597       if (parent)
5598 	{
5599 	  struct loop *body_loop = alloc_loop ();
5600 	  body_loop->header = body_bb;
5601 	  body_loop->latch = cont_bb;
5602 	  add_loop (body_loop, parent);
5603 
5604 	  if (fd->tiling)
5605 	    {
5606 	      /* Insert tiling's element loop.  */
5607 	      struct loop *inner_loop = alloc_loop ();
5608 	      inner_loop->header = elem_body_bb;
5609 	      inner_loop->latch = elem_cont_bb;
5610 	      add_loop (inner_loop, body_loop);
5611 	    }
5612 	}
5613     }
5614 }
5615 
5616 /* Expand the OMP loop defined by REGION.  */
5617 
5618 static void
5619 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5620 {
5621   struct omp_for_data fd;
5622   struct omp_for_data_loop *loops;
5623 
5624   loops
5625     = (struct omp_for_data_loop *)
5626       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5627 	      * sizeof (struct omp_for_data_loop));
5628   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5629 			&fd, loops);
5630   region->sched_kind = fd.sched_kind;
5631   region->sched_modifiers = fd.sched_modifiers;
5632 
5633   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5634   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5635   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5636   if (region->cont)
5637     {
5638       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5639       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5640       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5641     }
5642   else
5643     /* If there isn't a continue then this is a degerate case where
5644        the introduction of abnormal edges during lowering will prevent
5645        original loops from being detected.  Fix that up.  */
5646     loops_state_set (LOOPS_NEED_FIXUP);
5647 
5648   if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5649     expand_omp_simd (region, &fd);
5650   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5651     {
5652       gcc_assert (!inner_stmt);
5653       expand_oacc_for (region, &fd);
5654     }
5655   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5656     {
5657       if (gimple_omp_for_combined_into_p (fd.for_stmt))
5658 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5659       else
5660 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5661     }
5662   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5663 	   && !fd.have_ordered)
5664     {
5665       if (fd.chunk_size == NULL)
5666 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5667       else
5668 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5669     }
5670   else
5671     {
5672       int fn_index, start_ix, next_ix;
5673 
5674       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5675 		  == GF_OMP_FOR_KIND_FOR);
5676       if (fd.chunk_size == NULL
5677 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5678 	fd.chunk_size = integer_zero_node;
5679       gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5680       switch (fd.sched_kind)
5681 	{
5682 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5683 	  fn_index = 3;
5684 	  break;
5685 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5686 	case OMP_CLAUSE_SCHEDULE_GUIDED:
5687 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5688 	      && !fd.ordered
5689 	      && !fd.have_ordered)
5690 	    {
5691 	      fn_index = 3 + fd.sched_kind;
5692 	      break;
5693 	    }
5694 	  /* FALLTHRU */
5695 	default:
5696 	  fn_index = fd.sched_kind;
5697 	  break;
5698 	}
5699       if (!fd.ordered)
5700 	fn_index += fd.have_ordered * 6;
5701       if (fd.ordered)
5702 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5703       else
5704 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5705       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5706       if (fd.iter_type == long_long_unsigned_type_node)
5707 	{
5708 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5709 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5710 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5711 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5712 	}
5713       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5714 			      (enum built_in_function) next_ix, inner_stmt);
5715     }
5716 
5717   if (gimple_in_ssa_p (cfun))
5718     update_ssa (TODO_update_ssa_only_virtuals);
5719 }
5720 
5721 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5722 
5723 	v = GOMP_sections_start (n);
5724     L0:
5725 	switch (v)
5726 	  {
5727 	  case 0:
5728 	    goto L2;
5729 	  case 1:
5730 	    section 1;
5731 	    goto L1;
5732 	  case 2:
5733 	    ...
5734 	  case n:
5735 	    ...
5736 	  default:
5737 	    abort ();
5738 	  }
5739     L1:
5740 	v = GOMP_sections_next ();
5741 	goto L0;
5742     L2:
5743 	reduction;
5744 
5745     If this is a combined parallel sections, replace the call to
5746     GOMP_sections_start with call to GOMP_sections_next.  */
5747 
5748 static void
5749 expand_omp_sections (struct omp_region *region)
5750 {
5751   tree t, u, vin = NULL, vmain, vnext, l2;
5752   unsigned len;
5753   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5754   gimple_stmt_iterator si, switch_si;
5755   gomp_sections *sections_stmt;
5756   gimple *stmt;
5757   gomp_continue *cont;
5758   edge_iterator ei;
5759   edge e;
5760   struct omp_region *inner;
5761   unsigned i, casei;
5762   bool exit_reachable = region->cont != NULL;
5763 
5764   gcc_assert (region->exit != NULL);
5765   entry_bb = region->entry;
5766   l0_bb = single_succ (entry_bb);
5767   l1_bb = region->cont;
5768   l2_bb = region->exit;
5769   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5770     l2 = gimple_block_label (l2_bb);
5771   else
5772     {
5773       /* This can happen if there are reductions.  */
5774       len = EDGE_COUNT (l0_bb->succs);
5775       gcc_assert (len > 0);
5776       e = EDGE_SUCC (l0_bb, len - 1);
5777       si = gsi_last_nondebug_bb (e->dest);
5778       l2 = NULL_TREE;
5779       if (gsi_end_p (si)
5780 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5781 	l2 = gimple_block_label (e->dest);
5782       else
5783 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5784 	  {
5785 	    si = gsi_last_nondebug_bb (e->dest);
5786 	    if (gsi_end_p (si)
5787 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5788 	      {
5789 		l2 = gimple_block_label (e->dest);
5790 		break;
5791 	      }
5792 	  }
5793     }
5794   if (exit_reachable)
5795     default_bb = create_empty_bb (l1_bb->prev_bb);
5796   else
5797     default_bb = create_empty_bb (l0_bb);
5798 
5799   /* We will build a switch() with enough cases for all the
5800      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5801      and a default case to abort if something goes wrong.  */
5802   len = EDGE_COUNT (l0_bb->succs);
5803 
5804   /* Use vec::quick_push on label_vec throughout, since we know the size
5805      in advance.  */
5806   auto_vec<tree> label_vec (len);
5807 
5808   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5809      GIMPLE_OMP_SECTIONS statement.  */
5810   si = gsi_last_nondebug_bb (entry_bb);
5811   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5812   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5813   vin = gimple_omp_sections_control (sections_stmt);
5814   if (!is_combined_parallel (region))
5815     {
5816       /* If we are not inside a combined parallel+sections region,
5817 	 call GOMP_sections_start.  */
5818       t = build_int_cst (unsigned_type_node, len - 1);
5819       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5820       stmt = gimple_build_call (u, 1, t);
5821     }
5822   else
5823     {
5824       /* Otherwise, call GOMP_sections_next.  */
5825       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5826       stmt = gimple_build_call (u, 0);
5827     }
5828   gimple_call_set_lhs (stmt, vin);
5829   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5830   gsi_remove (&si, true);
5831 
5832   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5833      L0_BB.  */
5834   switch_si = gsi_last_nondebug_bb (l0_bb);
5835   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5836   if (exit_reachable)
5837     {
5838       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5839       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5840       vmain = gimple_omp_continue_control_use (cont);
5841       vnext = gimple_omp_continue_control_def (cont);
5842     }
5843   else
5844     {
5845       vmain = vin;
5846       vnext = NULL_TREE;
5847     }
5848 
5849   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5850   label_vec.quick_push (t);
5851   i = 1;
5852 
5853   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
5854   for (inner = region->inner, casei = 1;
5855        inner;
5856        inner = inner->next, i++, casei++)
5857     {
5858       basic_block s_entry_bb, s_exit_bb;
5859 
5860       /* Skip optional reduction region.  */
5861       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5862 	{
5863 	  --i;
5864 	  --casei;
5865 	  continue;
5866 	}
5867 
5868       s_entry_bb = inner->entry;
5869       s_exit_bb = inner->exit;
5870 
5871       t = gimple_block_label (s_entry_bb);
5872       u = build_int_cst (unsigned_type_node, casei);
5873       u = build_case_label (u, NULL, t);
5874       label_vec.quick_push (u);
5875 
5876       si = gsi_last_nondebug_bb (s_entry_bb);
5877       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5878       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5879       gsi_remove (&si, true);
5880       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5881 
5882       if (s_exit_bb == NULL)
5883 	continue;
5884 
5885       si = gsi_last_nondebug_bb (s_exit_bb);
5886       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5887       gsi_remove (&si, true);
5888 
5889       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5890     }
5891 
5892   /* Error handling code goes in DEFAULT_BB.  */
5893   t = gimple_block_label (default_bb);
5894   u = build_case_label (NULL, NULL, t);
5895   make_edge (l0_bb, default_bb, 0);
5896   add_bb_to_loop (default_bb, current_loops->tree_root);
5897 
5898   stmt = gimple_build_switch (vmain, u, label_vec);
5899   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5900   gsi_remove (&switch_si, true);
5901 
5902   si = gsi_start_bb (default_bb);
5903   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5904   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5905 
5906   if (exit_reachable)
5907     {
5908       tree bfn_decl;
5909 
5910       /* Code to get the next section goes in L1_BB.  */
5911       si = gsi_last_nondebug_bb (l1_bb);
5912       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5913 
5914       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5915       stmt = gimple_build_call (bfn_decl, 0);
5916       gimple_call_set_lhs (stmt, vnext);
5917       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5918       gsi_remove (&si, true);
5919 
5920       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5921     }
5922 
5923   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
5924   si = gsi_last_nondebug_bb (l2_bb);
5925   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5926     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5927   else if (gimple_omp_return_lhs (gsi_stmt (si)))
5928     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5929   else
5930     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5931   stmt = gimple_build_call (t, 0);
5932   if (gimple_omp_return_lhs (gsi_stmt (si)))
5933     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5934   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5935   gsi_remove (&si, true);
5936 
5937   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5938 }
5939 
5940 /* Expand code for an OpenMP single directive.  We've already expanded
5941    much of the code, here we simply place the GOMP_barrier call.  */
5942 
5943 static void
5944 expand_omp_single (struct omp_region *region)
5945 {
5946   basic_block entry_bb, exit_bb;
5947   gimple_stmt_iterator si;
5948 
5949   entry_bb = region->entry;
5950   exit_bb = region->exit;
5951 
5952   si = gsi_last_nondebug_bb (entry_bb);
5953   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5954   gsi_remove (&si, true);
5955   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5956 
5957   si = gsi_last_nondebug_bb (exit_bb);
5958   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5959     {
5960       tree t = gimple_omp_return_lhs (gsi_stmt (si));
5961       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5962     }
5963   gsi_remove (&si, true);
5964   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5965 }
5966 
5967 /* Generic expansion for OpenMP synchronization directives: master,
5968    ordered and critical.  All we need to do here is remove the entry
5969    and exit markers for REGION.  */
5970 
5971 static void
5972 expand_omp_synch (struct omp_region *region)
5973 {
5974   basic_block entry_bb, exit_bb;
5975   gimple_stmt_iterator si;
5976 
5977   entry_bb = region->entry;
5978   exit_bb = region->exit;
5979 
5980   si = gsi_last_nondebug_bb (entry_bb);
5981   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5982 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5983 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5984 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5985 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5986 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5987   gsi_remove (&si, true);
5988   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5989 
5990   if (exit_bb)
5991     {
5992       si = gsi_last_nondebug_bb (exit_bb);
5993       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5994       gsi_remove (&si, true);
5995       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5996     }
5997 }
5998 
5999 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6000    operation as a normal volatile load.  */
6001 
6002 static bool
6003 expand_omp_atomic_load (basic_block load_bb, tree addr,
6004 			tree loaded_val, int index)
6005 {
6006   enum built_in_function tmpbase;
6007   gimple_stmt_iterator gsi;
6008   basic_block store_bb;
6009   location_t loc;
6010   gimple *stmt;
6011   tree decl, call, type, itype;
6012 
6013   gsi = gsi_last_nondebug_bb (load_bb);
6014   stmt = gsi_stmt (gsi);
6015   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6016   loc = gimple_location (stmt);
6017 
6018   /* ??? If the target does not implement atomic_load_optab[mode], and mode
6019      is smaller than word size, then expand_atomic_load assumes that the load
6020      is atomic.  We could avoid the builtin entirely in this case.  */
6021 
6022   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6023   decl = builtin_decl_explicit (tmpbase);
6024   if (decl == NULL_TREE)
6025     return false;
6026 
6027   type = TREE_TYPE (loaded_val);
6028   itype = TREE_TYPE (TREE_TYPE (decl));
6029 
6030   call = build_call_expr_loc (loc, decl, 2, addr,
6031 			      build_int_cst (NULL,
6032 					     gimple_omp_atomic_seq_cst_p (stmt)
6033 					     ? MEMMODEL_SEQ_CST
6034 					     : MEMMODEL_RELAXED));
6035   if (!useless_type_conversion_p (type, itype))
6036     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6037   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6038 
6039   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6040   gsi_remove (&gsi, true);
6041 
6042   store_bb = single_succ (load_bb);
6043   gsi = gsi_last_nondebug_bb (store_bb);
6044   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6045   gsi_remove (&gsi, true);
6046 
6047   if (gimple_in_ssa_p (cfun))
6048     update_ssa (TODO_update_ssa_no_phi);
6049 
6050   return true;
6051 }
6052 
6053 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6054    operation as a normal volatile store.  */
6055 
6056 static bool
6057 expand_omp_atomic_store (basic_block load_bb, tree addr,
6058 			 tree loaded_val, tree stored_val, int index)
6059 {
6060   enum built_in_function tmpbase;
6061   gimple_stmt_iterator gsi;
6062   basic_block store_bb = single_succ (load_bb);
6063   location_t loc;
6064   gimple *stmt;
6065   tree decl, call, type, itype;
6066   machine_mode imode;
6067   bool exchange;
6068 
6069   gsi = gsi_last_nondebug_bb (load_bb);
6070   stmt = gsi_stmt (gsi);
6071   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6072 
6073   /* If the load value is needed, then this isn't a store but an exchange.  */
6074   exchange = gimple_omp_atomic_need_value_p (stmt);
6075 
6076   gsi = gsi_last_nondebug_bb (store_bb);
6077   stmt = gsi_stmt (gsi);
6078   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6079   loc = gimple_location (stmt);
6080 
6081   /* ??? If the target does not implement atomic_store_optab[mode], and mode
6082      is smaller than word size, then expand_atomic_store assumes that the store
6083      is atomic.  We could avoid the builtin entirely in this case.  */
6084 
6085   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6086   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6087   decl = builtin_decl_explicit (tmpbase);
6088   if (decl == NULL_TREE)
6089     return false;
6090 
6091   type = TREE_TYPE (stored_val);
6092 
6093   /* Dig out the type of the function's second argument.  */
6094   itype = TREE_TYPE (decl);
6095   itype = TYPE_ARG_TYPES (itype);
6096   itype = TREE_CHAIN (itype);
6097   itype = TREE_VALUE (itype);
6098   imode = TYPE_MODE (itype);
6099 
6100   if (exchange && !can_atomic_exchange_p (imode, true))
6101     return false;
6102 
6103   if (!useless_type_conversion_p (itype, type))
6104     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6105   call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6106 			      build_int_cst (NULL,
6107 					     gimple_omp_atomic_seq_cst_p (stmt)
6108 					     ? MEMMODEL_SEQ_CST
6109 					     : MEMMODEL_RELAXED));
6110   if (exchange)
6111     {
6112       if (!useless_type_conversion_p (type, itype))
6113 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6114       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6115     }
6116 
6117   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6118   gsi_remove (&gsi, true);
6119 
6120   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6121   gsi = gsi_last_nondebug_bb (load_bb);
6122   gsi_remove (&gsi, true);
6123 
6124   if (gimple_in_ssa_p (cfun))
6125     update_ssa (TODO_update_ssa_no_phi);
6126 
6127   return true;
6128 }
6129 
6130 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6131    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6132    size of the data type, and thus usable to find the index of the builtin
6133    decl.  Returns false if the expression is not of the proper form.  */
6134 
6135 static bool
6136 expand_omp_atomic_fetch_op (basic_block load_bb,
6137 			    tree addr, tree loaded_val,
6138 			    tree stored_val, int index)
6139 {
6140   enum built_in_function oldbase, newbase, tmpbase;
6141   tree decl, itype, call;
6142   tree lhs, rhs;
6143   basic_block store_bb = single_succ (load_bb);
6144   gimple_stmt_iterator gsi;
6145   gimple *stmt;
6146   location_t loc;
6147   enum tree_code code;
6148   bool need_old, need_new;
6149   machine_mode imode;
6150   bool seq_cst;
6151 
6152   /* We expect to find the following sequences:
6153 
6154    load_bb:
6155        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6156 
6157    store_bb:
6158        val = tmp OP something; (or: something OP tmp)
6159        GIMPLE_OMP_STORE (val)
6160 
6161   ???FIXME: Allow a more flexible sequence.
6162   Perhaps use data flow to pick the statements.
6163 
6164   */
6165 
6166   gsi = gsi_after_labels (store_bb);
6167   stmt = gsi_stmt (gsi);
6168   if (is_gimple_debug (stmt))
6169     {
6170       gsi_next_nondebug (&gsi);
6171       if (gsi_end_p (gsi))
6172 	return false;
6173       stmt = gsi_stmt (gsi);
6174     }
6175   loc = gimple_location (stmt);
6176   if (!is_gimple_assign (stmt))
6177     return false;
6178   gsi_next_nondebug (&gsi);
6179   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6180     return false;
6181   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6182   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6183   seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6184   gcc_checking_assert (!need_old || !need_new);
6185 
6186   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6187     return false;
6188 
6189   /* Check for one of the supported fetch-op operations.  */
6190   code = gimple_assign_rhs_code (stmt);
6191   switch (code)
6192     {
6193     case PLUS_EXPR:
6194     case POINTER_PLUS_EXPR:
6195       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6196       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6197       break;
6198     case MINUS_EXPR:
6199       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6200       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6201       break;
6202     case BIT_AND_EXPR:
6203       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6204       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6205       break;
6206     case BIT_IOR_EXPR:
6207       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6208       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6209       break;
6210     case BIT_XOR_EXPR:
6211       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6212       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6213       break;
6214     default:
6215       return false;
6216     }
6217 
6218   /* Make sure the expression is of the proper form.  */
6219   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6220     rhs = gimple_assign_rhs2 (stmt);
6221   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6222 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6223     rhs = gimple_assign_rhs1 (stmt);
6224   else
6225     return false;
6226 
6227   tmpbase = ((enum built_in_function)
6228 	     ((need_new ? newbase : oldbase) + index + 1));
6229   decl = builtin_decl_explicit (tmpbase);
6230   if (decl == NULL_TREE)
6231     return false;
6232   itype = TREE_TYPE (TREE_TYPE (decl));
6233   imode = TYPE_MODE (itype);
6234 
6235   /* We could test all of the various optabs involved, but the fact of the
6236      matter is that (with the exception of i486 vs i586 and xadd) all targets
6237      that support any atomic operaton optab also implements compare-and-swap.
6238      Let optabs.c take care of expanding any compare-and-swap loop.  */
6239   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6240     return false;
6241 
6242   gsi = gsi_last_nondebug_bb (load_bb);
6243   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6244 
6245   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6246      It only requires that the operation happen atomically.  Thus we can
6247      use the RELAXED memory model.  */
6248   call = build_call_expr_loc (loc, decl, 3, addr,
6249 			      fold_convert_loc (loc, itype, rhs),
6250 			      build_int_cst (NULL,
6251 					     seq_cst ? MEMMODEL_SEQ_CST
6252 						     : MEMMODEL_RELAXED));
6253 
6254   if (need_old || need_new)
6255     {
6256       lhs = need_old ? loaded_val : stored_val;
6257       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6258       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6259     }
6260   else
6261     call = fold_convert_loc (loc, void_type_node, call);
6262   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6263   gsi_remove (&gsi, true);
6264 
6265   gsi = gsi_last_nondebug_bb (store_bb);
6266   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6267   gsi_remove (&gsi, true);
6268   gsi = gsi_last_nondebug_bb (store_bb);
6269   stmt = gsi_stmt (gsi);
6270   gsi_remove (&gsi, true);
6271 
6272   if (gimple_in_ssa_p (cfun))
6273     {
6274       release_defs (stmt);
6275       update_ssa (TODO_update_ssa_no_phi);
6276     }
6277 
6278   return true;
6279 }
6280 
6281 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6282 
6283       oldval = *addr;
6284       repeat:
6285 	newval = rhs;	 // with oldval replacing *addr in rhs
6286 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6287 	if (oldval != newval)
6288 	  goto repeat;
6289 
6290    INDEX is log2 of the size of the data type, and thus usable to find the
6291    index of the builtin decl.  */
6292 
6293 static bool
6294 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6295 			    tree addr, tree loaded_val, tree stored_val,
6296 			    int index)
6297 {
6298   tree loadedi, storedi, initial, new_storedi, old_vali;
6299   tree type, itype, cmpxchg, iaddr, atype;
6300   gimple_stmt_iterator si;
6301   basic_block loop_header = single_succ (load_bb);
6302   gimple *phi, *stmt;
6303   edge e;
6304   enum built_in_function fncode;
6305 
6306   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6307      order to use the RELAXED memory model effectively.  */
6308   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6309 				    + index + 1);
6310   cmpxchg = builtin_decl_explicit (fncode);
6311   if (cmpxchg == NULL_TREE)
6312     return false;
6313   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6314   atype = type;
6315   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6316 
6317   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6318       || !can_atomic_load_p (TYPE_MODE (itype)))
6319     return false;
6320 
6321   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6322   si = gsi_last_nondebug_bb (load_bb);
6323   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6324 
6325   /* For floating-point values, we'll need to view-convert them to integers
6326      so that we can perform the atomic compare and swap.  Simplify the
6327      following code by always setting up the "i"ntegral variables.  */
6328   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6329     {
6330       tree iaddr_val;
6331 
6332       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6333 							   true));
6334       atype = itype;
6335       iaddr_val
6336 	= force_gimple_operand_gsi (&si,
6337 				    fold_convert (TREE_TYPE (iaddr), addr),
6338 				    false, NULL_TREE, true, GSI_SAME_STMT);
6339       stmt = gimple_build_assign (iaddr, iaddr_val);
6340       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6341       loadedi = create_tmp_var (itype);
6342       if (gimple_in_ssa_p (cfun))
6343 	loadedi = make_ssa_name (loadedi);
6344     }
6345   else
6346     {
6347       iaddr = addr;
6348       loadedi = loaded_val;
6349     }
6350 
6351   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6352   tree loaddecl = builtin_decl_explicit (fncode);
6353   if (loaddecl)
6354     initial
6355       = fold_convert (atype,
6356 		      build_call_expr (loaddecl, 2, iaddr,
6357 				       build_int_cst (NULL_TREE,
6358 						      MEMMODEL_RELAXED)));
6359   else
6360     {
6361       tree off
6362 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6363 						      true), 0);
6364       initial = build2 (MEM_REF, atype, iaddr, off);
6365     }
6366 
6367   initial
6368     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6369 				GSI_SAME_STMT);
6370 
6371   /* Move the value to the LOADEDI temporary.  */
6372   if (gimple_in_ssa_p (cfun))
6373     {
6374       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6375       phi = create_phi_node (loadedi, loop_header);
6376       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6377 	       initial);
6378     }
6379   else
6380     gsi_insert_before (&si,
6381 		       gimple_build_assign (loadedi, initial),
6382 		       GSI_SAME_STMT);
6383   if (loadedi != loaded_val)
6384     {
6385       gimple_stmt_iterator gsi2;
6386       tree x;
6387 
6388       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6389       gsi2 = gsi_start_bb (loop_header);
6390       if (gimple_in_ssa_p (cfun))
6391 	{
6392 	  gassign *stmt;
6393 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6394 					true, GSI_SAME_STMT);
6395 	  stmt = gimple_build_assign (loaded_val, x);
6396 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6397 	}
6398       else
6399 	{
6400 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6401 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6402 				    true, GSI_SAME_STMT);
6403 	}
6404     }
6405   gsi_remove (&si, true);
6406 
6407   si = gsi_last_nondebug_bb (store_bb);
6408   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6409 
6410   if (iaddr == addr)
6411     storedi = stored_val;
6412   else
6413     storedi
6414       = force_gimple_operand_gsi (&si,
6415 				  build1 (VIEW_CONVERT_EXPR, itype,
6416 					  stored_val), true, NULL_TREE, true,
6417 				  GSI_SAME_STMT);
6418 
6419   /* Build the compare&swap statement.  */
6420   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6421   new_storedi = force_gimple_operand_gsi (&si,
6422 					  fold_convert (TREE_TYPE (loadedi),
6423 							new_storedi),
6424 					  true, NULL_TREE,
6425 					  true, GSI_SAME_STMT);
6426 
6427   if (gimple_in_ssa_p (cfun))
6428     old_vali = loadedi;
6429   else
6430     {
6431       old_vali = create_tmp_var (TREE_TYPE (loadedi));
6432       stmt = gimple_build_assign (old_vali, loadedi);
6433       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6434 
6435       stmt = gimple_build_assign (loadedi, new_storedi);
6436       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6437     }
6438 
6439   /* Note that we always perform the comparison as an integer, even for
6440      floating point.  This allows the atomic operation to properly
6441      succeed even with NaNs and -0.0.  */
6442   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6443   stmt = gimple_build_cond_empty (ne);
6444   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6445 
6446   /* Update cfg.  */
6447   e = single_succ_edge (store_bb);
6448   e->flags &= ~EDGE_FALLTHRU;
6449   e->flags |= EDGE_FALSE_VALUE;
6450   /* Expect no looping.  */
6451   e->probability = profile_probability::guessed_always ();
6452 
6453   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6454   e->probability = profile_probability::guessed_never ();
6455 
6456   /* Copy the new value to loadedi (we already did that before the condition
6457      if we are not in SSA).  */
6458   if (gimple_in_ssa_p (cfun))
6459     {
6460       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6461       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6462     }
6463 
6464   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6465   gsi_remove (&si, true);
6466 
6467   struct loop *loop = alloc_loop ();
6468   loop->header = loop_header;
6469   loop->latch = store_bb;
6470   add_loop (loop, loop_header->loop_father);
6471 
6472   if (gimple_in_ssa_p (cfun))
6473     update_ssa (TODO_update_ssa_no_phi);
6474 
6475   return true;
6476 }
6477 
6478 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6479 
6480 				  GOMP_atomic_start ();
6481 				  *addr = rhs;
6482 				  GOMP_atomic_end ();
6483 
6484    The result is not globally atomic, but works so long as all parallel
6485    references are within #pragma omp atomic directives.  According to
6486    responses received from omp@openmp.org, appears to be within spec.
6487    Which makes sense, since that's how several other compilers handle
6488    this situation as well.
6489    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6490    expanding.  STORED_VAL is the operand of the matching
6491    GIMPLE_OMP_ATOMIC_STORE.
6492 
6493    We replace
6494    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6495    loaded_val = *addr;
6496 
6497    and replace
6498    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6499    *addr = stored_val;
6500 */
6501 
6502 static bool
6503 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6504 			 tree addr, tree loaded_val, tree stored_val)
6505 {
6506   gimple_stmt_iterator si;
6507   gassign *stmt;
6508   tree t;
6509 
6510   si = gsi_last_nondebug_bb (load_bb);
6511   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6512 
6513   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6514   t = build_call_expr (t, 0);
6515   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6516 
6517   tree mem = build_simple_mem_ref (addr);
6518   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6519   TREE_OPERAND (mem, 1)
6520     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6521 						 true),
6522 		    TREE_OPERAND (mem, 1));
6523   stmt = gimple_build_assign (loaded_val, mem);
6524   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6525   gsi_remove (&si, true);
6526 
6527   si = gsi_last_nondebug_bb (store_bb);
6528   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6529 
6530   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6531   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6532 
6533   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6534   t = build_call_expr (t, 0);
6535   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6536   gsi_remove (&si, true);
6537 
6538   if (gimple_in_ssa_p (cfun))
6539     update_ssa (TODO_update_ssa_no_phi);
6540   return true;
6541 }
6542 
6543 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6544    using expand_omp_atomic_fetch_op.  If it failed, we try to
6545    call expand_omp_atomic_pipeline, and if it fails too, the
6546    ultimate fallback is wrapping the operation in a mutex
6547    (expand_omp_atomic_mutex).  REGION is the atomic region built
6548    by build_omp_regions_1().  */
6549 
6550 static void
6551 expand_omp_atomic (struct omp_region *region)
6552 {
6553   basic_block load_bb = region->entry, store_bb = region->exit;
6554   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6555   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6556   tree loaded_val = gimple_omp_atomic_load_lhs (load);
6557   tree addr = gimple_omp_atomic_load_rhs (load);
6558   tree stored_val = gimple_omp_atomic_store_val (store);
6559   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6560   HOST_WIDE_INT index;
6561 
6562   /* Make sure the type is one of the supported sizes.  */
6563   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6564   index = exact_log2 (index);
6565   if (index >= 0 && index <= 4)
6566     {
6567       unsigned int align = TYPE_ALIGN_UNIT (type);
6568 
6569       /* __sync builtins require strict data alignment.  */
6570       if (exact_log2 (align) >= index)
6571 	{
6572 	  /* Atomic load.  */
6573 	  scalar_mode smode;
6574 	  if (loaded_val == stored_val
6575 	      && (is_int_mode (TYPE_MODE (type), &smode)
6576 		  || is_float_mode (TYPE_MODE (type), &smode))
6577 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6578 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6579 	    return;
6580 
6581 	  /* Atomic store.  */
6582 	  if ((is_int_mode (TYPE_MODE (type), &smode)
6583 	       || is_float_mode (TYPE_MODE (type), &smode))
6584 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6585 	      && store_bb == single_succ (load_bb)
6586 	      && first_stmt (store_bb) == store
6587 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6588 					  stored_val, index))
6589 	    return;
6590 
6591 	  /* When possible, use specialized atomic update functions.  */
6592 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6593 	      && store_bb == single_succ (load_bb)
6594 	      && expand_omp_atomic_fetch_op (load_bb, addr,
6595 					     loaded_val, stored_val, index))
6596 	    return;
6597 
6598 	  /* If we don't have specialized __sync builtins, try and implement
6599 	     as a compare and swap loop.  */
6600 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6601 					  loaded_val, stored_val, index))
6602 	    return;
6603 	}
6604     }
6605 
6606   /* The ultimate fallback is wrapping the operation in a mutex.  */
6607   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6608 }
6609 
6610 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6611    at REGION_EXIT.  */
6612 
6613 static void
6614 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6615 				   basic_block region_exit)
6616 {
6617   struct loop *outer = region_entry->loop_father;
6618   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6619 
6620   /* Don't parallelize the kernels region if it contains more than one outer
6621      loop.  */
6622   unsigned int nr_outer_loops = 0;
6623   struct loop *single_outer = NULL;
6624   for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6625     {
6626       gcc_assert (loop_outer (loop) == outer);
6627 
6628       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6629 	continue;
6630 
6631       if (region_exit != NULL
6632 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6633 	continue;
6634 
6635       nr_outer_loops++;
6636       single_outer = loop;
6637     }
6638   if (nr_outer_loops != 1)
6639     return;
6640 
6641   for (struct loop *loop = single_outer->inner;
6642        loop != NULL;
6643        loop = loop->inner)
6644     if (loop->next)
6645       return;
6646 
6647   /* Mark the loops in the region.  */
6648   for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6649     loop->in_oacc_kernels_region = true;
6650 }
6651 
6652 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6653 
6654 struct GTY(()) grid_launch_attributes_trees
6655 {
6656   tree kernel_dim_array_type;
6657   tree kernel_lattrs_dimnum_decl;
6658   tree kernel_lattrs_grid_decl;
6659   tree kernel_lattrs_group_decl;
6660   tree kernel_launch_attributes_type;
6661 };
6662 
6663 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6664 
6665 /* Create types used to pass kernel launch attributes to target.  */
6666 
6667 static void
6668 grid_create_kernel_launch_attr_types (void)
6669 {
6670   if (grid_attr_trees)
6671     return;
6672   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6673 
6674   tree dim_arr_index_type
6675     = build_index_type (build_int_cst (integer_type_node, 2));
6676   grid_attr_trees->kernel_dim_array_type
6677     = build_array_type (uint32_type_node, dim_arr_index_type);
6678 
6679   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6680   grid_attr_trees->kernel_lattrs_dimnum_decl
6681     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6682 		  uint32_type_node);
6683   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6684 
6685   grid_attr_trees->kernel_lattrs_grid_decl
6686     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6687 		  grid_attr_trees->kernel_dim_array_type);
6688   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6689     = grid_attr_trees->kernel_lattrs_dimnum_decl;
6690   grid_attr_trees->kernel_lattrs_group_decl
6691     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6692 		  grid_attr_trees->kernel_dim_array_type);
6693   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6694     = grid_attr_trees->kernel_lattrs_grid_decl;
6695   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6696 			 "__gomp_kernel_launch_attributes",
6697 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6698 }
6699 
6700 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6701    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6702    of type uint32_type_node.  */
6703 
6704 static void
6705 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6706 			     tree fld_decl, int index, tree value)
6707 {
6708   tree ref = build4 (ARRAY_REF, uint32_type_node,
6709 		     build3 (COMPONENT_REF,
6710 			     grid_attr_trees->kernel_dim_array_type,
6711 			     range_var, fld_decl, NULL_TREE),
6712 		     build_int_cst (integer_type_node, index),
6713 		     NULL_TREE, NULL_TREE);
6714   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6715 }
6716 
6717 /* Return a tree representation of a pointer to a structure with grid and
6718    work-group size information.  Statements filling that information will be
6719    inserted before GSI, TGT_STMT is the target statement which has the
6720    necessary information in it.  */
6721 
6722 static tree
6723 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6724 				       gomp_target *tgt_stmt)
6725 {
6726   grid_create_kernel_launch_attr_types ();
6727   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6728 				"__kernel_launch_attrs");
6729 
6730   unsigned max_dim = 0;
6731   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6732        clause;
6733        clause = OMP_CLAUSE_CHAIN (clause))
6734     {
6735       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6736 	continue;
6737 
6738       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6739       max_dim = MAX (dim, max_dim);
6740 
6741       grid_insert_store_range_dim (gsi, lattrs,
6742 				   grid_attr_trees->kernel_lattrs_grid_decl,
6743 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6744       grid_insert_store_range_dim (gsi, lattrs,
6745 				   grid_attr_trees->kernel_lattrs_group_decl,
6746 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6747     }
6748 
6749   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6750 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6751   gcc_checking_assert (max_dim <= 2);
6752   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6753   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6754 		     GSI_SAME_STMT);
6755   TREE_ADDRESSABLE (lattrs) = 1;
6756   return build_fold_addr_expr (lattrs);
6757 }
6758 
6759 /* Build target argument identifier from the DEVICE identifier, value
6760    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6761 
6762 static tree
6763 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6764 {
6765   tree t = build_int_cst (integer_type_node, device);
6766   if (subseqent_param)
6767     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6768 		     build_int_cst (integer_type_node,
6769 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6770   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6771 		   build_int_cst (integer_type_node, id));
6772   return t;
6773 }
6774 
6775 /* Like above but return it in type that can be directly stored as an element
6776    of the argument array.  */
6777 
6778 static tree
6779 get_target_argument_identifier (int device, bool subseqent_param, int id)
6780 {
6781   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6782   return fold_convert (ptr_type_node, t);
6783 }
6784 
6785 /* Return a target argument consisting of DEVICE identifier, value identifier
6786    ID, and the actual VALUE.  */
6787 
6788 static tree
6789 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6790 			   tree value)
6791 {
6792   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6793 			fold_convert (integer_type_node, value),
6794 			build_int_cst (unsigned_type_node,
6795 				       GOMP_TARGET_ARG_VALUE_SHIFT));
6796   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6797 		   get_target_argument_identifier_1 (device, false, id));
6798   t = fold_convert (ptr_type_node, t);
6799   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6800 }
6801 
6802 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6803    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6804    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6805    arguments.  */
6806 
6807 static void
6808 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6809 					 int id, tree value, vec <tree> *args)
6810 {
6811   if (tree_fits_shwi_p (value)
6812       && tree_to_shwi (value) > -(1 << 15)
6813       && tree_to_shwi (value) < (1 << 15))
6814     args->quick_push (get_target_argument_value (gsi, device, id, value));
6815   else
6816     {
6817       args->quick_push (get_target_argument_identifier (device, true, id));
6818       value = fold_convert (ptr_type_node, value);
6819       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6820 					GSI_SAME_STMT);
6821       args->quick_push (value);
6822     }
6823 }
6824 
6825 /* Create an array of arguments that is then passed to GOMP_target.  */
6826 
6827 static tree
6828 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6829 {
6830   auto_vec <tree, 6> args;
6831   tree clauses = gimple_omp_target_clauses (tgt_stmt);
6832   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6833   if (c)
6834     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6835   else
6836     t = integer_minus_one_node;
6837   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6838 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6839 
6840   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6841   if (c)
6842     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6843   else
6844     t = integer_minus_one_node;
6845   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6846 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
6847 					   &args);
6848 
6849   /* Add HSA-specific grid sizes, if available.  */
6850   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6851 		       OMP_CLAUSE__GRIDDIM_))
6852     {
6853       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6854       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6855       args.quick_push (t);
6856       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6857     }
6858 
6859   /* Produce more, perhaps device specific, arguments here.  */
6860 
6861   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6862 							  args.length () + 1),
6863 				  ".omp_target_args");
6864   for (unsigned i = 0; i < args.length (); i++)
6865     {
6866       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6867 			 build_int_cst (integer_type_node, i),
6868 			 NULL_TREE, NULL_TREE);
6869       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6870 			 GSI_SAME_STMT);
6871     }
6872   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6873 		     build_int_cst (integer_type_node, args.length ()),
6874 		     NULL_TREE, NULL_TREE);
6875   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6876 		     GSI_SAME_STMT);
6877   TREE_ADDRESSABLE (argarray) = 1;
6878   return build_fold_addr_expr (argarray);
6879 }
6880 
6881 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
6882 
6883 static void
6884 expand_omp_target (struct omp_region *region)
6885 {
6886   basic_block entry_bb, exit_bb, new_bb;
6887   struct function *child_cfun;
6888   tree child_fn, block, t;
6889   gimple_stmt_iterator gsi;
6890   gomp_target *entry_stmt;
6891   gimple *stmt;
6892   edge e;
6893   bool offloaded, data_region;
6894 
6895   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6896   new_bb = region->entry;
6897 
6898   offloaded = is_gimple_omp_offloaded (entry_stmt);
6899   switch (gimple_omp_target_kind (entry_stmt))
6900     {
6901     case GF_OMP_TARGET_KIND_REGION:
6902     case GF_OMP_TARGET_KIND_UPDATE:
6903     case GF_OMP_TARGET_KIND_ENTER_DATA:
6904     case GF_OMP_TARGET_KIND_EXIT_DATA:
6905     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6906     case GF_OMP_TARGET_KIND_OACC_KERNELS:
6907     case GF_OMP_TARGET_KIND_OACC_UPDATE:
6908     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6909     case GF_OMP_TARGET_KIND_OACC_DECLARE:
6910       data_region = false;
6911       break;
6912     case GF_OMP_TARGET_KIND_DATA:
6913     case GF_OMP_TARGET_KIND_OACC_DATA:
6914     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6915       data_region = true;
6916       break;
6917     default:
6918       gcc_unreachable ();
6919     }
6920 
6921   child_fn = NULL_TREE;
6922   child_cfun = NULL;
6923   if (offloaded)
6924     {
6925       child_fn = gimple_omp_target_child_fn (entry_stmt);
6926       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6927     }
6928 
6929   /* Supported by expand_omp_taskreg, but not here.  */
6930   if (child_cfun != NULL)
6931     gcc_checking_assert (!child_cfun->cfg);
6932   gcc_checking_assert (!gimple_in_ssa_p (cfun));
6933 
6934   entry_bb = region->entry;
6935   exit_bb = region->exit;
6936 
6937   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6938     {
6939       mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6940 
6941       /* Further down, both OpenACC kernels and OpenACC parallel constructs
6942 	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6943 	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
6944       DECL_ATTRIBUTES (child_fn)
6945 	= tree_cons (get_identifier ("oacc kernels"),
6946 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
6947     }
6948 
6949   if (offloaded)
6950     {
6951       unsigned srcidx, dstidx, num;
6952 
6953       /* If the offloading region needs data sent from the parent
6954 	 function, then the very first statement (except possible
6955 	 tree profile counter updates) of the offloading body
6956 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
6957 	 &.OMP_DATA_O is passed as an argument to the child function,
6958 	 we need to replace it with the argument as seen by the child
6959 	 function.
6960 
6961 	 In most cases, this will end up being the identity assignment
6962 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
6963 	 a function call that has been inlined, the original PARM_DECL
6964 	 .OMP_DATA_I may have been converted into a different local
6965 	 variable.  In which case, we need to keep the assignment.  */
6966       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6967       if (data_arg)
6968 	{
6969 	  basic_block entry_succ_bb = single_succ (entry_bb);
6970 	  gimple_stmt_iterator gsi;
6971 	  tree arg;
6972 	  gimple *tgtcopy_stmt = NULL;
6973 	  tree sender = TREE_VEC_ELT (data_arg, 0);
6974 
6975 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6976 	    {
6977 	      gcc_assert (!gsi_end_p (gsi));
6978 	      stmt = gsi_stmt (gsi);
6979 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
6980 		continue;
6981 
6982 	      if (gimple_num_ops (stmt) == 2)
6983 		{
6984 		  tree arg = gimple_assign_rhs1 (stmt);
6985 
6986 		  /* We're ignoring the subcode because we're
6987 		     effectively doing a STRIP_NOPS.  */
6988 
6989 		  if (TREE_CODE (arg) == ADDR_EXPR
6990 		      && TREE_OPERAND (arg, 0) == sender)
6991 		    {
6992 		      tgtcopy_stmt = stmt;
6993 		      break;
6994 		    }
6995 		}
6996 	    }
6997 
6998 	  gcc_assert (tgtcopy_stmt != NULL);
6999 	  arg = DECL_ARGUMENTS (child_fn);
7000 
7001 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7002 	  gsi_remove (&gsi, true);
7003 	}
7004 
7005       /* Declare local variables needed in CHILD_CFUN.  */
7006       block = DECL_INITIAL (child_fn);
7007       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7008       /* The gimplifier could record temporaries in the offloading block
7009 	 rather than in containing function's local_decls chain,
7010 	 which would mean cgraph missed finalizing them.  Do it now.  */
7011       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7012 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7013 	  varpool_node::finalize_decl (t);
7014       DECL_SAVED_TREE (child_fn) = NULL;
7015       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7016       gimple_set_body (child_fn, NULL);
7017       TREE_USED (block) = 1;
7018 
7019       /* Reset DECL_CONTEXT on function arguments.  */
7020       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7021 	DECL_CONTEXT (t) = child_fn;
7022 
7023       /* Split ENTRY_BB at GIMPLE_*,
7024 	 so that it can be moved to the child function.  */
7025       gsi = gsi_last_nondebug_bb (entry_bb);
7026       stmt = gsi_stmt (gsi);
7027       gcc_assert (stmt
7028 		  && gimple_code (stmt) == gimple_code (entry_stmt));
7029       e = split_block (entry_bb, stmt);
7030       gsi_remove (&gsi, true);
7031       entry_bb = e->dest;
7032       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7033 
7034       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7035       if (exit_bb)
7036 	{
7037 	  gsi = gsi_last_nondebug_bb (exit_bb);
7038 	  gcc_assert (!gsi_end_p (gsi)
7039 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7040 	  stmt = gimple_build_return (NULL);
7041 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7042 	  gsi_remove (&gsi, true);
7043 	}
7044 
7045       /* Make sure to generate early debug for the function before
7046          outlining anything.  */
7047       if (! gimple_in_ssa_p (cfun))
7048 	(*debug_hooks->early_global_decl) (cfun->decl);
7049 
7050       /* Move the offloading region into CHILD_CFUN.  */
7051 
7052       block = gimple_block (entry_stmt);
7053 
7054       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7055       if (exit_bb)
7056 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7057       /* When the OMP expansion process cannot guarantee an up-to-date
7058 	 loop tree arrange for the child function to fixup loops.  */
7059       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7060 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7061 
7062       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7063       num = vec_safe_length (child_cfun->local_decls);
7064       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7065 	{
7066 	  t = (*child_cfun->local_decls)[srcidx];
7067 	  if (DECL_CONTEXT (t) == cfun->decl)
7068 	    continue;
7069 	  if (srcidx != dstidx)
7070 	    (*child_cfun->local_decls)[dstidx] = t;
7071 	  dstidx++;
7072 	}
7073       if (dstidx != num)
7074 	vec_safe_truncate (child_cfun->local_decls, dstidx);
7075 
7076       /* Inform the callgraph about the new function.  */
7077       child_cfun->curr_properties = cfun->curr_properties;
7078       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7079       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7080       cgraph_node *node = cgraph_node::get_create (child_fn);
7081       node->parallelized_function = 1;
7082       cgraph_node::add_new_function (child_fn, true);
7083 
7084       /* Add the new function to the offload table.  */
7085       if (ENABLE_OFFLOADING)
7086 	{
7087 	  if (in_lto_p)
7088 	    DECL_PRESERVE_P (child_fn) = 1;
7089 	  vec_safe_push (offload_funcs, child_fn);
7090 	}
7091 
7092       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7093 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7094 
7095       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7096 	 fixed in a following pass.  */
7097       push_cfun (child_cfun);
7098       if (need_asm)
7099 	assign_assembler_name_if_needed (child_fn);
7100       cgraph_edge::rebuild_edges ();
7101 
7102       /* Some EH regions might become dead, see PR34608.  If
7103 	 pass_cleanup_cfg isn't the first pass to happen with the
7104 	 new child, these dead EH edges might cause problems.
7105 	 Clean them up now.  */
7106       if (flag_exceptions)
7107 	{
7108 	  basic_block bb;
7109 	  bool changed = false;
7110 
7111 	  FOR_EACH_BB_FN (bb, cfun)
7112 	    changed |= gimple_purge_dead_eh_edges (bb);
7113 	  if (changed)
7114 	    cleanup_tree_cfg ();
7115 	}
7116       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7117 	verify_loop_structure ();
7118       pop_cfun ();
7119 
7120       if (dump_file && !gimple_in_ssa_p (cfun))
7121 	{
7122 	  omp_any_child_fn_dumped = true;
7123 	  dump_function_header (dump_file, child_fn, dump_flags);
7124 	  dump_function_to_file (child_fn, dump_file, dump_flags);
7125 	}
7126     }
7127 
7128   /* Emit a library call to launch the offloading region, or do data
7129      transfers.  */
7130   tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7131   enum built_in_function start_ix;
7132   location_t clause_loc;
7133   unsigned int flags_i = 0;
7134 
7135   switch (gimple_omp_target_kind (entry_stmt))
7136     {
7137     case GF_OMP_TARGET_KIND_REGION:
7138       start_ix = BUILT_IN_GOMP_TARGET;
7139       break;
7140     case GF_OMP_TARGET_KIND_DATA:
7141       start_ix = BUILT_IN_GOMP_TARGET_DATA;
7142       break;
7143     case GF_OMP_TARGET_KIND_UPDATE:
7144       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7145       break;
7146     case GF_OMP_TARGET_KIND_ENTER_DATA:
7147       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7148       break;
7149     case GF_OMP_TARGET_KIND_EXIT_DATA:
7150       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7151       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7152       break;
7153     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7154     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7155       start_ix = BUILT_IN_GOACC_PARALLEL;
7156       break;
7157     case GF_OMP_TARGET_KIND_OACC_DATA:
7158     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7159       start_ix = BUILT_IN_GOACC_DATA_START;
7160       break;
7161     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7162       start_ix = BUILT_IN_GOACC_UPDATE;
7163       break;
7164     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7165       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7166       break;
7167     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7168       start_ix = BUILT_IN_GOACC_DECLARE;
7169       break;
7170     default:
7171       gcc_unreachable ();
7172     }
7173 
7174   clauses = gimple_omp_target_clauses (entry_stmt);
7175 
7176   /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7177      library choose) and there is no conditional.  */
7178   cond = NULL_TREE;
7179   device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7180 
7181   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7182   if (c)
7183     cond = OMP_CLAUSE_IF_EXPR (c);
7184 
7185   c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7186   if (c)
7187     {
7188       /* Even if we pass it to all library function calls, it is currently only
7189 	 defined/used for the OpenMP target ones.  */
7190       gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7191 			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7192 			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7193 			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7194 
7195       device = OMP_CLAUSE_DEVICE_ID (c);
7196       clause_loc = OMP_CLAUSE_LOCATION (c);
7197     }
7198   else
7199     clause_loc = gimple_location (entry_stmt);
7200 
7201   c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7202   if (c)
7203     flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7204 
7205   /* Ensure 'device' is of the correct type.  */
7206   device = fold_convert_loc (clause_loc, integer_type_node, device);
7207 
7208   /* If we found the clause 'if (cond)', build
7209      (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7210   if (cond)
7211     {
7212       cond = gimple_boolify (cond);
7213 
7214       basic_block cond_bb, then_bb, else_bb;
7215       edge e;
7216       tree tmp_var;
7217 
7218       tmp_var = create_tmp_var (TREE_TYPE (device));
7219       if (offloaded)
7220 	e = split_block_after_labels (new_bb);
7221       else
7222 	{
7223 	  gsi = gsi_last_nondebug_bb (new_bb);
7224 	  gsi_prev (&gsi);
7225 	  e = split_block (new_bb, gsi_stmt (gsi));
7226 	}
7227       cond_bb = e->src;
7228       new_bb = e->dest;
7229       remove_edge (e);
7230 
7231       then_bb = create_empty_bb (cond_bb);
7232       else_bb = create_empty_bb (then_bb);
7233       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7234       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7235 
7236       stmt = gimple_build_cond_empty (cond);
7237       gsi = gsi_last_bb (cond_bb);
7238       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7239 
7240       gsi = gsi_start_bb (then_bb);
7241       stmt = gimple_build_assign (tmp_var, device);
7242       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7243 
7244       gsi = gsi_start_bb (else_bb);
7245       stmt = gimple_build_assign (tmp_var,
7246 				  build_int_cst (integer_type_node,
7247 						 GOMP_DEVICE_HOST_FALLBACK));
7248       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7249 
7250       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7251       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7252       add_bb_to_loop (then_bb, cond_bb->loop_father);
7253       add_bb_to_loop (else_bb, cond_bb->loop_father);
7254       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7255       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7256 
7257       device = tmp_var;
7258       gsi = gsi_last_nondebug_bb (new_bb);
7259     }
7260   else
7261     {
7262       gsi = gsi_last_nondebug_bb (new_bb);
7263       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7264 					 true, GSI_SAME_STMT);
7265     }
7266 
7267   t = gimple_omp_target_data_arg (entry_stmt);
7268   if (t == NULL)
7269     {
7270       t1 = size_zero_node;
7271       t2 = build_zero_cst (ptr_type_node);
7272       t3 = t2;
7273       t4 = t2;
7274     }
7275   else
7276     {
7277       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7278       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7279       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7280       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7281       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7282     }
7283 
7284   gimple *g;
7285   bool tagging = false;
7286   /* The maximum number used by any start_ix, without varargs.  */
7287   auto_vec<tree, 11> args;
7288   args.quick_push (device);
7289   if (offloaded)
7290     args.quick_push (build_fold_addr_expr (child_fn));
7291   args.quick_push (t1);
7292   args.quick_push (t2);
7293   args.quick_push (t3);
7294   args.quick_push (t4);
7295   switch (start_ix)
7296     {
7297     case BUILT_IN_GOACC_DATA_START:
7298     case BUILT_IN_GOACC_DECLARE:
7299     case BUILT_IN_GOMP_TARGET_DATA:
7300       break;
7301     case BUILT_IN_GOMP_TARGET:
7302     case BUILT_IN_GOMP_TARGET_UPDATE:
7303     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7304       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7305       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7306       if (c)
7307 	depend = OMP_CLAUSE_DECL (c);
7308       else
7309 	depend = build_int_cst (ptr_type_node, 0);
7310       args.quick_push (depend);
7311       if (start_ix == BUILT_IN_GOMP_TARGET)
7312 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7313       break;
7314     case BUILT_IN_GOACC_PARALLEL:
7315       oacc_set_fn_attrib (child_fn, clauses, &args);
7316       tagging = true;
7317       /* FALLTHRU */
7318     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7319     case BUILT_IN_GOACC_UPDATE:
7320       {
7321 	tree t_async = NULL_TREE;
7322 
7323 	/* If present, use the value specified by the respective
7324 	   clause, making sure that is of the correct type.  */
7325 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7326 	if (c)
7327 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7328 				      integer_type_node,
7329 				      OMP_CLAUSE_ASYNC_EXPR (c));
7330 	else if (!tagging)
7331 	  /* Default values for t_async.  */
7332 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7333 				      integer_type_node,
7334 				      build_int_cst (integer_type_node,
7335 						     GOMP_ASYNC_SYNC));
7336 	if (tagging && t_async)
7337 	  {
7338 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7339 
7340 	    if (TREE_CODE (t_async) == INTEGER_CST)
7341 	      {
7342 		/* See if we can pack the async arg in to the tag's
7343 		   operand.  */
7344 		i_async = TREE_INT_CST_LOW (t_async);
7345 		if (i_async < GOMP_LAUNCH_OP_MAX)
7346 		  t_async = NULL_TREE;
7347 		else
7348 		  i_async = GOMP_LAUNCH_OP_MAX;
7349 	      }
7350 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7351 					      i_async));
7352 	  }
7353 	if (t_async)
7354 	  args.safe_push (t_async);
7355 
7356 	/* Save the argument index, and ... */
7357 	unsigned t_wait_idx = args.length ();
7358 	unsigned num_waits = 0;
7359 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7360 	if (!tagging || c)
7361 	  /* ... push a placeholder.  */
7362 	  args.safe_push (integer_zero_node);
7363 
7364 	for (; c; c = OMP_CLAUSE_CHAIN (c))
7365 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7366 	    {
7367 	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7368 						integer_type_node,
7369 						OMP_CLAUSE_WAIT_EXPR (c)));
7370 	      num_waits++;
7371 	    }
7372 
7373 	if (!tagging || num_waits)
7374 	  {
7375 	    tree len;
7376 
7377 	    /* Now that we know the number, update the placeholder.  */
7378 	    if (tagging)
7379 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7380 	    else
7381 	      len = build_int_cst (integer_type_node, num_waits);
7382 	    len = fold_convert_loc (gimple_location (entry_stmt),
7383 				    unsigned_type_node, len);
7384 	    args[t_wait_idx] = len;
7385 	  }
7386       }
7387       break;
7388     default:
7389       gcc_unreachable ();
7390     }
7391   if (tagging)
7392     /*  Push terminal marker - zero.  */
7393     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7394 
7395   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7396   gimple_set_location (g, gimple_location (entry_stmt));
7397   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7398   if (!offloaded)
7399     {
7400       g = gsi_stmt (gsi);
7401       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7402       gsi_remove (&gsi, true);
7403     }
7404   if (data_region && region->exit)
7405     {
7406       gsi = gsi_last_nondebug_bb (region->exit);
7407       g = gsi_stmt (gsi);
7408       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7409       gsi_remove (&gsi, true);
7410     }
7411 }
7412 
7413 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7414    iteration variable derived from the thread number.  INTRA_GROUP means this
7415    is an expansion of a loop iterating over work-items within a separate
7416    iteration over groups.  */
7417 
7418 static void
7419 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7420 {
7421   gimple_stmt_iterator gsi;
7422   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7423   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7424 		       == GF_OMP_FOR_KIND_GRID_LOOP);
7425   size_t collapse = gimple_omp_for_collapse (for_stmt);
7426   struct omp_for_data_loop *loops
7427     = XALLOCAVEC (struct omp_for_data_loop,
7428 		  gimple_omp_for_collapse (for_stmt));
7429   struct omp_for_data fd;
7430 
7431   remove_edge (BRANCH_EDGE (kfor->entry));
7432   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7433 
7434   gcc_assert (kfor->cont);
7435   omp_extract_for_data (for_stmt, &fd, loops);
7436 
7437   gsi = gsi_start_bb (body_bb);
7438 
7439   for (size_t dim = 0; dim < collapse; dim++)
7440     {
7441       tree type, itype;
7442       itype = type = TREE_TYPE (fd.loops[dim].v);
7443       if (POINTER_TYPE_P (type))
7444 	itype = signed_type_for (type);
7445 
7446       tree n1 = fd.loops[dim].n1;
7447       tree step = fd.loops[dim].step;
7448       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7449 				     true, NULL_TREE, true, GSI_SAME_STMT);
7450       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7451 				       true, NULL_TREE, true, GSI_SAME_STMT);
7452       tree threadid;
7453       if (gimple_omp_for_grid_group_iter (for_stmt))
7454 	{
7455 	  gcc_checking_assert (!intra_group);
7456 	  threadid = build_call_expr (builtin_decl_explicit
7457 				      (BUILT_IN_HSA_WORKGROUPID), 1,
7458 				      build_int_cstu (unsigned_type_node, dim));
7459 	}
7460       else if (intra_group)
7461 	threadid = build_call_expr (builtin_decl_explicit
7462 				    (BUILT_IN_HSA_WORKITEMID), 1,
7463 				    build_int_cstu (unsigned_type_node, dim));
7464       else
7465 	threadid = build_call_expr (builtin_decl_explicit
7466 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7467 				    build_int_cstu (unsigned_type_node, dim));
7468       threadid = fold_convert (itype, threadid);
7469       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7470 					   true, GSI_SAME_STMT);
7471 
7472       tree startvar = fd.loops[dim].v;
7473       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7474       if (POINTER_TYPE_P (type))
7475 	t = fold_build_pointer_plus (n1, t);
7476       else
7477 	t = fold_build2 (PLUS_EXPR, type, t, n1);
7478       t = fold_convert (type, t);
7479       t = force_gimple_operand_gsi (&gsi, t,
7480 				    DECL_P (startvar)
7481 				    && TREE_ADDRESSABLE (startvar),
7482 				    NULL_TREE, true, GSI_SAME_STMT);
7483       gassign *assign_stmt = gimple_build_assign (startvar, t);
7484       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7485     }
7486   /* Remove the omp for statement.  */
7487   gsi = gsi_last_nondebug_bb (kfor->entry);
7488   gsi_remove (&gsi, true);
7489 
7490   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7491   gsi = gsi_last_nondebug_bb (kfor->cont);
7492   gcc_assert (!gsi_end_p (gsi)
7493 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7494   gsi_remove (&gsi, true);
7495 
7496   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7497   gsi = gsi_last_nondebug_bb (kfor->exit);
7498   gcc_assert (!gsi_end_p (gsi)
7499 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7500   if (intra_group)
7501     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7502   gsi_remove (&gsi, true);
7503 
7504   /* Fixup the much simpler CFG.  */
7505   remove_edge (find_edge (kfor->cont, body_bb));
7506 
7507   if (kfor->cont != body_bb)
7508     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7509   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7510 }
7511 
7512 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7513    argument_decls.  */
7514 
7515 struct grid_arg_decl_map
7516 {
7517   tree old_arg;
7518   tree new_arg;
7519 };
7520 
7521 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7522    pertaining to kernel function.  */
7523 
7524 static tree
7525 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7526 {
7527   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7528   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7529   tree t = *tp;
7530 
7531   if (t == adm->old_arg)
7532     *tp = adm->new_arg;
7533   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7534   return NULL_TREE;
7535 }
7536 
7537 /* If TARGET region contains a kernel body for loop, remove its region from the
7538    TARGET and expand it in HSA gridified kernel fashion.  */
7539 
7540 static void
7541 grid_expand_target_grid_body (struct omp_region *target)
7542 {
7543   if (!hsa_gen_requested_p ())
7544     return;
7545 
7546   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7547   struct omp_region **pp;
7548 
7549   for (pp = &target->inner; *pp; pp = &(*pp)->next)
7550     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7551       break;
7552 
7553   struct omp_region *gpukernel = *pp;
7554 
7555   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7556   if (!gpukernel)
7557     {
7558       /* HSA cannot handle OACC stuff.  */
7559       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7560 	return;
7561       gcc_checking_assert (orig_child_fndecl);
7562       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7563 				    OMP_CLAUSE__GRIDDIM_));
7564       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7565 
7566       hsa_register_kernel (n);
7567       return;
7568     }
7569 
7570   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7571 			       OMP_CLAUSE__GRIDDIM_));
7572   tree inside_block
7573     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7574   *pp = gpukernel->next;
7575   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7576     if ((*pp)->type == GIMPLE_OMP_FOR)
7577       break;
7578 
7579   struct omp_region *kfor = *pp;
7580   gcc_assert (kfor);
7581   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7582   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7583   *pp = kfor->next;
7584   if (kfor->inner)
7585     {
7586       if (gimple_omp_for_grid_group_iter (for_stmt))
7587 	{
7588 	  struct omp_region **next_pp;
7589 	  for (pp = &kfor->inner; *pp; pp = next_pp)
7590 	    {
7591 	      next_pp = &(*pp)->next;
7592 	      if ((*pp)->type != GIMPLE_OMP_FOR)
7593 		continue;
7594 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7595 	      gcc_assert (gimple_omp_for_kind (inner)
7596 			  == GF_OMP_FOR_KIND_GRID_LOOP);
7597 	      grid_expand_omp_for_loop (*pp, true);
7598 	      *pp = (*pp)->next;
7599 	      next_pp = pp;
7600 	    }
7601 	}
7602       expand_omp (kfor->inner);
7603     }
7604   if (gpukernel->inner)
7605     expand_omp (gpukernel->inner);
7606 
7607   tree kern_fndecl = copy_node (orig_child_fndecl);
7608   DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7609   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7610   tree tgtblock = gimple_block (tgt_stmt);
7611   tree fniniblock = make_node (BLOCK);
7612   BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7613   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7614   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7615   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7616   DECL_INITIAL (kern_fndecl) = fniniblock;
7617   push_struct_function (kern_fndecl);
7618   cfun->function_end_locus = gimple_location (tgt_stmt);
7619   init_tree_ssa (cfun);
7620   pop_cfun ();
7621 
7622   /* Make sure to generate early debug for the function before
7623      outlining anything.  */
7624   if (! gimple_in_ssa_p (cfun))
7625     (*debug_hooks->early_global_decl) (cfun->decl);
7626 
7627   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7628   gcc_assert (!DECL_CHAIN (old_parm_decl));
7629   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7630   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7631   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7632   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7633   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7634   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7635   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7636   kern_cfun->curr_properties = cfun->curr_properties;
7637 
7638   grid_expand_omp_for_loop (kfor, false);
7639 
7640   /* Remove the omp for statement.  */
7641   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7642   gsi_remove (&gsi, true);
7643   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7644      return.  */
7645   gsi = gsi_last_nondebug_bb (gpukernel->exit);
7646   gcc_assert (!gsi_end_p (gsi)
7647 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7648   gimple *ret_stmt = gimple_build_return (NULL);
7649   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7650   gsi_remove (&gsi, true);
7651 
7652   /* Statements in the first BB in the target construct have been produced by
7653      target lowering and must be copied inside the GPUKERNEL, with the two
7654      exceptions of the first OMP statement and the OMP_DATA assignment
7655      statement.  */
7656   gsi = gsi_start_bb (single_succ (gpukernel->entry));
7657   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7658   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7659   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7660        !gsi_end_p (tsi); gsi_next (&tsi))
7661     {
7662       gimple *stmt = gsi_stmt (tsi);
7663       if (is_gimple_omp (stmt))
7664 	break;
7665       if (sender
7666 	  && is_gimple_assign (stmt)
7667 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7668 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7669 	continue;
7670       gimple *copy = gimple_copy (stmt);
7671       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7672       gimple_set_block (copy, fniniblock);
7673     }
7674 
7675   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7676 			  gpukernel->exit, inside_block);
7677 
7678   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7679   kcn->mark_force_output ();
7680   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7681 
7682   hsa_register_kernel (kcn, orig_child);
7683 
7684   cgraph_node::add_new_function (kern_fndecl, true);
7685   push_cfun (kern_cfun);
7686   cgraph_edge::rebuild_edges ();
7687 
7688   /* Re-map any mention of the PARM_DECL of the original function to the
7689      PARM_DECL of the new one.
7690 
7691      TODO: It would be great if lowering produced references into the GPU
7692      kernel decl straight away and we did not have to do this.  */
7693   struct grid_arg_decl_map adm;
7694   adm.old_arg = old_parm_decl;
7695   adm.new_arg = new_parm_decl;
7696   basic_block bb;
7697   FOR_EACH_BB_FN (bb, kern_cfun)
7698     {
7699       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7700 	{
7701 	  gimple *stmt = gsi_stmt (gsi);
7702 	  struct walk_stmt_info wi;
7703 	  memset (&wi, 0, sizeof (wi));
7704 	  wi.info = &adm;
7705 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7706 	}
7707     }
7708   pop_cfun ();
7709 
7710   return;
7711 }
7712 
7713 /* Expand the parallel region tree rooted at REGION.  Expansion
7714    proceeds in depth-first order.  Innermost regions are expanded
7715    first.  This way, parallel regions that require a new function to
7716    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7717    internal dependencies in their body.  */
7718 
7719 static void
7720 expand_omp (struct omp_region *region)
7721 {
7722   omp_any_child_fn_dumped = false;
7723   while (region)
7724     {
7725       location_t saved_location;
7726       gimple *inner_stmt = NULL;
7727 
7728       /* First, determine whether this is a combined parallel+workshare
7729 	 region.  */
7730       if (region->type == GIMPLE_OMP_PARALLEL)
7731 	determine_parallel_type (region);
7732       else if (region->type == GIMPLE_OMP_TARGET)
7733 	grid_expand_target_grid_body (region);
7734 
7735       if (region->type == GIMPLE_OMP_FOR
7736 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7737 	inner_stmt = last_stmt (region->inner->entry);
7738 
7739       if (region->inner)
7740 	expand_omp (region->inner);
7741 
7742       saved_location = input_location;
7743       if (gimple_has_location (last_stmt (region->entry)))
7744 	input_location = gimple_location (last_stmt (region->entry));
7745 
7746       switch (region->type)
7747 	{
7748 	case GIMPLE_OMP_PARALLEL:
7749 	case GIMPLE_OMP_TASK:
7750 	  expand_omp_taskreg (region);
7751 	  break;
7752 
7753 	case GIMPLE_OMP_FOR:
7754 	  expand_omp_for (region, inner_stmt);
7755 	  break;
7756 
7757 	case GIMPLE_OMP_SECTIONS:
7758 	  expand_omp_sections (region);
7759 	  break;
7760 
7761 	case GIMPLE_OMP_SECTION:
7762 	  /* Individual omp sections are handled together with their
7763 	     parent GIMPLE_OMP_SECTIONS region.  */
7764 	  break;
7765 
7766 	case GIMPLE_OMP_SINGLE:
7767 	  expand_omp_single (region);
7768 	  break;
7769 
7770 	case GIMPLE_OMP_ORDERED:
7771 	  {
7772 	    gomp_ordered *ord_stmt
7773 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7774 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7775 				 OMP_CLAUSE_DEPEND))
7776 	      {
7777 		/* We'll expand these when expanding corresponding
7778 		   worksharing region with ordered(n) clause.  */
7779 		gcc_assert (region->outer
7780 			    && region->outer->type == GIMPLE_OMP_FOR);
7781 		region->ord_stmt = ord_stmt;
7782 		break;
7783 	      }
7784 	  }
7785 	  /* FALLTHRU */
7786 	case GIMPLE_OMP_MASTER:
7787 	case GIMPLE_OMP_TASKGROUP:
7788 	case GIMPLE_OMP_CRITICAL:
7789 	case GIMPLE_OMP_TEAMS:
7790 	  expand_omp_synch (region);
7791 	  break;
7792 
7793 	case GIMPLE_OMP_ATOMIC_LOAD:
7794 	  expand_omp_atomic (region);
7795 	  break;
7796 
7797 	case GIMPLE_OMP_TARGET:
7798 	  expand_omp_target (region);
7799 	  break;
7800 
7801 	default:
7802 	  gcc_unreachable ();
7803 	}
7804 
7805       input_location = saved_location;
7806       region = region->next;
7807     }
7808   if (omp_any_child_fn_dumped)
7809     {
7810       if (dump_file)
7811 	dump_function_header (dump_file, current_function_decl, dump_flags);
7812       omp_any_child_fn_dumped = false;
7813     }
7814 }
7815 
7816 /* Helper for build_omp_regions.  Scan the dominator tree starting at
7817    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7818    true, the function ends once a single tree is built (otherwise, whole
7819    forest of OMP constructs may be built).  */
7820 
7821 static void
7822 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7823 		     bool single_tree)
7824 {
7825   gimple_stmt_iterator gsi;
7826   gimple *stmt;
7827   basic_block son;
7828 
7829   gsi = gsi_last_nondebug_bb (bb);
7830   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7831     {
7832       struct omp_region *region;
7833       enum gimple_code code;
7834 
7835       stmt = gsi_stmt (gsi);
7836       code = gimple_code (stmt);
7837       if (code == GIMPLE_OMP_RETURN)
7838 	{
7839 	  /* STMT is the return point out of region PARENT.  Mark it
7840 	     as the exit point and make PARENT the immediately
7841 	     enclosing region.  */
7842 	  gcc_assert (parent);
7843 	  region = parent;
7844 	  region->exit = bb;
7845 	  parent = parent->outer;
7846 	}
7847       else if (code == GIMPLE_OMP_ATOMIC_STORE)
7848 	{
7849 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7850 	     GIMPLE_OMP_RETURN, but matches with
7851 	     GIMPLE_OMP_ATOMIC_LOAD.  */
7852 	  gcc_assert (parent);
7853 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7854 	  region = parent;
7855 	  region->exit = bb;
7856 	  parent = parent->outer;
7857 	}
7858       else if (code == GIMPLE_OMP_CONTINUE)
7859 	{
7860 	  gcc_assert (parent);
7861 	  parent->cont = bb;
7862 	}
7863       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7864 	{
7865 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7866 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
7867 	}
7868       else
7869 	{
7870 	  region = new_omp_region (bb, code, parent);
7871 	  /* Otherwise...  */
7872 	  if (code == GIMPLE_OMP_TARGET)
7873 	    {
7874 	      switch (gimple_omp_target_kind (stmt))
7875 		{
7876 		case GF_OMP_TARGET_KIND_REGION:
7877 		case GF_OMP_TARGET_KIND_DATA:
7878 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7879 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
7880 		case GF_OMP_TARGET_KIND_OACC_DATA:
7881 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7882 		  break;
7883 		case GF_OMP_TARGET_KIND_UPDATE:
7884 		case GF_OMP_TARGET_KIND_ENTER_DATA:
7885 		case GF_OMP_TARGET_KIND_EXIT_DATA:
7886 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
7887 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7888 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
7889 		  /* ..., other than for those stand-alone directives...  */
7890 		  region = NULL;
7891 		  break;
7892 		default:
7893 		  gcc_unreachable ();
7894 		}
7895 	    }
7896 	  else if (code == GIMPLE_OMP_ORDERED
7897 		   && omp_find_clause (gimple_omp_ordered_clauses
7898 					 (as_a <gomp_ordered *> (stmt)),
7899 				       OMP_CLAUSE_DEPEND))
7900 	    /* #pragma omp ordered depend is also just a stand-alone
7901 	       directive.  */
7902 	    region = NULL;
7903 	  /* ..., this directive becomes the parent for a new region.  */
7904 	  if (region)
7905 	    parent = region;
7906 	}
7907     }
7908 
7909   if (single_tree && !parent)
7910     return;
7911 
7912   for (son = first_dom_son (CDI_DOMINATORS, bb);
7913        son;
7914        son = next_dom_son (CDI_DOMINATORS, son))
7915     build_omp_regions_1 (son, parent, single_tree);
7916 }
7917 
7918 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7919    root_omp_region.  */
7920 
7921 static void
7922 build_omp_regions_root (basic_block root)
7923 {
7924   gcc_assert (root_omp_region == NULL);
7925   build_omp_regions_1 (root, NULL, true);
7926   gcc_assert (root_omp_region != NULL);
7927 }
7928 
7929 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
7930 
7931 void
7932 omp_expand_local (basic_block head)
7933 {
7934   build_omp_regions_root (head);
7935   if (dump_file && (dump_flags & TDF_DETAILS))
7936     {
7937       fprintf (dump_file, "\nOMP region tree\n\n");
7938       dump_omp_region (dump_file, root_omp_region, 0);
7939       fprintf (dump_file, "\n");
7940     }
7941 
7942   remove_exit_barriers (root_omp_region);
7943   expand_omp (root_omp_region);
7944 
7945   omp_free_regions ();
7946 }
7947 
7948 /* Scan the CFG and build a tree of OMP regions.  Return the root of
7949    the OMP region tree.  */
7950 
7951 static void
7952 build_omp_regions (void)
7953 {
7954   gcc_assert (root_omp_region == NULL);
7955   calculate_dominance_info (CDI_DOMINATORS);
7956   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7957 }
7958 
7959 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
7960 
7961 static unsigned int
7962 execute_expand_omp (void)
7963 {
7964   build_omp_regions ();
7965 
7966   if (!root_omp_region)
7967     return 0;
7968 
7969   if (dump_file)
7970     {
7971       fprintf (dump_file, "\nOMP region tree\n\n");
7972       dump_omp_region (dump_file, root_omp_region, 0);
7973       fprintf (dump_file, "\n");
7974     }
7975 
7976   remove_exit_barriers (root_omp_region);
7977 
7978   expand_omp (root_omp_region);
7979 
7980   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7981     verify_loop_structure ();
7982   cleanup_tree_cfg ();
7983 
7984   omp_free_regions ();
7985 
7986   return 0;
7987 }
7988 
7989 /* OMP expansion -- the default pass, run before creation of SSA form.  */
7990 
7991 namespace {
7992 
7993 const pass_data pass_data_expand_omp =
7994 {
7995   GIMPLE_PASS, /* type */
7996   "ompexp", /* name */
7997   OPTGROUP_OMP, /* optinfo_flags */
7998   TV_NONE, /* tv_id */
7999   PROP_gimple_any, /* properties_required */
8000   PROP_gimple_eomp, /* properties_provided */
8001   0, /* properties_destroyed */
8002   0, /* todo_flags_start */
8003   0, /* todo_flags_finish */
8004 };
8005 
8006 class pass_expand_omp : public gimple_opt_pass
8007 {
8008 public:
8009   pass_expand_omp (gcc::context *ctxt)
8010     : gimple_opt_pass (pass_data_expand_omp, ctxt)
8011   {}
8012 
8013   /* opt_pass methods: */
8014   virtual unsigned int execute (function *)
8015     {
8016       bool gate = ((flag_openacc != 0 || flag_openmp != 0
8017 		    || flag_openmp_simd != 0)
8018 		   && !seen_error ());
8019 
8020       /* This pass always runs, to provide PROP_gimple_eomp.
8021 	 But often, there is nothing to do.  */
8022       if (!gate)
8023 	return 0;
8024 
8025       return execute_expand_omp ();
8026     }
8027 
8028 }; // class pass_expand_omp
8029 
8030 } // anon namespace
8031 
8032 gimple_opt_pass *
8033 make_pass_expand_omp (gcc::context *ctxt)
8034 {
8035   return new pass_expand_omp (ctxt);
8036 }
8037 
8038 namespace {
8039 
8040 const pass_data pass_data_expand_omp_ssa =
8041 {
8042   GIMPLE_PASS, /* type */
8043   "ompexpssa", /* name */
8044   OPTGROUP_OMP, /* optinfo_flags */
8045   TV_NONE, /* tv_id */
8046   PROP_cfg | PROP_ssa, /* properties_required */
8047   PROP_gimple_eomp, /* properties_provided */
8048   0, /* properties_destroyed */
8049   0, /* todo_flags_start */
8050   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8051 };
8052 
8053 class pass_expand_omp_ssa : public gimple_opt_pass
8054 {
8055 public:
8056   pass_expand_omp_ssa (gcc::context *ctxt)
8057     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8058   {}
8059 
8060   /* opt_pass methods: */
8061   virtual bool gate (function *fun)
8062     {
8063       return !(fun->curr_properties & PROP_gimple_eomp);
8064     }
8065   virtual unsigned int execute (function *) { return execute_expand_omp (); }
8066   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8067 
8068 }; // class pass_expand_omp_ssa
8069 
8070 } // anon namespace
8071 
8072 gimple_opt_pass *
8073 make_pass_expand_omp_ssa (gcc::context *ctxt)
8074 {
8075   return new pass_expand_omp_ssa (ctxt);
8076 }
8077 
8078 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8079    GIMPLE_* codes.  */
8080 
8081 bool
8082 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8083 		       int *region_idx)
8084 {
8085   gimple *last = last_stmt (bb);
8086   enum gimple_code code = gimple_code (last);
8087   struct omp_region *cur_region = *region;
8088   bool fallthru = false;
8089 
8090   switch (code)
8091     {
8092     case GIMPLE_OMP_PARALLEL:
8093     case GIMPLE_OMP_TASK:
8094     case GIMPLE_OMP_FOR:
8095     case GIMPLE_OMP_SINGLE:
8096     case GIMPLE_OMP_TEAMS:
8097     case GIMPLE_OMP_MASTER:
8098     case GIMPLE_OMP_TASKGROUP:
8099     case GIMPLE_OMP_CRITICAL:
8100     case GIMPLE_OMP_SECTION:
8101     case GIMPLE_OMP_GRID_BODY:
8102       cur_region = new_omp_region (bb, code, cur_region);
8103       fallthru = true;
8104       break;
8105 
8106     case GIMPLE_OMP_ORDERED:
8107       cur_region = new_omp_region (bb, code, cur_region);
8108       fallthru = true;
8109       if (omp_find_clause (gimple_omp_ordered_clauses
8110 			     (as_a <gomp_ordered *> (last)),
8111 			   OMP_CLAUSE_DEPEND))
8112 	cur_region = cur_region->outer;
8113       break;
8114 
8115     case GIMPLE_OMP_TARGET:
8116       cur_region = new_omp_region (bb, code, cur_region);
8117       fallthru = true;
8118       switch (gimple_omp_target_kind (last))
8119 	{
8120 	case GF_OMP_TARGET_KIND_REGION:
8121 	case GF_OMP_TARGET_KIND_DATA:
8122 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8123 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8124 	case GF_OMP_TARGET_KIND_OACC_DATA:
8125 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8126 	  break;
8127 	case GF_OMP_TARGET_KIND_UPDATE:
8128 	case GF_OMP_TARGET_KIND_ENTER_DATA:
8129 	case GF_OMP_TARGET_KIND_EXIT_DATA:
8130 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8131 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8132 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8133 	  cur_region = cur_region->outer;
8134 	  break;
8135 	default:
8136 	  gcc_unreachable ();
8137 	}
8138       break;
8139 
8140     case GIMPLE_OMP_SECTIONS:
8141       cur_region = new_omp_region (bb, code, cur_region);
8142       fallthru = true;
8143       break;
8144 
8145     case GIMPLE_OMP_SECTIONS_SWITCH:
8146       fallthru = false;
8147       break;
8148 
8149     case GIMPLE_OMP_ATOMIC_LOAD:
8150     case GIMPLE_OMP_ATOMIC_STORE:
8151        fallthru = true;
8152        break;
8153 
8154     case GIMPLE_OMP_RETURN:
8155       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8156 	 somewhere other than the next block.  This will be
8157 	 created later.  */
8158       cur_region->exit = bb;
8159       if (cur_region->type == GIMPLE_OMP_TASK)
8160 	/* Add an edge corresponding to not scheduling the task
8161 	   immediately.  */
8162 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8163       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8164       cur_region = cur_region->outer;
8165       break;
8166 
8167     case GIMPLE_OMP_CONTINUE:
8168       cur_region->cont = bb;
8169       switch (cur_region->type)
8170 	{
8171 	case GIMPLE_OMP_FOR:
8172 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8173 	     succs edges as abnormal to prevent splitting
8174 	     them.  */
8175 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8176 	  /* Make the loopback edge.  */
8177 	  make_edge (bb, single_succ (cur_region->entry),
8178 		     EDGE_ABNORMAL);
8179 
8180 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8181 	     corresponds to the case that the body of the loop
8182 	     is not executed at all.  */
8183 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8184 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8185 	  fallthru = false;
8186 	  break;
8187 
8188 	case GIMPLE_OMP_SECTIONS:
8189 	  /* Wire up the edges into and out of the nested sections.  */
8190 	  {
8191 	    basic_block switch_bb = single_succ (cur_region->entry);
8192 
8193 	    struct omp_region *i;
8194 	    for (i = cur_region->inner; i ; i = i->next)
8195 	      {
8196 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8197 		make_edge (switch_bb, i->entry, 0);
8198 		make_edge (i->exit, bb, EDGE_FALLTHRU);
8199 	      }
8200 
8201 	    /* Make the loopback edge to the block with
8202 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8203 	    make_edge (bb, switch_bb, 0);
8204 
8205 	    /* Make the edge from the switch to exit.  */
8206 	    make_edge (switch_bb, bb->next_bb, 0);
8207 	    fallthru = false;
8208 	  }
8209 	  break;
8210 
8211 	case GIMPLE_OMP_TASK:
8212 	  fallthru = true;
8213 	  break;
8214 
8215 	default:
8216 	  gcc_unreachable ();
8217 	}
8218       break;
8219 
8220     default:
8221       gcc_unreachable ();
8222     }
8223 
8224   if (*region != cur_region)
8225     {
8226       *region = cur_region;
8227       if (cur_region)
8228 	*region_idx = cur_region->entry->index;
8229       else
8230 	*region_idx = 0;
8231     }
8232 
8233   return fallthru;
8234 }
8235 
8236 #include "gt-omp-expand.h"
8237