1 /* Expansion pass for OMP directives.  Outlines regions of certain OMP
2    directives to separate functions, converts others into explicit calls to the
3    runtime library (libgomp) and so forth
4 
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62 
63 /* OMP region information.  Every parallel and workshare
64    directive is enclosed between two markers, the OMP_* directive
65    and a corresponding GIMPLE_OMP_RETURN statement.  */
66 
67 struct omp_region
68 {
69   /* The enclosing region.  */
70   struct omp_region *outer;
71 
72   /* First child region.  */
73   struct omp_region *inner;
74 
75   /* Next peer region.  */
76   struct omp_region *next;
77 
78   /* Block containing the omp directive as its last stmt.  */
79   basic_block entry;
80 
81   /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82   basic_block exit;
83 
84   /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85   basic_block cont;
86 
87   /* If this is a combined parallel+workshare region, this is a list
88      of additional arguments needed by the combined parallel+workshare
89      library call.  */
90   vec<tree, va_gc> *ws_args;
91 
92   /* The code for the omp directive of this region.  */
93   enum gimple_code type;
94 
95   /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96   enum omp_clause_schedule_kind sched_kind;
97 
98   /* Schedule modifiers.  */
99   unsigned char sched_modifiers;
100 
101   /* True if this is a combined parallel+workshare region.  */
102   bool is_combined_parallel;
103 
104   /* Copy of fd.lastprivate_conditional != 0.  */
105   bool has_lastprivate_conditional;
106 
107   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108      a depend clause.  */
109   gomp_ordered *ord_stmt;
110 };
111 
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114 
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 				     bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119 
120 /* Return true if REGION is a combined parallel+workshare region.  */
121 
122 static inline bool
is_combined_parallel(struct omp_region * region)123 is_combined_parallel (struct omp_region *region)
124 {
125   return region->is_combined_parallel;
126 }
127 
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129    is the immediate dominator of PAR_ENTRY_BB, return true if there
130    are no data dependencies that would prevent expanding the parallel
131    directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132 
133    When expanding a combined parallel+workshare region, the call to
134    the child function may need additional arguments in the case of
135    GIMPLE_OMP_FOR regions.  In some cases, these arguments are
136    computed out of variables passed in from the parent to the child
137    via 'struct .omp_data_s'.  For instance:
138 
139 	#pragma omp parallel for schedule (guided, i * 4)
140 	for (j ...)
141 
142    Is lowered into:
143 
144 	# BLOCK 2 (PAR_ENTRY_BB)
145 	.omp_data_o.i = i;
146 	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147 
148 	# BLOCK 3 (WS_ENTRY_BB)
149 	.omp_data_i = &.omp_data_o;
150 	D.1667 = .omp_data_i->i;
151 	D.1598 = D.1667 * 4;
152 	#pragma omp for schedule (guided, D.1598)
153 
154    When we outline the parallel region, the call to the child function
155    'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156    that value is computed *after* the call site.  So, in principle we
157    cannot do the transformation.
158 
159    To see whether the code in WS_ENTRY_BB blocks the combined
160    parallel+workshare call, we collect all the variables used in the
161    GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162    statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
163    call.
164 
165    FIXME.  If we had the SSA form built at this point, we could merely
166    hoist the code in block 3 into block 2 and be done with it.  But at
167    this point we don't have dataflow information and though we could
168    hack something up here, it is really not worth the aggravation.  */
169 
170 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173   struct omp_for_data fd;
174   gimple *ws_stmt = last_stmt (ws_entry_bb);
175 
176   if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177     return true;
178 
179   gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180   if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181     return false;
182 
183   omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184 
185   if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186     return false;
187   if (fd.iter_type != long_integer_type_node)
188     return false;
189 
190   /* FIXME.  We give up too easily here.  If any of these arguments
191      are not constants, they will likely involve variables that have
192      been mapped into fields of .omp_data_s for sharing with the child
193      function.  With appropriate data flow, it would be possible to
194      see through this.  */
195   if (!is_gimple_min_invariant (fd.loop.n1)
196       || !is_gimple_min_invariant (fd.loop.n2)
197       || !is_gimple_min_invariant (fd.loop.step)
198       || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199     return false;
200 
201   return true;
202 }
203 
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205    presence (SIMD_SCHEDULE).  */
206 
207 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210   if (!simd_schedule || integer_zerop (chunk_size))
211     return chunk_size;
212 
213   poly_uint64 vf = omp_max_vf ();
214   if (known_eq (vf, 1U))
215     return chunk_size;
216 
217   tree type = TREE_TYPE (chunk_size);
218   chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 			    build_int_cst (type, vf - 1));
220   return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 		      build_int_cst (type, -vf));
222 }
223 
224 /* Collect additional arguments needed to emit a combined
225    parallel+workshare call.  WS_STMT is the workshare directive being
226    expanded.  */
227 
228 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231   tree t;
232   location_t loc = gimple_location (ws_stmt);
233   vec<tree, va_gc> *ws_args;
234 
235   if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236     {
237       struct omp_for_data fd;
238       tree n1, n2;
239 
240       omp_extract_for_data (for_stmt, &fd, NULL);
241       n1 = fd.loop.n1;
242       n2 = fd.loop.n2;
243 
244       if (gimple_omp_for_combined_into_p (for_stmt))
245 	{
246 	  tree innerc
247 	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 			       OMP_CLAUSE__LOOPTEMP_);
249 	  gcc_assert (innerc);
250 	  n1 = OMP_CLAUSE_DECL (innerc);
251 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 				    OMP_CLAUSE__LOOPTEMP_);
253 	  gcc_assert (innerc);
254 	  n2 = OMP_CLAUSE_DECL (innerc);
255 	}
256 
257       vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258 
259       t = fold_convert_loc (loc, long_integer_type_node, n1);
260       ws_args->quick_push (t);
261 
262       t = fold_convert_loc (loc, long_integer_type_node, n2);
263       ws_args->quick_push (t);
264 
265       t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266       ws_args->quick_push (t);
267 
268       if (fd.chunk_size)
269 	{
270 	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 	  ws_args->quick_push (t);
273 	}
274 
275       return ws_args;
276     }
277   else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278     {
279       /* Number of sections is equal to the number of edges from the
280 	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 	 the exit of the sections region.  */
282       basic_block bb = single_succ (gimple_bb (ws_stmt));
283       t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284       vec_alloc (ws_args, 1);
285       ws_args->quick_push (t);
286       return ws_args;
287     }
288 
289   gcc_unreachable ();
290 }
291 
292 /* Discover whether REGION is a combined parallel+workshare region.  */
293 
294 static void
determine_parallel_type(struct omp_region * region)295 determine_parallel_type (struct omp_region *region)
296 {
297   basic_block par_entry_bb, par_exit_bb;
298   basic_block ws_entry_bb, ws_exit_bb;
299 
300   if (region == NULL || region->inner == NULL
301       || region->exit == NULL || region->inner->exit == NULL
302       || region->inner->cont == NULL)
303     return;
304 
305   /* We only support parallel+for and parallel+sections.  */
306   if (region->type != GIMPLE_OMP_PARALLEL
307       || (region->inner->type != GIMPLE_OMP_FOR
308 	  && region->inner->type != GIMPLE_OMP_SECTIONS))
309     return;
310 
311   /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312      WS_EXIT_BB -> PAR_EXIT_BB.  */
313   par_entry_bb = region->entry;
314   par_exit_bb = region->exit;
315   ws_entry_bb = region->inner->entry;
316   ws_exit_bb = region->inner->exit;
317 
318   /* Give up for task reductions on the parallel, while it is implementable,
319      adding another big set of APIs or slowing down the normal paths is
320      not acceptable.  */
321   tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322   if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323     return;
324 
325   if (single_succ (par_entry_bb) == ws_entry_bb
326       && single_succ (ws_exit_bb) == par_exit_bb
327       && workshare_safe_to_combine_p (ws_entry_bb)
328       && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 	  || (last_and_only_stmt (ws_entry_bb)
330 	      && last_and_only_stmt (par_exit_bb))))
331     {
332       gimple *par_stmt = last_stmt (par_entry_bb);
333       gimple *ws_stmt = last_stmt (ws_entry_bb);
334 
335       if (region->inner->type == GIMPLE_OMP_FOR)
336 	{
337 	  /* If this is a combined parallel loop, we need to determine
338 	     whether or not to use the combined library calls.  There
339 	     are two cases where we do not apply the transformation:
340 	     static loops and any kind of ordered loop.  In the first
341 	     case, we already open code the loop so there is no need
342 	     to do anything else.  In the latter case, the combined
343 	     parallel loop call would still need extra synchronization
344 	     to implement ordered semantics, so there would not be any
345 	     gain in using the combined call.  */
346 	  tree clauses = gimple_omp_for_clauses (ws_stmt);
347 	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 	  if (c == NULL
349 	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 		  == OMP_CLAUSE_SCHEDULE_STATIC)
351 	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 	      || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 		  && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 	    return;
356 	}
357       else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 	       && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 				    OMP_CLAUSE__REDUCTEMP_)
360 		   || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 				       OMP_CLAUSE__CONDTEMP_)))
362 	return;
363 
364       region->is_combined_parallel = true;
365       region->inner->is_combined_parallel = true;
366       region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367     }
368 }
369 
370 /* Debugging dumps for parallel regions.  */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374 
375 /* Dump the parallel region tree rooted at REGION.  */
376 
377 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380   fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 	   gimple_code_name[region->type]);
382 
383   if (region->inner)
384     dump_omp_region (file, region->inner, indent + 4);
385 
386   if (region->cont)
387     {
388       fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 	       region->cont->index);
390     }
391 
392   if (region->exit)
393     fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 	     region->exit->index);
395   else
396     fprintf (file, "%*s[no exit marker]\n", indent, "");
397 
398   if (region->next)
399     dump_omp_region (file, region->next, indent);
400 }
401 
402 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)403 debug_omp_region (struct omp_region *region)
404 {
405   dump_omp_region (stderr, region, 0);
406 }
407 
408 DEBUG_FUNCTION void
debug_all_omp_regions(void)409 debug_all_omp_regions (void)
410 {
411   dump_omp_region (stderr, root_omp_region, 0);
412 }
413 
414 /* Create a new parallel region starting at STMT inside region PARENT.  */
415 
416 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)417 new_omp_region (basic_block bb, enum gimple_code type,
418 		struct omp_region *parent)
419 {
420   struct omp_region *region = XCNEW (struct omp_region);
421 
422   region->outer = parent;
423   region->entry = bb;
424   region->type = type;
425 
426   if (parent)
427     {
428       /* This is a nested region.  Add it to the list of inner
429 	 regions in PARENT.  */
430       region->next = parent->inner;
431       parent->inner = region;
432     }
433   else
434     {
435       /* This is a toplevel region.  Add it to the list of toplevel
436 	 regions in ROOT_OMP_REGION.  */
437       region->next = root_omp_region;
438       root_omp_region = region;
439     }
440 
441   return region;
442 }
443 
444 /* Release the memory associated with the region tree rooted at REGION.  */
445 
446 static void
free_omp_region_1(struct omp_region * region)447 free_omp_region_1 (struct omp_region *region)
448 {
449   struct omp_region *i, *n;
450 
451   for (i = region->inner; i ; i = n)
452     {
453       n = i->next;
454       free_omp_region_1 (i);
455     }
456 
457   free (region);
458 }
459 
460 /* Release the memory for the entire omp region tree.  */
461 
462 void
omp_free_regions(void)463 omp_free_regions (void)
464 {
465   struct omp_region *r, *n;
466   for (r = root_omp_region; r ; r = n)
467     {
468       n = r->next;
469       free_omp_region_1 (r);
470     }
471   root_omp_region = NULL;
472 }
473 
474 /* A convenience function to build an empty GIMPLE_COND with just the
475    condition.  */
476 
477 static gcond *
gimple_build_cond_empty(tree cond)478 gimple_build_cond_empty (tree cond)
479 {
480   enum tree_code pred_code;
481   tree lhs, rhs;
482 
483   gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486 
487 /* Return true if a parallel REGION is within a declare target function or
488    within a target region and is not a part of a gridified target.  */
489 
490 static bool
parallel_needs_hsa_kernel_p(struct omp_region * region)491 parallel_needs_hsa_kernel_p (struct omp_region *region)
492 {
493   bool indirect = false;
494   for (region = region->outer; region; region = region->outer)
495     {
496       if (region->type == GIMPLE_OMP_PARALLEL)
497 	indirect = true;
498       else if (region->type == GIMPLE_OMP_TARGET)
499 	{
500 	  gomp_target *tgt_stmt
501 	    = as_a <gomp_target *> (last_stmt (region->entry));
502 
503 	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 			       OMP_CLAUSE__GRIDDIM_))
505 	    return indirect;
506 	  else
507 	    return true;
508 	}
509     }
510 
511   if (lookup_attribute ("omp declare target",
512 			DECL_ATTRIBUTES (current_function_decl)))
513     return true;
514 
515   return false;
516 }
517 
518 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519    Add CHILD_FNDECL to decl chain of the supercontext of the block
520    ENTRY_BLOCK - this is the block which originally contained the
521    code from which CHILD_FNDECL was created.
522 
523    Together, these actions ensure that the debug info for the outlined
524    function will be emitted with the correct lexical scope.  */
525 
526 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)527 adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 			  tree child_fndecl)
529 {
530   tree parent_fndecl = NULL_TREE;
531   gimple *entry_stmt;
532   /* OMP expansion expands inner regions before outer ones, so if
533      we e.g. have explicit task region nested in parallel region, when
534      expanding the task region current_function_decl will be the original
535      source function, but we actually want to use as context the child
536      function of the parallel.  */
537   for (region = region->outer;
538        region && parent_fndecl == NULL_TREE; region = region->outer)
539     switch (region->type)
540       {
541       case GIMPLE_OMP_PARALLEL:
542       case GIMPLE_OMP_TASK:
543       case GIMPLE_OMP_TEAMS:
544 	entry_stmt = last_stmt (region->entry);
545 	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 	break;
547       case GIMPLE_OMP_TARGET:
548 	entry_stmt = last_stmt (region->entry);
549 	parent_fndecl
550 	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 	break;
552       default:
553 	break;
554       }
555 
556   if (parent_fndecl == NULL_TREE)
557     parent_fndecl = current_function_decl;
558   DECL_CONTEXT (child_fndecl) = parent_fndecl;
559 
560   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561     {
562       tree b = BLOCK_SUPERCONTEXT (entry_block);
563       if (TREE_CODE (b) == BLOCK)
564         {
565 	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 	  BLOCK_VARS (b) = child_fndecl;
567 	}
568     }
569 }
570 
571 /* Build the function calls to GOMP_parallel etc to actually
572    generate the parallel operation.  REGION is the parallel region
573    being expanded.  BB is the block where to insert the code.  WS_ARGS
574    will be set if this is a call to a combined parallel+workshare
575    construct, it contains the list of additional arguments needed by
576    the workshare construct.  */
577 
578 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)579 expand_parallel_call (struct omp_region *region, basic_block bb,
580 		      gomp_parallel *entry_stmt,
581 		      vec<tree, va_gc> *ws_args)
582 {
583   tree t, t1, t2, val, cond, c, clauses, flags;
584   gimple_stmt_iterator gsi;
585   gimple *stmt;
586   enum built_in_function start_ix;
587   int start_ix2;
588   location_t clause_loc;
589   vec<tree, va_gc> *args;
590 
591   clauses = gimple_omp_parallel_clauses (entry_stmt);
592 
593   /* Determine what flavor of GOMP_parallel we will be
594      emitting.  */
595   start_ix = BUILT_IN_GOMP_PARALLEL;
596   tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597   if (rtmp)
598     start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599   else if (is_combined_parallel (region))
600     {
601       switch (region->inner->type)
602 	{
603 	case GIMPLE_OMP_FOR:
604 	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 	  switch (region->inner->sched_kind)
606 	    {
607 	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
608 	      /* For lastprivate(conditional:), our implementation
609 		 requires monotonic behavior.  */
610 	      if (region->inner->has_lastprivate_conditional != 0)
611 		start_ix2 = 3;
612 	      else if ((region->inner->sched_modifiers
613 		       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
614 		start_ix2 = 6;
615 	      else if ((region->inner->sched_modifiers
616 			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 		start_ix2 = 7;
618 	      else
619 		start_ix2 = 3;
620 	      break;
621 	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 	    case OMP_CLAUSE_SCHEDULE_GUIDED:
623 	      if ((region->inner->sched_modifiers
624 		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 		  && !region->inner->has_lastprivate_conditional)
626 		{
627 		  start_ix2 = 3 + region->inner->sched_kind;
628 		  break;
629 		}
630 	      /* FALLTHRU */
631 	    default:
632 	      start_ix2 = region->inner->sched_kind;
633 	      break;
634 	    }
635 	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 	  start_ix = (enum built_in_function) start_ix2;
637 	  break;
638 	case GIMPLE_OMP_SECTIONS:
639 	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 	  break;
641 	default:
642 	  gcc_unreachable ();
643 	}
644     }
645 
646   /* By default, the value of NUM_THREADS is zero (selected at run time)
647      and there is no conditional.  */
648   cond = NULL_TREE;
649   val = build_int_cst (unsigned_type_node, 0);
650   flags = build_int_cst (unsigned_type_node, 0);
651 
652   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653   if (c)
654     cond = OMP_CLAUSE_IF_EXPR (c);
655 
656   c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657   if (c)
658     {
659       val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660       clause_loc = OMP_CLAUSE_LOCATION (c);
661     }
662   else
663     clause_loc = gimple_location (entry_stmt);
664 
665   c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666   if (c)
667     flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668 
669   /* Ensure 'val' is of the correct type.  */
670   val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671 
672   /* If we found the clause 'if (cond)', build either
673      (cond != 0) or (cond ? val : 1u).  */
674   if (cond)
675     {
676       cond = gimple_boolify (cond);
677 
678       if (integer_zerop (val))
679 	val = fold_build2_loc (clause_loc,
680 			   EQ_EXPR, unsigned_type_node, cond,
681 			   build_int_cst (TREE_TYPE (cond), 0));
682       else
683 	{
684 	  basic_block cond_bb, then_bb, else_bb;
685 	  edge e, e_then, e_else;
686 	  tree tmp_then, tmp_else, tmp_join, tmp_var;
687 
688 	  tmp_var = create_tmp_var (TREE_TYPE (val));
689 	  if (gimple_in_ssa_p (cfun))
690 	    {
691 	      tmp_then = make_ssa_name (tmp_var);
692 	      tmp_else = make_ssa_name (tmp_var);
693 	      tmp_join = make_ssa_name (tmp_var);
694 	    }
695 	  else
696 	    {
697 	      tmp_then = tmp_var;
698 	      tmp_else = tmp_var;
699 	      tmp_join = tmp_var;
700 	    }
701 
702 	  e = split_block_after_labels (bb);
703 	  cond_bb = e->src;
704 	  bb = e->dest;
705 	  remove_edge (e);
706 
707 	  then_bb = create_empty_bb (cond_bb);
708 	  else_bb = create_empty_bb (then_bb);
709 	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711 
712 	  stmt = gimple_build_cond_empty (cond);
713 	  gsi = gsi_start_bb (cond_bb);
714 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715 
716 	  gsi = gsi_start_bb (then_bb);
717 	  expand_omp_build_assign (&gsi, tmp_then, val, true);
718 
719 	  gsi = gsi_start_bb (else_bb);
720 	  expand_omp_build_assign (&gsi, tmp_else,
721 				   build_int_cst (unsigned_type_node, 1),
722 				   true);
723 
724 	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 	  add_bb_to_loop (then_bb, cond_bb->loop_father);
727 	  add_bb_to_loop (else_bb, cond_bb->loop_father);
728 	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730 
731 	  if (gimple_in_ssa_p (cfun))
732 	    {
733 	      gphi *phi = create_phi_node (tmp_join, bb);
734 	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 	    }
737 
738 	  val = tmp_join;
739 	}
740 
741       gsi = gsi_start_bb (bb);
742       val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 				      false, GSI_CONTINUE_LINKING);
744     }
745 
746   gsi = gsi_last_nondebug_bb (bb);
747   t = gimple_omp_parallel_data_arg (entry_stmt);
748   if (t == NULL)
749     t1 = null_pointer_node;
750   else
751     t1 = build_fold_addr_expr (t);
752   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753   t2 = build_fold_addr_expr (child_fndecl);
754 
755   vec_alloc (args, 4 + vec_safe_length (ws_args));
756   args->quick_push (t2);
757   args->quick_push (t1);
758   args->quick_push (val);
759   if (ws_args)
760     args->splice (*ws_args);
761   args->quick_push (flags);
762 
763   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 			       builtin_decl_explicit (start_ix), args);
765 
766   if (rtmp)
767     {
768       tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769       t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 		  fold_convert (type,
771 				fold_convert (pointer_sized_int_node, t)));
772     }
773   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 			    false, GSI_CONTINUE_LINKING);
775 
776   if (hsa_gen_requested_p ()
777       && parallel_needs_hsa_kernel_p (region))
778     {
779       cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780       hsa_register_kernel (child_cnode);
781     }
782 }
783 
784 /* Build the function call to GOMP_task to actually
785    generate the task operation.  BB is the block where to insert the code.  */
786 
787 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)788 expand_task_call (struct omp_region *region, basic_block bb,
789 		  gomp_task *entry_stmt)
790 {
791   tree t1, t2, t3;
792   gimple_stmt_iterator gsi;
793   location_t loc = gimple_location (entry_stmt);
794 
795   tree clauses = gimple_omp_task_clauses (entry_stmt);
796 
797   tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798   tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799   tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803 
804   unsigned int iflags
805     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806       | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807       | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808 
809   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811   tree num_tasks = NULL_TREE;
812   bool ull = false;
813   if (taskloop_p)
814     {
815       gimple *g = last_stmt (region->outer->entry);
816       gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818       struct omp_for_data fd;
819       omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820       startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821       endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 				OMP_CLAUSE__LOOPTEMP_);
823       startvar = OMP_CLAUSE_DECL (startvar);
824       endvar = OMP_CLAUSE_DECL (endvar);
825       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826       if (fd.loop.cond_code == LT_EXPR)
827 	iflags |= GOMP_TASK_FLAG_UP;
828       tree tclauses = gimple_omp_for_clauses (g);
829       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830       if (num_tasks)
831 	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832       else
833 	{
834 	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 	  if (num_tasks)
836 	    {
837 	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 	    }
840 	  else
841 	    num_tasks = integer_zero_node;
842 	}
843       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844       if (ifc == NULL_TREE)
845 	iflags |= GOMP_TASK_FLAG_IF;
846       if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 	iflags |= GOMP_TASK_FLAG_NOGROUP;
848       ull = fd.iter_type == long_long_unsigned_type_node;
849       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 	iflags |= GOMP_TASK_FLAG_REDUCTION;
851     }
852   else if (priority)
853     iflags |= GOMP_TASK_FLAG_PRIORITY;
854 
855   tree flags = build_int_cst (unsigned_type_node, iflags);
856 
857   tree cond = boolean_true_node;
858   if (ifc)
859     {
860       if (taskloop_p)
861 	{
862 	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 			       build_int_cst (unsigned_type_node,
865 					      GOMP_TASK_FLAG_IF),
866 			       build_int_cst (unsigned_type_node, 0));
867 	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 				   flags, t);
869 	}
870       else
871 	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872     }
873 
874   if (finalc)
875     {
876       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 			   build_int_cst (unsigned_type_node,
879 					  GOMP_TASK_FLAG_FINAL),
880 			   build_int_cst (unsigned_type_node, 0));
881       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882     }
883   if (depend)
884     depend = OMP_CLAUSE_DECL (depend);
885   else
886     depend = build_int_cst (ptr_type_node, 0);
887   if (priority)
888     priority = fold_convert (integer_type_node,
889 			     OMP_CLAUSE_PRIORITY_EXPR (priority));
890   else
891     priority = integer_zero_node;
892 
893   gsi = gsi_last_nondebug_bb (bb);
894   tree t = gimple_omp_task_data_arg (entry_stmt);
895   if (t == NULL)
896     t2 = null_pointer_node;
897   else
898     t2 = build_fold_addr_expr_loc (loc, t);
899   t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900   t = gimple_omp_task_copy_fn (entry_stmt);
901   if (t == NULL)
902     t3 = null_pointer_node;
903   else
904     t3 = build_fold_addr_expr_loc (loc, t);
905 
906   if (taskloop_p)
907     t = build_call_expr (ull
908 			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 			 11, t1, t2, t3,
911 			 gimple_omp_task_arg_size (entry_stmt),
912 			 gimple_omp_task_arg_align (entry_stmt), flags,
913 			 num_tasks, priority, startvar, endvar, step);
914   else
915     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 			 9, t1, t2, t3,
917 			 gimple_omp_task_arg_size (entry_stmt),
918 			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 			 depend, priority);
920 
921   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 			    false, GSI_CONTINUE_LINKING);
923 }
924 
925 /* Build the function call to GOMP_taskwait_depend to actually
926    generate the taskwait operation.  BB is the block where to insert the
927    code.  */
928 
929 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)930 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931 {
932   tree clauses = gimple_omp_task_clauses (entry_stmt);
933   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934   if (depend == NULL_TREE)
935     return;
936 
937   depend = OMP_CLAUSE_DECL (depend);
938 
939   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940   tree t
941     = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 		       1, depend);
943 
944   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 			    false, GSI_CONTINUE_LINKING);
946 }
947 
948 /* Build the function call to GOMP_teams_reg to actually
949    generate the host teams operation.  REGION is the teams region
950    being expanded.  BB is the block where to insert the code.  */
951 
952 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)953 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954 {
955   tree clauses = gimple_omp_teams_clauses (entry_stmt);
956   tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957   if (num_teams == NULL_TREE)
958     num_teams = build_int_cst (unsigned_type_node, 0);
959   else
960     {
961       num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962       num_teams = fold_convert (unsigned_type_node, num_teams);
963     }
964   tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965   if (thread_limit == NULL_TREE)
966     thread_limit = build_int_cst (unsigned_type_node, 0);
967   else
968     {
969       thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970       thread_limit = fold_convert (unsigned_type_node, thread_limit);
971     }
972 
973   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974   tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975   if (t == NULL)
976     t1 = null_pointer_node;
977   else
978     t1 = build_fold_addr_expr (t);
979   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980   tree t2 = build_fold_addr_expr (child_fndecl);
981 
982   vec<tree, va_gc> *args;
983   vec_alloc (args, 5);
984   args->quick_push (t2);
985   args->quick_push (t1);
986   args->quick_push (num_teams);
987   args->quick_push (thread_limit);
988   /* For future extensibility.  */
989   args->quick_push (build_zero_cst (unsigned_type_node));
990 
991   t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 			       args);
994 
995   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 			    false, GSI_CONTINUE_LINKING);
997 }
998 
999 /* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
1000 
1001 static tree
vec2chain(vec<tree,va_gc> * v)1002 vec2chain (vec<tree, va_gc> *v)
1003 {
1004   tree chain = NULL_TREE, t;
1005   unsigned ix;
1006 
1007   FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008     {
1009       DECL_CHAIN (t) = chain;
1010       chain = t;
1011     }
1012 
1013   return chain;
1014 }
1015 
1016 /* Remove barriers in REGION->EXIT's block.  Note that this is only
1017    valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
1018    is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019    left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020    removed.  */
1021 
1022 static void
remove_exit_barrier(struct omp_region * region)1023 remove_exit_barrier (struct omp_region *region)
1024 {
1025   gimple_stmt_iterator gsi;
1026   basic_block exit_bb;
1027   edge_iterator ei;
1028   edge e;
1029   gimple *stmt;
1030   int any_addressable_vars = -1;
1031 
1032   exit_bb = region->exit;
1033 
1034   /* If the parallel region doesn't return, we don't have REGION->EXIT
1035      block at all.  */
1036   if (! exit_bb)
1037     return;
1038 
1039   /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1040      workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1041      statements that can appear in between are extremely limited -- no
1042      memory operations at all.  Here, we allow nothing at all, so the
1043      only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1044   gsi = gsi_last_nondebug_bb (exit_bb);
1045   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1046   gsi_prev_nondebug (&gsi);
1047   if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048     return;
1049 
1050   FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051     {
1052       gsi = gsi_last_nondebug_bb (e->src);
1053       if (gsi_end_p (gsi))
1054 	continue;
1055       stmt = gsi_stmt (gsi);
1056       if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 	  && !gimple_omp_return_nowait_p (stmt))
1058 	{
1059 	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 	     in many cases.  If there could be tasks queued, the barrier
1061 	     might be needed to let the tasks run before some local
1062 	     variable of the parallel that the task uses as shared
1063 	     runs out of scope.  The task can be spawned either
1064 	     from within current function (this would be easy to check)
1065 	     or from some function it calls and gets passed an address
1066 	     of such a variable.  */
1067 	  if (any_addressable_vars < 0)
1068 	    {
1069 	      gomp_parallel *parallel_stmt
1070 		= as_a <gomp_parallel *> (last_stmt (region->entry));
1071 	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 	      tree local_decls, block, decl;
1073 	      unsigned ix;
1074 
1075 	      any_addressable_vars = 0;
1076 	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 		if (TREE_ADDRESSABLE (decl))
1078 		  {
1079 		    any_addressable_vars = 1;
1080 		    break;
1081 		  }
1082 	      for (block = gimple_block (stmt);
1083 		   !any_addressable_vars
1084 		   && block
1085 		   && TREE_CODE (block) == BLOCK;
1086 		   block = BLOCK_SUPERCONTEXT (block))
1087 		{
1088 		  for (local_decls = BLOCK_VARS (block);
1089 		       local_decls;
1090 		       local_decls = DECL_CHAIN (local_decls))
1091 		    if (TREE_ADDRESSABLE (local_decls))
1092 		      {
1093 			any_addressable_vars = 1;
1094 			break;
1095 		      }
1096 		  if (block == gimple_block (parallel_stmt))
1097 		    break;
1098 		}
1099 	    }
1100 	  if (!any_addressable_vars)
1101 	    gimple_omp_return_set_nowait (stmt);
1102 	}
1103     }
1104 }
1105 
1106 static void
remove_exit_barriers(struct omp_region * region)1107 remove_exit_barriers (struct omp_region *region)
1108 {
1109   if (region->type == GIMPLE_OMP_PARALLEL)
1110     remove_exit_barrier (region);
1111 
1112   if (region->inner)
1113     {
1114       region = region->inner;
1115       remove_exit_barriers (region);
1116       while (region->next)
1117 	{
1118 	  region = region->next;
1119 	  remove_exit_barriers (region);
1120 	}
1121     }
1122 }
1123 
1124 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125    calls.  These can't be declared as const functions, but
1126    within one parallel body they are constant, so they can be
1127    transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128    which are declared const.  Similarly for task body, except
1129    that in untied task omp_get_thread_num () can change at any task
1130    scheduling point.  */
1131 
1132 static void
optimize_omp_library_calls(gimple * entry_stmt)1133 optimize_omp_library_calls (gimple *entry_stmt)
1134 {
1135   basic_block bb;
1136   gimple_stmt_iterator gsi;
1137   tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138   tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139   tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140   tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141   bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 					  OMP_CLAUSE_UNTIED) != NULL);
1144 
1145   FOR_EACH_BB_FN (bb, cfun)
1146     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147       {
1148 	gimple *call = gsi_stmt (gsi);
1149 	tree decl;
1150 
1151 	if (is_gimple_call (call)
1152 	    && (decl = gimple_call_fndecl (call))
1153 	    && DECL_EXTERNAL (decl)
1154 	    && TREE_PUBLIC (decl)
1155 	    && DECL_INITIAL (decl) == NULL)
1156 	  {
1157 	    tree built_in;
1158 
1159 	    if (DECL_NAME (decl) == thr_num_id)
1160 	      {
1161 		/* In #pragma omp task untied omp_get_thread_num () can change
1162 		   during the execution of the task region.  */
1163 		if (untied_task)
1164 		  continue;
1165 		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 	      }
1167 	    else if (DECL_NAME (decl) == num_thr_id)
1168 	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 	    else
1170 	      continue;
1171 
1172 	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 		|| gimple_call_num_args (call) != 0)
1174 	      continue;
1175 
1176 	    if (flag_exceptions && !TREE_NOTHROW (decl))
1177 	      continue;
1178 
1179 	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 					TREE_TYPE (TREE_TYPE (built_in))))
1182 	      continue;
1183 
1184 	    gimple_call_set_fndecl (call, built_in);
1185 	  }
1186       }
1187 }
1188 
1189 /* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1190    regimplified.  */
1191 
1192 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1193 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194 {
1195   tree t = *tp;
1196 
1197   /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1198   if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199     return t;
1200 
1201   if (TREE_CODE (t) == ADDR_EXPR)
1202     recompute_tree_invariant_for_addr_expr (t);
1203 
1204   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205   return NULL_TREE;
1206 }
1207 
1208 /* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1209 
1210 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1211 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 			 bool after)
1213 {
1214   bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215   from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 				   !after, after ? GSI_CONTINUE_LINKING
1217 						 : GSI_SAME_STMT);
1218   gimple *stmt = gimple_build_assign (to, from);
1219   if (after)
1220     gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221   else
1222     gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223   if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224       || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225     {
1226       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227       gimple_regimplify_operands (stmt, &gsi);
1228     }
1229 }
1230 
1231 /* Expand the OpenMP parallel or task directive starting at REGION.  */
1232 
1233 static void
expand_omp_taskreg(struct omp_region * region)1234 expand_omp_taskreg (struct omp_region *region)
1235 {
1236   basic_block entry_bb, exit_bb, new_bb;
1237   struct function *child_cfun;
1238   tree child_fn, block, t;
1239   gimple_stmt_iterator gsi;
1240   gimple *entry_stmt, *stmt;
1241   edge e;
1242   vec<tree, va_gc> *ws_args;
1243 
1244   entry_stmt = last_stmt (region->entry);
1245   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246       && gimple_omp_task_taskwait_p (entry_stmt))
1247     {
1248       new_bb = region->entry;
1249       gsi = gsi_last_nondebug_bb (region->entry);
1250       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251       gsi_remove (&gsi, true);
1252       expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253       return;
1254     }
1255 
1256   child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258 
1259   entry_bb = region->entry;
1260   if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261     exit_bb = region->cont;
1262   else
1263     exit_bb = region->exit;
1264 
1265   if (is_combined_parallel (region))
1266     ws_args = region->ws_args;
1267   else
1268     ws_args = NULL;
1269 
1270   if (child_cfun->cfg)
1271     {
1272       /* Due to inlining, it may happen that we have already outlined
1273 	 the region, in which case all we need to do is make the
1274 	 sub-graph unreachable and emit the parallel call.  */
1275       edge entry_succ_e, exit_succ_e;
1276 
1277       entry_succ_e = single_succ_edge (entry_bb);
1278 
1279       gsi = gsi_last_nondebug_bb (entry_bb);
1280       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1281 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1283       gsi_remove (&gsi, true);
1284 
1285       new_bb = entry_bb;
1286       if (exit_bb)
1287 	{
1288 	  exit_succ_e = single_succ_edge (exit_bb);
1289 	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 	}
1291       remove_edge_and_dominated_blocks (entry_succ_e);
1292     }
1293   else
1294     {
1295       unsigned srcidx, dstidx, num;
1296 
1297       /* If the parallel region needs data sent from the parent
1298 	 function, then the very first statement (except possible
1299 	 tree profile counter updates) of the parallel body
1300 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1301 	 &.OMP_DATA_O is passed as an argument to the child function,
1302 	 we need to replace it with the argument as seen by the child
1303 	 function.
1304 
1305 	 In most cases, this will end up being the identity assignment
1306 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1307 	 a function call that has been inlined, the original PARM_DECL
1308 	 .OMP_DATA_I may have been converted into a different local
1309 	 variable.  In which case, we need to keep the assignment.  */
1310       if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 	{
1312 	  basic_block entry_succ_bb
1313 	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 				       : FALLTHRU_EDGE (entry_bb)->dest;
1315 	  tree arg;
1316 	  gimple *parcopy_stmt = NULL;
1317 
1318 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 	    {
1320 	      gimple *stmt;
1321 
1322 	      gcc_assert (!gsi_end_p (gsi));
1323 	      stmt = gsi_stmt (gsi);
1324 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 		continue;
1326 
1327 	      if (gimple_num_ops (stmt) == 2)
1328 		{
1329 		  tree arg = gimple_assign_rhs1 (stmt);
1330 
1331 		  /* We're ignore the subcode because we're
1332 		     effectively doing a STRIP_NOPS.  */
1333 
1334 		  if (TREE_CODE (arg) == ADDR_EXPR
1335 		      && (TREE_OPERAND (arg, 0)
1336 			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1337 		    {
1338 		      parcopy_stmt = stmt;
1339 		      break;
1340 		    }
1341 		}
1342 	    }
1343 
1344 	  gcc_assert (parcopy_stmt != NULL);
1345 	  arg = DECL_ARGUMENTS (child_fn);
1346 
1347 	  if (!gimple_in_ssa_p (cfun))
1348 	    {
1349 	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 		gsi_remove (&gsi, true);
1351 	      else
1352 		{
1353 		  /* ?? Is setting the subcode really necessary ??  */
1354 		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 		}
1357 	    }
1358 	  else
1359 	    {
1360 	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 	      /* We'd like to set the rhs to the default def in the child_fn,
1363 		 but it's too early to create ssa names in the child_fn.
1364 		 Instead, we set the rhs to the parm.  In
1365 		 move_sese_region_to_fn, we introduce a default def for the
1366 		 parm, map the parm to it's default def, and once we encounter
1367 		 this stmt, replace the parm with the default def.  */
1368 	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 	      update_stmt (parcopy_stmt);
1370 	    }
1371 	}
1372 
1373       /* Declare local variables needed in CHILD_CFUN.  */
1374       block = DECL_INITIAL (child_fn);
1375       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376       /* The gimplifier could record temporaries in parallel/task block
1377 	 rather than in containing function's local_decls chain,
1378 	 which would mean cgraph missed finalizing them.  Do it now.  */
1379       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 	  varpool_node::finalize_decl (t);
1382       DECL_SAVED_TREE (child_fn) = NULL;
1383       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1384       gimple_set_body (child_fn, NULL);
1385       TREE_USED (block) = 1;
1386 
1387       /* Reset DECL_CONTEXT on function arguments.  */
1388       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 	DECL_CONTEXT (t) = child_fn;
1390 
1391       /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 	 so that it can be moved to the child function.  */
1393       gsi = gsi_last_nondebug_bb (entry_bb);
1394       stmt = gsi_stmt (gsi);
1395       gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1396 			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1398       e = split_block (entry_bb, stmt);
1399       gsi_remove (&gsi, true);
1400       entry_bb = e->dest;
1401       edge e2 = NULL;
1402       if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1403 	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404       else
1405 	{
1406 	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 	  gcc_assert (e2->dest == region->exit);
1408 	  remove_edge (BRANCH_EDGE (entry_bb));
1409 	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1410 	  gsi = gsi_last_nondebug_bb (region->exit);
1411 	  gcc_assert (!gsi_end_p (gsi)
1412 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 	  gsi_remove (&gsi, true);
1414 	}
1415 
1416       /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1417       if (exit_bb)
1418 	{
1419 	  gsi = gsi_last_nondebug_bb (exit_bb);
1420 	  gcc_assert (!gsi_end_p (gsi)
1421 		      && (gimple_code (gsi_stmt (gsi))
1422 			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 	  stmt = gimple_build_return (NULL);
1424 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 	  gsi_remove (&gsi, true);
1426 	}
1427 
1428       /* Move the parallel region into CHILD_CFUN.  */
1429 
1430       if (gimple_in_ssa_p (cfun))
1431 	{
1432 	  init_tree_ssa (child_cfun);
1433 	  init_ssa_operands (child_cfun);
1434 	  child_cfun->gimple_df->in_ssa_p = true;
1435 	  block = NULL_TREE;
1436 	}
1437       else
1438 	block = gimple_block (entry_stmt);
1439 
1440       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441       if (exit_bb)
1442 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443       if (e2)
1444 	{
1445 	  basic_block dest_bb = e2->dest;
1446 	  if (!exit_bb)
1447 	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 	  remove_edge (e2);
1449 	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 	}
1451       /* When the OMP expansion process cannot guarantee an up-to-date
1452 	 loop tree arrange for the child function to fixup loops.  */
1453       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455 
1456       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1457       num = vec_safe_length (child_cfun->local_decls);
1458       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 	{
1460 	  t = (*child_cfun->local_decls)[srcidx];
1461 	  if (DECL_CONTEXT (t) == cfun->decl)
1462 	    continue;
1463 	  if (srcidx != dstidx)
1464 	    (*child_cfun->local_decls)[dstidx] = t;
1465 	  dstidx++;
1466 	}
1467       if (dstidx != num)
1468 	vec_safe_truncate (child_cfun->local_decls, dstidx);
1469 
1470       /* Inform the callgraph about the new function.  */
1471       child_cfun->curr_properties = cfun->curr_properties;
1472       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474       cgraph_node *node = cgraph_node::get_create (child_fn);
1475       node->parallelized_function = 1;
1476       cgraph_node::add_new_function (child_fn, true);
1477 
1478       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480 
1481       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1482 	 fixed in a following pass.  */
1483       push_cfun (child_cfun);
1484       if (need_asm)
1485 	assign_assembler_name_if_needed (child_fn);
1486 
1487       if (optimize)
1488 	optimize_omp_library_calls (entry_stmt);
1489       update_max_bb_count ();
1490       cgraph_edge::rebuild_edges ();
1491 
1492       /* Some EH regions might become dead, see PR34608.  If
1493 	 pass_cleanup_cfg isn't the first pass to happen with the
1494 	 new child, these dead EH edges might cause problems.
1495 	 Clean them up now.  */
1496       if (flag_exceptions)
1497 	{
1498 	  basic_block bb;
1499 	  bool changed = false;
1500 
1501 	  FOR_EACH_BB_FN (bb, cfun)
1502 	    changed |= gimple_purge_dead_eh_edges (bb);
1503 	  if (changed)
1504 	    cleanup_tree_cfg ();
1505 	}
1506       if (gimple_in_ssa_p (cfun))
1507 	update_ssa (TODO_update_ssa);
1508       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 	verify_loop_structure ();
1510       pop_cfun ();
1511 
1512       if (dump_file && !gimple_in_ssa_p (cfun))
1513 	{
1514 	  omp_any_child_fn_dumped = true;
1515 	  dump_function_header (dump_file, child_fn, dump_flags);
1516 	  dump_function_to_file (child_fn, dump_file, dump_flags);
1517 	}
1518     }
1519 
1520   adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521 
1522   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1523     expand_parallel_call (region, new_bb,
1524 			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1525   else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526     expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1527   else
1528     expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529   if (gimple_in_ssa_p (cfun))
1530     update_ssa (TODO_update_ssa_only_virtuals);
1531 }
1532 
1533 /* Information about members of an OpenACC collapsed loop nest.  */
1534 
1535 struct oacc_collapse
1536 {
1537   tree base;  /* Base value.  */
1538   tree iters; /* Number of steps.  */
1539   tree step;  /* Step size.  */
1540   tree tile;  /* Tile increment (if tiled).  */
1541   tree outer; /* Tile iterator var. */
1542 };
1543 
1544 /* Helper for expand_oacc_for.  Determine collapsed loop information.
1545    Fill in COUNTS array.  Emit any initialization code before GSI.
1546    Return the calculated outer loop bound of BOUND_TYPE.  */
1547 
1548 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1549 expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 			   gimple_stmt_iterator *gsi,
1551 			   oacc_collapse *counts, tree diff_type,
1552 			   tree bound_type, location_t loc)
1553 {
1554   tree tiling = fd->tiling;
1555   tree total = build_int_cst (bound_type, 1);
1556   int ix;
1557 
1558   gcc_assert (integer_onep (fd->loop.step));
1559   gcc_assert (integer_zerop (fd->loop.n1));
1560 
1561   /* When tiling, the first operand of the tile clause applies to the
1562      innermost loop, and we work outwards from there.  Seems
1563      backwards, but whatever.  */
1564   for (ix = fd->collapse; ix--;)
1565     {
1566       const omp_for_data_loop *loop = &fd->loops[ix];
1567 
1568       tree iter_type = TREE_TYPE (loop->v);
1569       tree plus_type = iter_type;
1570 
1571       gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1572 
1573       if (POINTER_TYPE_P (iter_type))
1574 	plus_type = sizetype;
1575 
1576       if (tiling)
1577 	{
1578 	  tree num = build_int_cst (integer_type_node, fd->collapse);
1579 	  tree loop_no = build_int_cst (integer_type_node, ix);
1580 	  tree tile = TREE_VALUE (tiling);
1581 	  gcall *call
1582 	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1583 					  /* gwv-outer=*/integer_zero_node,
1584 					  /* gwv-inner=*/integer_zero_node);
1585 
1586 	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1587 	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1588 	  gimple_call_set_lhs (call, counts[ix].tile);
1589 	  gimple_set_location (call, loc);
1590 	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1591 
1592 	  tiling = TREE_CHAIN (tiling);
1593 	}
1594       else
1595 	{
1596 	  counts[ix].tile = NULL;
1597 	  counts[ix].outer = loop->v;
1598 	}
1599 
1600       tree b = loop->n1;
1601       tree e = loop->n2;
1602       tree s = loop->step;
1603       bool up = loop->cond_code == LT_EXPR;
1604       tree dir = build_int_cst (diff_type, up ? +1 : -1);
1605       bool negating;
1606       tree expr;
1607 
1608       b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1609 				    true, GSI_SAME_STMT);
1610       e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1611 				    true, GSI_SAME_STMT);
1612 
1613       /* Convert the step, avoiding possible unsigned->signed overflow.  */
1614       negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1615       if (negating)
1616 	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1617       s = fold_convert (diff_type, s);
1618       if (negating)
1619 	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1620       s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1621 				    true, GSI_SAME_STMT);
1622 
1623       /* Determine the range, avoiding possible unsigned->signed overflow.  */
1624       negating = !up && TYPE_UNSIGNED (iter_type);
1625       expr = fold_build2 (MINUS_EXPR, plus_type,
1626 			  fold_convert (plus_type, negating ? b : e),
1627 			  fold_convert (plus_type, negating ? e : b));
1628       expr = fold_convert (diff_type, expr);
1629       if (negating)
1630 	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1631       tree range = force_gimple_operand_gsi
1632 	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1633 
1634       /* Determine number of iterations.  */
1635       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1636       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1637       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1638 
1639       tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1640 					     true, GSI_SAME_STMT);
1641 
1642       counts[ix].base = b;
1643       counts[ix].iters = iters;
1644       counts[ix].step = s;
1645 
1646       total = fold_build2 (MULT_EXPR, bound_type, total,
1647 			   fold_convert (bound_type, iters));
1648     }
1649 
1650   return total;
1651 }
1652 
1653 /* Emit initializers for collapsed loop members.  INNER is true if
1654    this is for the element loop of a TILE.  IVAR is the outer
1655    loop iteration variable, from which collapsed loop iteration values
1656    are  calculated.  COUNTS array has been initialized by
1657    expand_oacc_collapse_inits.  */
1658 
1659 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1660 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1661 			   gimple_stmt_iterator *gsi,
1662 			   const oacc_collapse *counts, tree ivar,
1663 			   tree diff_type)
1664 {
1665   tree ivar_type = TREE_TYPE (ivar);
1666 
1667   /*  The most rapidly changing iteration variable is the innermost
1668       one.  */
1669   for (int ix = fd->collapse; ix--;)
1670     {
1671       const omp_for_data_loop *loop = &fd->loops[ix];
1672       const oacc_collapse *collapse = &counts[ix];
1673       tree v = inner ? loop->v : collapse->outer;
1674       tree iter_type = TREE_TYPE (v);
1675       tree plus_type = iter_type;
1676       enum tree_code plus_code = PLUS_EXPR;
1677       tree expr;
1678 
1679       if (POINTER_TYPE_P (iter_type))
1680 	{
1681 	  plus_code = POINTER_PLUS_EXPR;
1682 	  plus_type = sizetype;
1683 	}
1684 
1685       expr = ivar;
1686       if (ix)
1687 	{
1688 	  tree mod = fold_convert (ivar_type, collapse->iters);
1689 	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1690 	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1691 	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1692 					   true, GSI_SAME_STMT);
1693 	}
1694 
1695       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1696 			  fold_convert (diff_type, collapse->step));
1697       expr = fold_build2 (plus_code, iter_type,
1698 			  inner ? collapse->outer : collapse->base,
1699 			  fold_convert (plus_type, expr));
1700       expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1701 				       true, GSI_SAME_STMT);
1702       gassign *ass = gimple_build_assign (v, expr);
1703       gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1704     }
1705 }
1706 
1707 /* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1708    of the combined collapse > 1 loop constructs, generate code like:
1709 	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1710 	if (cond3 is <)
1711 	  adj = STEP3 - 1;
1712 	else
1713 	  adj = STEP3 + 1;
1714 	count3 = (adj + N32 - N31) / STEP3;
1715 	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1716 	if (cond2 is <)
1717 	  adj = STEP2 - 1;
1718 	else
1719 	  adj = STEP2 + 1;
1720 	count2 = (adj + N22 - N21) / STEP2;
1721 	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1722 	if (cond1 is <)
1723 	  adj = STEP1 - 1;
1724 	else
1725 	  adj = STEP1 + 1;
1726 	count1 = (adj + N12 - N11) / STEP1;
1727 	count = count1 * count2 * count3;
1728    Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1729 	count = 0;
1730    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1731    of the combined loop constructs, just initialize COUNTS array
1732    from the _looptemp_ clauses.  */
1733 
1734 /* NOTE: It *could* be better to moosh all of the BBs together,
1735    creating one larger BB with all the computation and the unexpected
1736    jump at the end.  I.e.
1737 
1738    bool zero3, zero2, zero1, zero;
1739 
1740    zero3 = N32 c3 N31;
1741    count3 = (N32 - N31) /[cl] STEP3;
1742    zero2 = N22 c2 N21;
1743    count2 = (N22 - N21) /[cl] STEP2;
1744    zero1 = N12 c1 N11;
1745    count1 = (N12 - N11) /[cl] STEP1;
1746    zero = zero3 || zero2 || zero1;
1747    count = count1 * count2 * count3;
1748    if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 
1750    After all, we expect the zero=false, and thus we expect to have to
1751    evaluate all of the comparison expressions, so short-circuiting
1752    oughtn't be a win.  Since the condition isn't protecting a
1753    denominator, we're not concerned about divide-by-zero, so we can
1754    fully evaluate count even if a numerator turned out to be wrong.
1755 
1756    It seems like putting this all together would create much better
1757    scheduling opportunities, and less pressure on the chip's branch
1758    predictor.  */
1759 
1760 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1761 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1762 			    basic_block &entry_bb, tree *counts,
1763 			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1764 			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1765 			    basic_block &l2_dom_bb)
1766 {
1767   tree t, type = TREE_TYPE (fd->loop.v);
1768   edge e, ne;
1769   int i;
1770 
1771   /* Collapsed loops need work for expansion into SSA form.  */
1772   gcc_assert (!gimple_in_ssa_p (cfun));
1773 
1774   if (gimple_omp_for_combined_into_p (fd->for_stmt)
1775       && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776     {
1777       gcc_assert (fd->ordered == 0);
1778       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1779 	 isn't supposed to be handled, as the inner loop doesn't
1780 	 use it.  */
1781       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1782 				     OMP_CLAUSE__LOOPTEMP_);
1783       gcc_assert (innerc);
1784       for (i = 0; i < fd->collapse; i++)
1785 	{
1786 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1787 				    OMP_CLAUSE__LOOPTEMP_);
1788 	  gcc_assert (innerc);
1789 	  if (i)
1790 	    counts[i] = OMP_CLAUSE_DECL (innerc);
1791 	  else
1792 	    counts[0] = NULL_TREE;
1793 	}
1794       return;
1795     }
1796 
1797   for (i = fd->collapse; i < fd->ordered; i++)
1798     {
1799       tree itype = TREE_TYPE (fd->loops[i].v);
1800       counts[i] = NULL_TREE;
1801       t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1802 		       fold_convert (itype, fd->loops[i].n1),
1803 		       fold_convert (itype, fd->loops[i].n2));
1804       if (t && integer_zerop (t))
1805 	{
1806 	  for (i = fd->collapse; i < fd->ordered; i++)
1807 	    counts[i] = build_int_cst (type, 0);
1808 	  break;
1809 	}
1810     }
1811   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1812     {
1813       tree itype = TREE_TYPE (fd->loops[i].v);
1814 
1815       if (i >= fd->collapse && counts[i])
1816 	continue;
1817       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1818 	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1819 				fold_convert (itype, fd->loops[i].n1),
1820 				fold_convert (itype, fd->loops[i].n2)))
1821 	      == NULL_TREE || !integer_onep (t)))
1822 	{
1823 	  gcond *cond_stmt;
1824 	  tree n1, n2;
1825 	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1826 	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1827 					 true, GSI_SAME_STMT);
1828 	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1829 	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1830 					 true, GSI_SAME_STMT);
1831 	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1832 					 NULL_TREE, NULL_TREE);
1833 	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1834 	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1835 			 expand_omp_regimplify_p, NULL, NULL)
1836 	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1837 			    expand_omp_regimplify_p, NULL, NULL))
1838 	    {
1839 	      *gsi = gsi_for_stmt (cond_stmt);
1840 	      gimple_regimplify_operands (cond_stmt, gsi);
1841 	    }
1842 	  e = split_block (entry_bb, cond_stmt);
1843 	  basic_block &zero_iter_bb
1844 	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1845 	  int &first_zero_iter
1846 	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1847 	  if (zero_iter_bb == NULL)
1848 	    {
1849 	      gassign *assign_stmt;
1850 	      first_zero_iter = i;
1851 	      zero_iter_bb = create_empty_bb (entry_bb);
1852 	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1853 	      *gsi = gsi_after_labels (zero_iter_bb);
1854 	      if (i < fd->collapse)
1855 		assign_stmt = gimple_build_assign (fd->loop.n2,
1856 						   build_zero_cst (type));
1857 	      else
1858 		{
1859 		  counts[i] = create_tmp_reg (type, ".count");
1860 		  assign_stmt
1861 		    = gimple_build_assign (counts[i], build_zero_cst (type));
1862 		}
1863 	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1864 	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1865 				       entry_bb);
1866 	    }
1867 	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1868 	  ne->probability = profile_probability::very_unlikely ();
1869 	  e->flags = EDGE_TRUE_VALUE;
1870 	  e->probability = ne->probability.invert ();
1871 	  if (l2_dom_bb == NULL)
1872 	    l2_dom_bb = entry_bb;
1873 	  entry_bb = e->dest;
1874 	  *gsi = gsi_last_nondebug_bb (entry_bb);
1875 	}
1876 
1877       if (POINTER_TYPE_P (itype))
1878 	itype = signed_type_for (itype);
1879       t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1880 				 ? -1 : 1));
1881       t = fold_build2 (PLUS_EXPR, itype,
1882 		       fold_convert (itype, fd->loops[i].step), t);
1883       t = fold_build2 (PLUS_EXPR, itype, t,
1884 		       fold_convert (itype, fd->loops[i].n2));
1885       t = fold_build2 (MINUS_EXPR, itype, t,
1886 		       fold_convert (itype, fd->loops[i].n1));
1887       /* ?? We could probably use CEIL_DIV_EXPR instead of
1888 	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1889 	 generate the same code in the end because generically we
1890 	 don't know that the values involved must be negative for
1891 	 GT??  */
1892       if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1893 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1894 			 fold_build1 (NEGATE_EXPR, itype, t),
1895 			 fold_build1 (NEGATE_EXPR, itype,
1896 				      fold_convert (itype,
1897 						    fd->loops[i].step)));
1898       else
1899 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1900 			 fold_convert (itype, fd->loops[i].step));
1901       t = fold_convert (type, t);
1902       if (TREE_CODE (t) == INTEGER_CST)
1903 	counts[i] = t;
1904       else
1905 	{
1906 	  if (i < fd->collapse || i != first_zero_iter2)
1907 	    counts[i] = create_tmp_reg (type, ".count");
1908 	  expand_omp_build_assign (gsi, counts[i], t);
1909 	}
1910       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1911 	{
1912 	  if (i == 0)
1913 	    t = counts[0];
1914 	  else
1915 	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1916 	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1917 	}
1918     }
1919 }
1920 
1921 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1922 	T = V;
1923 	V3 = N31 + (T % count3) * STEP3;
1924 	T = T / count3;
1925 	V2 = N21 + (T % count2) * STEP2;
1926 	T = T / count2;
1927 	V1 = N11 + T * STEP1;
1928    if this loop doesn't have an inner loop construct combined with it.
1929    If it does have an inner loop construct combined with it and the
1930    iteration count isn't known constant, store values from counts array
1931    into its _looptemp_ temporaries instead.  */
1932 
1933 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,gimple * inner_stmt,tree startvar)1934 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1935 			  tree *counts, gimple *inner_stmt, tree startvar)
1936 {
1937   int i;
1938   if (gimple_omp_for_combined_p (fd->for_stmt))
1939     {
1940       /* If fd->loop.n2 is constant, then no propagation of the counts
1941 	 is needed, they are constant.  */
1942       if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1943 	return;
1944 
1945       tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1946 		     ? gimple_omp_taskreg_clauses (inner_stmt)
1947 		     : gimple_omp_for_clauses (inner_stmt);
1948       /* First two _looptemp_ clauses are for istart/iend, counts[0]
1949 	 isn't supposed to be handled, as the inner loop doesn't
1950 	 use it.  */
1951       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1952       gcc_assert (innerc);
1953       for (i = 0; i < fd->collapse; i++)
1954 	{
1955 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1956 				    OMP_CLAUSE__LOOPTEMP_);
1957 	  gcc_assert (innerc);
1958 	  if (i)
1959 	    {
1960 	      tree tem = OMP_CLAUSE_DECL (innerc);
1961 	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1962 	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1963 					    false, GSI_CONTINUE_LINKING);
1964 	      gassign *stmt = gimple_build_assign (tem, t);
1965 	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1966 	    }
1967 	}
1968       return;
1969     }
1970 
1971   tree type = TREE_TYPE (fd->loop.v);
1972   tree tem = create_tmp_reg (type, ".tem");
1973   gassign *stmt = gimple_build_assign (tem, startvar);
1974   gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1975 
1976   for (i = fd->collapse - 1; i >= 0; i--)
1977     {
1978       tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1979       itype = vtype;
1980       if (POINTER_TYPE_P (vtype))
1981 	itype = signed_type_for (vtype);
1982       if (i != 0)
1983 	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1984       else
1985 	t = tem;
1986       t = fold_convert (itype, t);
1987       t = fold_build2 (MULT_EXPR, itype, t,
1988 		       fold_convert (itype, fd->loops[i].step));
1989       if (POINTER_TYPE_P (vtype))
1990 	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1991       else
1992 	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1993       t = force_gimple_operand_gsi (gsi, t,
1994 				    DECL_P (fd->loops[i].v)
1995 				    && TREE_ADDRESSABLE (fd->loops[i].v),
1996 				    NULL_TREE, false,
1997 				    GSI_CONTINUE_LINKING);
1998       stmt = gimple_build_assign (fd->loops[i].v, t);
1999       gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2000       if (i != 0)
2001 	{
2002 	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2003 	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2004 					false, GSI_CONTINUE_LINKING);
2005 	  stmt = gimple_build_assign (tem, t);
2006 	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2007 	}
2008     }
2009 }
2010 
2011 /* Helper function for expand_omp_for_*.  Generate code like:
2012     L10:
2013 	V3 += STEP3;
2014 	if (V3 cond3 N32) goto BODY_BB; else goto L11;
2015     L11:
2016 	V3 = N31;
2017 	V2 += STEP2;
2018 	if (V2 cond2 N22) goto BODY_BB; else goto L12;
2019     L12:
2020 	V2 = N21;
2021 	V1 += STEP1;
2022 	goto BODY_BB;  */
2023 
2024 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,basic_block cont_bb,basic_block body_bb)2025 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2026 			     basic_block body_bb)
2027 {
2028   basic_block last_bb, bb, collapse_bb = NULL;
2029   int i;
2030   gimple_stmt_iterator gsi;
2031   edge e;
2032   tree t;
2033   gimple *stmt;
2034 
2035   last_bb = cont_bb;
2036   for (i = fd->collapse - 1; i >= 0; i--)
2037     {
2038       tree vtype = TREE_TYPE (fd->loops[i].v);
2039 
2040       bb = create_empty_bb (last_bb);
2041       add_bb_to_loop (bb, last_bb->loop_father);
2042       gsi = gsi_start_bb (bb);
2043 
2044       if (i < fd->collapse - 1)
2045 	{
2046 	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2047 	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2048 
2049 	  t = fd->loops[i + 1].n1;
2050 	  t = force_gimple_operand_gsi (&gsi, t,
2051 					DECL_P (fd->loops[i + 1].v)
2052 					&& TREE_ADDRESSABLE (fd->loops[i
2053 								       + 1].v),
2054 					NULL_TREE, false,
2055 					GSI_CONTINUE_LINKING);
2056 	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2057 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2058 	}
2059       else
2060 	collapse_bb = bb;
2061 
2062       set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2063 
2064       if (POINTER_TYPE_P (vtype))
2065 	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2066       else
2067 	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2068       t = force_gimple_operand_gsi (&gsi, t,
2069 				    DECL_P (fd->loops[i].v)
2070 				    && TREE_ADDRESSABLE (fd->loops[i].v),
2071 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
2072       stmt = gimple_build_assign (fd->loops[i].v, t);
2073       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2074 
2075       if (i > 0)
2076 	{
2077 	  t = fd->loops[i].n2;
2078 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2079 					false, GSI_CONTINUE_LINKING);
2080 	  tree v = fd->loops[i].v;
2081 	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
2082 	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2083 					  false, GSI_CONTINUE_LINKING);
2084 	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2085 	  stmt = gimple_build_cond_empty (t);
2086 	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2087 	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2088 			 expand_omp_regimplify_p, NULL, NULL)
2089 	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2090 			    expand_omp_regimplify_p, NULL, NULL))
2091 	    gimple_regimplify_operands (stmt, &gsi);
2092 	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2093 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2094 	}
2095       else
2096 	make_edge (bb, body_bb, EDGE_FALLTHRU);
2097       last_bb = bb;
2098     }
2099 
2100   return collapse_bb;
2101 }
2102 
2103 /* Expand #pragma omp ordered depend(source).  */
2104 
2105 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)2106 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2107 			   tree *counts, location_t loc)
2108 {
2109   enum built_in_function source_ix
2110     = fd->iter_type == long_integer_type_node
2111       ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2112   gimple *g
2113     = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2114 			 build_fold_addr_expr (counts[fd->ordered]));
2115   gimple_set_location (g, loc);
2116   gsi_insert_before (gsi, g, GSI_SAME_STMT);
2117 }
2118 
2119 /* Expand a single depend from #pragma omp ordered depend(sink:...).  */
2120 
2121 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)2122 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2123 			 tree *counts, tree c, location_t loc)
2124 {
2125   auto_vec<tree, 10> args;
2126   enum built_in_function sink_ix
2127     = fd->iter_type == long_integer_type_node
2128       ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2129   tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2130   int i;
2131   gimple_stmt_iterator gsi2 = *gsi;
2132   bool warned_step = false;
2133 
2134   for (i = 0; i < fd->ordered; i++)
2135     {
2136       tree step = NULL_TREE;
2137       off = TREE_PURPOSE (deps);
2138       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2139 	{
2140 	  step = TREE_OPERAND (off, 1);
2141 	  off = TREE_OPERAND (off, 0);
2142 	}
2143       if (!integer_zerop (off))
2144 	{
2145 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2146 		      || fd->loops[i].cond_code == GT_EXPR);
2147 	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2148 	  if (step)
2149 	    {
2150 	      /* Non-simple Fortran DO loops.  If step is variable,
2151 		 we don't know at compile even the direction, so can't
2152 		 warn.  */
2153 	      if (TREE_CODE (step) != INTEGER_CST)
2154 		break;
2155 	      forward = tree_int_cst_sgn (step) != -1;
2156 	    }
2157 	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2158 	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2159 				"waiting for lexically later iteration");
2160 	  break;
2161 	}
2162       deps = TREE_CHAIN (deps);
2163     }
2164   /* If all offsets corresponding to the collapsed loops are zero,
2165      this depend clause can be ignored.  FIXME: but there is still a
2166      flush needed.  We need to emit one __sync_synchronize () for it
2167      though (perhaps conditionally)?  Solve this together with the
2168      conservative dependence folding optimization.
2169   if (i >= fd->collapse)
2170     return;  */
2171 
2172   deps = OMP_CLAUSE_DECL (c);
2173   gsi_prev (&gsi2);
2174   edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2175   edge e2 = split_block_after_labels (e1->dest);
2176 
2177   gsi2 = gsi_after_labels (e1->dest);
2178   *gsi = gsi_last_bb (e1->src);
2179   for (i = 0; i < fd->ordered; i++)
2180     {
2181       tree itype = TREE_TYPE (fd->loops[i].v);
2182       tree step = NULL_TREE;
2183       tree orig_off = NULL_TREE;
2184       if (POINTER_TYPE_P (itype))
2185 	itype = sizetype;
2186       if (i)
2187 	deps = TREE_CHAIN (deps);
2188       off = TREE_PURPOSE (deps);
2189       if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2190 	{
2191 	  step = TREE_OPERAND (off, 1);
2192 	  off = TREE_OPERAND (off, 0);
2193 	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2194 		      && integer_onep (fd->loops[i].step)
2195 		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2196 	}
2197       tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2198       if (step)
2199 	{
2200 	  off = fold_convert_loc (loc, itype, off);
2201 	  orig_off = off;
2202 	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2203 	}
2204 
2205       if (integer_zerop (off))
2206 	t = boolean_true_node;
2207       else
2208 	{
2209 	  tree a;
2210 	  tree co = fold_convert_loc (loc, itype, off);
2211 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2212 	    {
2213 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2214 		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2215 	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2216 				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2217 				   co);
2218 	    }
2219 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2220 	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2221 				 fd->loops[i].v, co);
2222 	  else
2223 	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2224 				 fd->loops[i].v, co);
2225 	  if (step)
2226 	    {
2227 	      tree t1, t2;
2228 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2229 		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2230 				      fd->loops[i].n1);
2231 	      else
2232 		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2233 				      fd->loops[i].n2);
2234 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2235 		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2236 				      fd->loops[i].n2);
2237 	      else
2238 		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2239 				      fd->loops[i].n1);
2240 	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2241 				   step, build_int_cst (TREE_TYPE (step), 0));
2242 	      if (TREE_CODE (step) != INTEGER_CST)
2243 		{
2244 		  t1 = unshare_expr (t1);
2245 		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2246 						 false, GSI_CONTINUE_LINKING);
2247 		  t2 = unshare_expr (t2);
2248 		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2249 						 false, GSI_CONTINUE_LINKING);
2250 		}
2251 	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2252 				   t, t2, t1);
2253 	    }
2254 	  else if (fd->loops[i].cond_code == LT_EXPR)
2255 	    {
2256 	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2257 		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2258 				     fd->loops[i].n1);
2259 	      else
2260 		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2261 				     fd->loops[i].n2);
2262 	    }
2263 	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2264 	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2265 				 fd->loops[i].n2);
2266 	  else
2267 	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2268 				 fd->loops[i].n1);
2269 	}
2270       if (cond)
2271 	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2272       else
2273 	cond = t;
2274 
2275       off = fold_convert_loc (loc, itype, off);
2276 
2277       if (step
2278 	  || (fd->loops[i].cond_code == LT_EXPR
2279 	      ? !integer_onep (fd->loops[i].step)
2280 	      : !integer_minus_onep (fd->loops[i].step)))
2281 	{
2282 	  if (step == NULL_TREE
2283 	      && TYPE_UNSIGNED (itype)
2284 	      && fd->loops[i].cond_code == GT_EXPR)
2285 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2286 				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2287 						  s));
2288 	  else
2289 	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2290 				 orig_off ? orig_off : off, s);
2291 	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2292 			       build_int_cst (itype, 0));
2293 	  if (integer_zerop (t) && !warned_step)
2294 	    {
2295 	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2296 				  "refers to iteration never in the iteration "
2297 				  "space");
2298 	      warned_step = true;
2299 	    }
2300 	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2301 				  cond, t);
2302 	}
2303 
2304       if (i <= fd->collapse - 1 && fd->collapse > 1)
2305 	t = fd->loop.v;
2306       else if (counts[i])
2307 	t = counts[i];
2308       else
2309 	{
2310 	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2311 			       fd->loops[i].v, fd->loops[i].n1);
2312 	  t = fold_convert_loc (loc, fd->iter_type, t);
2313 	}
2314       if (step)
2315 	/* We have divided off by step already earlier.  */;
2316       else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2317 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2318 			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2319 						s));
2320       else
2321 	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2322       if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2323 	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2324       off = fold_convert_loc (loc, fd->iter_type, off);
2325       if (i <= fd->collapse - 1 && fd->collapse > 1)
2326 	{
2327 	  if (i)
2328 	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2329 				   off);
2330 	  if (i < fd->collapse - 1)
2331 	    {
2332 	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2333 				      counts[i]);
2334 	      continue;
2335 	    }
2336 	}
2337       off = unshare_expr (off);
2338       t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2339       t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2340 				    true, GSI_SAME_STMT);
2341       args.safe_push (t);
2342     }
2343   gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2344   gimple_set_location (g, loc);
2345   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2346 
2347   cond = unshare_expr (cond);
2348   cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2349 				   GSI_CONTINUE_LINKING);
2350   gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2351   edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2352   e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2353   e1->probability = e3->probability.invert ();
2354   e1->flags = EDGE_TRUE_VALUE;
2355   set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2356 
2357   *gsi = gsi_after_labels (e2->dest);
2358 }
2359 
2360 /* Expand all #pragma omp ordered depend(source) and
2361    #pragma omp ordered depend(sink:...) constructs in the current
2362    #pragma omp for ordered(n) region.  */
2363 
2364 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)2365 expand_omp_ordered_source_sink (struct omp_region *region,
2366 				struct omp_for_data *fd, tree *counts,
2367 				basic_block cont_bb)
2368 {
2369   struct omp_region *inner;
2370   int i;
2371   for (i = fd->collapse - 1; i < fd->ordered; i++)
2372     if (i == fd->collapse - 1 && fd->collapse > 1)
2373       counts[i] = NULL_TREE;
2374     else if (i >= fd->collapse && !cont_bb)
2375       counts[i] = build_zero_cst (fd->iter_type);
2376     else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2377 	     && integer_onep (fd->loops[i].step))
2378       counts[i] = NULL_TREE;
2379     else
2380       counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2381   tree atype
2382     = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2383   counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2384   TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2385 
2386   for (inner = region->inner; inner; inner = inner->next)
2387     if (inner->type == GIMPLE_OMP_ORDERED)
2388       {
2389 	gomp_ordered *ord_stmt = inner->ord_stmt;
2390 	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2391 	location_t loc = gimple_location (ord_stmt);
2392 	tree c;
2393 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2394 	     c; c = OMP_CLAUSE_CHAIN (c))
2395 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2396 	    break;
2397 	if (c)
2398 	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2399 	for (c = gimple_omp_ordered_clauses (ord_stmt);
2400 	     c; c = OMP_CLAUSE_CHAIN (c))
2401 	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2402 	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2403 	gsi_remove (&gsi, true);
2404       }
2405 }
2406 
2407 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2408    collapsed.  */
2409 
2410 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,basic_block l0_bb,bool ordered_lastprivate)2411 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2412 			      basic_block cont_bb, basic_block body_bb,
2413 			      basic_block l0_bb, bool ordered_lastprivate)
2414 {
2415   if (fd->ordered == fd->collapse)
2416     return cont_bb;
2417 
2418   if (!cont_bb)
2419     {
2420       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2421       for (int i = fd->collapse; i < fd->ordered; i++)
2422 	{
2423 	  tree type = TREE_TYPE (fd->loops[i].v);
2424 	  tree n1 = fold_convert (type, fd->loops[i].n1);
2425 	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2426 	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2427 			      size_int (i - fd->collapse + 1),
2428 			      NULL_TREE, NULL_TREE);
2429 	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2430 	}
2431       return NULL;
2432     }
2433 
2434   for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2435     {
2436       tree t, type = TREE_TYPE (fd->loops[i].v);
2437       gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2438       expand_omp_build_assign (&gsi, fd->loops[i].v,
2439 			       fold_convert (type, fd->loops[i].n1));
2440       if (counts[i])
2441 	expand_omp_build_assign (&gsi, counts[i],
2442 				 build_zero_cst (fd->iter_type));
2443       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2444 			  size_int (i - fd->collapse + 1),
2445 			  NULL_TREE, NULL_TREE);
2446       expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2447       if (!gsi_end_p (gsi))
2448 	gsi_prev (&gsi);
2449       else
2450 	gsi = gsi_last_bb (body_bb);
2451       edge e1 = split_block (body_bb, gsi_stmt (gsi));
2452       basic_block new_body = e1->dest;
2453       if (body_bb == cont_bb)
2454 	cont_bb = new_body;
2455       edge e2 = NULL;
2456       basic_block new_header;
2457       if (EDGE_COUNT (cont_bb->preds) > 0)
2458 	{
2459 	  gsi = gsi_last_bb (cont_bb);
2460 	  if (POINTER_TYPE_P (type))
2461 	    t = fold_build_pointer_plus (fd->loops[i].v,
2462 					 fold_convert (sizetype,
2463 						       fd->loops[i].step));
2464 	  else
2465 	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2466 			     fold_convert (type, fd->loops[i].step));
2467 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2468 	  if (counts[i])
2469 	    {
2470 	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2471 			       build_int_cst (fd->iter_type, 1));
2472 	      expand_omp_build_assign (&gsi, counts[i], t);
2473 	      t = counts[i];
2474 	    }
2475 	  else
2476 	    {
2477 	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2478 			       fd->loops[i].v, fd->loops[i].n1);
2479 	      t = fold_convert (fd->iter_type, t);
2480 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2481 					    true, GSI_SAME_STMT);
2482 	    }
2483 	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2484 			 size_int (i - fd->collapse + 1),
2485 			 NULL_TREE, NULL_TREE);
2486 	  expand_omp_build_assign (&gsi, aref, t);
2487 	  gsi_prev (&gsi);
2488 	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2489 	  new_header = e2->dest;
2490 	}
2491       else
2492 	new_header = cont_bb;
2493       gsi = gsi_after_labels (new_header);
2494       tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2495 					 true, GSI_SAME_STMT);
2496       tree n2
2497 	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2498 				    true, NULL_TREE, true, GSI_SAME_STMT);
2499       t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2500       gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2501       edge e3 = split_block (new_header, gsi_stmt (gsi));
2502       cont_bb = e3->dest;
2503       remove_edge (e1);
2504       make_edge (body_bb, new_header, EDGE_FALLTHRU);
2505       e3->flags = EDGE_FALSE_VALUE;
2506       e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2507       e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2508       e1->probability = e3->probability.invert ();
2509 
2510       set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2511       set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2512 
2513       if (e2)
2514 	{
2515 	  class loop *loop = alloc_loop ();
2516 	  loop->header = new_header;
2517 	  loop->latch = e2->src;
2518 	  add_loop (loop, l0_bb->loop_father);
2519 	}
2520     }
2521 
2522   /* If there are any lastprivate clauses and it is possible some loops
2523      might have zero iterations, ensure all the decls are initialized,
2524      otherwise we could crash evaluating C++ class iterators with lastprivate
2525      clauses.  */
2526   bool need_inits = false;
2527   for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2528     if (need_inits)
2529       {
2530 	tree type = TREE_TYPE (fd->loops[i].v);
2531 	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2532 	expand_omp_build_assign (&gsi, fd->loops[i].v,
2533 				 fold_convert (type, fd->loops[i].n1));
2534       }
2535     else
2536       {
2537 	tree type = TREE_TYPE (fd->loops[i].v);
2538 	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2539 				      boolean_type_node,
2540 				      fold_convert (type, fd->loops[i].n1),
2541 				      fold_convert (type, fd->loops[i].n2));
2542 	if (!integer_onep (this_cond))
2543 	  need_inits = true;
2544       }
2545 
2546   return cont_bb;
2547 }
2548 
2549 /* A subroutine of expand_omp_for.  Generate code for a parallel
2550    loop with any schedule.  Given parameters:
2551 
2552 	for (V = N1; V cond N2; V += STEP) BODY;
2553 
2554    where COND is "<" or ">", we generate pseudocode
2555 
2556 	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2557 	if (more) goto L0; else goto L3;
2558     L0:
2559 	V = istart0;
2560 	iend = iend0;
2561     L1:
2562 	BODY;
2563 	V += STEP;
2564 	if (V cond iend) goto L1; else goto L2;
2565     L2:
2566 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2567     L3:
2568 
2569     If this is a combined omp parallel loop, instead of the call to
2570     GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2571     If this is gimple_omp_for_combined_p loop, then instead of assigning
2572     V and iend in L0 we assign the first two _looptemp_ clause decls of the
2573     inner GIMPLE_OMP_FOR and V += STEP; and
2574     if (V cond iend) goto L1; else goto L2; are removed.
2575 
2576     For collapsed loops, given parameters:
2577       collapse(3)
2578       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2579 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2580 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2581 	    BODY;
2582 
2583     we generate pseudocode
2584 
2585 	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2586 	if (cond3 is <)
2587 	  adj = STEP3 - 1;
2588 	else
2589 	  adj = STEP3 + 1;
2590 	count3 = (adj + N32 - N31) / STEP3;
2591 	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2592 	if (cond2 is <)
2593 	  adj = STEP2 - 1;
2594 	else
2595 	  adj = STEP2 + 1;
2596 	count2 = (adj + N22 - N21) / STEP2;
2597 	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2598 	if (cond1 is <)
2599 	  adj = STEP1 - 1;
2600 	else
2601 	  adj = STEP1 + 1;
2602 	count1 = (adj + N12 - N11) / STEP1;
2603 	count = count1 * count2 * count3;
2604 	goto Z1;
2605     Z0:
2606 	count = 0;
2607     Z1:
2608 	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2609 	if (more) goto L0; else goto L3;
2610     L0:
2611 	V = istart0;
2612 	T = V;
2613 	V3 = N31 + (T % count3) * STEP3;
2614 	T = T / count3;
2615 	V2 = N21 + (T % count2) * STEP2;
2616 	T = T / count2;
2617 	V1 = N11 + T * STEP1;
2618 	iend = iend0;
2619     L1:
2620 	BODY;
2621 	V += 1;
2622 	if (V < iend) goto L10; else goto L2;
2623     L10:
2624 	V3 += STEP3;
2625 	if (V3 cond3 N32) goto L1; else goto L11;
2626     L11:
2627 	V3 = N31;
2628 	V2 += STEP2;
2629 	if (V2 cond2 N22) goto L1; else goto L12;
2630     L12:
2631 	V2 = N21;
2632 	V1 += STEP1;
2633 	goto L1;
2634     L2:
2635 	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2636     L3:
2637 
2638       */
2639 
2640 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)2641 expand_omp_for_generic (struct omp_region *region,
2642 			struct omp_for_data *fd,
2643 			enum built_in_function start_fn,
2644 			enum built_in_function next_fn,
2645 			tree sched_arg,
2646 			gimple *inner_stmt)
2647 {
2648   tree type, istart0, iend0, iend;
2649   tree t, vmain, vback, bias = NULL_TREE;
2650   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2651   basic_block l2_bb = NULL, l3_bb = NULL;
2652   gimple_stmt_iterator gsi;
2653   gassign *assign_stmt;
2654   bool in_combined_parallel = is_combined_parallel (region);
2655   bool broken_loop = region->cont == NULL;
2656   edge e, ne;
2657   tree *counts = NULL;
2658   int i;
2659   bool ordered_lastprivate = false;
2660 
2661   gcc_assert (!broken_loop || !in_combined_parallel);
2662   gcc_assert (fd->iter_type == long_integer_type_node
2663 	      || !in_combined_parallel);
2664 
2665   entry_bb = region->entry;
2666   cont_bb = region->cont;
2667   collapse_bb = NULL;
2668   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2669   gcc_assert (broken_loop
2670 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2671   l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2672   l1_bb = single_succ (l0_bb);
2673   if (!broken_loop)
2674     {
2675       l2_bb = create_empty_bb (cont_bb);
2676       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2677 		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2678 		      == l1_bb));
2679       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2680     }
2681   else
2682     l2_bb = NULL;
2683   l3_bb = BRANCH_EDGE (entry_bb)->dest;
2684   exit_bb = region->exit;
2685 
2686   gsi = gsi_last_nondebug_bb (entry_bb);
2687 
2688   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2689   if (fd->ordered
2690       && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2691 			  OMP_CLAUSE_LASTPRIVATE))
2692     ordered_lastprivate = false;
2693   tree reductions = NULL_TREE;
2694   tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2695   tree memv = NULL_TREE;
2696   if (fd->lastprivate_conditional)
2697     {
2698       tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2699 				OMP_CLAUSE__CONDTEMP_);
2700       if (fd->have_pointer_condtemp)
2701 	condtemp = OMP_CLAUSE_DECL (c);
2702       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2703       cond_var = OMP_CLAUSE_DECL (c);
2704     }
2705   if (sched_arg)
2706     {
2707       if (fd->have_reductemp)
2708 	{
2709 	  tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2710 				    OMP_CLAUSE__REDUCTEMP_);
2711 	  reductions = OMP_CLAUSE_DECL (c);
2712 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2713 	  gimple *g = SSA_NAME_DEF_STMT (reductions);
2714 	  reductions = gimple_assign_rhs1 (g);
2715 	  OMP_CLAUSE_DECL (c) = reductions;
2716 	  entry_bb = gimple_bb (g);
2717 	  edge e = split_block (entry_bb, g);
2718 	  if (region->entry == entry_bb)
2719 	    region->entry = e->dest;
2720 	  gsi = gsi_last_bb (entry_bb);
2721 	}
2722       else
2723 	reductions = null_pointer_node;
2724       if (fd->have_pointer_condtemp)
2725 	{
2726 	  tree type = TREE_TYPE (condtemp);
2727 	  memv = create_tmp_var (type);
2728 	  TREE_ADDRESSABLE (memv) = 1;
2729 	  unsigned HOST_WIDE_INT sz
2730 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2731 	  sz *= fd->lastprivate_conditional;
2732 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2733 				   false);
2734 	  mem = build_fold_addr_expr (memv);
2735 	}
2736       else
2737 	mem = null_pointer_node;
2738     }
2739   if (fd->collapse > 1 || fd->ordered)
2740     {
2741       int first_zero_iter1 = -1, first_zero_iter2 = -1;
2742       basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2743 
2744       counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2745       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2746 				  zero_iter1_bb, first_zero_iter1,
2747 				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2748 
2749       if (zero_iter1_bb)
2750 	{
2751 	  /* Some counts[i] vars might be uninitialized if
2752 	     some loop has zero iterations.  But the body shouldn't
2753 	     be executed in that case, so just avoid uninit warnings.  */
2754 	  for (i = first_zero_iter1;
2755 	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2756 	    if (SSA_VAR_P (counts[i]))
2757 	      TREE_NO_WARNING (counts[i]) = 1;
2758 	  gsi_prev (&gsi);
2759 	  e = split_block (entry_bb, gsi_stmt (gsi));
2760 	  entry_bb = e->dest;
2761 	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2762 	  gsi = gsi_last_nondebug_bb (entry_bb);
2763 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2764 				   get_immediate_dominator (CDI_DOMINATORS,
2765 							    zero_iter1_bb));
2766 	}
2767       if (zero_iter2_bb)
2768 	{
2769 	  /* Some counts[i] vars might be uninitialized if
2770 	     some loop has zero iterations.  But the body shouldn't
2771 	     be executed in that case, so just avoid uninit warnings.  */
2772 	  for (i = first_zero_iter2; i < fd->ordered; i++)
2773 	    if (SSA_VAR_P (counts[i]))
2774 	      TREE_NO_WARNING (counts[i]) = 1;
2775 	  if (zero_iter1_bb)
2776 	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2777 	  else
2778 	    {
2779 	      gsi_prev (&gsi);
2780 	      e = split_block (entry_bb, gsi_stmt (gsi));
2781 	      entry_bb = e->dest;
2782 	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2783 	      gsi = gsi_last_nondebug_bb (entry_bb);
2784 	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2785 				       get_immediate_dominator
2786 					 (CDI_DOMINATORS, zero_iter2_bb));
2787 	    }
2788 	}
2789       if (fd->collapse == 1)
2790 	{
2791 	  counts[0] = fd->loop.n2;
2792 	  fd->loop = fd->loops[0];
2793 	}
2794     }
2795 
2796   type = TREE_TYPE (fd->loop.v);
2797   istart0 = create_tmp_var (fd->iter_type, ".istart0");
2798   iend0 = create_tmp_var (fd->iter_type, ".iend0");
2799   TREE_ADDRESSABLE (istart0) = 1;
2800   TREE_ADDRESSABLE (iend0) = 1;
2801 
2802   /* See if we need to bias by LLONG_MIN.  */
2803   if (fd->iter_type == long_long_unsigned_type_node
2804       && TREE_CODE (type) == INTEGER_TYPE
2805       && !TYPE_UNSIGNED (type)
2806       && fd->ordered == 0)
2807     {
2808       tree n1, n2;
2809 
2810       if (fd->loop.cond_code == LT_EXPR)
2811 	{
2812 	  n1 = fd->loop.n1;
2813 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814 	}
2815       else
2816 	{
2817 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 	  n2 = fd->loop.n1;
2819 	}
2820       if (TREE_CODE (n1) != INTEGER_CST
2821 	  || TREE_CODE (n2) != INTEGER_CST
2822 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2823 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2824     }
2825 
2826   gimple_stmt_iterator gsif = gsi;
2827   gsi_prev (&gsif);
2828 
2829   tree arr = NULL_TREE;
2830   if (in_combined_parallel)
2831     {
2832       gcc_assert (fd->ordered == 0);
2833       /* In a combined parallel loop, emit a call to
2834 	 GOMP_loop_foo_next.  */
2835       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2836 			   build_fold_addr_expr (istart0),
2837 			   build_fold_addr_expr (iend0));
2838     }
2839   else
2840     {
2841       tree t0, t1, t2, t3, t4;
2842       /* If this is not a combined parallel loop, emit a call to
2843 	 GOMP_loop_foo_start in ENTRY_BB.  */
2844       t4 = build_fold_addr_expr (iend0);
2845       t3 = build_fold_addr_expr (istart0);
2846       if (fd->ordered)
2847 	{
2848 	  t0 = build_int_cst (unsigned_type_node,
2849 			      fd->ordered - fd->collapse + 1);
2850 	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2851 							fd->ordered
2852 							- fd->collapse + 1),
2853 				".omp_counts");
2854 	  DECL_NAMELESS (arr) = 1;
2855 	  TREE_ADDRESSABLE (arr) = 1;
2856 	  TREE_STATIC (arr) = 1;
2857 	  vec<constructor_elt, va_gc> *v;
2858 	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2859 	  int idx;
2860 
2861 	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2862 	    {
2863 	      tree c;
2864 	      if (idx == 0 && fd->collapse > 1)
2865 		c = fd->loop.n2;
2866 	      else
2867 		c = counts[idx + fd->collapse - 1];
2868 	      tree purpose = size_int (idx);
2869 	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2870 	      if (TREE_CODE (c) != INTEGER_CST)
2871 		TREE_STATIC (arr) = 0;
2872 	    }
2873 
2874 	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2875 	  if (!TREE_STATIC (arr))
2876 	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2877 						    void_type_node, arr),
2878 				      true, NULL_TREE, true, GSI_SAME_STMT);
2879 	  t1 = build_fold_addr_expr (arr);
2880 	  t2 = NULL_TREE;
2881 	}
2882       else
2883 	{
2884 	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2885 	  t1 = fd->loop.n2;
2886 	  t0 = fd->loop.n1;
2887 	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2888 	    {
2889 	      tree innerc
2890 		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2891 				   OMP_CLAUSE__LOOPTEMP_);
2892 	      gcc_assert (innerc);
2893 	      t0 = OMP_CLAUSE_DECL (innerc);
2894 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2895 					OMP_CLAUSE__LOOPTEMP_);
2896 	      gcc_assert (innerc);
2897 	      t1 = OMP_CLAUSE_DECL (innerc);
2898 	    }
2899 	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2900 	      && TYPE_PRECISION (TREE_TYPE (t0))
2901 		 != TYPE_PRECISION (fd->iter_type))
2902 	    {
2903 	      /* Avoid casting pointers to integer of a different size.  */
2904 	      tree itype = signed_type_for (type);
2905 	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2906 	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2907 	    }
2908 	  else
2909 	    {
2910 	      t1 = fold_convert (fd->iter_type, t1);
2911 	      t0 = fold_convert (fd->iter_type, t0);
2912 	    }
2913 	  if (bias)
2914 	    {
2915 	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2916 	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2917 	    }
2918 	}
2919       if (fd->iter_type == long_integer_type_node || fd->ordered)
2920 	{
2921 	  if (fd->chunk_size)
2922 	    {
2923 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2924 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2925 	      if (sched_arg)
2926 		{
2927 		  if (fd->ordered)
2928 		    t = build_call_expr (builtin_decl_explicit (start_fn),
2929 					 8, t0, t1, sched_arg, t, t3, t4,
2930 					 reductions, mem);
2931 		  else
2932 		    t = build_call_expr (builtin_decl_explicit (start_fn),
2933 					 9, t0, t1, t2, sched_arg, t, t3, t4,
2934 					 reductions, mem);
2935 		}
2936 	      else if (fd->ordered)
2937 		t = build_call_expr (builtin_decl_explicit (start_fn),
2938 				     5, t0, t1, t, t3, t4);
2939 	      else
2940 		t = build_call_expr (builtin_decl_explicit (start_fn),
2941 				     6, t0, t1, t2, t, t3, t4);
2942 	    }
2943 	  else if (fd->ordered)
2944 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2945 				 4, t0, t1, t3, t4);
2946 	  else
2947 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2948 				 5, t0, t1, t2, t3, t4);
2949 	}
2950       else
2951 	{
2952 	  tree t5;
2953 	  tree c_bool_type;
2954 	  tree bfn_decl;
2955 
2956 	  /* The GOMP_loop_ull_*start functions have additional boolean
2957 	     argument, true for < loops and false for > loops.
2958 	     In Fortran, the C bool type can be different from
2959 	     boolean_type_node.  */
2960 	  bfn_decl = builtin_decl_explicit (start_fn);
2961 	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2962 	  t5 = build_int_cst (c_bool_type,
2963 			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2964 	  if (fd->chunk_size)
2965 	    {
2966 	      tree bfn_decl = builtin_decl_explicit (start_fn);
2967 	      t = fold_convert (fd->iter_type, fd->chunk_size);
2968 	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2969 	      if (sched_arg)
2970 		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2971 				     t, t3, t4, reductions, mem);
2972 	      else
2973 		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2974 	    }
2975 	  else
2976 	    t = build_call_expr (builtin_decl_explicit (start_fn),
2977 				 6, t5, t0, t1, t2, t3, t4);
2978 	}
2979     }
2980   if (TREE_TYPE (t) != boolean_type_node)
2981     t = fold_build2 (NE_EXPR, boolean_type_node,
2982 		     t, build_int_cst (TREE_TYPE (t), 0));
2983   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2984 				true, GSI_SAME_STMT);
2985   if (arr && !TREE_STATIC (arr))
2986     {
2987       tree clobber = build_clobber (TREE_TYPE (arr));
2988       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2989 			 GSI_SAME_STMT);
2990     }
2991   if (fd->have_pointer_condtemp)
2992     expand_omp_build_assign (&gsi, condtemp, memv, false);
2993   if (fd->have_reductemp)
2994     {
2995       gimple *g = gsi_stmt (gsi);
2996       gsi_remove (&gsi, true);
2997       release_ssa_name (gimple_assign_lhs (g));
2998 
2999       entry_bb = region->entry;
3000       gsi = gsi_last_nondebug_bb (entry_bb);
3001 
3002       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3003     }
3004   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3005 
3006   /* Remove the GIMPLE_OMP_FOR statement.  */
3007   gsi_remove (&gsi, true);
3008 
3009   if (gsi_end_p (gsif))
3010     gsif = gsi_after_labels (gsi_bb (gsif));
3011   gsi_next (&gsif);
3012 
3013   /* Iteration setup for sequential loop goes in L0_BB.  */
3014   tree startvar = fd->loop.v;
3015   tree endvar = NULL_TREE;
3016 
3017   if (gimple_omp_for_combined_p (fd->for_stmt))
3018     {
3019       gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3020 		  && gimple_omp_for_kind (inner_stmt)
3021 		     == GF_OMP_FOR_KIND_SIMD);
3022       tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3023 				     OMP_CLAUSE__LOOPTEMP_);
3024       gcc_assert (innerc);
3025       startvar = OMP_CLAUSE_DECL (innerc);
3026       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3027 				OMP_CLAUSE__LOOPTEMP_);
3028       gcc_assert (innerc);
3029       endvar = OMP_CLAUSE_DECL (innerc);
3030     }
3031 
3032   gsi = gsi_start_bb (l0_bb);
3033   t = istart0;
3034   if (fd->ordered && fd->collapse == 1)
3035     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3036 		     fold_convert (fd->iter_type, fd->loop.step));
3037   else if (bias)
3038     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3039   if (fd->ordered && fd->collapse == 1)
3040     {
3041       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3042 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3043 			 fd->loop.n1, fold_convert (sizetype, t));
3044       else
3045 	{
3046 	  t = fold_convert (TREE_TYPE (startvar), t);
3047 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3048 			   fd->loop.n1, t);
3049 	}
3050     }
3051   else
3052     {
3053       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3054 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3055       t = fold_convert (TREE_TYPE (startvar), t);
3056     }
3057   t = force_gimple_operand_gsi (&gsi, t,
3058 				DECL_P (startvar)
3059 				&& TREE_ADDRESSABLE (startvar),
3060 				NULL_TREE, false, GSI_CONTINUE_LINKING);
3061   assign_stmt = gimple_build_assign (startvar, t);
3062   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063   if (cond_var)
3064     {
3065       tree itype = TREE_TYPE (cond_var);
3066       /* For lastprivate(conditional:) itervar, we need some iteration
3067 	 counter that starts at unsigned non-zero and increases.
3068 	 Prefer as few IVs as possible, so if we can use startvar
3069 	 itself, use that, or startvar + constant (those would be
3070 	 incremented with step), and as last resort use the s0 + 1
3071 	 incremented by 1.  */
3072       if ((fd->ordered && fd->collapse == 1)
3073 	  || bias
3074 	  || POINTER_TYPE_P (type)
3075 	  || TREE_CODE (fd->loop.n1) != INTEGER_CST
3076 	  || fd->loop.cond_code != LT_EXPR)
3077 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3078 			 build_int_cst (itype, 1));
3079       else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3080 	t = fold_convert (itype, t);
3081       else
3082 	{
3083 	  tree c = fold_convert (itype, fd->loop.n1);
3084 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3085 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3086 	}
3087       t = force_gimple_operand_gsi (&gsi, t, false,
3088 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
3089       assign_stmt = gimple_build_assign (cond_var, t);
3090       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3091     }
3092 
3093   t = iend0;
3094   if (fd->ordered && fd->collapse == 1)
3095     t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3096 		     fold_convert (fd->iter_type, fd->loop.step));
3097   else if (bias)
3098     t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3099   if (fd->ordered && fd->collapse == 1)
3100     {
3101       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3102 	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3103 			 fd->loop.n1, fold_convert (sizetype, t));
3104       else
3105 	{
3106 	  t = fold_convert (TREE_TYPE (startvar), t);
3107 	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3108 			   fd->loop.n1, t);
3109 	}
3110     }
3111   else
3112     {
3113       if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3114 	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3115       t = fold_convert (TREE_TYPE (startvar), t);
3116     }
3117   iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3118 				   false, GSI_CONTINUE_LINKING);
3119   if (endvar)
3120     {
3121       assign_stmt = gimple_build_assign (endvar, iend);
3122       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3123       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3124 	assign_stmt = gimple_build_assign (fd->loop.v, iend);
3125       else
3126 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3127       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128     }
3129   /* Handle linear clause adjustments.  */
3130   tree itercnt = NULL_TREE;
3131   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3132     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3133 	 c; c = OMP_CLAUSE_CHAIN (c))
3134       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3135 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3136 	{
3137 	  tree d = OMP_CLAUSE_DECL (c);
3138 	  bool is_ref = omp_is_reference (d);
3139 	  tree t = d, a, dest;
3140 	  if (is_ref)
3141 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3142 	  tree type = TREE_TYPE (t);
3143 	  if (POINTER_TYPE_P (type))
3144 	    type = sizetype;
3145 	  dest = unshare_expr (t);
3146 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
3147 	  expand_omp_build_assign (&gsif, v, t);
3148 	  if (itercnt == NULL_TREE)
3149 	    {
3150 	      itercnt = startvar;
3151 	      tree n1 = fd->loop.n1;
3152 	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3153 		{
3154 		  itercnt
3155 		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3156 				    itercnt);
3157 		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
3158 		}
3159 	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3160 				     itercnt, n1);
3161 	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3162 				     itercnt, fd->loop.step);
3163 	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3164 						  NULL_TREE, false,
3165 						  GSI_CONTINUE_LINKING);
3166 	    }
3167 	  a = fold_build2 (MULT_EXPR, type,
3168 			   fold_convert (type, itercnt),
3169 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3170 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3171 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3172 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3173 					false, GSI_CONTINUE_LINKING);
3174 	  assign_stmt = gimple_build_assign (dest, t);
3175 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3176 	}
3177   if (fd->collapse > 1)
3178     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3179 
3180   if (fd->ordered)
3181     {
3182       /* Until now, counts array contained number of iterations or
3183 	 variable containing it for ith loop.  From now on, we need
3184 	 those counts only for collapsed loops, and only for the 2nd
3185 	 till the last collapsed one.  Move those one element earlier,
3186 	 we'll use counts[fd->collapse - 1] for the first source/sink
3187 	 iteration counter and so on and counts[fd->ordered]
3188 	 as the array holding the current counter values for
3189 	 depend(source).  */
3190       if (fd->collapse > 1)
3191 	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3192       if (broken_loop)
3193 	{
3194 	  int i;
3195 	  for (i = fd->collapse; i < fd->ordered; i++)
3196 	    {
3197 	      tree type = TREE_TYPE (fd->loops[i].v);
3198 	      tree this_cond
3199 		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3200 			       fold_convert (type, fd->loops[i].n1),
3201 			       fold_convert (type, fd->loops[i].n2));
3202 	      if (!integer_onep (this_cond))
3203 		break;
3204 	    }
3205 	  if (i < fd->ordered)
3206 	    {
3207 	      if (entry_bb->loop_father != l0_bb->loop_father)
3208 		{
3209 		  remove_bb_from_loops (l0_bb);
3210 		  add_bb_to_loop (l0_bb, entry_bb->loop_father);
3211 		  gcc_assert (single_succ (l0_bb) == l1_bb);
3212 		}
3213 	      cont_bb
3214 		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3215 	      add_bb_to_loop (cont_bb, l0_bb->loop_father);
3216 	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3217 	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3218 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3219 	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3220 	      make_edge (cont_bb, l1_bb, 0);
3221 	      l2_bb = create_empty_bb (cont_bb);
3222 	      broken_loop = false;
3223 	    }
3224 	}
3225       expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3226       cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3227 					      l0_bb, ordered_lastprivate);
3228       if (counts[fd->collapse - 1])
3229 	{
3230 	  gcc_assert (fd->collapse == 1);
3231 	  gsi = gsi_last_bb (l0_bb);
3232 	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3233 				   istart0, true);
3234 	  if (cont_bb)
3235 	    {
3236 	      gsi = gsi_last_bb (cont_bb);
3237 	      t = fold_build2 (PLUS_EXPR, fd->iter_type,
3238 			       counts[fd->collapse - 1],
3239 			       build_int_cst (fd->iter_type, 1));
3240 	      expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3241 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3242 				  counts[fd->ordered], size_zero_node,
3243 				  NULL_TREE, NULL_TREE);
3244 	      expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3245 	    }
3246 	  t = counts[fd->collapse - 1];
3247 	}
3248       else if (fd->collapse > 1)
3249 	t = fd->loop.v;
3250       else
3251 	{
3252 	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3253 			   fd->loops[0].v, fd->loops[0].n1);
3254 	  t = fold_convert (fd->iter_type, t);
3255 	}
3256       gsi = gsi_last_bb (l0_bb);
3257       tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3258 			  size_zero_node, NULL_TREE, NULL_TREE);
3259       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260 				    false, GSI_CONTINUE_LINKING);
3261       expand_omp_build_assign (&gsi, aref, t, true);
3262     }
3263 
3264   if (!broken_loop)
3265     {
3266       /* Code to control the increment and predicate for the sequential
3267 	 loop goes in the CONT_BB.  */
3268       gsi = gsi_last_nondebug_bb (cont_bb);
3269       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3270       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3271       vmain = gimple_omp_continue_control_use (cont_stmt);
3272       vback = gimple_omp_continue_control_def (cont_stmt);
3273 
3274       if (cond_var)
3275 	{
3276 	  tree itype = TREE_TYPE (cond_var);
3277 	  tree t2;
3278 	  if ((fd->ordered && fd->collapse == 1)
3279 	       || bias
3280 	       || POINTER_TYPE_P (type)
3281 	       || TREE_CODE (fd->loop.n1) != INTEGER_CST
3282 	       || fd->loop.cond_code != LT_EXPR)
3283 	    t2 = build_int_cst (itype, 1);
3284 	  else
3285 	    t2 = fold_convert (itype, fd->loop.step);
3286 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3287 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
3288 					 NULL_TREE, true, GSI_SAME_STMT);
3289 	  assign_stmt = gimple_build_assign (cond_var, t2);
3290 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3291 	}
3292 
3293       if (!gimple_omp_for_combined_p (fd->for_stmt))
3294 	{
3295 	  if (POINTER_TYPE_P (type))
3296 	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3297 	  else
3298 	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3299 	  t = force_gimple_operand_gsi (&gsi, t,
3300 					DECL_P (vback)
3301 					&& TREE_ADDRESSABLE (vback),
3302 					NULL_TREE, true, GSI_SAME_STMT);
3303 	  assign_stmt = gimple_build_assign (vback, t);
3304 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3305 
3306 	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3307 	    {
3308 	      tree tem;
3309 	      if (fd->collapse > 1)
3310 		tem = fd->loop.v;
3311 	      else
3312 		{
3313 		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3314 				     fd->loops[0].v, fd->loops[0].n1);
3315 		  tem = fold_convert (fd->iter_type, tem);
3316 		}
3317 	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3318 				  counts[fd->ordered], size_zero_node,
3319 				  NULL_TREE, NULL_TREE);
3320 	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3321 					      true, GSI_SAME_STMT);
3322 	      expand_omp_build_assign (&gsi, aref, tem);
3323 	    }
3324 
3325 	  t = build2 (fd->loop.cond_code, boolean_type_node,
3326 		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3327 		      iend);
3328 	  gcond *cond_stmt = gimple_build_cond_empty (t);
3329 	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3330 	}
3331 
3332       /* Remove GIMPLE_OMP_CONTINUE.  */
3333       gsi_remove (&gsi, true);
3334 
3335       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3336 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3337 
3338       /* Emit code to get the next parallel iteration in L2_BB.  */
3339       gsi = gsi_start_bb (l2_bb);
3340 
3341       t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3342 			   build_fold_addr_expr (istart0),
3343 			   build_fold_addr_expr (iend0));
3344       t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3345 				    false, GSI_CONTINUE_LINKING);
3346       if (TREE_TYPE (t) != boolean_type_node)
3347 	t = fold_build2 (NE_EXPR, boolean_type_node,
3348 			 t, build_int_cst (TREE_TYPE (t), 0));
3349       gcond *cond_stmt = gimple_build_cond_empty (t);
3350       gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3351     }
3352 
3353   /* Add the loop cleanup function.  */
3354   gsi = gsi_last_nondebug_bb (exit_bb);
3355   if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3356     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3357   else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3358     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3359   else
3360     t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3361   gcall *call_stmt = gimple_build_call (t, 0);
3362   if (fd->ordered)
3363     {
3364       tree arr = counts[fd->ordered];
3365       tree clobber = build_clobber (TREE_TYPE (arr));
3366       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3367 			GSI_SAME_STMT);
3368     }
3369   if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3370     {
3371       gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3372       if (fd->have_reductemp)
3373 	{
3374 	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3375 					   gimple_call_lhs (call_stmt));
3376 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3377 	}
3378     }
3379   gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3380   gsi_remove (&gsi, true);
3381 
3382   /* Connect the new blocks.  */
3383   find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3384   find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3385 
3386   if (!broken_loop)
3387     {
3388       gimple_seq phis;
3389 
3390       e = find_edge (cont_bb, l3_bb);
3391       ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3392 
3393       phis = phi_nodes (l3_bb);
3394       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3395 	{
3396 	  gimple *phi = gsi_stmt (gsi);
3397 	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3398 		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3399 	}
3400       remove_edge (e);
3401 
3402       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3403       e = find_edge (cont_bb, l1_bb);
3404       if (e == NULL)
3405 	{
3406 	  e = BRANCH_EDGE (cont_bb);
3407 	  gcc_assert (single_succ (e->dest) == l1_bb);
3408 	}
3409       if (gimple_omp_for_combined_p (fd->for_stmt))
3410 	{
3411 	  remove_edge (e);
3412 	  e = NULL;
3413 	}
3414       else if (fd->collapse > 1)
3415 	{
3416 	  remove_edge (e);
3417 	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3418 	}
3419       else
3420 	e->flags = EDGE_TRUE_VALUE;
3421       if (e)
3422 	{
3423 	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3424 	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3425 	}
3426       else
3427 	{
3428 	  e = find_edge (cont_bb, l2_bb);
3429 	  e->flags = EDGE_FALLTHRU;
3430 	}
3431       make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3432 
3433       if (gimple_in_ssa_p (cfun))
3434 	{
3435 	  /* Add phis to the outer loop that connect to the phis in the inner,
3436 	     original loop, and move the loop entry value of the inner phi to
3437 	     the loop entry value of the outer phi.  */
3438 	  gphi_iterator psi;
3439 	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3440 	    {
3441 	      location_t locus;
3442 	      gphi *nphi;
3443 	      gphi *exit_phi = psi.phi ();
3444 
3445 	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3446 		continue;
3447 
3448 	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3449 	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3450 
3451 	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3452 	      edge latch_to_l1 = find_edge (latch, l1_bb);
3453 	      gphi *inner_phi
3454 		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3455 
3456 	      tree t = gimple_phi_result (exit_phi);
3457 	      tree new_res = copy_ssa_name (t, NULL);
3458 	      nphi = create_phi_node (new_res, l0_bb);
3459 
3460 	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3461 	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3462 	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3463 	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3464 	      add_phi_arg (nphi, t, entry_to_l0, locus);
3465 
3466 	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3467 	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3468 
3469 	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3470 	    }
3471 	}
3472 
3473       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3474 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3475       set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3476 			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3477       set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3478 			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3479       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3480 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3481 
3482       /* We enter expand_omp_for_generic with a loop.  This original loop may
3483 	 have its own loop struct, or it may be part of an outer loop struct
3484 	 (which may be the fake loop).  */
3485       class loop *outer_loop = entry_bb->loop_father;
3486       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3487 
3488       add_bb_to_loop (l2_bb, outer_loop);
3489 
3490       /* We've added a new loop around the original loop.  Allocate the
3491 	 corresponding loop struct.  */
3492       class loop *new_loop = alloc_loop ();
3493       new_loop->header = l0_bb;
3494       new_loop->latch = l2_bb;
3495       add_loop (new_loop, outer_loop);
3496 
3497       /* Allocate a loop structure for the original loop unless we already
3498 	 had one.  */
3499       if (!orig_loop_has_loop_struct
3500 	  && !gimple_omp_for_combined_p (fd->for_stmt))
3501 	{
3502 	  class loop *orig_loop = alloc_loop ();
3503 	  orig_loop->header = l1_bb;
3504 	  /* The loop may have multiple latches.  */
3505 	  add_loop (orig_loop, new_loop);
3506 	}
3507     }
3508 }
3509 
3510 /* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
3511    compute needed allocation size.  If !ALLOC of team allocations,
3512    if ALLOC of thread allocation.  SZ is the initial needed size for
3513    other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3514    CNT number of elements of each array, for !ALLOC this is
3515    omp_get_num_threads (), for ALLOC number of iterations handled by the
3516    current thread.  If PTR is non-NULL, it is the start of the allocation
3517    and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3518    clauses pointers to the corresponding arrays.  */
3519 
3520 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)3521 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3522 			   unsigned HOST_WIDE_INT alloc_align, tree cnt,
3523 			   gimple_stmt_iterator *gsi, bool alloc)
3524 {
3525   tree eltsz = NULL_TREE;
3526   unsigned HOST_WIDE_INT preval = 0;
3527   if (ptr && sz)
3528     ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3529 		       ptr, size_int (sz));
3530   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3531     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3532 	&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3533 	&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3534       {
3535 	tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3536 	unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3537 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3538 	  {
3539 	    unsigned HOST_WIDE_INT szl
3540 	      = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3541 	    szl = least_bit_hwi (szl);
3542 	    if (szl)
3543 	      al = MIN (al, szl);
3544 	  }
3545 	if (ptr == NULL_TREE)
3546 	  {
3547 	    if (eltsz == NULL_TREE)
3548 	      eltsz = TYPE_SIZE_UNIT (pointee_type);
3549 	    else
3550 	      eltsz = size_binop (PLUS_EXPR, eltsz,
3551 				  TYPE_SIZE_UNIT (pointee_type));
3552 	  }
3553 	if (preval == 0 && al <= alloc_align)
3554 	  {
3555 	    unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3556 	    sz += diff;
3557 	    if (diff && ptr)
3558 	      ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3559 				 ptr, size_int (diff));
3560 	  }
3561 	else if (al > preval)
3562 	  {
3563 	    if (ptr)
3564 	      {
3565 		ptr = fold_convert (pointer_sized_int_node, ptr);
3566 		ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3567 				   build_int_cst (pointer_sized_int_node,
3568 						  al - 1));
3569 		ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3570 				   build_int_cst (pointer_sized_int_node,
3571 						  -(HOST_WIDE_INT) al));
3572 		ptr = fold_convert (ptr_type_node, ptr);
3573 	      }
3574 	    else
3575 	      sz += al - 1;
3576 	  }
3577 	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3578 	  preval = al;
3579 	else
3580 	  preval = 1;
3581 	if (ptr)
3582 	  {
3583 	    expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3584 	    ptr = OMP_CLAUSE_DECL (c);
3585 	    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3586 			       size_binop (MULT_EXPR, cnt,
3587 					   TYPE_SIZE_UNIT (pointee_type)));
3588 	  }
3589       }
3590 
3591   if (ptr == NULL_TREE)
3592     {
3593       eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3594       if (sz)
3595 	eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3596       return eltsz;
3597     }
3598   else
3599     return ptr;
3600 }
3601 
3602 /* A subroutine of expand_omp_for.  Generate code for a parallel
3603    loop with static schedule and no specified chunk size.  Given
3604    parameters:
3605 
3606 	for (V = N1; V cond N2; V += STEP) BODY;
3607 
3608    where COND is "<" or ">", we generate pseudocode
3609 
3610 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3611 	if (cond is <)
3612 	  adj = STEP - 1;
3613 	else
3614 	  adj = STEP + 1;
3615 	if ((__typeof (V)) -1 > 0 && cond is >)
3616 	  n = -(adj + N2 - N1) / -STEP;
3617 	else
3618 	  n = (adj + N2 - N1) / STEP;
3619 	q = n / nthreads;
3620 	tt = n % nthreads;
3621 	if (threadid < tt) goto L3; else goto L4;
3622     L3:
3623 	tt = 0;
3624 	q = q + 1;
3625     L4:
3626 	s0 = q * threadid + tt;
3627 	e0 = s0 + q;
3628 	V = s0 * STEP + N1;
3629 	if (s0 >= e0) goto L2; else goto L0;
3630     L0:
3631 	e = e0 * STEP + N1;
3632     L1:
3633 	BODY;
3634 	V += STEP;
3635 	if (V cond e) goto L1;
3636     L2:
3637 */
3638 
3639 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)3640 expand_omp_for_static_nochunk (struct omp_region *region,
3641 			       struct omp_for_data *fd,
3642 			       gimple *inner_stmt)
3643 {
3644   tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
3645   tree type, itype, vmain, vback;
3646   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3647   basic_block body_bb, cont_bb, collapse_bb = NULL;
3648   basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3649   basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
3650   gimple_stmt_iterator gsi, gsip;
3651   edge ep;
3652   bool broken_loop = region->cont == NULL;
3653   tree *counts = NULL;
3654   tree n1, n2, step;
3655   tree reductions = NULL_TREE;
3656   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3657 
3658   itype = type = TREE_TYPE (fd->loop.v);
3659   if (POINTER_TYPE_P (type))
3660     itype = signed_type_for (type);
3661 
3662   entry_bb = region->entry;
3663   cont_bb = region->cont;
3664   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3665   fin_bb = BRANCH_EDGE (entry_bb)->dest;
3666   gcc_assert (broken_loop
3667 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3668   seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3669   body_bb = single_succ (seq_start_bb);
3670   if (!broken_loop)
3671     {
3672       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3673 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3674       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3675     }
3676   exit_bb = region->exit;
3677 
3678   /* Iteration space partitioning goes in ENTRY_BB.  */
3679   gsi = gsi_last_nondebug_bb (entry_bb);
3680   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3681   gsip = gsi;
3682   gsi_prev (&gsip);
3683 
3684   if (fd->collapse > 1)
3685     {
3686       int first_zero_iter = -1, dummy = -1;
3687       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3688 
3689       counts = XALLOCAVEC (tree, fd->collapse);
3690       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3691 				  fin_bb, first_zero_iter,
3692 				  dummy_bb, dummy, l2_dom_bb);
3693       t = NULL_TREE;
3694     }
3695   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3696     t = integer_one_node;
3697   else
3698     t = fold_binary (fd->loop.cond_code, boolean_type_node,
3699 		     fold_convert (type, fd->loop.n1),
3700 		     fold_convert (type, fd->loop.n2));
3701   if (fd->collapse == 1
3702       && TYPE_UNSIGNED (type)
3703       && (t == NULL_TREE || !integer_onep (t)))
3704     {
3705       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3706       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3707 				     true, GSI_SAME_STMT);
3708       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3709       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3710 				     true, GSI_SAME_STMT);
3711       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3712 					    NULL_TREE, NULL_TREE);
3713       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3714       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3715 		     expand_omp_regimplify_p, NULL, NULL)
3716 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3717 			expand_omp_regimplify_p, NULL, NULL))
3718 	{
3719 	  gsi = gsi_for_stmt (cond_stmt);
3720 	  gimple_regimplify_operands (cond_stmt, &gsi);
3721 	}
3722       ep = split_block (entry_bb, cond_stmt);
3723       ep->flags = EDGE_TRUE_VALUE;
3724       entry_bb = ep->dest;
3725       ep->probability = profile_probability::very_likely ();
3726       ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3727       ep->probability = profile_probability::very_unlikely ();
3728       if (gimple_in_ssa_p (cfun))
3729 	{
3730 	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3731 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3732 	       !gsi_end_p (gpi); gsi_next (&gpi))
3733 	    {
3734 	      gphi *phi = gpi.phi ();
3735 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3736 			   ep, UNKNOWN_LOCATION);
3737 	    }
3738 	}
3739       gsi = gsi_last_bb (entry_bb);
3740     }
3741 
3742   if (fd->lastprivate_conditional)
3743     {
3744       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3745       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3746       if (fd->have_pointer_condtemp)
3747 	condtemp = OMP_CLAUSE_DECL (c);
3748       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3749       cond_var = OMP_CLAUSE_DECL (c);
3750     }
3751   if (fd->have_reductemp
3752       /* For scan, we don't want to reinitialize condtemp before the
3753 	 second loop.  */
3754       || (fd->have_pointer_condtemp && !fd->have_scantemp)
3755       || fd->have_nonctrl_scantemp)
3756     {
3757       tree t1 = build_int_cst (long_integer_type_node, 0);
3758       tree t2 = build_int_cst (long_integer_type_node, 1);
3759       tree t3 = build_int_cstu (long_integer_type_node,
3760 				(HOST_WIDE_INT_1U << 31) + 1);
3761       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3762       gimple_stmt_iterator gsi2 = gsi_none ();
3763       gimple *g = NULL;
3764       tree mem = null_pointer_node, memv = NULL_TREE;
3765       unsigned HOST_WIDE_INT condtemp_sz = 0;
3766       unsigned HOST_WIDE_INT alloc_align = 0;
3767       if (fd->have_reductemp)
3768 	{
3769 	  gcc_assert (!fd->have_nonctrl_scantemp);
3770 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3771 	  reductions = OMP_CLAUSE_DECL (c);
3772 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3773 	  g = SSA_NAME_DEF_STMT (reductions);
3774 	  reductions = gimple_assign_rhs1 (g);
3775 	  OMP_CLAUSE_DECL (c) = reductions;
3776 	  gsi2 = gsi_for_stmt (g);
3777 	}
3778       else
3779 	{
3780 	  if (gsi_end_p (gsip))
3781 	    gsi2 = gsi_after_labels (region->entry);
3782 	  else
3783 	    gsi2 = gsip;
3784 	  reductions = null_pointer_node;
3785 	}
3786       if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
3787 	{
3788 	  tree type;
3789 	  if (fd->have_pointer_condtemp)
3790 	    type = TREE_TYPE (condtemp);
3791 	  else
3792 	    type = ptr_type_node;
3793 	  memv = create_tmp_var (type);
3794 	  TREE_ADDRESSABLE (memv) = 1;
3795 	  unsigned HOST_WIDE_INT sz = 0;
3796 	  tree size = NULL_TREE;
3797 	  if (fd->have_pointer_condtemp)
3798 	    {
3799 	      sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3800 	      sz *= fd->lastprivate_conditional;
3801 	      condtemp_sz = sz;
3802 	    }
3803 	  if (fd->have_nonctrl_scantemp)
3804 	    {
3805 	      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3806 	      gimple *g = gimple_build_call (nthreads, 0);
3807 	      nthreads = create_tmp_var (integer_type_node);
3808 	      gimple_call_set_lhs (g, nthreads);
3809 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3810 	      nthreads = fold_convert (sizetype, nthreads);
3811 	      alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3812 	      size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3813 						alloc_align, nthreads, NULL,
3814 						false);
3815 	      size = fold_convert (type, size);
3816 	    }
3817 	  else
3818 	    size = build_int_cst (type, sz);
3819 	  expand_omp_build_assign (&gsi2, memv, size, false);
3820 	  mem = build_fold_addr_expr (memv);
3821 	}
3822       tree t
3823 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3824 			   9, t1, t2, t2, t3, t1, null_pointer_node,
3825 			   null_pointer_node, reductions, mem);
3826       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3827 				true, GSI_SAME_STMT);
3828       if (fd->have_pointer_condtemp)
3829 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
3830       if (fd->have_nonctrl_scantemp)
3831 	{
3832 	  tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3833 	  expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3834 				     alloc_align, nthreads, &gsi2, false);
3835 	}
3836       if (fd->have_reductemp)
3837 	{
3838 	  gsi_remove (&gsi2, true);
3839 	  release_ssa_name (gimple_assign_lhs (g));
3840 	}
3841     }
3842   switch (gimple_omp_for_kind (fd->for_stmt))
3843     {
3844     case GF_OMP_FOR_KIND_FOR:
3845       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3846       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3847       break;
3848     case GF_OMP_FOR_KIND_DISTRIBUTE:
3849       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3850       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3851       break;
3852     default:
3853       gcc_unreachable ();
3854     }
3855   nthreads = build_call_expr (nthreads, 0);
3856   nthreads = fold_convert (itype, nthreads);
3857   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3858 				       true, GSI_SAME_STMT);
3859   threadid = build_call_expr (threadid, 0);
3860   threadid = fold_convert (itype, threadid);
3861   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3862 				       true, GSI_SAME_STMT);
3863 
3864   n1 = fd->loop.n1;
3865   n2 = fd->loop.n2;
3866   step = fd->loop.step;
3867   if (gimple_omp_for_combined_into_p (fd->for_stmt))
3868     {
3869       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3870 				     OMP_CLAUSE__LOOPTEMP_);
3871       gcc_assert (innerc);
3872       n1 = OMP_CLAUSE_DECL (innerc);
3873       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3874 				OMP_CLAUSE__LOOPTEMP_);
3875       gcc_assert (innerc);
3876       n2 = OMP_CLAUSE_DECL (innerc);
3877     }
3878   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3879 				 true, NULL_TREE, true, GSI_SAME_STMT);
3880   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3881 				 true, NULL_TREE, true, GSI_SAME_STMT);
3882   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3883 				   true, NULL_TREE, true, GSI_SAME_STMT);
3884 
3885   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3886   t = fold_build2 (PLUS_EXPR, itype, step, t);
3887   t = fold_build2 (PLUS_EXPR, itype, t, n2);
3888   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3889   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3890     t = fold_build2 (TRUNC_DIV_EXPR, itype,
3891 		     fold_build1 (NEGATE_EXPR, itype, t),
3892 		     fold_build1 (NEGATE_EXPR, itype, step));
3893   else
3894     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3895   t = fold_convert (itype, t);
3896   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3897 
3898   q = create_tmp_reg (itype, "q");
3899   t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3900   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3901   gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3902 
3903   tt = create_tmp_reg (itype, "tt");
3904   t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3905   t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3906   gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3907 
3908   t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3909   gcond *cond_stmt = gimple_build_cond_empty (t);
3910   gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3911 
3912   second_bb = split_block (entry_bb, cond_stmt)->dest;
3913   gsi = gsi_last_nondebug_bb (second_bb);
3914   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3915 
3916   gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3917 		     GSI_SAME_STMT);
3918   gassign *assign_stmt
3919     = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3920   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3921 
3922   third_bb = split_block (second_bb, assign_stmt)->dest;
3923   gsi = gsi_last_nondebug_bb (third_bb);
3924   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3925 
3926   if (fd->have_nonctrl_scantemp)
3927     {
3928       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3929       tree controlp = NULL_TREE, controlb = NULL_TREE;
3930       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3931 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3932 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3933 	  {
3934 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3935 	      controlb = OMP_CLAUSE_DECL (c);
3936 	    else
3937 	      controlp = OMP_CLAUSE_DECL (c);
3938 	    if (controlb && controlp)
3939 	      break;
3940 	  }
3941       gcc_assert (controlp && controlb);
3942       tree cnt = create_tmp_var (sizetype);
3943       gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3944       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3945       unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3946       tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3947 					   alloc_align, cnt, NULL, true);
3948       tree size = create_tmp_var (sizetype);
3949       expand_omp_build_assign (&gsi, size, sz, false);
3950       tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3951 			      size, size_int (16384));
3952       expand_omp_build_assign (&gsi, controlb, cmp);
3953       g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3954 			     NULL_TREE, NULL_TREE);
3955       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956       fourth_bb = split_block (third_bb, g)->dest;
3957       gsi = gsi_last_nondebug_bb (fourth_bb);
3958       /* FIXME: Once we have allocators, this should use allocator.  */
3959       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3960       gimple_call_set_lhs (g, controlp);
3961       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3962       expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3963 				 &gsi, true);
3964       gsi_prev (&gsi);
3965       g = gsi_stmt (gsi);
3966       fifth_bb = split_block (fourth_bb, g)->dest;
3967       gsi = gsi_last_nondebug_bb (fifth_bb);
3968 
3969       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3970       gimple_call_set_lhs (g, controlp);
3971       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3972       tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3973       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3974 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3975 	    && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3976 	  {
3977 	    tree tmp = create_tmp_var (sizetype);
3978 	    tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3979 	    g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3980 				     TYPE_SIZE_UNIT (pointee_type));
3981 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3982 	    g = gimple_build_call (alloca_decl, 2, tmp,
3983 				   size_int (TYPE_ALIGN (pointee_type)));
3984 	    gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3985 	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3986 	  }
3987 
3988       sixth_bb = split_block (fifth_bb, g)->dest;
3989       gsi = gsi_last_nondebug_bb (sixth_bb);
3990     }
3991 
3992   t = build2 (MULT_EXPR, itype, q, threadid);
3993   t = build2 (PLUS_EXPR, itype, t, tt);
3994   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3995 
3996   t = fold_build2 (PLUS_EXPR, itype, s0, q);
3997   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3998 
3999   t = build2 (GE_EXPR, boolean_type_node, s0, e0);
4000   gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4001 
4002   /* Remove the GIMPLE_OMP_FOR statement.  */
4003   gsi_remove (&gsi, true);
4004 
4005   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
4006   gsi = gsi_start_bb (seq_start_bb);
4007 
4008   tree startvar = fd->loop.v;
4009   tree endvar = NULL_TREE;
4010 
4011   if (gimple_omp_for_combined_p (fd->for_stmt))
4012     {
4013       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4014 		     ? gimple_omp_parallel_clauses (inner_stmt)
4015 		     : gimple_omp_for_clauses (inner_stmt);
4016       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4017       gcc_assert (innerc);
4018       startvar = OMP_CLAUSE_DECL (innerc);
4019       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4020 				OMP_CLAUSE__LOOPTEMP_);
4021       gcc_assert (innerc);
4022       endvar = OMP_CLAUSE_DECL (innerc);
4023       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4024 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4025 	{
4026 	  int i;
4027 	  for (i = 1; i < fd->collapse; i++)
4028 	    {
4029 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4030 					OMP_CLAUSE__LOOPTEMP_);
4031 	      gcc_assert (innerc);
4032 	    }
4033 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4034 				    OMP_CLAUSE__LOOPTEMP_);
4035 	  if (innerc)
4036 	    {
4037 	      /* If needed (distribute parallel for with lastprivate),
4038 		 propagate down the total number of iterations.  */
4039 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4040 				     fd->loop.n2);
4041 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4042 					    GSI_CONTINUE_LINKING);
4043 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4044 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4045 	    }
4046 	}
4047     }
4048   t = fold_convert (itype, s0);
4049   t = fold_build2 (MULT_EXPR, itype, t, step);
4050   if (POINTER_TYPE_P (type))
4051     {
4052       t = fold_build_pointer_plus (n1, t);
4053       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4054 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4055 	t = fold_convert (signed_type_for (type), t);
4056     }
4057   else
4058     t = fold_build2 (PLUS_EXPR, type, t, n1);
4059   t = fold_convert (TREE_TYPE (startvar), t);
4060   t = force_gimple_operand_gsi (&gsi, t,
4061 				DECL_P (startvar)
4062 				&& TREE_ADDRESSABLE (startvar),
4063 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4064   assign_stmt = gimple_build_assign (startvar, t);
4065   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4066   if (cond_var)
4067     {
4068       tree itype = TREE_TYPE (cond_var);
4069       /* For lastprivate(conditional:) itervar, we need some iteration
4070 	 counter that starts at unsigned non-zero and increases.
4071 	 Prefer as few IVs as possible, so if we can use startvar
4072 	 itself, use that, or startvar + constant (those would be
4073 	 incremented with step), and as last resort use the s0 + 1
4074 	 incremented by 1.  */
4075       if (POINTER_TYPE_P (type)
4076 	  || TREE_CODE (n1) != INTEGER_CST
4077 	  || fd->loop.cond_code != LT_EXPR)
4078 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4079 			 build_int_cst (itype, 1));
4080       else if (tree_int_cst_sgn (n1) == 1)
4081 	t = fold_convert (itype, t);
4082       else
4083 	{
4084 	  tree c = fold_convert (itype, n1);
4085 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4086 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4087 	}
4088       t = force_gimple_operand_gsi (&gsi, t, false,
4089 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4090       assign_stmt = gimple_build_assign (cond_var, t);
4091       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4092     }
4093 
4094   t = fold_convert (itype, e0);
4095   t = fold_build2 (MULT_EXPR, itype, t, step);
4096   if (POINTER_TYPE_P (type))
4097     {
4098       t = fold_build_pointer_plus (n1, t);
4099       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4100 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4101 	t = fold_convert (signed_type_for (type), t);
4102     }
4103   else
4104     t = fold_build2 (PLUS_EXPR, type, t, n1);
4105   t = fold_convert (TREE_TYPE (startvar), t);
4106   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4107 				false, GSI_CONTINUE_LINKING);
4108   if (endvar)
4109     {
4110       assign_stmt = gimple_build_assign (endvar, e);
4111       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4112       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4113 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4114       else
4115 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4116       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4117     }
4118   /* Handle linear clause adjustments.  */
4119   tree itercnt = NULL_TREE;
4120   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4121     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4122 	 c; c = OMP_CLAUSE_CHAIN (c))
4123       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4124 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4125 	{
4126 	  tree d = OMP_CLAUSE_DECL (c);
4127 	  bool is_ref = omp_is_reference (d);
4128 	  tree t = d, a, dest;
4129 	  if (is_ref)
4130 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4131 	  if (itercnt == NULL_TREE)
4132 	    {
4133 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4134 		{
4135 		  itercnt = fold_build2 (MINUS_EXPR, itype,
4136 					 fold_convert (itype, n1),
4137 					 fold_convert (itype, fd->loop.n1));
4138 		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4139 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4140 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4141 						      NULL_TREE, false,
4142 						      GSI_CONTINUE_LINKING);
4143 		}
4144 	      else
4145 		itercnt = s0;
4146 	    }
4147 	  tree type = TREE_TYPE (t);
4148 	  if (POINTER_TYPE_P (type))
4149 	    type = sizetype;
4150 	  a = fold_build2 (MULT_EXPR, type,
4151 			   fold_convert (type, itercnt),
4152 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4153 	  dest = unshare_expr (t);
4154 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4155 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4156 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4157 					false, GSI_CONTINUE_LINKING);
4158 	  assign_stmt = gimple_build_assign (dest, t);
4159 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4160 	}
4161   if (fd->collapse > 1)
4162     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4163 
4164   if (!broken_loop)
4165     {
4166       /* The code controlling the sequential loop replaces the
4167 	 GIMPLE_OMP_CONTINUE.  */
4168       gsi = gsi_last_nondebug_bb (cont_bb);
4169       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4170       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4171       vmain = gimple_omp_continue_control_use (cont_stmt);
4172       vback = gimple_omp_continue_control_def (cont_stmt);
4173 
4174       if (cond_var)
4175 	{
4176 	  tree itype = TREE_TYPE (cond_var);
4177 	  tree t2;
4178 	  if (POINTER_TYPE_P (type)
4179 	      || TREE_CODE (n1) != INTEGER_CST
4180 	      || fd->loop.cond_code != LT_EXPR)
4181 	    t2 = build_int_cst (itype, 1);
4182 	  else
4183 	    t2 = fold_convert (itype, step);
4184 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4185 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4186 					 NULL_TREE, true, GSI_SAME_STMT);
4187 	  assign_stmt = gimple_build_assign (cond_var, t2);
4188 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4189 	}
4190 
4191       if (!gimple_omp_for_combined_p (fd->for_stmt))
4192 	{
4193 	  if (POINTER_TYPE_P (type))
4194 	    t = fold_build_pointer_plus (vmain, step);
4195 	  else
4196 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4197 	  t = force_gimple_operand_gsi (&gsi, t,
4198 					DECL_P (vback)
4199 					&& TREE_ADDRESSABLE (vback),
4200 					NULL_TREE, true, GSI_SAME_STMT);
4201 	  assign_stmt = gimple_build_assign (vback, t);
4202 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4203 
4204 	  t = build2 (fd->loop.cond_code, boolean_type_node,
4205 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
4206 		      ? t : vback, e);
4207 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4208 	}
4209 
4210       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
4211       gsi_remove (&gsi, true);
4212 
4213       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4214 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4215     }
4216 
4217   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4218   gsi = gsi_last_nondebug_bb (exit_bb);
4219   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4220     {
4221       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4222       if (fd->have_reductemp
4223 	  || ((fd->have_pointer_condtemp || fd->have_scantemp)
4224 	      && !fd->have_nonctrl_scantemp))
4225 	{
4226 	  tree fn;
4227 	  if (t)
4228 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4229 	  else
4230 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4231 	  gcall *g = gimple_build_call (fn, 0);
4232 	  if (t)
4233 	    {
4234 	      gimple_call_set_lhs (g, t);
4235 	      if (fd->have_reductemp)
4236 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
4237 							     NOP_EXPR, t),
4238 				  GSI_SAME_STMT);
4239 	    }
4240 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4241 	}
4242       else
4243 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4244     }
4245   else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4246 	   && !fd->have_nonctrl_scantemp)
4247     {
4248       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4249       gcall *g = gimple_build_call (fn, 0);
4250       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4251     }
4252   if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4253     {
4254       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4255       tree controlp = NULL_TREE, controlb = NULL_TREE;
4256       for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4257 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4258 	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4259 	  {
4260 	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4261 	      controlb = OMP_CLAUSE_DECL (c);
4262 	    else
4263 	      controlp = OMP_CLAUSE_DECL (c);
4264 	    if (controlb && controlp)
4265 	      break;
4266 	  }
4267       gcc_assert (controlp && controlb);
4268       gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4269 				     NULL_TREE, NULL_TREE);
4270       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271       exit1_bb = split_block (exit_bb, g)->dest;
4272       gsi = gsi_after_labels (exit1_bb);
4273       g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4274 			     controlp);
4275       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4276       exit2_bb = split_block (exit1_bb, g)->dest;
4277       gsi = gsi_after_labels (exit2_bb);
4278       g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4279 			     controlp);
4280       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4281       exit3_bb = split_block (exit2_bb, g)->dest;
4282       gsi = gsi_after_labels (exit3_bb);
4283     }
4284   gsi_remove (&gsi, true);
4285 
4286   /* Connect all the blocks.  */
4287   ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4288   ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4289   ep = find_edge (entry_bb, second_bb);
4290   ep->flags = EDGE_TRUE_VALUE;
4291   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4292   if (fourth_bb)
4293     {
4294       ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4295       ep->probability
4296 	= profile_probability::guessed_always ().apply_scale (1, 2);
4297       ep = find_edge (third_bb, fourth_bb);
4298       ep->flags = EDGE_TRUE_VALUE;
4299       ep->probability
4300 	= profile_probability::guessed_always ().apply_scale (1, 2);
4301       ep = find_edge (fourth_bb, fifth_bb);
4302       redirect_edge_and_branch (ep, sixth_bb);
4303     }
4304   else
4305     sixth_bb = third_bb;
4306   find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4307   find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4308   if (exit1_bb)
4309     {
4310       ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4311       ep->probability
4312 	= profile_probability::guessed_always ().apply_scale (1, 2);
4313       ep = find_edge (exit_bb, exit1_bb);
4314       ep->flags = EDGE_TRUE_VALUE;
4315       ep->probability
4316 	= profile_probability::guessed_always ().apply_scale (1, 2);
4317       ep = find_edge (exit1_bb, exit2_bb);
4318       redirect_edge_and_branch (ep, exit3_bb);
4319     }
4320 
4321   if (!broken_loop)
4322     {
4323       ep = find_edge (cont_bb, body_bb);
4324       if (ep == NULL)
4325 	{
4326 	  ep = BRANCH_EDGE (cont_bb);
4327 	  gcc_assert (single_succ (ep->dest) == body_bb);
4328 	}
4329       if (gimple_omp_for_combined_p (fd->for_stmt))
4330 	{
4331 	  remove_edge (ep);
4332 	  ep = NULL;
4333 	}
4334       else if (fd->collapse > 1)
4335 	{
4336 	  remove_edge (ep);
4337 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4338 	}
4339       else
4340 	ep->flags = EDGE_TRUE_VALUE;
4341       find_edge (cont_bb, fin_bb)->flags
4342 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4343     }
4344 
4345   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4346   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4347   if (fourth_bb)
4348     {
4349       set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4350       set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4351     }
4352   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4353 
4354   set_immediate_dominator (CDI_DOMINATORS, body_bb,
4355 			   recompute_dominator (CDI_DOMINATORS, body_bb));
4356   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4357 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4358   if (exit1_bb)
4359     {
4360       set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4361       set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4362     }
4363 
4364   class loop *loop = body_bb->loop_father;
4365   if (loop != entry_bb->loop_father)
4366     {
4367       gcc_assert (broken_loop || loop->header == body_bb);
4368       gcc_assert (broken_loop
4369 		  || loop->latch == region->cont
4370 		  || single_pred (loop->latch) == region->cont);
4371       return;
4372     }
4373 
4374   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4375     {
4376       loop = alloc_loop ();
4377       loop->header = body_bb;
4378       if (collapse_bb == NULL)
4379 	loop->latch = cont_bb;
4380       add_loop (loop, body_bb->loop_father);
4381     }
4382 }
4383 
4384 /* Return phi in E->DEST with ARG on edge E.  */
4385 
4386 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)4387 find_phi_with_arg_on_edge (tree arg, edge e)
4388 {
4389   basic_block bb = e->dest;
4390 
4391   for (gphi_iterator gpi = gsi_start_phis (bb);
4392        !gsi_end_p (gpi);
4393        gsi_next (&gpi))
4394     {
4395       gphi *phi = gpi.phi ();
4396       if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4397 	return phi;
4398     }
4399 
4400   return NULL;
4401 }
4402 
4403 /* A subroutine of expand_omp_for.  Generate code for a parallel
4404    loop with static schedule and a specified chunk size.  Given
4405    parameters:
4406 
4407 	for (V = N1; V cond N2; V += STEP) BODY;
4408 
4409    where COND is "<" or ">", we generate pseudocode
4410 
4411 	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4412 	if (cond is <)
4413 	  adj = STEP - 1;
4414 	else
4415 	  adj = STEP + 1;
4416 	if ((__typeof (V)) -1 > 0 && cond is >)
4417 	  n = -(adj + N2 - N1) / -STEP;
4418 	else
4419 	  n = (adj + N2 - N1) / STEP;
4420 	trip = 0;
4421 	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
4422 					      here so that V is defined
4423 					      if the loop is not entered
4424     L0:
4425 	s0 = (trip * nthreads + threadid) * CHUNK;
4426 	e0 = min (s0 + CHUNK, n);
4427 	if (s0 < n) goto L1; else goto L4;
4428     L1:
4429 	V = s0 * STEP + N1;
4430 	e = e0 * STEP + N1;
4431     L2:
4432 	BODY;
4433 	V += STEP;
4434 	if (V cond e) goto L2; else goto L3;
4435     L3:
4436 	trip += 1;
4437 	goto L0;
4438     L4:
4439 */
4440 
4441 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4442 expand_omp_for_static_chunk (struct omp_region *region,
4443 			     struct omp_for_data *fd, gimple *inner_stmt)
4444 {
4445   tree n, s0, e0, e, t;
4446   tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4447   tree type, itype, vmain, vback, vextra;
4448   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4449   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4450   gimple_stmt_iterator gsi, gsip;
4451   edge se;
4452   bool broken_loop = region->cont == NULL;
4453   tree *counts = NULL;
4454   tree n1, n2, step;
4455   tree reductions = NULL_TREE;
4456   tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4457 
4458   itype = type = TREE_TYPE (fd->loop.v);
4459   if (POINTER_TYPE_P (type))
4460     itype = signed_type_for (type);
4461 
4462   entry_bb = region->entry;
4463   se = split_block (entry_bb, last_stmt (entry_bb));
4464   entry_bb = se->src;
4465   iter_part_bb = se->dest;
4466   cont_bb = region->cont;
4467   gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4468   fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4469   gcc_assert (broken_loop
4470 	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4471   seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4472   body_bb = single_succ (seq_start_bb);
4473   if (!broken_loop)
4474     {
4475       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4476 		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4477       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4478       trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4479     }
4480   exit_bb = region->exit;
4481 
4482   /* Trip and adjustment setup goes in ENTRY_BB.  */
4483   gsi = gsi_last_nondebug_bb (entry_bb);
4484   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4485   gsip = gsi;
4486   gsi_prev (&gsip);
4487 
4488   if (fd->collapse > 1)
4489     {
4490       int first_zero_iter = -1, dummy = -1;
4491       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4492 
4493       counts = XALLOCAVEC (tree, fd->collapse);
4494       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4495 				  fin_bb, first_zero_iter,
4496 				  dummy_bb, dummy, l2_dom_bb);
4497       t = NULL_TREE;
4498     }
4499   else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4500     t = integer_one_node;
4501   else
4502     t = fold_binary (fd->loop.cond_code, boolean_type_node,
4503 		     fold_convert (type, fd->loop.n1),
4504 		     fold_convert (type, fd->loop.n2));
4505   if (fd->collapse == 1
4506       && TYPE_UNSIGNED (type)
4507       && (t == NULL_TREE || !integer_onep (t)))
4508     {
4509       n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4510       n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4511 				     true, GSI_SAME_STMT);
4512       n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4513       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4514 				     true, GSI_SAME_STMT);
4515       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4516 						 NULL_TREE, NULL_TREE);
4517       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4518       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4519 		     expand_omp_regimplify_p, NULL, NULL)
4520 	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4521 			expand_omp_regimplify_p, NULL, NULL))
4522 	{
4523 	  gsi = gsi_for_stmt (cond_stmt);
4524 	  gimple_regimplify_operands (cond_stmt, &gsi);
4525 	}
4526       se = split_block (entry_bb, cond_stmt);
4527       se->flags = EDGE_TRUE_VALUE;
4528       entry_bb = se->dest;
4529       se->probability = profile_probability::very_likely ();
4530       se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4531       se->probability = profile_probability::very_unlikely ();
4532       if (gimple_in_ssa_p (cfun))
4533 	{
4534 	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4535 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4536 	       !gsi_end_p (gpi); gsi_next (&gpi))
4537 	    {
4538 	      gphi *phi = gpi.phi ();
4539 	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4540 			   se, UNKNOWN_LOCATION);
4541 	    }
4542 	}
4543       gsi = gsi_last_bb (entry_bb);
4544     }
4545 
4546   if (fd->lastprivate_conditional)
4547     {
4548       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4549       tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4550       if (fd->have_pointer_condtemp)
4551 	condtemp = OMP_CLAUSE_DECL (c);
4552       c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4553       cond_var = OMP_CLAUSE_DECL (c);
4554     }
4555   if (fd->have_reductemp || fd->have_pointer_condtemp)
4556     {
4557       tree t1 = build_int_cst (long_integer_type_node, 0);
4558       tree t2 = build_int_cst (long_integer_type_node, 1);
4559       tree t3 = build_int_cstu (long_integer_type_node,
4560 				(HOST_WIDE_INT_1U << 31) + 1);
4561       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4562       gimple_stmt_iterator gsi2 = gsi_none ();
4563       gimple *g = NULL;
4564       tree mem = null_pointer_node, memv = NULL_TREE;
4565       if (fd->have_reductemp)
4566 	{
4567 	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4568 	  reductions = OMP_CLAUSE_DECL (c);
4569 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4570 	  g = SSA_NAME_DEF_STMT (reductions);
4571 	  reductions = gimple_assign_rhs1 (g);
4572 	  OMP_CLAUSE_DECL (c) = reductions;
4573 	  gsi2 = gsi_for_stmt (g);
4574 	}
4575       else
4576 	{
4577 	  if (gsi_end_p (gsip))
4578 	    gsi2 = gsi_after_labels (region->entry);
4579 	  else
4580 	    gsi2 = gsip;
4581 	  reductions = null_pointer_node;
4582 	}
4583       if (fd->have_pointer_condtemp)
4584 	{
4585 	  tree type = TREE_TYPE (condtemp);
4586 	  memv = create_tmp_var (type);
4587 	  TREE_ADDRESSABLE (memv) = 1;
4588 	  unsigned HOST_WIDE_INT sz
4589 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4590 	  sz *= fd->lastprivate_conditional;
4591 	  expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4592 				   false);
4593 	  mem = build_fold_addr_expr (memv);
4594 	}
4595       tree t
4596 	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4597 			   9, t1, t2, t2, t3, t1, null_pointer_node,
4598 			   null_pointer_node, reductions, mem);
4599       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4600 				true, GSI_SAME_STMT);
4601       if (fd->have_pointer_condtemp)
4602 	expand_omp_build_assign (&gsi2, condtemp, memv, false);
4603       if (fd->have_reductemp)
4604 	{
4605 	  gsi_remove (&gsi2, true);
4606 	  release_ssa_name (gimple_assign_lhs (g));
4607 	}
4608     }
4609   switch (gimple_omp_for_kind (fd->for_stmt))
4610     {
4611     case GF_OMP_FOR_KIND_FOR:
4612       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4613       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4614       break;
4615     case GF_OMP_FOR_KIND_DISTRIBUTE:
4616       nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4617       threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4618       break;
4619     default:
4620       gcc_unreachable ();
4621     }
4622   nthreads = build_call_expr (nthreads, 0);
4623   nthreads = fold_convert (itype, nthreads);
4624   nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4625 				       true, GSI_SAME_STMT);
4626   threadid = build_call_expr (threadid, 0);
4627   threadid = fold_convert (itype, threadid);
4628   threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4629 				       true, GSI_SAME_STMT);
4630 
4631   n1 = fd->loop.n1;
4632   n2 = fd->loop.n2;
4633   step = fd->loop.step;
4634   if (gimple_omp_for_combined_into_p (fd->for_stmt))
4635     {
4636       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4637 				     OMP_CLAUSE__LOOPTEMP_);
4638       gcc_assert (innerc);
4639       n1 = OMP_CLAUSE_DECL (innerc);
4640       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4641 				OMP_CLAUSE__LOOPTEMP_);
4642       gcc_assert (innerc);
4643       n2 = OMP_CLAUSE_DECL (innerc);
4644     }
4645   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4646 				 true, NULL_TREE, true, GSI_SAME_STMT);
4647   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4648 				 true, NULL_TREE, true, GSI_SAME_STMT);
4649   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4650 				   true, NULL_TREE, true, GSI_SAME_STMT);
4651   tree chunk_size = fold_convert (itype, fd->chunk_size);
4652   chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4653   chunk_size
4654     = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4655 				GSI_SAME_STMT);
4656 
4657   t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4658   t = fold_build2 (PLUS_EXPR, itype, step, t);
4659   t = fold_build2 (PLUS_EXPR, itype, t, n2);
4660   t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4661   if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4662     t = fold_build2 (TRUNC_DIV_EXPR, itype,
4663 		     fold_build1 (NEGATE_EXPR, itype, t),
4664 		     fold_build1 (NEGATE_EXPR, itype, step));
4665   else
4666     t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4667   t = fold_convert (itype, t);
4668   n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4669 				true, GSI_SAME_STMT);
4670 
4671   trip_var = create_tmp_reg (itype, ".trip");
4672   if (gimple_in_ssa_p (cfun))
4673     {
4674       trip_init = make_ssa_name (trip_var);
4675       trip_main = make_ssa_name (trip_var);
4676       trip_back = make_ssa_name (trip_var);
4677     }
4678   else
4679     {
4680       trip_init = trip_var;
4681       trip_main = trip_var;
4682       trip_back = trip_var;
4683     }
4684 
4685   gassign *assign_stmt
4686     = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4687   gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4688 
4689   t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4690   t = fold_build2 (MULT_EXPR, itype, t, step);
4691   if (POINTER_TYPE_P (type))
4692     t = fold_build_pointer_plus (n1, t);
4693   else
4694     t = fold_build2 (PLUS_EXPR, type, t, n1);
4695   vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4696 				     true, GSI_SAME_STMT);
4697 
4698   /* Remove the GIMPLE_OMP_FOR.  */
4699   gsi_remove (&gsi, true);
4700 
4701   gimple_stmt_iterator gsif = gsi;
4702 
4703   /* Iteration space partitioning goes in ITER_PART_BB.  */
4704   gsi = gsi_last_bb (iter_part_bb);
4705 
4706   t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4707   t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4708   t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4709   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4710 				 false, GSI_CONTINUE_LINKING);
4711 
4712   t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4713   t = fold_build2 (MIN_EXPR, itype, t, n);
4714   e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4715 				 false, GSI_CONTINUE_LINKING);
4716 
4717   t = build2 (LT_EXPR, boolean_type_node, s0, n);
4718   gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4719 
4720   /* Setup code for sequential iteration goes in SEQ_START_BB.  */
4721   gsi = gsi_start_bb (seq_start_bb);
4722 
4723   tree startvar = fd->loop.v;
4724   tree endvar = NULL_TREE;
4725 
4726   if (gimple_omp_for_combined_p (fd->for_stmt))
4727     {
4728       tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4729 		     ? gimple_omp_parallel_clauses (inner_stmt)
4730 		     : gimple_omp_for_clauses (inner_stmt);
4731       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4732       gcc_assert (innerc);
4733       startvar = OMP_CLAUSE_DECL (innerc);
4734       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4735 				OMP_CLAUSE__LOOPTEMP_);
4736       gcc_assert (innerc);
4737       endvar = OMP_CLAUSE_DECL (innerc);
4738       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4739 	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4740 	{
4741 	  int i;
4742 	  for (i = 1; i < fd->collapse; i++)
4743 	    {
4744 	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4745 					OMP_CLAUSE__LOOPTEMP_);
4746 	      gcc_assert (innerc);
4747 	    }
4748 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4749 				    OMP_CLAUSE__LOOPTEMP_);
4750 	  if (innerc)
4751 	    {
4752 	      /* If needed (distribute parallel for with lastprivate),
4753 		 propagate down the total number of iterations.  */
4754 	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4755 				     fd->loop.n2);
4756 	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4757 					    GSI_CONTINUE_LINKING);
4758 	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4759 	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4760 	    }
4761 	}
4762     }
4763 
4764   t = fold_convert (itype, s0);
4765   t = fold_build2 (MULT_EXPR, itype, t, step);
4766   if (POINTER_TYPE_P (type))
4767     {
4768       t = fold_build_pointer_plus (n1, t);
4769       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4770 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4771 	t = fold_convert (signed_type_for (type), t);
4772     }
4773   else
4774     t = fold_build2 (PLUS_EXPR, type, t, n1);
4775   t = fold_convert (TREE_TYPE (startvar), t);
4776   t = force_gimple_operand_gsi (&gsi, t,
4777 				DECL_P (startvar)
4778 				&& TREE_ADDRESSABLE (startvar),
4779 				NULL_TREE, false, GSI_CONTINUE_LINKING);
4780   assign_stmt = gimple_build_assign (startvar, t);
4781   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4782   if (cond_var)
4783     {
4784       tree itype = TREE_TYPE (cond_var);
4785       /* For lastprivate(conditional:) itervar, we need some iteration
4786 	 counter that starts at unsigned non-zero and increases.
4787 	 Prefer as few IVs as possible, so if we can use startvar
4788 	 itself, use that, or startvar + constant (those would be
4789 	 incremented with step), and as last resort use the s0 + 1
4790 	 incremented by 1.  */
4791       if (POINTER_TYPE_P (type)
4792 	  || TREE_CODE (n1) != INTEGER_CST
4793 	  || fd->loop.cond_code != LT_EXPR)
4794 	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4795 			 build_int_cst (itype, 1));
4796       else if (tree_int_cst_sgn (n1) == 1)
4797 	t = fold_convert (itype, t);
4798       else
4799 	{
4800 	  tree c = fold_convert (itype, n1);
4801 	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4802 	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4803 	}
4804       t = force_gimple_operand_gsi (&gsi, t, false,
4805 				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4806       assign_stmt = gimple_build_assign (cond_var, t);
4807       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4808     }
4809 
4810   t = fold_convert (itype, e0);
4811   t = fold_build2 (MULT_EXPR, itype, t, step);
4812   if (POINTER_TYPE_P (type))
4813     {
4814       t = fold_build_pointer_plus (n1, t);
4815       if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4816 	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4817 	t = fold_convert (signed_type_for (type), t);
4818     }
4819   else
4820     t = fold_build2 (PLUS_EXPR, type, t, n1);
4821   t = fold_convert (TREE_TYPE (startvar), t);
4822   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4823 				false, GSI_CONTINUE_LINKING);
4824   if (endvar)
4825     {
4826       assign_stmt = gimple_build_assign (endvar, e);
4827       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4828       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4829 	assign_stmt = gimple_build_assign (fd->loop.v, e);
4830       else
4831 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4832       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4833     }
4834   /* Handle linear clause adjustments.  */
4835   tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4836   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4837     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4838 	 c; c = OMP_CLAUSE_CHAIN (c))
4839       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4840 	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4841 	{
4842 	  tree d = OMP_CLAUSE_DECL (c);
4843 	  bool is_ref = omp_is_reference (d);
4844 	  tree t = d, a, dest;
4845 	  if (is_ref)
4846 	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4847 	  tree type = TREE_TYPE (t);
4848 	  if (POINTER_TYPE_P (type))
4849 	    type = sizetype;
4850 	  dest = unshare_expr (t);
4851 	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4852 	  expand_omp_build_assign (&gsif, v, t);
4853 	  if (itercnt == NULL_TREE)
4854 	    {
4855 	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4856 		{
4857 		  itercntbias
4858 		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4859 				   fold_convert (itype, fd->loop.n1));
4860 		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4861 					     itercntbias, step);
4862 		  itercntbias
4863 		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4864 						NULL_TREE, true,
4865 						GSI_SAME_STMT);
4866 		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4867 		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4868 						      NULL_TREE, false,
4869 						      GSI_CONTINUE_LINKING);
4870 		}
4871 	      else
4872 		itercnt = s0;
4873 	    }
4874 	  a = fold_build2 (MULT_EXPR, type,
4875 			   fold_convert (type, itercnt),
4876 			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4877 	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4878 			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4879 	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4880 					false, GSI_CONTINUE_LINKING);
4881 	  assign_stmt = gimple_build_assign (dest, t);
4882 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4883 	}
4884   if (fd->collapse > 1)
4885     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4886 
4887   if (!broken_loop)
4888     {
4889       /* The code controlling the sequential loop goes in CONT_BB,
4890 	 replacing the GIMPLE_OMP_CONTINUE.  */
4891       gsi = gsi_last_nondebug_bb (cont_bb);
4892       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4893       vmain = gimple_omp_continue_control_use (cont_stmt);
4894       vback = gimple_omp_continue_control_def (cont_stmt);
4895 
4896       if (cond_var)
4897 	{
4898 	  tree itype = TREE_TYPE (cond_var);
4899 	  tree t2;
4900 	  if (POINTER_TYPE_P (type)
4901 	      || TREE_CODE (n1) != INTEGER_CST
4902 	      || fd->loop.cond_code != LT_EXPR)
4903 	    t2 = build_int_cst (itype, 1);
4904 	  else
4905 	    t2 = fold_convert (itype, step);
4906 	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4907 	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4908 					 NULL_TREE, true, GSI_SAME_STMT);
4909 	  assign_stmt = gimple_build_assign (cond_var, t2);
4910 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4911 	}
4912 
4913       if (!gimple_omp_for_combined_p (fd->for_stmt))
4914 	{
4915 	  if (POINTER_TYPE_P (type))
4916 	    t = fold_build_pointer_plus (vmain, step);
4917 	  else
4918 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4919 	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4920 	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4921 					  true, GSI_SAME_STMT);
4922 	  assign_stmt = gimple_build_assign (vback, t);
4923 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4924 
4925 	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4926 	    t = build2 (EQ_EXPR, boolean_type_node,
4927 			build_int_cst (itype, 0),
4928 			build_int_cst (itype, 1));
4929 	  else
4930 	    t = build2 (fd->loop.cond_code, boolean_type_node,
4931 			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4932 			? t : vback, e);
4933 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4934 	}
4935 
4936       /* Remove GIMPLE_OMP_CONTINUE.  */
4937       gsi_remove (&gsi, true);
4938 
4939       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4940 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4941 
4942       /* Trip update code goes into TRIP_UPDATE_BB.  */
4943       gsi = gsi_start_bb (trip_update_bb);
4944 
4945       t = build_int_cst (itype, 1);
4946       t = build2 (PLUS_EXPR, itype, trip_main, t);
4947       assign_stmt = gimple_build_assign (trip_back, t);
4948       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4949     }
4950 
4951   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4952   gsi = gsi_last_nondebug_bb (exit_bb);
4953   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4954     {
4955       t = gimple_omp_return_lhs (gsi_stmt (gsi));
4956       if (fd->have_reductemp || fd->have_pointer_condtemp)
4957 	{
4958 	  tree fn;
4959 	  if (t)
4960 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4961 	  else
4962 	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4963 	  gcall *g = gimple_build_call (fn, 0);
4964 	  if (t)
4965 	    {
4966 	      gimple_call_set_lhs (g, t);
4967 	      if (fd->have_reductemp)
4968 		gsi_insert_after (&gsi, gimple_build_assign (reductions,
4969 							     NOP_EXPR, t),
4970 				  GSI_SAME_STMT);
4971 	    }
4972 	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4973 	}
4974       else
4975 	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4976     }
4977   else if (fd->have_pointer_condtemp)
4978     {
4979       tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4980       gcall *g = gimple_build_call (fn, 0);
4981       gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4982     }
4983   gsi_remove (&gsi, true);
4984 
4985   /* Connect the new blocks.  */
4986   find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4987   find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4988 
4989   if (!broken_loop)
4990     {
4991       se = find_edge (cont_bb, body_bb);
4992       if (se == NULL)
4993 	{
4994 	  se = BRANCH_EDGE (cont_bb);
4995 	  gcc_assert (single_succ (se->dest) == body_bb);
4996 	}
4997       if (gimple_omp_for_combined_p (fd->for_stmt))
4998 	{
4999 	  remove_edge (se);
5000 	  se = NULL;
5001 	}
5002       else if (fd->collapse > 1)
5003 	{
5004 	  remove_edge (se);
5005 	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5006 	}
5007       else
5008 	se->flags = EDGE_TRUE_VALUE;
5009       find_edge (cont_bb, trip_update_bb)->flags
5010 	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5011 
5012       redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5013 				iter_part_bb);
5014     }
5015 
5016   if (gimple_in_ssa_p (cfun))
5017     {
5018       gphi_iterator psi;
5019       gphi *phi;
5020       edge re, ene;
5021       edge_var_map *vm;
5022       size_t i;
5023 
5024       gcc_assert (fd->collapse == 1 && !broken_loop);
5025 
5026       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5027 	 remove arguments of the phi nodes in fin_bb.  We need to create
5028 	 appropriate phi nodes in iter_part_bb instead.  */
5029       se = find_edge (iter_part_bb, fin_bb);
5030       re = single_succ_edge (trip_update_bb);
5031       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5032       ene = single_succ_edge (entry_bb);
5033 
5034       psi = gsi_start_phis (fin_bb);
5035       for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5036 	   gsi_next (&psi), ++i)
5037 	{
5038 	  gphi *nphi;
5039 	  location_t locus;
5040 
5041 	  phi = psi.phi ();
5042 	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5043 			       redirect_edge_var_map_def (vm), 0))
5044 	    continue;
5045 
5046 	  t = gimple_phi_result (phi);
5047 	  gcc_assert (t == redirect_edge_var_map_result (vm));
5048 
5049 	  if (!single_pred_p (fin_bb))
5050 	    t = copy_ssa_name (t, phi);
5051 
5052 	  nphi = create_phi_node (t, iter_part_bb);
5053 
5054 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5055 	  locus = gimple_phi_arg_location_from_edge (phi, se);
5056 
5057 	  /* A special case -- fd->loop.v is not yet computed in
5058 	     iter_part_bb, we need to use vextra instead.  */
5059 	  if (t == fd->loop.v)
5060 	    t = vextra;
5061 	  add_phi_arg (nphi, t, ene, locus);
5062 	  locus = redirect_edge_var_map_location (vm);
5063 	  tree back_arg = redirect_edge_var_map_def (vm);
5064 	  add_phi_arg (nphi, back_arg, re, locus);
5065 	  edge ce = find_edge (cont_bb, body_bb);
5066 	  if (ce == NULL)
5067 	    {
5068 	      ce = BRANCH_EDGE (cont_bb);
5069 	      gcc_assert (single_succ (ce->dest) == body_bb);
5070 	      ce = single_succ_edge (ce->dest);
5071 	    }
5072 	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5073 	  gcc_assert (inner_loop_phi != NULL);
5074 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5075 		       find_edge (seq_start_bb, body_bb), locus);
5076 
5077 	  if (!single_pred_p (fin_bb))
5078 	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5079 	}
5080       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5081       redirect_edge_var_map_clear (re);
5082       if (single_pred_p (fin_bb))
5083 	while (1)
5084 	  {
5085 	    psi = gsi_start_phis (fin_bb);
5086 	    if (gsi_end_p (psi))
5087 	      break;
5088 	    remove_phi_node (&psi, false);
5089 	  }
5090 
5091       /* Make phi node for trip.  */
5092       phi = create_phi_node (trip_main, iter_part_bb);
5093       add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5094 		   UNKNOWN_LOCATION);
5095       add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5096 		   UNKNOWN_LOCATION);
5097     }
5098 
5099   if (!broken_loop)
5100     set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5101   set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5102 			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5103   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5104 			   recompute_dominator (CDI_DOMINATORS, fin_bb));
5105   set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5106 			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5107   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5108 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5109 
5110   if (!broken_loop)
5111     {
5112       class loop *loop = body_bb->loop_father;
5113       class loop *trip_loop = alloc_loop ();
5114       trip_loop->header = iter_part_bb;
5115       trip_loop->latch = trip_update_bb;
5116       add_loop (trip_loop, iter_part_bb->loop_father);
5117 
5118       if (loop != entry_bb->loop_father)
5119 	{
5120 	  gcc_assert (loop->header == body_bb);
5121 	  gcc_assert (loop->latch == region->cont
5122 		      || single_pred (loop->latch) == region->cont);
5123 	  trip_loop->inner = loop;
5124 	  return;
5125 	}
5126 
5127       if (!gimple_omp_for_combined_p (fd->for_stmt))
5128 	{
5129 	  loop = alloc_loop ();
5130 	  loop->header = body_bb;
5131 	  if (collapse_bb == NULL)
5132 	    loop->latch = cont_bb;
5133 	  add_loop (loop, trip_loop);
5134 	}
5135     }
5136 }
5137 
5138 /* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
5139    loop.  Given parameters:
5140 
5141 	for (V = N1; V cond N2; V += STEP) BODY;
5142 
5143    where COND is "<" or ">", we generate pseudocode
5144 
5145 	V = N1;
5146 	goto L1;
5147     L0:
5148 	BODY;
5149 	V += STEP;
5150     L1:
5151 	if (V cond N2) goto L0; else goto L2;
5152     L2:
5153 
5154     For collapsed loops, given parameters:
5155       collapse(3)
5156       for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5157 	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5158 	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5159 	    BODY;
5160 
5161     we generate pseudocode
5162 
5163 	if (cond3 is <)
5164 	  adj = STEP3 - 1;
5165 	else
5166 	  adj = STEP3 + 1;
5167 	count3 = (adj + N32 - N31) / STEP3;
5168 	if (cond2 is <)
5169 	  adj = STEP2 - 1;
5170 	else
5171 	  adj = STEP2 + 1;
5172 	count2 = (adj + N22 - N21) / STEP2;
5173 	if (cond1 is <)
5174 	  adj = STEP1 - 1;
5175 	else
5176 	  adj = STEP1 + 1;
5177 	count1 = (adj + N12 - N11) / STEP1;
5178 	count = count1 * count2 * count3;
5179 	V = 0;
5180 	V1 = N11;
5181 	V2 = N21;
5182 	V3 = N31;
5183 	goto L1;
5184     L0:
5185 	BODY;
5186 	V += 1;
5187 	V3 += STEP3;
5188 	V2 += (V3 cond3 N32) ? 0 : STEP2;
5189 	V3 = (V3 cond3 N32) ? V3 : N31;
5190 	V1 += (V2 cond2 N22) ? 0 : STEP1;
5191 	V2 = (V2 cond2 N22) ? V2 : N21;
5192     L1:
5193 	if (V < count) goto L0; else goto L2;
5194     L2:
5195 
5196       */
5197 
5198 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)5199 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5200 {
5201   tree type, t;
5202   basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5203   gimple_stmt_iterator gsi;
5204   gimple *stmt;
5205   gcond *cond_stmt;
5206   bool broken_loop = region->cont == NULL;
5207   edge e, ne;
5208   tree *counts = NULL;
5209   int i;
5210   int safelen_int = INT_MAX;
5211   bool dont_vectorize = false;
5212   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213 				  OMP_CLAUSE_SAFELEN);
5214   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5215 				  OMP_CLAUSE__SIMDUID_);
5216   tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5217 			      OMP_CLAUSE_IF);
5218   tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5219 				  OMP_CLAUSE_SIMDLEN);
5220   tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5221 				   OMP_CLAUSE__CONDTEMP_);
5222   tree n1, n2;
5223   tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
5224 
5225   if (safelen)
5226     {
5227       poly_uint64 val;
5228       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
5229       if (!poly_int_tree_p (safelen, &val))
5230 	safelen_int = 0;
5231       else
5232 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
5233       if (safelen_int == 1)
5234 	safelen_int = 0;
5235     }
5236   if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5237       || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5238     {
5239       safelen_int = 0;
5240       dont_vectorize = true;
5241     }
5242   type = TREE_TYPE (fd->loop.v);
5243   entry_bb = region->entry;
5244   cont_bb = region->cont;
5245   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5246   gcc_assert (broken_loop
5247 	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5248   l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5249   if (!broken_loop)
5250     {
5251       gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5252       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5253       l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5254       l2_bb = BRANCH_EDGE (entry_bb)->dest;
5255     }
5256   else
5257     {
5258       BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5259       l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5260       l2_bb = single_succ (l1_bb);
5261     }
5262   exit_bb = region->exit;
5263   l2_dom_bb = NULL;
5264 
5265   gsi = gsi_last_nondebug_bb (entry_bb);
5266 
5267   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5268   /* Not needed in SSA form right now.  */
5269   gcc_assert (!gimple_in_ssa_p (cfun));
5270   if (fd->collapse > 1)
5271     {
5272       int first_zero_iter = -1, dummy = -1;
5273       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5274 
5275       counts = XALLOCAVEC (tree, fd->collapse);
5276       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5277 				  zero_iter_bb, first_zero_iter,
5278 				  dummy_bb, dummy, l2_dom_bb);
5279     }
5280   if (l2_dom_bb == NULL)
5281     l2_dom_bb = l1_bb;
5282 
5283   n1 = fd->loop.n1;
5284   n2 = fd->loop.n2;
5285   if (gimple_omp_for_combined_into_p (fd->for_stmt))
5286     {
5287       tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5288 				     OMP_CLAUSE__LOOPTEMP_);
5289       gcc_assert (innerc);
5290       n1 = OMP_CLAUSE_DECL (innerc);
5291       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5292 				OMP_CLAUSE__LOOPTEMP_);
5293       gcc_assert (innerc);
5294       n2 = OMP_CLAUSE_DECL (innerc);
5295     }
5296   tree step = fd->loop.step;
5297 
5298   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5299 				  OMP_CLAUSE__SIMT_);
5300   if (is_simt)
5301     {
5302       cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5303       is_simt = safelen_int > 1;
5304     }
5305   tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5306   if (is_simt)
5307     {
5308       simt_lane = create_tmp_var (unsigned_type_node);
5309       gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5310       gimple_call_set_lhs (g, simt_lane);
5311       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5312       tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5313 				 fold_convert (TREE_TYPE (step), simt_lane));
5314       n1 = fold_convert (type, n1);
5315       if (POINTER_TYPE_P (type))
5316 	n1 = fold_build_pointer_plus (n1, offset);
5317       else
5318 	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5319 
5320       /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
5321       if (fd->collapse > 1)
5322 	simt_maxlane = build_one_cst (unsigned_type_node);
5323       else if (safelen_int < omp_max_simt_vf ())
5324 	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5325       tree vf
5326 	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5327 					unsigned_type_node, 0);
5328       if (simt_maxlane)
5329 	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5330       vf = fold_convert (TREE_TYPE (step), vf);
5331       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5332     }
5333 
5334   expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5335   if (fd->collapse > 1)
5336     {
5337       if (gimple_omp_for_combined_into_p (fd->for_stmt))
5338 	{
5339 	  gsi_prev (&gsi);
5340 	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5341 	  gsi_next (&gsi);
5342 	}
5343       else
5344 	for (i = 0; i < fd->collapse; i++)
5345 	  {
5346 	    tree itype = TREE_TYPE (fd->loops[i].v);
5347 	    if (POINTER_TYPE_P (itype))
5348 	      itype = signed_type_for (itype);
5349 	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5350 	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5351 	  }
5352     }
5353   if (cond_var)
5354     {
5355       if (POINTER_TYPE_P (type)
5356 	  || TREE_CODE (n1) != INTEGER_CST
5357 	  || fd->loop.cond_code != LT_EXPR
5358 	  || tree_int_cst_sgn (n1) != 1)
5359 	expand_omp_build_assign (&gsi, cond_var,
5360 				 build_one_cst (TREE_TYPE (cond_var)));
5361       else
5362 	expand_omp_build_assign (&gsi, cond_var,
5363 				 fold_convert (TREE_TYPE (cond_var), n1));
5364     }
5365 
5366   /* Remove the GIMPLE_OMP_FOR statement.  */
5367   gsi_remove (&gsi, true);
5368 
5369   if (!broken_loop)
5370     {
5371       /* Code to control the increment goes in the CONT_BB.  */
5372       gsi = gsi_last_nondebug_bb (cont_bb);
5373       stmt = gsi_stmt (gsi);
5374       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5375 
5376       if (POINTER_TYPE_P (type))
5377 	t = fold_build_pointer_plus (fd->loop.v, step);
5378       else
5379 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5380       expand_omp_build_assign (&gsi, fd->loop.v, t);
5381 
5382       if (fd->collapse > 1)
5383 	{
5384 	  i = fd->collapse - 1;
5385 	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5386 	    {
5387 	      t = fold_convert (sizetype, fd->loops[i].step);
5388 	      t = fold_build_pointer_plus (fd->loops[i].v, t);
5389 	    }
5390 	  else
5391 	    {
5392 	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
5393 				fd->loops[i].step);
5394 	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5395 			       fd->loops[i].v, t);
5396 	    }
5397 	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5398 
5399 	  for (i = fd->collapse - 1; i > 0; i--)
5400 	    {
5401 	      tree itype = TREE_TYPE (fd->loops[i].v);
5402 	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5403 	      if (POINTER_TYPE_P (itype2))
5404 		itype2 = signed_type_for (itype2);
5405 	      t = fold_convert (itype2, fd->loops[i - 1].step);
5406 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5407 					    GSI_SAME_STMT);
5408 	      t = build3 (COND_EXPR, itype2,
5409 			  build2 (fd->loops[i].cond_code, boolean_type_node,
5410 				  fd->loops[i].v,
5411 				  fold_convert (itype, fd->loops[i].n2)),
5412 			  build_int_cst (itype2, 0), t);
5413 	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5414 		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5415 	      else
5416 		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5417 	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5418 
5419 	      t = fold_convert (itype, fd->loops[i].n1);
5420 	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5421 					    GSI_SAME_STMT);
5422 	      t = build3 (COND_EXPR, itype,
5423 			  build2 (fd->loops[i].cond_code, boolean_type_node,
5424 				  fd->loops[i].v,
5425 				  fold_convert (itype, fd->loops[i].n2)),
5426 			  fd->loops[i].v, t);
5427 	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5428 	    }
5429 	}
5430       if (cond_var)
5431 	{
5432 	  if (POINTER_TYPE_P (type)
5433 	      || TREE_CODE (n1) != INTEGER_CST
5434 	      || fd->loop.cond_code != LT_EXPR
5435 	      || tree_int_cst_sgn (n1) != 1)
5436 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5437 			     build_one_cst (TREE_TYPE (cond_var)));
5438 	  else
5439 	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5440 			     fold_convert (TREE_TYPE (cond_var), step));
5441 	  expand_omp_build_assign (&gsi, cond_var, t);
5442 	}
5443 
5444       /* Remove GIMPLE_OMP_CONTINUE.  */
5445       gsi_remove (&gsi, true);
5446     }
5447 
5448   /* Emit the condition in L1_BB.  */
5449   gsi = gsi_start_bb (l1_bb);
5450 
5451   t = fold_convert (type, n2);
5452   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5453 				false, GSI_CONTINUE_LINKING);
5454   tree v = fd->loop.v;
5455   if (DECL_P (v) && TREE_ADDRESSABLE (v))
5456     v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5457 				  false, GSI_CONTINUE_LINKING);
5458   t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5459   cond_stmt = gimple_build_cond_empty (t);
5460   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5461   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5462 		 NULL, NULL)
5463       || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5464 		    NULL, NULL))
5465     {
5466       gsi = gsi_for_stmt (cond_stmt);
5467       gimple_regimplify_operands (cond_stmt, &gsi);
5468     }
5469 
5470   /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
5471   if (is_simt)
5472     {
5473       gsi = gsi_start_bb (l2_bb);
5474       step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5475       if (POINTER_TYPE_P (type))
5476 	t = fold_build_pointer_plus (fd->loop.v, step);
5477       else
5478 	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5479       expand_omp_build_assign (&gsi, fd->loop.v, t);
5480     }
5481 
5482   /* Remove GIMPLE_OMP_RETURN.  */
5483   gsi = gsi_last_nondebug_bb (exit_bb);
5484   gsi_remove (&gsi, true);
5485 
5486   /* Connect the new blocks.  */
5487   remove_edge (FALLTHRU_EDGE (entry_bb));
5488 
5489   if (!broken_loop)
5490     {
5491       remove_edge (BRANCH_EDGE (entry_bb));
5492       make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5493 
5494       e = BRANCH_EDGE (l1_bb);
5495       ne = FALLTHRU_EDGE (l1_bb);
5496       e->flags = EDGE_TRUE_VALUE;
5497     }
5498   else
5499     {
5500       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5501 
5502       ne = single_succ_edge (l1_bb);
5503       e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5504 
5505     }
5506   ne->flags = EDGE_FALSE_VALUE;
5507   e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5508   ne->probability = e->probability.invert ();
5509 
5510   set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5511   set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5512 
5513   if (simt_maxlane)
5514     {
5515       cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5516 				     NULL_TREE, NULL_TREE);
5517       gsi = gsi_last_bb (entry_bb);
5518       gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5519       make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5520       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5521       FALLTHRU_EDGE (entry_bb)->probability
5522 	 = profile_probability::guessed_always ().apply_scale (7, 8);
5523       BRANCH_EDGE (entry_bb)->probability
5524 	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5525       l2_dom_bb = entry_bb;
5526     }
5527   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5528 
5529   if (!broken_loop)
5530     {
5531       class loop *loop = alloc_loop ();
5532       loop->header = l1_bb;
5533       loop->latch = cont_bb;
5534       add_loop (loop, l1_bb->loop_father);
5535       loop->safelen = safelen_int;
5536       if (simduid)
5537 	{
5538 	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5539 	  cfun->has_simduid_loops = true;
5540 	}
5541       /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5542 	 the loop.  */
5543       if ((flag_tree_loop_vectorize
5544 	   || !global_options_set.x_flag_tree_loop_vectorize)
5545 	  && flag_tree_loop_optimize
5546 	  && loop->safelen > 1)
5547 	{
5548 	  loop->force_vectorize = true;
5549 	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5550 	    {
5551 	      unsigned HOST_WIDE_INT v
5552 		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5553 	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5554 		loop->simdlen = v;
5555 	    }
5556 	  cfun->has_force_vectorize_loops = true;
5557 	}
5558       else if (dont_vectorize)
5559 	loop->dont_vectorize = true;
5560     }
5561   else if (simduid)
5562     cfun->has_simduid_loops = true;
5563 }
5564 
5565 /* Taskloop construct is represented after gimplification with
5566    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5567    in between them.  This routine expands the outer GIMPLE_OMP_FOR,
5568    which should just compute all the needed loop temporaries
5569    for GIMPLE_OMP_TASK.  */
5570 
5571 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5572 expand_omp_taskloop_for_outer (struct omp_region *region,
5573 			       struct omp_for_data *fd,
5574 			       gimple *inner_stmt)
5575 {
5576   tree type, bias = NULL_TREE;
5577   basic_block entry_bb, cont_bb, exit_bb;
5578   gimple_stmt_iterator gsi;
5579   gassign *assign_stmt;
5580   tree *counts = NULL;
5581   int i;
5582 
5583   gcc_assert (inner_stmt);
5584   gcc_assert (region->cont);
5585   gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5586 	      && gimple_omp_task_taskloop_p (inner_stmt));
5587   type = TREE_TYPE (fd->loop.v);
5588 
5589   /* See if we need to bias by LLONG_MIN.  */
5590   if (fd->iter_type == long_long_unsigned_type_node
5591       && TREE_CODE (type) == INTEGER_TYPE
5592       && !TYPE_UNSIGNED (type))
5593     {
5594       tree n1, n2;
5595 
5596       if (fd->loop.cond_code == LT_EXPR)
5597 	{
5598 	  n1 = fd->loop.n1;
5599 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5600 	}
5601       else
5602 	{
5603 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5604 	  n2 = fd->loop.n1;
5605 	}
5606       if (TREE_CODE (n1) != INTEGER_CST
5607 	  || TREE_CODE (n2) != INTEGER_CST
5608 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5609 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5610     }
5611 
5612   entry_bb = region->entry;
5613   cont_bb = region->cont;
5614   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5615   gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5616   exit_bb = region->exit;
5617 
5618   gsi = gsi_last_nondebug_bb (entry_bb);
5619   gimple *for_stmt = gsi_stmt (gsi);
5620   gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5621   if (fd->collapse > 1)
5622     {
5623       int first_zero_iter = -1, dummy = -1;
5624       basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5625 
5626       counts = XALLOCAVEC (tree, fd->collapse);
5627       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5628 				  zero_iter_bb, first_zero_iter,
5629 				  dummy_bb, dummy, l2_dom_bb);
5630 
5631       if (zero_iter_bb)
5632 	{
5633 	  /* Some counts[i] vars might be uninitialized if
5634 	     some loop has zero iterations.  But the body shouldn't
5635 	     be executed in that case, so just avoid uninit warnings.  */
5636 	  for (i = first_zero_iter; i < fd->collapse; i++)
5637 	    if (SSA_VAR_P (counts[i]))
5638 	      TREE_NO_WARNING (counts[i]) = 1;
5639 	  gsi_prev (&gsi);
5640 	  edge e = split_block (entry_bb, gsi_stmt (gsi));
5641 	  entry_bb = e->dest;
5642 	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5643 	  gsi = gsi_last_bb (entry_bb);
5644 	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5645 				   get_immediate_dominator (CDI_DOMINATORS,
5646 							    zero_iter_bb));
5647 	}
5648     }
5649 
5650   tree t0, t1;
5651   t1 = fd->loop.n2;
5652   t0 = fd->loop.n1;
5653   if (POINTER_TYPE_P (TREE_TYPE (t0))
5654       && TYPE_PRECISION (TREE_TYPE (t0))
5655 	 != TYPE_PRECISION (fd->iter_type))
5656     {
5657       /* Avoid casting pointers to integer of a different size.  */
5658       tree itype = signed_type_for (type);
5659       t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5660       t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5661     }
5662   else
5663     {
5664       t1 = fold_convert (fd->iter_type, t1);
5665       t0 = fold_convert (fd->iter_type, t0);
5666     }
5667   if (bias)
5668     {
5669       t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5670       t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5671     }
5672 
5673   tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5674 				 OMP_CLAUSE__LOOPTEMP_);
5675   gcc_assert (innerc);
5676   tree startvar = OMP_CLAUSE_DECL (innerc);
5677   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5678   gcc_assert (innerc);
5679   tree endvar = OMP_CLAUSE_DECL (innerc);
5680   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5681     {
5682       gcc_assert (innerc);
5683       for (i = 1; i < fd->collapse; i++)
5684 	{
5685 	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5686 				    OMP_CLAUSE__LOOPTEMP_);
5687 	  gcc_assert (innerc);
5688 	}
5689       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5690 				OMP_CLAUSE__LOOPTEMP_);
5691       if (innerc)
5692 	{
5693 	  /* If needed (inner taskloop has lastprivate clause), propagate
5694 	     down the total number of iterations.  */
5695 	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5696 					     NULL_TREE, false,
5697 					     GSI_CONTINUE_LINKING);
5698 	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5699 	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5700 	}
5701     }
5702 
5703   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5704 				 GSI_CONTINUE_LINKING);
5705   assign_stmt = gimple_build_assign (startvar, t0);
5706   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5707 
5708   t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5709 				 GSI_CONTINUE_LINKING);
5710   assign_stmt = gimple_build_assign (endvar, t1);
5711   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5712   if (fd->collapse > 1)
5713     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5714 
5715   /* Remove the GIMPLE_OMP_FOR statement.  */
5716   gsi = gsi_for_stmt (for_stmt);
5717   gsi_remove (&gsi, true);
5718 
5719   gsi = gsi_last_nondebug_bb (cont_bb);
5720   gsi_remove (&gsi, true);
5721 
5722   gsi = gsi_last_nondebug_bb (exit_bb);
5723   gsi_remove (&gsi, true);
5724 
5725   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5726   remove_edge (BRANCH_EDGE (entry_bb));
5727   FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5728   remove_edge (BRANCH_EDGE (cont_bb));
5729   set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5730   set_immediate_dominator (CDI_DOMINATORS, region->entry,
5731 			   recompute_dominator (CDI_DOMINATORS, region->entry));
5732 }
5733 
5734 /* Taskloop construct is represented after gimplification with
5735    two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5736    in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5737    GOMP_taskloop{,_ull} function arranges for each task to be given just
5738    a single range of iterations.  */
5739 
5740 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5741 expand_omp_taskloop_for_inner (struct omp_region *region,
5742 			       struct omp_for_data *fd,
5743 			       gimple *inner_stmt)
5744 {
5745   tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5746   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5747   basic_block fin_bb;
5748   gimple_stmt_iterator gsi;
5749   edge ep;
5750   bool broken_loop = region->cont == NULL;
5751   tree *counts = NULL;
5752   tree n1, n2, step;
5753 
5754   itype = type = TREE_TYPE (fd->loop.v);
5755   if (POINTER_TYPE_P (type))
5756     itype = signed_type_for (type);
5757 
5758   /* See if we need to bias by LLONG_MIN.  */
5759   if (fd->iter_type == long_long_unsigned_type_node
5760       && TREE_CODE (type) == INTEGER_TYPE
5761       && !TYPE_UNSIGNED (type))
5762     {
5763       tree n1, n2;
5764 
5765       if (fd->loop.cond_code == LT_EXPR)
5766 	{
5767 	  n1 = fd->loop.n1;
5768 	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5769 	}
5770       else
5771 	{
5772 	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5773 	  n2 = fd->loop.n1;
5774 	}
5775       if (TREE_CODE (n1) != INTEGER_CST
5776 	  || TREE_CODE (n2) != INTEGER_CST
5777 	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5778 	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5779     }
5780 
5781   entry_bb = region->entry;
5782   cont_bb = region->cont;
5783   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5784   fin_bb = BRANCH_EDGE (entry_bb)->dest;
5785   gcc_assert (broken_loop
5786 	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5787   body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5788   if (!broken_loop)
5789     {
5790       gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5791       gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5792     }
5793   exit_bb = region->exit;
5794 
5795   /* Iteration space partitioning goes in ENTRY_BB.  */
5796   gsi = gsi_last_nondebug_bb (entry_bb);
5797   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5798 
5799   if (fd->collapse > 1)
5800     {
5801       int first_zero_iter = -1, dummy = -1;
5802       basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5803 
5804       counts = XALLOCAVEC (tree, fd->collapse);
5805       expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5806 				  fin_bb, first_zero_iter,
5807 				  dummy_bb, dummy, l2_dom_bb);
5808       t = NULL_TREE;
5809     }
5810   else
5811     t = integer_one_node;
5812 
5813   step = fd->loop.step;
5814   tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5815 				 OMP_CLAUSE__LOOPTEMP_);
5816   gcc_assert (innerc);
5817   n1 = OMP_CLAUSE_DECL (innerc);
5818   innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5819   gcc_assert (innerc);
5820   n2 = OMP_CLAUSE_DECL (innerc);
5821   if (bias)
5822     {
5823       n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5824       n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5825     }
5826   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5827 				 true, NULL_TREE, true, GSI_SAME_STMT);
5828   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5829 				 true, NULL_TREE, true, GSI_SAME_STMT);
5830   step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5831 				   true, NULL_TREE, true, GSI_SAME_STMT);
5832 
5833   tree startvar = fd->loop.v;
5834   tree endvar = NULL_TREE;
5835 
5836   if (gimple_omp_for_combined_p (fd->for_stmt))
5837     {
5838       tree clauses = gimple_omp_for_clauses (inner_stmt);
5839       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5840       gcc_assert (innerc);
5841       startvar = OMP_CLAUSE_DECL (innerc);
5842       innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5843 				OMP_CLAUSE__LOOPTEMP_);
5844       gcc_assert (innerc);
5845       endvar = OMP_CLAUSE_DECL (innerc);
5846     }
5847   t = fold_convert (TREE_TYPE (startvar), n1);
5848   t = force_gimple_operand_gsi (&gsi, t,
5849 				DECL_P (startvar)
5850 				&& TREE_ADDRESSABLE (startvar),
5851 				NULL_TREE, false, GSI_CONTINUE_LINKING);
5852   gimple *assign_stmt = gimple_build_assign (startvar, t);
5853   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5854 
5855   t = fold_convert (TREE_TYPE (startvar), n2);
5856   e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5857 				false, GSI_CONTINUE_LINKING);
5858   if (endvar)
5859     {
5860       assign_stmt = gimple_build_assign (endvar, e);
5861       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5862       if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5863 	assign_stmt = gimple_build_assign (fd->loop.v, e);
5864       else
5865 	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5866       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5867     }
5868   if (fd->collapse > 1)
5869     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5870 
5871   if (!broken_loop)
5872     {
5873       /* The code controlling the sequential loop replaces the
5874 	 GIMPLE_OMP_CONTINUE.  */
5875       gsi = gsi_last_nondebug_bb (cont_bb);
5876       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5877       gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5878       vmain = gimple_omp_continue_control_use (cont_stmt);
5879       vback = gimple_omp_continue_control_def (cont_stmt);
5880 
5881       if (!gimple_omp_for_combined_p (fd->for_stmt))
5882 	{
5883 	  if (POINTER_TYPE_P (type))
5884 	    t = fold_build_pointer_plus (vmain, step);
5885 	  else
5886 	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5887 	  t = force_gimple_operand_gsi (&gsi, t,
5888 					DECL_P (vback)
5889 					&& TREE_ADDRESSABLE (vback),
5890 					NULL_TREE, true, GSI_SAME_STMT);
5891 	  assign_stmt = gimple_build_assign (vback, t);
5892 	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5893 
5894 	  t = build2 (fd->loop.cond_code, boolean_type_node,
5895 		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5896 		      ? t : vback, e);
5897 	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5898 	}
5899 
5900       /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5901       gsi_remove (&gsi, true);
5902 
5903       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5904 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5905     }
5906 
5907   /* Remove the GIMPLE_OMP_FOR statement.  */
5908   gsi = gsi_for_stmt (fd->for_stmt);
5909   gsi_remove (&gsi, true);
5910 
5911   /* Remove the GIMPLE_OMP_RETURN statement.  */
5912   gsi = gsi_last_nondebug_bb (exit_bb);
5913   gsi_remove (&gsi, true);
5914 
5915   FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5916   if (!broken_loop)
5917     remove_edge (BRANCH_EDGE (entry_bb));
5918   else
5919     {
5920       remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5921       region->outer->cont = NULL;
5922     }
5923 
5924   /* Connect all the blocks.  */
5925   if (!broken_loop)
5926     {
5927       ep = find_edge (cont_bb, body_bb);
5928       if (gimple_omp_for_combined_p (fd->for_stmt))
5929 	{
5930 	  remove_edge (ep);
5931 	  ep = NULL;
5932 	}
5933       else if (fd->collapse > 1)
5934 	{
5935 	  remove_edge (ep);
5936 	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5937 	}
5938       else
5939 	ep->flags = EDGE_TRUE_VALUE;
5940       find_edge (cont_bb, fin_bb)->flags
5941 	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5942     }
5943 
5944   set_immediate_dominator (CDI_DOMINATORS, body_bb,
5945 			   recompute_dominator (CDI_DOMINATORS, body_bb));
5946   if (!broken_loop)
5947     set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5948 			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5949 
5950   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5951     {
5952       class loop *loop = alloc_loop ();
5953       loop->header = body_bb;
5954       if (collapse_bb == NULL)
5955 	loop->latch = cont_bb;
5956       add_loop (loop, body_bb->loop_father);
5957     }
5958 }
5959 
5960 /* A subroutine of expand_omp_for.  Generate code for an OpenACC
5961    partitioned loop.  The lowering here is abstracted, in that the
5962    loop parameters are passed through internal functions, which are
5963    further lowered by oacc_device_lower, once we get to the target
5964    compiler.  The loop is of the form:
5965 
5966    for (V = B; V LTGT E; V += S) {BODY}
5967 
5968    where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5969    (constant 0 for no chunking) and we will have a GWV partitioning
5970    mask, specifying dimensions over which the loop is to be
5971    partitioned (see note below).  We generate code that looks like
5972    (this ignores tiling):
5973 
5974    <entry_bb> [incoming FALL->body, BRANCH->exit]
5975      typedef signedintify (typeof (V)) T;  // underlying signed integral type
5976      T range = E - B;
5977      T chunk_no = 0;
5978      T DIR = LTGT == '<' ? +1 : -1;
5979      T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5980      T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5981 
5982    <head_bb> [created by splitting end of entry_bb]
5983      T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5984      T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5985      if (!(offset LTGT bound)) goto bottom_bb;
5986 
5987    <body_bb> [incoming]
5988      V = B + offset;
5989      {BODY}
5990 
5991    <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5992      offset += step;
5993      if (offset LTGT bound) goto body_bb; [*]
5994 
5995    <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5996      chunk_no++;
5997      if (chunk < chunk_max) goto head_bb;
5998 
5999    <exit_bb> [incoming]
6000      V = B + ((range -/+ 1) / S +/- 1) * S [*]
6001 
6002    [*] Needed if V live at end of loop.  */
6003 
6004 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)6005 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6006 {
6007   bool is_oacc_kernels_parallelized
6008     = (lookup_attribute ("oacc kernels parallelized",
6009 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
6010   {
6011     bool is_oacc_kernels
6012       = (lookup_attribute ("oacc kernels",
6013 			   DECL_ATTRIBUTES (current_function_decl)) != NULL);
6014     if (is_oacc_kernels_parallelized)
6015       gcc_checking_assert (is_oacc_kernels);
6016   }
6017   gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
6018   /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
6019      for SSA specifics, and some are for 'parloops' OpenACC
6020      'kernels'-parallelized specifics.  */
6021 
6022   tree v = fd->loop.v;
6023   enum tree_code cond_code = fd->loop.cond_code;
6024   enum tree_code plus_code = PLUS_EXPR;
6025 
6026   tree chunk_size = integer_minus_one_node;
6027   tree gwv = integer_zero_node;
6028   tree iter_type = TREE_TYPE (v);
6029   tree diff_type = iter_type;
6030   tree plus_type = iter_type;
6031   struct oacc_collapse *counts = NULL;
6032 
6033   gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6034 		       == GF_OMP_FOR_KIND_OACC_LOOP);
6035   gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6036   gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6037 
6038   if (POINTER_TYPE_P (iter_type))
6039     {
6040       plus_code = POINTER_PLUS_EXPR;
6041       plus_type = sizetype;
6042     }
6043   for (int ix = fd->collapse; ix--;)
6044     {
6045       tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
6046       if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
6047 	diff_type = diff_type2;
6048     }
6049   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6050     diff_type = signed_type_for (diff_type);
6051   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6052     diff_type = integer_type_node;
6053 
6054   basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6055   basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6056   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
6057   basic_block bottom_bb = NULL;
6058 
6059   /* entry_bb has two successors; the branch edge is to the exit
6060      block, fallthrough edge to body.  */
6061   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6062 	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6063 
6064   /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
6065      body_bb, or to a block whose only successor is the body_bb.  Its
6066      fallthrough successor is the final block (same as the branch
6067      successor of the entry_bb).  */
6068   if (cont_bb)
6069     {
6070       basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6071       basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6072 
6073       gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6074       gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6075     }
6076   else
6077     gcc_assert (!gimple_in_ssa_p (cfun));
6078 
6079   /* The exit block only has entry_bb and cont_bb as predecessors.  */
6080   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6081 
6082   tree chunk_no;
6083   tree chunk_max = NULL_TREE;
6084   tree bound, offset;
6085   tree step = create_tmp_var (diff_type, ".step");
6086   bool up = cond_code == LT_EXPR;
6087   tree dir = build_int_cst (diff_type, up ? +1 : -1);
6088   bool chunking = !gimple_in_ssa_p (cfun);
6089   bool negating;
6090 
6091   /* Tiling vars.  */
6092   tree tile_size = NULL_TREE;
6093   tree element_s = NULL_TREE;
6094   tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6095   basic_block elem_body_bb = NULL;
6096   basic_block elem_cont_bb = NULL;
6097 
6098   /* SSA instances.  */
6099   tree offset_incr = NULL_TREE;
6100   tree offset_init = NULL_TREE;
6101 
6102   gimple_stmt_iterator gsi;
6103   gassign *ass;
6104   gcall *call;
6105   gimple *stmt;
6106   tree expr;
6107   location_t loc;
6108   edge split, be, fte;
6109 
6110   /* Split the end of entry_bb to create head_bb.  */
6111   split = split_block (entry_bb, last_stmt (entry_bb));
6112   basic_block head_bb = split->dest;
6113   entry_bb = split->src;
6114 
6115   /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
6116   gsi = gsi_last_nondebug_bb (entry_bb);
6117   gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6118   loc = gimple_location (for_stmt);
6119 
6120   if (gimple_in_ssa_p (cfun))
6121     {
6122       offset_init = gimple_omp_for_index (for_stmt, 0);
6123       gcc_assert (integer_zerop (fd->loop.n1));
6124       /* The SSA parallelizer does gang parallelism.  */
6125       gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6126     }
6127 
6128   if (fd->collapse > 1 || fd->tiling)
6129     {
6130       gcc_assert (!gimple_in_ssa_p (cfun) && up);
6131       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6132       tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
6133 					      TREE_TYPE (fd->loop.n2), loc);
6134 
6135       if (SSA_VAR_P (fd->loop.n2))
6136 	{
6137 	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6138 					    true, GSI_SAME_STMT);
6139 	  ass = gimple_build_assign (fd->loop.n2, total);
6140 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6141 	}
6142     }
6143 
6144   tree b = fd->loop.n1;
6145   tree e = fd->loop.n2;
6146   tree s = fd->loop.step;
6147 
6148   b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6149   e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6150 
6151   /* Convert the step, avoiding possible unsigned->signed overflow.  */
6152   negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6153   if (negating)
6154     s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6155   s = fold_convert (diff_type, s);
6156   if (negating)
6157     s = fold_build1 (NEGATE_EXPR, diff_type, s);
6158   s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6159 
6160   if (!chunking)
6161     chunk_size = integer_zero_node;
6162   expr = fold_convert (diff_type, chunk_size);
6163   chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6164 					 NULL_TREE, true, GSI_SAME_STMT);
6165 
6166   if (fd->tiling)
6167     {
6168       /* Determine the tile size and element step,
6169 	 modify the outer loop step size.  */
6170       tile_size = create_tmp_var (diff_type, ".tile_size");
6171       expr = build_int_cst (diff_type, 1);
6172       for (int ix = 0; ix < fd->collapse; ix++)
6173 	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6174       expr = force_gimple_operand_gsi (&gsi, expr, true,
6175 				       NULL_TREE, true, GSI_SAME_STMT);
6176       ass = gimple_build_assign (tile_size, expr);
6177       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6178 
6179       element_s = create_tmp_var (diff_type, ".element_s");
6180       ass = gimple_build_assign (element_s, s);
6181       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6182 
6183       expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6184       s = force_gimple_operand_gsi (&gsi, expr, true,
6185 				    NULL_TREE, true, GSI_SAME_STMT);
6186     }
6187 
6188   /* Determine the range, avoiding possible unsigned->signed overflow.  */
6189   negating = !up && TYPE_UNSIGNED (iter_type);
6190   expr = fold_build2 (MINUS_EXPR, plus_type,
6191 		      fold_convert (plus_type, negating ? b : e),
6192 		      fold_convert (plus_type, negating ? e : b));
6193   expr = fold_convert (diff_type, expr);
6194   if (negating)
6195     expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6196   tree range = force_gimple_operand_gsi (&gsi, expr, true,
6197 					 NULL_TREE, true, GSI_SAME_STMT);
6198 
6199   chunk_no = build_int_cst (diff_type, 0);
6200   if (chunking)
6201     {
6202       gcc_assert (!gimple_in_ssa_p (cfun));
6203 
6204       expr = chunk_no;
6205       chunk_max = create_tmp_var (diff_type, ".chunk_max");
6206       chunk_no = create_tmp_var (diff_type, ".chunk_no");
6207 
6208       ass = gimple_build_assign (chunk_no, expr);
6209       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6210 
6211       call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6212 					 build_int_cst (integer_type_node,
6213 							IFN_GOACC_LOOP_CHUNKS),
6214 					 dir, range, s, chunk_size, gwv);
6215       gimple_call_set_lhs (call, chunk_max);
6216       gimple_set_location (call, loc);
6217       gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6218     }
6219   else
6220     chunk_size = chunk_no;
6221 
6222   call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6223 				     build_int_cst (integer_type_node,
6224 						    IFN_GOACC_LOOP_STEP),
6225 				     dir, range, s, chunk_size, gwv);
6226   gimple_call_set_lhs (call, step);
6227   gimple_set_location (call, loc);
6228   gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6229 
6230   /* Remove the GIMPLE_OMP_FOR.  */
6231   gsi_remove (&gsi, true);
6232 
6233   /* Fixup edges from head_bb.  */
6234   be = BRANCH_EDGE (head_bb);
6235   fte = FALLTHRU_EDGE (head_bb);
6236   be->flags |= EDGE_FALSE_VALUE;
6237   fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6238 
6239   basic_block body_bb = fte->dest;
6240 
6241   if (gimple_in_ssa_p (cfun))
6242     {
6243       gsi = gsi_last_nondebug_bb (cont_bb);
6244       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6245 
6246       offset = gimple_omp_continue_control_use (cont_stmt);
6247       offset_incr = gimple_omp_continue_control_def (cont_stmt);
6248     }
6249   else
6250     {
6251       offset = create_tmp_var (diff_type, ".offset");
6252       offset_init = offset_incr = offset;
6253     }
6254   bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6255 
6256   /* Loop offset & bound go into head_bb.  */
6257   gsi = gsi_start_bb (head_bb);
6258 
6259   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6260 				     build_int_cst (integer_type_node,
6261 						    IFN_GOACC_LOOP_OFFSET),
6262 				     dir, range, s,
6263 				     chunk_size, gwv, chunk_no);
6264   gimple_call_set_lhs (call, offset_init);
6265   gimple_set_location (call, loc);
6266   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6267 
6268   call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6269 				     build_int_cst (integer_type_node,
6270 						    IFN_GOACC_LOOP_BOUND),
6271 				     dir, range, s,
6272 				     chunk_size, gwv, offset_init);
6273   gimple_call_set_lhs (call, bound);
6274   gimple_set_location (call, loc);
6275   gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6276 
6277   expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6278   gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6279 		    GSI_CONTINUE_LINKING);
6280 
6281   /* V assignment goes into body_bb.  */
6282   if (!gimple_in_ssa_p (cfun))
6283     {
6284       gsi = gsi_start_bb (body_bb);
6285 
6286       expr = build2 (plus_code, iter_type, b,
6287 		     fold_convert (plus_type, offset));
6288       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6289 				       true, GSI_SAME_STMT);
6290       ass = gimple_build_assign (v, expr);
6291       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6292 
6293       if (fd->collapse > 1 || fd->tiling)
6294 	expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
6295 
6296       if (fd->tiling)
6297 	{
6298 	  /* Determine the range of the element loop -- usually simply
6299 	     the tile_size, but could be smaller if the final
6300 	     iteration of the outer loop is a partial tile.  */
6301 	  tree e_range = create_tmp_var (diff_type, ".e_range");
6302 
6303 	  expr = build2 (MIN_EXPR, diff_type,
6304 			 build2 (MINUS_EXPR, diff_type, bound, offset),
6305 			 build2 (MULT_EXPR, diff_type, tile_size,
6306 				 element_s));
6307 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6308 					   true, GSI_SAME_STMT);
6309 	  ass = gimple_build_assign (e_range, expr);
6310 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6311 
6312 	  /* Determine bound, offset & step of inner loop. */
6313 	  e_bound = create_tmp_var (diff_type, ".e_bound");
6314 	  e_offset = create_tmp_var (diff_type, ".e_offset");
6315 	  e_step = create_tmp_var (diff_type, ".e_step");
6316 
6317 	  /* Mark these as element loops.  */
6318 	  tree t, e_gwv = integer_minus_one_node;
6319 	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
6320 
6321 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6322 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6323 					     element_s, chunk, e_gwv, chunk);
6324 	  gimple_call_set_lhs (call, e_offset);
6325 	  gimple_set_location (call, loc);
6326 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6327 
6328 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6329 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6330 					     element_s, chunk, e_gwv, e_offset);
6331 	  gimple_call_set_lhs (call, e_bound);
6332 	  gimple_set_location (call, loc);
6333 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6334 
6335 	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6336 	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6337 					     element_s, chunk, e_gwv);
6338 	  gimple_call_set_lhs (call, e_step);
6339 	  gimple_set_location (call, loc);
6340 	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6341 
6342 	  /* Add test and split block.  */
6343 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6344 	  stmt = gimple_build_cond_empty (expr);
6345 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6346 	  split = split_block (body_bb, stmt);
6347 	  elem_body_bb = split->dest;
6348 	  if (cont_bb == body_bb)
6349 	    cont_bb = elem_body_bb;
6350 	  body_bb = split->src;
6351 
6352 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6353 
6354 	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
6355 	  if (cont_bb == NULL)
6356 	    {
6357 	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6358 	      e->probability = profile_probability::even ();
6359 	      split->probability = profile_probability::even ();
6360 	    }
6361 
6362 	  /* Initialize the user's loop vars.  */
6363 	  gsi = gsi_start_bb (elem_body_bb);
6364 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
6365 				     diff_type);
6366 	}
6367     }
6368 
6369   /* Loop increment goes into cont_bb.  If this is not a loop, we
6370      will have spawned threads as if it was, and each one will
6371      execute one iteration.  The specification is not explicit about
6372      whether such constructs are ill-formed or not, and they can
6373      occur, especially when noreturn routines are involved.  */
6374   if (cont_bb)
6375     {
6376       gsi = gsi_last_nondebug_bb (cont_bb);
6377       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6378       loc = gimple_location (cont_stmt);
6379 
6380       if (fd->tiling)
6381 	{
6382 	  /* Insert element loop increment and test.  */
6383 	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6384 	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6385 					   true, GSI_SAME_STMT);
6386 	  ass = gimple_build_assign (e_offset, expr);
6387 	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6388 	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6389 
6390 	  stmt = gimple_build_cond_empty (expr);
6391 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6392 	  split = split_block (cont_bb, stmt);
6393 	  elem_cont_bb = split->src;
6394 	  cont_bb = split->dest;
6395 
6396 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6397 	  split->probability = profile_probability::unlikely ().guessed ();
6398 	  edge latch_edge
6399 	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6400 	  latch_edge->probability = profile_probability::likely ().guessed ();
6401 
6402 	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6403 	  skip_edge->probability = profile_probability::unlikely ().guessed ();
6404 	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6405 	  loop_entry_edge->probability
6406 	    = profile_probability::likely ().guessed ();
6407 
6408 	  gsi = gsi_for_stmt (cont_stmt);
6409 	}
6410 
6411       /* Increment offset.  */
6412       if (gimple_in_ssa_p (cfun))
6413 	expr = build2 (plus_code, iter_type, offset,
6414 		       fold_convert (plus_type, step));
6415       else
6416 	expr = build2 (PLUS_EXPR, diff_type, offset, step);
6417       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6418 				       true, GSI_SAME_STMT);
6419       ass = gimple_build_assign (offset_incr, expr);
6420       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6421       expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6422       gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6423 
6424       /*  Remove the GIMPLE_OMP_CONTINUE.  */
6425       gsi_remove (&gsi, true);
6426 
6427       /* Fixup edges from cont_bb.  */
6428       be = BRANCH_EDGE (cont_bb);
6429       fte = FALLTHRU_EDGE (cont_bb);
6430       be->flags |= EDGE_TRUE_VALUE;
6431       fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6432 
6433       if (chunking)
6434 	{
6435 	  /* Split the beginning of exit_bb to make bottom_bb.  We
6436 	     need to insert a nop at the start, because splitting is
6437 	     after a stmt, not before.  */
6438 	  gsi = gsi_start_bb (exit_bb);
6439 	  stmt = gimple_build_nop ();
6440 	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6441 	  split = split_block (exit_bb, stmt);
6442 	  bottom_bb = split->src;
6443 	  exit_bb = split->dest;
6444 	  gsi = gsi_last_bb (bottom_bb);
6445 
6446 	  /* Chunk increment and test goes into bottom_bb.  */
6447 	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6448 			 build_int_cst (diff_type, 1));
6449 	  ass = gimple_build_assign (chunk_no, expr);
6450 	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6451 
6452 	  /* Chunk test at end of bottom_bb.  */
6453 	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6454 	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6455 			    GSI_CONTINUE_LINKING);
6456 
6457 	  /* Fixup edges from bottom_bb.  */
6458 	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6459 	  split->probability = profile_probability::unlikely ().guessed ();
6460 	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6461 	  latch_edge->probability = profile_probability::likely ().guessed ();
6462 	}
6463     }
6464 
6465   gsi = gsi_last_nondebug_bb (exit_bb);
6466   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6467   loc = gimple_location (gsi_stmt (gsi));
6468 
6469   if (!gimple_in_ssa_p (cfun))
6470     {
6471       /* Insert the final value of V, in case it is live.  This is the
6472 	 value for the only thread that survives past the join.  */
6473       expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6474       expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6475       expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6476       expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6477       expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6478       expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6479 				       true, GSI_SAME_STMT);
6480       ass = gimple_build_assign (v, expr);
6481       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6482     }
6483 
6484   /* Remove the OMP_RETURN.  */
6485   gsi_remove (&gsi, true);
6486 
6487   if (cont_bb)
6488     {
6489       /* We now have one, two or three nested loops.  Update the loop
6490 	 structures.  */
6491       class loop *parent = entry_bb->loop_father;
6492       class loop *body = body_bb->loop_father;
6493 
6494       if (chunking)
6495 	{
6496 	  class loop *chunk_loop = alloc_loop ();
6497 	  chunk_loop->header = head_bb;
6498 	  chunk_loop->latch = bottom_bb;
6499 	  add_loop (chunk_loop, parent);
6500 	  parent = chunk_loop;
6501 	}
6502       else if (parent != body)
6503 	{
6504 	  gcc_assert (body->header == body_bb);
6505 	  gcc_assert (body->latch == cont_bb
6506 		      || single_pred (body->latch) == cont_bb);
6507 	  parent = NULL;
6508 	}
6509 
6510       if (parent)
6511 	{
6512 	  class loop *body_loop = alloc_loop ();
6513 	  body_loop->header = body_bb;
6514 	  body_loop->latch = cont_bb;
6515 	  add_loop (body_loop, parent);
6516 
6517 	  if (fd->tiling)
6518 	    {
6519 	      /* Insert tiling's element loop.  */
6520 	      class loop *inner_loop = alloc_loop ();
6521 	      inner_loop->header = elem_body_bb;
6522 	      inner_loop->latch = elem_cont_bb;
6523 	      add_loop (inner_loop, body_loop);
6524 	    }
6525 	}
6526     }
6527 }
6528 
6529 /* Expand the OMP loop defined by REGION.  */
6530 
6531 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)6532 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6533 {
6534   struct omp_for_data fd;
6535   struct omp_for_data_loop *loops;
6536 
6537   loops
6538     = (struct omp_for_data_loop *)
6539       alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6540 	      * sizeof (struct omp_for_data_loop));
6541   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6542 			&fd, loops);
6543   region->sched_kind = fd.sched_kind;
6544   region->sched_modifiers = fd.sched_modifiers;
6545   region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6546 
6547   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6548   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6549   FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6550   if (region->cont)
6551     {
6552       gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6553       BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6554       FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6555     }
6556   else
6557     /* If there isn't a continue then this is a degerate case where
6558        the introduction of abnormal edges during lowering will prevent
6559        original loops from being detected.  Fix that up.  */
6560     loops_state_set (LOOPS_NEED_FIXUP);
6561 
6562   if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
6563     expand_omp_simd (region, &fd);
6564   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6565     {
6566       gcc_assert (!inner_stmt);
6567       expand_oacc_for (region, &fd);
6568     }
6569   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6570     {
6571       if (gimple_omp_for_combined_into_p (fd.for_stmt))
6572 	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6573       else
6574 	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6575     }
6576   else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6577 	   && !fd.have_ordered)
6578     {
6579       if (fd.chunk_size == NULL)
6580 	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6581       else
6582 	expand_omp_for_static_chunk (region, &fd, inner_stmt);
6583     }
6584   else
6585     {
6586       int fn_index, start_ix, next_ix;
6587       unsigned HOST_WIDE_INT sched = 0;
6588       tree sched_arg = NULL_TREE;
6589 
6590       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6591 		  == GF_OMP_FOR_KIND_FOR);
6592       if (fd.chunk_size == NULL
6593 	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6594 	fd.chunk_size = integer_zero_node;
6595       switch (fd.sched_kind)
6596 	{
6597 	case OMP_CLAUSE_SCHEDULE_RUNTIME:
6598 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6599 	      && fd.lastprivate_conditional == 0)
6600 	    {
6601 	      gcc_assert (!fd.have_ordered);
6602 	      fn_index = 6;
6603 	      sched = 4;
6604 	    }
6605 	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6606 		   && !fd.have_ordered
6607 		   && fd.lastprivate_conditional == 0)
6608 	    fn_index = 7;
6609 	  else
6610 	    {
6611 	      fn_index = 3;
6612 	      sched = (HOST_WIDE_INT_1U << 31);
6613 	    }
6614 	  break;
6615 	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6616 	case OMP_CLAUSE_SCHEDULE_GUIDED:
6617 	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6618 	      && !fd.have_ordered
6619 	      && fd.lastprivate_conditional == 0)
6620 	    {
6621 	      fn_index = 3 + fd.sched_kind;
6622 	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6623 	      break;
6624 	    }
6625 	  fn_index = fd.sched_kind;
6626 	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6627 	  sched += (HOST_WIDE_INT_1U << 31);
6628 	  break;
6629 	case OMP_CLAUSE_SCHEDULE_STATIC:
6630 	  gcc_assert (fd.have_ordered);
6631 	  fn_index = 0;
6632 	  sched = (HOST_WIDE_INT_1U << 31) + 1;
6633 	  break;
6634 	default:
6635 	  gcc_unreachable ();
6636 	}
6637       if (!fd.ordered)
6638 	fn_index += fd.have_ordered * 8;
6639       if (fd.ordered)
6640 	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6641       else
6642 	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6643       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6644       if (fd.have_reductemp || fd.have_pointer_condtemp)
6645 	{
6646 	  if (fd.ordered)
6647 	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6648 	  else if (fd.have_ordered)
6649 	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6650 	  else
6651 	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6652 	  sched_arg = build_int_cstu (long_integer_type_node, sched);
6653 	  if (!fd.chunk_size)
6654 	    fd.chunk_size = integer_zero_node;
6655 	}
6656       if (fd.iter_type == long_long_unsigned_type_node)
6657 	{
6658 	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6659 			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6660 	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6661 		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6662 	}
6663       expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6664 			      (enum built_in_function) next_ix, sched_arg,
6665 			      inner_stmt);
6666     }
6667 
6668   if (gimple_in_ssa_p (cfun))
6669     update_ssa (TODO_update_ssa_only_virtuals);
6670 }
6671 
6672 /* Expand code for an OpenMP sections directive.  In pseudo code, we generate
6673 
6674 	v = GOMP_sections_start (n);
6675     L0:
6676 	switch (v)
6677 	  {
6678 	  case 0:
6679 	    goto L2;
6680 	  case 1:
6681 	    section 1;
6682 	    goto L1;
6683 	  case 2:
6684 	    ...
6685 	  case n:
6686 	    ...
6687 	  default:
6688 	    abort ();
6689 	  }
6690     L1:
6691 	v = GOMP_sections_next ();
6692 	goto L0;
6693     L2:
6694 	reduction;
6695 
6696     If this is a combined parallel sections, replace the call to
6697     GOMP_sections_start with call to GOMP_sections_next.  */
6698 
6699 static void
expand_omp_sections(struct omp_region * region)6700 expand_omp_sections (struct omp_region *region)
6701 {
6702   tree t, u, vin = NULL, vmain, vnext, l2;
6703   unsigned len;
6704   basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6705   gimple_stmt_iterator si, switch_si;
6706   gomp_sections *sections_stmt;
6707   gimple *stmt;
6708   gomp_continue *cont;
6709   edge_iterator ei;
6710   edge e;
6711   struct omp_region *inner;
6712   unsigned i, casei;
6713   bool exit_reachable = region->cont != NULL;
6714 
6715   gcc_assert (region->exit != NULL);
6716   entry_bb = region->entry;
6717   l0_bb = single_succ (entry_bb);
6718   l1_bb = region->cont;
6719   l2_bb = region->exit;
6720   if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6721     l2 = gimple_block_label (l2_bb);
6722   else
6723     {
6724       /* This can happen if there are reductions.  */
6725       len = EDGE_COUNT (l0_bb->succs);
6726       gcc_assert (len > 0);
6727       e = EDGE_SUCC (l0_bb, len - 1);
6728       si = gsi_last_nondebug_bb (e->dest);
6729       l2 = NULL_TREE;
6730       if (gsi_end_p (si)
6731 	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6732 	l2 = gimple_block_label (e->dest);
6733       else
6734 	FOR_EACH_EDGE (e, ei, l0_bb->succs)
6735 	  {
6736 	    si = gsi_last_nondebug_bb (e->dest);
6737 	    if (gsi_end_p (si)
6738 		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6739 	      {
6740 		l2 = gimple_block_label (e->dest);
6741 		break;
6742 	      }
6743 	  }
6744     }
6745   if (exit_reachable)
6746     default_bb = create_empty_bb (l1_bb->prev_bb);
6747   else
6748     default_bb = create_empty_bb (l0_bb);
6749 
6750   /* We will build a switch() with enough cases for all the
6751      GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6752      and a default case to abort if something goes wrong.  */
6753   len = EDGE_COUNT (l0_bb->succs);
6754 
6755   /* Use vec::quick_push on label_vec throughout, since we know the size
6756      in advance.  */
6757   auto_vec<tree> label_vec (len);
6758 
6759   /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6760      GIMPLE_OMP_SECTIONS statement.  */
6761   si = gsi_last_nondebug_bb (entry_bb);
6762   sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6763   gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6764   vin = gimple_omp_sections_control (sections_stmt);
6765   tree clauses = gimple_omp_sections_clauses (sections_stmt);
6766   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6767   tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6768   tree cond_var = NULL_TREE;
6769   if (reductmp || condtmp)
6770     {
6771       tree reductions = null_pointer_node, mem = null_pointer_node;
6772       tree memv = NULL_TREE, condtemp = NULL_TREE;
6773       gimple_stmt_iterator gsi = gsi_none ();
6774       gimple *g = NULL;
6775       if (reductmp)
6776 	{
6777 	  reductions = OMP_CLAUSE_DECL (reductmp);
6778 	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6779 	  g = SSA_NAME_DEF_STMT (reductions);
6780 	  reductions = gimple_assign_rhs1 (g);
6781 	  OMP_CLAUSE_DECL (reductmp) = reductions;
6782 	  gsi = gsi_for_stmt (g);
6783 	}
6784       else
6785 	gsi = si;
6786       if (condtmp)
6787 	{
6788 	  condtemp = OMP_CLAUSE_DECL (condtmp);
6789 	  tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6790 				    OMP_CLAUSE__CONDTEMP_);
6791 	  cond_var = OMP_CLAUSE_DECL (c);
6792 	  tree type = TREE_TYPE (condtemp);
6793 	  memv = create_tmp_var (type);
6794 	  TREE_ADDRESSABLE (memv) = 1;
6795 	  unsigned cnt = 0;
6796 	  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6797 	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6798 		&& OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6799 	      ++cnt;
6800 	  unsigned HOST_WIDE_INT sz
6801 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6802 	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6803 				   false);
6804 	  mem = build_fold_addr_expr (memv);
6805 	}
6806       t = build_int_cst (unsigned_type_node, len - 1);
6807       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6808       stmt = gimple_build_call (u, 3, t, reductions, mem);
6809       gimple_call_set_lhs (stmt, vin);
6810       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6811       if (condtmp)
6812 	{
6813 	  expand_omp_build_assign (&gsi, condtemp, memv, false);
6814 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6815 			   vin, build_one_cst (TREE_TYPE (cond_var)));
6816 	  expand_omp_build_assign (&gsi, cond_var, t, false);
6817 	}
6818       if (reductmp)
6819 	{
6820 	  gsi_remove (&gsi, true);
6821 	  release_ssa_name (gimple_assign_lhs (g));
6822 	}
6823     }
6824   else if (!is_combined_parallel (region))
6825     {
6826       /* If we are not inside a combined parallel+sections region,
6827 	 call GOMP_sections_start.  */
6828       t = build_int_cst (unsigned_type_node, len - 1);
6829       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6830       stmt = gimple_build_call (u, 1, t);
6831     }
6832   else
6833     {
6834       /* Otherwise, call GOMP_sections_next.  */
6835       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6836       stmt = gimple_build_call (u, 0);
6837     }
6838   if (!reductmp && !condtmp)
6839     {
6840       gimple_call_set_lhs (stmt, vin);
6841       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6842     }
6843   gsi_remove (&si, true);
6844 
6845   /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6846      L0_BB.  */
6847   switch_si = gsi_last_nondebug_bb (l0_bb);
6848   gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6849   if (exit_reachable)
6850     {
6851       cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6852       gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6853       vmain = gimple_omp_continue_control_use (cont);
6854       vnext = gimple_omp_continue_control_def (cont);
6855     }
6856   else
6857     {
6858       vmain = vin;
6859       vnext = NULL_TREE;
6860     }
6861 
6862   t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6863   label_vec.quick_push (t);
6864   i = 1;
6865 
6866   /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6867   for (inner = region->inner, casei = 1;
6868        inner;
6869        inner = inner->next, i++, casei++)
6870     {
6871       basic_block s_entry_bb, s_exit_bb;
6872 
6873       /* Skip optional reduction region.  */
6874       if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6875 	{
6876 	  --i;
6877 	  --casei;
6878 	  continue;
6879 	}
6880 
6881       s_entry_bb = inner->entry;
6882       s_exit_bb = inner->exit;
6883 
6884       t = gimple_block_label (s_entry_bb);
6885       u = build_int_cst (unsigned_type_node, casei);
6886       u = build_case_label (u, NULL, t);
6887       label_vec.quick_push (u);
6888 
6889       si = gsi_last_nondebug_bb (s_entry_bb);
6890       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6891       gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6892       gsi_remove (&si, true);
6893       single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6894 
6895       if (s_exit_bb == NULL)
6896 	continue;
6897 
6898       si = gsi_last_nondebug_bb (s_exit_bb);
6899       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6900       gsi_remove (&si, true);
6901 
6902       single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6903     }
6904 
6905   /* Error handling code goes in DEFAULT_BB.  */
6906   t = gimple_block_label (default_bb);
6907   u = build_case_label (NULL, NULL, t);
6908   make_edge (l0_bb, default_bb, 0);
6909   add_bb_to_loop (default_bb, current_loops->tree_root);
6910 
6911   stmt = gimple_build_switch (vmain, u, label_vec);
6912   gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6913   gsi_remove (&switch_si, true);
6914 
6915   si = gsi_start_bb (default_bb);
6916   stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6917   gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6918 
6919   if (exit_reachable)
6920     {
6921       tree bfn_decl;
6922 
6923       /* Code to get the next section goes in L1_BB.  */
6924       si = gsi_last_nondebug_bb (l1_bb);
6925       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6926 
6927       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6928       stmt = gimple_build_call (bfn_decl, 0);
6929       gimple_call_set_lhs (stmt, vnext);
6930       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6931       if (cond_var)
6932 	{
6933 	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6934 			   vnext, build_one_cst (TREE_TYPE (cond_var)));
6935 	  expand_omp_build_assign (&si, cond_var, t, false);
6936 	}
6937       gsi_remove (&si, true);
6938 
6939       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6940     }
6941 
6942   /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6943   si = gsi_last_nondebug_bb (l2_bb);
6944   if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6945     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6946   else if (gimple_omp_return_lhs (gsi_stmt (si)))
6947     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6948   else
6949     t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6950   stmt = gimple_build_call (t, 0);
6951   if (gimple_omp_return_lhs (gsi_stmt (si)))
6952     gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6953   gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6954   gsi_remove (&si, true);
6955 
6956   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6957 }
6958 
6959 /* Expand code for an OpenMP single directive.  We've already expanded
6960    much of the code, here we simply place the GOMP_barrier call.  */
6961 
6962 static void
expand_omp_single(struct omp_region * region)6963 expand_omp_single (struct omp_region *region)
6964 {
6965   basic_block entry_bb, exit_bb;
6966   gimple_stmt_iterator si;
6967 
6968   entry_bb = region->entry;
6969   exit_bb = region->exit;
6970 
6971   si = gsi_last_nondebug_bb (entry_bb);
6972   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6973   gsi_remove (&si, true);
6974   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6975 
6976   si = gsi_last_nondebug_bb (exit_bb);
6977   if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6978     {
6979       tree t = gimple_omp_return_lhs (gsi_stmt (si));
6980       gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6981     }
6982   gsi_remove (&si, true);
6983   single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6984 }
6985 
6986 /* Generic expansion for OpenMP synchronization directives: master,
6987    ordered and critical.  All we need to do here is remove the entry
6988    and exit markers for REGION.  */
6989 
6990 static void
expand_omp_synch(struct omp_region * region)6991 expand_omp_synch (struct omp_region *region)
6992 {
6993   basic_block entry_bb, exit_bb;
6994   gimple_stmt_iterator si;
6995 
6996   entry_bb = region->entry;
6997   exit_bb = region->exit;
6998 
6999   si = gsi_last_nondebug_bb (entry_bb);
7000   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
7001 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
7002 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
7003 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
7004 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
7005 	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
7006   if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
7007       && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
7008     {
7009       expand_omp_taskreg (region);
7010       return;
7011     }
7012   gsi_remove (&si, true);
7013   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7014 
7015   if (exit_bb)
7016     {
7017       si = gsi_last_nondebug_bb (exit_bb);
7018       gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7019       gsi_remove (&si, true);
7020       single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7021     }
7022 }
7023 
7024 /* Translate enum omp_memory_order to enum memmodel.  The two enums
7025    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7026    is 0.  */
7027 
7028 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)7029 omp_memory_order_to_memmodel (enum omp_memory_order mo)
7030 {
7031   switch (mo)
7032     {
7033     case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7034     case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7035     case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7036     case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7037     case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7038     default: gcc_unreachable ();
7039     }
7040 }
7041 
7042 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7043    operation as a normal volatile load.  */
7044 
7045 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)7046 expand_omp_atomic_load (basic_block load_bb, tree addr,
7047 			tree loaded_val, int index)
7048 {
7049   enum built_in_function tmpbase;
7050   gimple_stmt_iterator gsi;
7051   basic_block store_bb;
7052   location_t loc;
7053   gimple *stmt;
7054   tree decl, call, type, itype;
7055 
7056   gsi = gsi_last_nondebug_bb (load_bb);
7057   stmt = gsi_stmt (gsi);
7058   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7059   loc = gimple_location (stmt);
7060 
7061   /* ??? If the target does not implement atomic_load_optab[mode], and mode
7062      is smaller than word size, then expand_atomic_load assumes that the load
7063      is atomic.  We could avoid the builtin entirely in this case.  */
7064 
7065   tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7066   decl = builtin_decl_explicit (tmpbase);
7067   if (decl == NULL_TREE)
7068     return false;
7069 
7070   type = TREE_TYPE (loaded_val);
7071   itype = TREE_TYPE (TREE_TYPE (decl));
7072 
7073   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7074   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7075   call = build_call_expr_loc (loc, decl, 2, addr, mo);
7076   if (!useless_type_conversion_p (type, itype))
7077     call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7078   call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7079 
7080   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7081   gsi_remove (&gsi, true);
7082 
7083   store_bb = single_succ (load_bb);
7084   gsi = gsi_last_nondebug_bb (store_bb);
7085   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7086   gsi_remove (&gsi, true);
7087 
7088   if (gimple_in_ssa_p (cfun))
7089     update_ssa (TODO_update_ssa_no_phi);
7090 
7091   return true;
7092 }
7093 
7094 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7095    operation as a normal volatile store.  */
7096 
7097 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)7098 expand_omp_atomic_store (basic_block load_bb, tree addr,
7099 			 tree loaded_val, tree stored_val, int index)
7100 {
7101   enum built_in_function tmpbase;
7102   gimple_stmt_iterator gsi;
7103   basic_block store_bb = single_succ (load_bb);
7104   location_t loc;
7105   gimple *stmt;
7106   tree decl, call, type, itype;
7107   machine_mode imode;
7108   bool exchange;
7109 
7110   gsi = gsi_last_nondebug_bb (load_bb);
7111   stmt = gsi_stmt (gsi);
7112   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7113 
7114   /* If the load value is needed, then this isn't a store but an exchange.  */
7115   exchange = gimple_omp_atomic_need_value_p (stmt);
7116 
7117   gsi = gsi_last_nondebug_bb (store_bb);
7118   stmt = gsi_stmt (gsi);
7119   gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7120   loc = gimple_location (stmt);
7121 
7122   /* ??? If the target does not implement atomic_store_optab[mode], and mode
7123      is smaller than word size, then expand_atomic_store assumes that the store
7124      is atomic.  We could avoid the builtin entirely in this case.  */
7125 
7126   tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7127   tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7128   decl = builtin_decl_explicit (tmpbase);
7129   if (decl == NULL_TREE)
7130     return false;
7131 
7132   type = TREE_TYPE (stored_val);
7133 
7134   /* Dig out the type of the function's second argument.  */
7135   itype = TREE_TYPE (decl);
7136   itype = TYPE_ARG_TYPES (itype);
7137   itype = TREE_CHAIN (itype);
7138   itype = TREE_VALUE (itype);
7139   imode = TYPE_MODE (itype);
7140 
7141   if (exchange && !can_atomic_exchange_p (imode, true))
7142     return false;
7143 
7144   if (!useless_type_conversion_p (itype, type))
7145     stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7146   enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7147   tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7148   call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
7149   if (exchange)
7150     {
7151       if (!useless_type_conversion_p (type, itype))
7152 	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7153       call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7154     }
7155 
7156   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7157   gsi_remove (&gsi, true);
7158 
7159   /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
7160   gsi = gsi_last_nondebug_bb (load_bb);
7161   gsi_remove (&gsi, true);
7162 
7163   if (gimple_in_ssa_p (cfun))
7164     update_ssa (TODO_update_ssa_no_phi);
7165 
7166   return true;
7167 }
7168 
7169 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
7170    operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
7171    size of the data type, and thus usable to find the index of the builtin
7172    decl.  Returns false if the expression is not of the proper form.  */
7173 
7174 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)7175 expand_omp_atomic_fetch_op (basic_block load_bb,
7176 			    tree addr, tree loaded_val,
7177 			    tree stored_val, int index)
7178 {
7179   enum built_in_function oldbase, newbase, tmpbase;
7180   tree decl, itype, call;
7181   tree lhs, rhs;
7182   basic_block store_bb = single_succ (load_bb);
7183   gimple_stmt_iterator gsi;
7184   gimple *stmt;
7185   location_t loc;
7186   enum tree_code code;
7187   bool need_old, need_new;
7188   machine_mode imode;
7189 
7190   /* We expect to find the following sequences:
7191 
7192    load_bb:
7193        GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7194 
7195    store_bb:
7196        val = tmp OP something; (or: something OP tmp)
7197        GIMPLE_OMP_STORE (val)
7198 
7199   ???FIXME: Allow a more flexible sequence.
7200   Perhaps use data flow to pick the statements.
7201 
7202   */
7203 
7204   gsi = gsi_after_labels (store_bb);
7205   stmt = gsi_stmt (gsi);
7206   if (is_gimple_debug (stmt))
7207     {
7208       gsi_next_nondebug (&gsi);
7209       if (gsi_end_p (gsi))
7210 	return false;
7211       stmt = gsi_stmt (gsi);
7212     }
7213   loc = gimple_location (stmt);
7214   if (!is_gimple_assign (stmt))
7215     return false;
7216   gsi_next_nondebug (&gsi);
7217   if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7218     return false;
7219   need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7220   need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7221   enum omp_memory_order omo
7222     = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7223   enum memmodel mo = omp_memory_order_to_memmodel (omo);
7224   gcc_checking_assert (!need_old || !need_new);
7225 
7226   if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7227     return false;
7228 
7229   /* Check for one of the supported fetch-op operations.  */
7230   code = gimple_assign_rhs_code (stmt);
7231   switch (code)
7232     {
7233     case PLUS_EXPR:
7234     case POINTER_PLUS_EXPR:
7235       oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7236       newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7237       break;
7238     case MINUS_EXPR:
7239       oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7240       newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7241       break;
7242     case BIT_AND_EXPR:
7243       oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7244       newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7245       break;
7246     case BIT_IOR_EXPR:
7247       oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7248       newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7249       break;
7250     case BIT_XOR_EXPR:
7251       oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7252       newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7253       break;
7254     default:
7255       return false;
7256     }
7257 
7258   /* Make sure the expression is of the proper form.  */
7259   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7260     rhs = gimple_assign_rhs2 (stmt);
7261   else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7262 	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7263     rhs = gimple_assign_rhs1 (stmt);
7264   else
7265     return false;
7266 
7267   tmpbase = ((enum built_in_function)
7268 	     ((need_new ? newbase : oldbase) + index + 1));
7269   decl = builtin_decl_explicit (tmpbase);
7270   if (decl == NULL_TREE)
7271     return false;
7272   itype = TREE_TYPE (TREE_TYPE (decl));
7273   imode = TYPE_MODE (itype);
7274 
7275   /* We could test all of the various optabs involved, but the fact of the
7276      matter is that (with the exception of i486 vs i586 and xadd) all targets
7277      that support any atomic operaton optab also implements compare-and-swap.
7278      Let optabs.c take care of expanding any compare-and-swap loop.  */
7279   if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
7280     return false;
7281 
7282   gsi = gsi_last_nondebug_bb (load_bb);
7283   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7284 
7285   /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7286      It only requires that the operation happen atomically.  Thus we can
7287      use the RELAXED memory model.  */
7288   call = build_call_expr_loc (loc, decl, 3, addr,
7289 			      fold_convert_loc (loc, itype, rhs),
7290 			      build_int_cst (NULL, mo));
7291 
7292   if (need_old || need_new)
7293     {
7294       lhs = need_old ? loaded_val : stored_val;
7295       call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7296       call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7297     }
7298   else
7299     call = fold_convert_loc (loc, void_type_node, call);
7300   force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7301   gsi_remove (&gsi, true);
7302 
7303   gsi = gsi_last_nondebug_bb (store_bb);
7304   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7305   gsi_remove (&gsi, true);
7306   gsi = gsi_last_nondebug_bb (store_bb);
7307   stmt = gsi_stmt (gsi);
7308   gsi_remove (&gsi, true);
7309 
7310   if (gimple_in_ssa_p (cfun))
7311     {
7312       release_defs (stmt);
7313       update_ssa (TODO_update_ssa_no_phi);
7314     }
7315 
7316   return true;
7317 }
7318 
7319 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
7320 
7321       oldval = *addr;
7322       repeat:
7323 	newval = rhs;	 // with oldval replacing *addr in rhs
7324 	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7325 	if (oldval != newval)
7326 	  goto repeat;
7327 
7328    INDEX is log2 of the size of the data type, and thus usable to find the
7329    index of the builtin decl.  */
7330 
7331 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)7332 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7333 			    tree addr, tree loaded_val, tree stored_val,
7334 			    int index)
7335 {
7336   tree loadedi, storedi, initial, new_storedi, old_vali;
7337   tree type, itype, cmpxchg, iaddr, atype;
7338   gimple_stmt_iterator si;
7339   basic_block loop_header = single_succ (load_bb);
7340   gimple *phi, *stmt;
7341   edge e;
7342   enum built_in_function fncode;
7343 
7344   /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7345      order to use the RELAXED memory model effectively.  */
7346   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7347 				    + index + 1);
7348   cmpxchg = builtin_decl_explicit (fncode);
7349   if (cmpxchg == NULL_TREE)
7350     return false;
7351   type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7352   atype = type;
7353   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7354 
7355   if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7356       || !can_atomic_load_p (TYPE_MODE (itype)))
7357     return false;
7358 
7359   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
7360   si = gsi_last_nondebug_bb (load_bb);
7361   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7362 
7363   /* For floating-point values, we'll need to view-convert them to integers
7364      so that we can perform the atomic compare and swap.  Simplify the
7365      following code by always setting up the "i"ntegral variables.  */
7366   if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7367     {
7368       tree iaddr_val;
7369 
7370       iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7371 							   true));
7372       atype = itype;
7373       iaddr_val
7374 	= force_gimple_operand_gsi (&si,
7375 				    fold_convert (TREE_TYPE (iaddr), addr),
7376 				    false, NULL_TREE, true, GSI_SAME_STMT);
7377       stmt = gimple_build_assign (iaddr, iaddr_val);
7378       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7379       loadedi = create_tmp_var (itype);
7380       if (gimple_in_ssa_p (cfun))
7381 	loadedi = make_ssa_name (loadedi);
7382     }
7383   else
7384     {
7385       iaddr = addr;
7386       loadedi = loaded_val;
7387     }
7388 
7389   fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7390   tree loaddecl = builtin_decl_explicit (fncode);
7391   if (loaddecl)
7392     initial
7393       = fold_convert (atype,
7394 		      build_call_expr (loaddecl, 2, iaddr,
7395 				       build_int_cst (NULL_TREE,
7396 						      MEMMODEL_RELAXED)));
7397   else
7398     {
7399       tree off
7400 	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7401 						      true), 0);
7402       initial = build2 (MEM_REF, atype, iaddr, off);
7403     }
7404 
7405   initial
7406     = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7407 				GSI_SAME_STMT);
7408 
7409   /* Move the value to the LOADEDI temporary.  */
7410   if (gimple_in_ssa_p (cfun))
7411     {
7412       gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7413       phi = create_phi_node (loadedi, loop_header);
7414       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7415 	       initial);
7416     }
7417   else
7418     gsi_insert_before (&si,
7419 		       gimple_build_assign (loadedi, initial),
7420 		       GSI_SAME_STMT);
7421   if (loadedi != loaded_val)
7422     {
7423       gimple_stmt_iterator gsi2;
7424       tree x;
7425 
7426       x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7427       gsi2 = gsi_start_bb (loop_header);
7428       if (gimple_in_ssa_p (cfun))
7429 	{
7430 	  gassign *stmt;
7431 	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7432 					true, GSI_SAME_STMT);
7433 	  stmt = gimple_build_assign (loaded_val, x);
7434 	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7435 	}
7436       else
7437 	{
7438 	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7439 	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7440 				    true, GSI_SAME_STMT);
7441 	}
7442     }
7443   gsi_remove (&si, true);
7444 
7445   si = gsi_last_nondebug_bb (store_bb);
7446   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7447 
7448   if (iaddr == addr)
7449     storedi = stored_val;
7450   else
7451     storedi
7452       = force_gimple_operand_gsi (&si,
7453 				  build1 (VIEW_CONVERT_EXPR, itype,
7454 					  stored_val), true, NULL_TREE, true,
7455 				  GSI_SAME_STMT);
7456 
7457   /* Build the compare&swap statement.  */
7458   new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7459   new_storedi = force_gimple_operand_gsi (&si,
7460 					  fold_convert (TREE_TYPE (loadedi),
7461 							new_storedi),
7462 					  true, NULL_TREE,
7463 					  true, GSI_SAME_STMT);
7464 
7465   if (gimple_in_ssa_p (cfun))
7466     old_vali = loadedi;
7467   else
7468     {
7469       old_vali = create_tmp_var (TREE_TYPE (loadedi));
7470       stmt = gimple_build_assign (old_vali, loadedi);
7471       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7472 
7473       stmt = gimple_build_assign (loadedi, new_storedi);
7474       gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7475     }
7476 
7477   /* Note that we always perform the comparison as an integer, even for
7478      floating point.  This allows the atomic operation to properly
7479      succeed even with NaNs and -0.0.  */
7480   tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7481   stmt = gimple_build_cond_empty (ne);
7482   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7483 
7484   /* Update cfg.  */
7485   e = single_succ_edge (store_bb);
7486   e->flags &= ~EDGE_FALLTHRU;
7487   e->flags |= EDGE_FALSE_VALUE;
7488   /* Expect no looping.  */
7489   e->probability = profile_probability::guessed_always ();
7490 
7491   e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7492   e->probability = profile_probability::guessed_never ();
7493 
7494   /* Copy the new value to loadedi (we already did that before the condition
7495      if we are not in SSA).  */
7496   if (gimple_in_ssa_p (cfun))
7497     {
7498       phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7499       SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7500     }
7501 
7502   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
7503   gsi_remove (&si, true);
7504 
7505   class loop *loop = alloc_loop ();
7506   loop->header = loop_header;
7507   loop->latch = store_bb;
7508   add_loop (loop, loop_header->loop_father);
7509 
7510   if (gimple_in_ssa_p (cfun))
7511     update_ssa (TODO_update_ssa_no_phi);
7512 
7513   return true;
7514 }
7515 
7516 /* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
7517 
7518 				  GOMP_atomic_start ();
7519 				  *addr = rhs;
7520 				  GOMP_atomic_end ();
7521 
7522    The result is not globally atomic, but works so long as all parallel
7523    references are within #pragma omp atomic directives.  According to
7524    responses received from omp@openmp.org, appears to be within spec.
7525    Which makes sense, since that's how several other compilers handle
7526    this situation as well.
7527    LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7528    expanding.  STORED_VAL is the operand of the matching
7529    GIMPLE_OMP_ATOMIC_STORE.
7530 
7531    We replace
7532    GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7533    loaded_val = *addr;
7534 
7535    and replace
7536    GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
7537    *addr = stored_val;
7538 */
7539 
7540 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)7541 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7542 			 tree addr, tree loaded_val, tree stored_val)
7543 {
7544   gimple_stmt_iterator si;
7545   gassign *stmt;
7546   tree t;
7547 
7548   si = gsi_last_nondebug_bb (load_bb);
7549   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7550 
7551   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7552   t = build_call_expr (t, 0);
7553   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7554 
7555   tree mem = build_simple_mem_ref (addr);
7556   TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7557   TREE_OPERAND (mem, 1)
7558     = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7559 						 true),
7560 		    TREE_OPERAND (mem, 1));
7561   stmt = gimple_build_assign (loaded_val, mem);
7562   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7563   gsi_remove (&si, true);
7564 
7565   si = gsi_last_nondebug_bb (store_bb);
7566   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7567 
7568   stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7569   gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7570 
7571   t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7572   t = build_call_expr (t, 0);
7573   force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7574   gsi_remove (&si, true);
7575 
7576   if (gimple_in_ssa_p (cfun))
7577     update_ssa (TODO_update_ssa_no_phi);
7578   return true;
7579 }
7580 
7581 /* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
7582    using expand_omp_atomic_fetch_op.  If it failed, we try to
7583    call expand_omp_atomic_pipeline, and if it fails too, the
7584    ultimate fallback is wrapping the operation in a mutex
7585    (expand_omp_atomic_mutex).  REGION is the atomic region built
7586    by build_omp_regions_1().  */
7587 
7588 static void
expand_omp_atomic(struct omp_region * region)7589 expand_omp_atomic (struct omp_region *region)
7590 {
7591   basic_block load_bb = region->entry, store_bb = region->exit;
7592   gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7593   gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7594   tree loaded_val = gimple_omp_atomic_load_lhs (load);
7595   tree addr = gimple_omp_atomic_load_rhs (load);
7596   tree stored_val = gimple_omp_atomic_store_val (store);
7597   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7598   HOST_WIDE_INT index;
7599 
7600   /* Make sure the type is one of the supported sizes.  */
7601   index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7602   index = exact_log2 (index);
7603   if (index >= 0 && index <= 4)
7604     {
7605       unsigned int align = TYPE_ALIGN_UNIT (type);
7606 
7607       /* __sync builtins require strict data alignment.  */
7608       if (exact_log2 (align) >= index)
7609 	{
7610 	  /* Atomic load.  */
7611 	  scalar_mode smode;
7612 	  if (loaded_val == stored_val
7613 	      && (is_int_mode (TYPE_MODE (type), &smode)
7614 		  || is_float_mode (TYPE_MODE (type), &smode))
7615 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7616 	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7617 	    return;
7618 
7619 	  /* Atomic store.  */
7620 	  if ((is_int_mode (TYPE_MODE (type), &smode)
7621 	       || is_float_mode (TYPE_MODE (type), &smode))
7622 	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7623 	      && store_bb == single_succ (load_bb)
7624 	      && first_stmt (store_bb) == store
7625 	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
7626 					  stored_val, index))
7627 	    return;
7628 
7629 	  /* When possible, use specialized atomic update functions.  */
7630 	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7631 	      && store_bb == single_succ (load_bb)
7632 	      && expand_omp_atomic_fetch_op (load_bb, addr,
7633 					     loaded_val, stored_val, index))
7634 	    return;
7635 
7636 	  /* If we don't have specialized __sync builtins, try and implement
7637 	     as a compare and swap loop.  */
7638 	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7639 					  loaded_val, stored_val, index))
7640 	    return;
7641 	}
7642     }
7643 
7644   /* The ultimate fallback is wrapping the operation in a mutex.  */
7645   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7646 }
7647 
7648 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7649    at REGION_EXIT.  */
7650 
7651 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)7652 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7653 				   basic_block region_exit)
7654 {
7655   class loop *outer = region_entry->loop_father;
7656   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7657 
7658   /* Don't parallelize the kernels region if it contains more than one outer
7659      loop.  */
7660   unsigned int nr_outer_loops = 0;
7661   class loop *single_outer = NULL;
7662   for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
7663     {
7664       gcc_assert (loop_outer (loop) == outer);
7665 
7666       if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7667 	continue;
7668 
7669       if (region_exit != NULL
7670 	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7671 	continue;
7672 
7673       nr_outer_loops++;
7674       single_outer = loop;
7675     }
7676   if (nr_outer_loops != 1)
7677     return;
7678 
7679   for (class loop *loop = single_outer->inner;
7680        loop != NULL;
7681        loop = loop->inner)
7682     if (loop->next)
7683       return;
7684 
7685   /* Mark the loops in the region.  */
7686   for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
7687     loop->in_oacc_kernels_region = true;
7688 }
7689 
7690 /* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
7691 
7692 struct GTY(()) grid_launch_attributes_trees
7693 {
7694   tree kernel_dim_array_type;
7695   tree kernel_lattrs_dimnum_decl;
7696   tree kernel_lattrs_grid_decl;
7697   tree kernel_lattrs_group_decl;
7698   tree kernel_launch_attributes_type;
7699 };
7700 
7701 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7702 
7703 /* Create types used to pass kernel launch attributes to target.  */
7704 
7705 static void
grid_create_kernel_launch_attr_types(void)7706 grid_create_kernel_launch_attr_types (void)
7707 {
7708   if (grid_attr_trees)
7709     return;
7710   grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7711 
7712   tree dim_arr_index_type
7713     = build_index_type (build_int_cst (integer_type_node, 2));
7714   grid_attr_trees->kernel_dim_array_type
7715     = build_array_type (uint32_type_node, dim_arr_index_type);
7716 
7717   grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7718   grid_attr_trees->kernel_lattrs_dimnum_decl
7719     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7720 		  uint32_type_node);
7721   DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7722 
7723   grid_attr_trees->kernel_lattrs_grid_decl
7724     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7725 		  grid_attr_trees->kernel_dim_array_type);
7726   DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7727     = grid_attr_trees->kernel_lattrs_dimnum_decl;
7728   grid_attr_trees->kernel_lattrs_group_decl
7729     = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7730 		  grid_attr_trees->kernel_dim_array_type);
7731   DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7732     = grid_attr_trees->kernel_lattrs_grid_decl;
7733   finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7734 			 "__gomp_kernel_launch_attributes",
7735 			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7736 }
7737 
7738 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7739    array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
7740    of type uint32_type_node.  */
7741 
7742 static void
grid_insert_store_range_dim(gimple_stmt_iterator * gsi,tree range_var,tree fld_decl,int index,tree value)7743 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7744 			     tree fld_decl, int index, tree value)
7745 {
7746   tree ref = build4 (ARRAY_REF, uint32_type_node,
7747 		     build3 (COMPONENT_REF,
7748 			     grid_attr_trees->kernel_dim_array_type,
7749 			     range_var, fld_decl, NULL_TREE),
7750 		     build_int_cst (integer_type_node, index),
7751 		     NULL_TREE, NULL_TREE);
7752   gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7753 }
7754 
7755 /* Return a tree representation of a pointer to a structure with grid and
7756    work-group size information.  Statements filling that information will be
7757    inserted before GSI, TGT_STMT is the target statement which has the
7758    necessary information in it.  */
7759 
7760 static tree
grid_get_kernel_launch_attributes(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)7761 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7762 				       gomp_target *tgt_stmt)
7763 {
7764   grid_create_kernel_launch_attr_types ();
7765   tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7766 				"__kernel_launch_attrs");
7767 
7768   unsigned max_dim = 0;
7769   for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7770        clause;
7771        clause = OMP_CLAUSE_CHAIN (clause))
7772     {
7773       if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7774 	continue;
7775 
7776       unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7777       max_dim = MAX (dim, max_dim);
7778 
7779       grid_insert_store_range_dim (gsi, lattrs,
7780 				   grid_attr_trees->kernel_lattrs_grid_decl,
7781 				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7782       grid_insert_store_range_dim (gsi, lattrs,
7783 				   grid_attr_trees->kernel_lattrs_group_decl,
7784 				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7785     }
7786 
7787   tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7788 			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7789   gcc_checking_assert (max_dim <= 2);
7790   tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7791   gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7792 		     GSI_SAME_STMT);
7793   TREE_ADDRESSABLE (lattrs) = 1;
7794   return build_fold_addr_expr (lattrs);
7795 }
7796 
7797 /* Build target argument identifier from the DEVICE identifier, value
7798    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
7799 
7800 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)7801 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7802 {
7803   tree t = build_int_cst (integer_type_node, device);
7804   if (subseqent_param)
7805     t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7806 		     build_int_cst (integer_type_node,
7807 				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7808   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7809 		   build_int_cst (integer_type_node, id));
7810   return t;
7811 }
7812 
7813 /* Like above but return it in type that can be directly stored as an element
7814    of the argument array.  */
7815 
7816 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)7817 get_target_argument_identifier (int device, bool subseqent_param, int id)
7818 {
7819   tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7820   return fold_convert (ptr_type_node, t);
7821 }
7822 
7823 /* Return a target argument consisting of DEVICE identifier, value identifier
7824    ID, and the actual VALUE.  */
7825 
7826 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)7827 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7828 			   tree value)
7829 {
7830   tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7831 			fold_convert (integer_type_node, value),
7832 			build_int_cst (unsigned_type_node,
7833 				       GOMP_TARGET_ARG_VALUE_SHIFT));
7834   t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7835 		   get_target_argument_identifier_1 (device, false, id));
7836   t = fold_convert (ptr_type_node, t);
7837   return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7838 }
7839 
7840 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7841    push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7842    otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7843    arguments.  */
7844 
7845 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)7846 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7847 					 int id, tree value, vec <tree> *args)
7848 {
7849   if (tree_fits_shwi_p (value)
7850       && tree_to_shwi (value) > -(1 << 15)
7851       && tree_to_shwi (value) < (1 << 15))
7852     args->quick_push (get_target_argument_value (gsi, device, id, value));
7853   else
7854     {
7855       args->quick_push (get_target_argument_identifier (device, true, id));
7856       value = fold_convert (ptr_type_node, value);
7857       value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7858 					GSI_SAME_STMT);
7859       args->quick_push (value);
7860     }
7861 }
7862 
7863 /* Create an array of arguments that is then passed to GOMP_target.  */
7864 
7865 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)7866 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7867 {
7868   auto_vec <tree, 6> args;
7869   tree clauses = gimple_omp_target_clauses (tgt_stmt);
7870   tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7871   if (c)
7872     t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7873   else
7874     t = integer_minus_one_node;
7875   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7876 					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7877 
7878   c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7879   if (c)
7880     t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7881   else
7882     t = integer_minus_one_node;
7883   push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7884 					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7885 					   &args);
7886 
7887   /* Add HSA-specific grid sizes, if available.  */
7888   if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7889 		       OMP_CLAUSE__GRIDDIM_))
7890     {
7891       int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7892       t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7893       args.quick_push (t);
7894       args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7895     }
7896 
7897   /* Produce more, perhaps device specific, arguments here.  */
7898 
7899   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7900 							  args.length () + 1),
7901 				  ".omp_target_args");
7902   for (unsigned i = 0; i < args.length (); i++)
7903     {
7904       tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7905 			 build_int_cst (integer_type_node, i),
7906 			 NULL_TREE, NULL_TREE);
7907       gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7908 			 GSI_SAME_STMT);
7909     }
7910   tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7911 		     build_int_cst (integer_type_node, args.length ()),
7912 		     NULL_TREE, NULL_TREE);
7913   gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7914 		     GSI_SAME_STMT);
7915   TREE_ADDRESSABLE (argarray) = 1;
7916   return build_fold_addr_expr (argarray);
7917 }
7918 
7919 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7920 
7921 static void
expand_omp_target(struct omp_region * region)7922 expand_omp_target (struct omp_region *region)
7923 {
7924   basic_block entry_bb, exit_bb, new_bb;
7925   struct function *child_cfun;
7926   tree child_fn, block, t;
7927   gimple_stmt_iterator gsi;
7928   gomp_target *entry_stmt;
7929   gimple *stmt;
7930   edge e;
7931   bool offloaded;
7932   int target_kind;
7933 
7934   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7935   target_kind = gimple_omp_target_kind (entry_stmt);
7936   new_bb = region->entry;
7937 
7938   offloaded = is_gimple_omp_offloaded (entry_stmt);
7939   switch (target_kind)
7940     {
7941     case GF_OMP_TARGET_KIND_REGION:
7942     case GF_OMP_TARGET_KIND_UPDATE:
7943     case GF_OMP_TARGET_KIND_ENTER_DATA:
7944     case GF_OMP_TARGET_KIND_EXIT_DATA:
7945     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7946     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7947     case GF_OMP_TARGET_KIND_OACC_SERIAL:
7948     case GF_OMP_TARGET_KIND_OACC_UPDATE:
7949     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7950     case GF_OMP_TARGET_KIND_OACC_DECLARE:
7951     case GF_OMP_TARGET_KIND_DATA:
7952     case GF_OMP_TARGET_KIND_OACC_DATA:
7953     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7954       break;
7955     default:
7956       gcc_unreachable ();
7957     }
7958 
7959   child_fn = NULL_TREE;
7960   child_cfun = NULL;
7961   if (offloaded)
7962     {
7963       child_fn = gimple_omp_target_child_fn (entry_stmt);
7964       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7965     }
7966 
7967   /* Supported by expand_omp_taskreg, but not here.  */
7968   if (child_cfun != NULL)
7969     gcc_checking_assert (!child_cfun->cfg);
7970   gcc_checking_assert (!gimple_in_ssa_p (cfun));
7971 
7972   entry_bb = region->entry;
7973   exit_bb = region->exit;
7974 
7975   if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
7976     mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7977 
7978   /* Going on, all OpenACC compute constructs are mapped to
7979      'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
7980      To distinguish between them, we attach attributes.  */
7981   switch (target_kind)
7982     {
7983     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7984       DECL_ATTRIBUTES (child_fn)
7985 	= tree_cons (get_identifier ("oacc parallel"),
7986 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7987       break;
7988     case GF_OMP_TARGET_KIND_OACC_KERNELS:
7989       DECL_ATTRIBUTES (child_fn)
7990 	= tree_cons (get_identifier ("oacc kernels"),
7991 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7992       break;
7993     case GF_OMP_TARGET_KIND_OACC_SERIAL:
7994       DECL_ATTRIBUTES (child_fn)
7995 	= tree_cons (get_identifier ("oacc serial"),
7996 		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
7997       break;
7998     default:
7999       /* Make sure we don't miss any.  */
8000       gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
8001 			     && is_gimple_omp_offloaded (entry_stmt)));
8002       break;
8003     }
8004 
8005   if (offloaded)
8006     {
8007       unsigned srcidx, dstidx, num;
8008 
8009       /* If the offloading region needs data sent from the parent
8010 	 function, then the very first statement (except possible
8011 	 tree profile counter updates) of the offloading body
8012 	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
8013 	 &.OMP_DATA_O is passed as an argument to the child function,
8014 	 we need to replace it with the argument as seen by the child
8015 	 function.
8016 
8017 	 In most cases, this will end up being the identity assignment
8018 	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
8019 	 a function call that has been inlined, the original PARM_DECL
8020 	 .OMP_DATA_I may have been converted into a different local
8021 	 variable.  In which case, we need to keep the assignment.  */
8022       tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8023       if (data_arg)
8024 	{
8025 	  basic_block entry_succ_bb = single_succ (entry_bb);
8026 	  gimple_stmt_iterator gsi;
8027 	  tree arg;
8028 	  gimple *tgtcopy_stmt = NULL;
8029 	  tree sender = TREE_VEC_ELT (data_arg, 0);
8030 
8031 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8032 	    {
8033 	      gcc_assert (!gsi_end_p (gsi));
8034 	      stmt = gsi_stmt (gsi);
8035 	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
8036 		continue;
8037 
8038 	      if (gimple_num_ops (stmt) == 2)
8039 		{
8040 		  tree arg = gimple_assign_rhs1 (stmt);
8041 
8042 		  /* We're ignoring the subcode because we're
8043 		     effectively doing a STRIP_NOPS.  */
8044 
8045 		  if (TREE_CODE (arg) == ADDR_EXPR
8046 		      && TREE_OPERAND (arg, 0) == sender)
8047 		    {
8048 		      tgtcopy_stmt = stmt;
8049 		      break;
8050 		    }
8051 		}
8052 	    }
8053 
8054 	  gcc_assert (tgtcopy_stmt != NULL);
8055 	  arg = DECL_ARGUMENTS (child_fn);
8056 
8057 	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8058 	  gsi_remove (&gsi, true);
8059 	}
8060 
8061       /* Declare local variables needed in CHILD_CFUN.  */
8062       block = DECL_INITIAL (child_fn);
8063       BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8064       /* The gimplifier could record temporaries in the offloading block
8065 	 rather than in containing function's local_decls chain,
8066 	 which would mean cgraph missed finalizing them.  Do it now.  */
8067       for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8068 	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8069 	  varpool_node::finalize_decl (t);
8070       DECL_SAVED_TREE (child_fn) = NULL;
8071       /* We'll create a CFG for child_fn, so no gimple body is needed.  */
8072       gimple_set_body (child_fn, NULL);
8073       TREE_USED (block) = 1;
8074 
8075       /* Reset DECL_CONTEXT on function arguments.  */
8076       for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8077 	DECL_CONTEXT (t) = child_fn;
8078 
8079       /* Split ENTRY_BB at GIMPLE_*,
8080 	 so that it can be moved to the child function.  */
8081       gsi = gsi_last_nondebug_bb (entry_bb);
8082       stmt = gsi_stmt (gsi);
8083       gcc_assert (stmt
8084 		  && gimple_code (stmt) == gimple_code (entry_stmt));
8085       e = split_block (entry_bb, stmt);
8086       gsi_remove (&gsi, true);
8087       entry_bb = e->dest;
8088       single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8089 
8090       /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
8091       if (exit_bb)
8092 	{
8093 	  gsi = gsi_last_nondebug_bb (exit_bb);
8094 	  gcc_assert (!gsi_end_p (gsi)
8095 		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8096 	  stmt = gimple_build_return (NULL);
8097 	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8098 	  gsi_remove (&gsi, true);
8099 	}
8100 
8101       /* Move the offloading region into CHILD_CFUN.  */
8102 
8103       block = gimple_block (entry_stmt);
8104 
8105       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8106       if (exit_bb)
8107 	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8108       /* When the OMP expansion process cannot guarantee an up-to-date
8109 	 loop tree arrange for the child function to fixup loops.  */
8110       if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8111 	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8112 
8113       /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
8114       num = vec_safe_length (child_cfun->local_decls);
8115       for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8116 	{
8117 	  t = (*child_cfun->local_decls)[srcidx];
8118 	  if (DECL_CONTEXT (t) == cfun->decl)
8119 	    continue;
8120 	  if (srcidx != dstidx)
8121 	    (*child_cfun->local_decls)[dstidx] = t;
8122 	  dstidx++;
8123 	}
8124       if (dstidx != num)
8125 	vec_safe_truncate (child_cfun->local_decls, dstidx);
8126 
8127       /* Inform the callgraph about the new function.  */
8128       child_cfun->curr_properties = cfun->curr_properties;
8129       child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8130       child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8131       cgraph_node *node = cgraph_node::get_create (child_fn);
8132       node->parallelized_function = 1;
8133       cgraph_node::add_new_function (child_fn, true);
8134 
8135       /* Add the new function to the offload table.  */
8136       if (ENABLE_OFFLOADING)
8137 	{
8138 	  if (in_lto_p)
8139 	    DECL_PRESERVE_P (child_fn) = 1;
8140 	  vec_safe_push (offload_funcs, child_fn);
8141 	}
8142 
8143       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8144 		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8145 
8146       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
8147 	 fixed in a following pass.  */
8148       push_cfun (child_cfun);
8149       if (need_asm)
8150 	assign_assembler_name_if_needed (child_fn);
8151       cgraph_edge::rebuild_edges ();
8152 
8153       /* Some EH regions might become dead, see PR34608.  If
8154 	 pass_cleanup_cfg isn't the first pass to happen with the
8155 	 new child, these dead EH edges might cause problems.
8156 	 Clean them up now.  */
8157       if (flag_exceptions)
8158 	{
8159 	  basic_block bb;
8160 	  bool changed = false;
8161 
8162 	  FOR_EACH_BB_FN (bb, cfun)
8163 	    changed |= gimple_purge_dead_eh_edges (bb);
8164 	  if (changed)
8165 	    cleanup_tree_cfg ();
8166 	}
8167       if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8168 	verify_loop_structure ();
8169       pop_cfun ();
8170 
8171       if (dump_file && !gimple_in_ssa_p (cfun))
8172 	{
8173 	  omp_any_child_fn_dumped = true;
8174 	  dump_function_header (dump_file, child_fn, dump_flags);
8175 	  dump_function_to_file (child_fn, dump_file, dump_flags);
8176 	}
8177 
8178       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
8179     }
8180 
8181   /* Emit a library call to launch the offloading region, or do data
8182      transfers.  */
8183   tree t1, t2, t3, t4, depend, c, clauses;
8184   enum built_in_function start_ix;
8185   unsigned int flags_i = 0;
8186 
8187   switch (gimple_omp_target_kind (entry_stmt))
8188     {
8189     case GF_OMP_TARGET_KIND_REGION:
8190       start_ix = BUILT_IN_GOMP_TARGET;
8191       break;
8192     case GF_OMP_TARGET_KIND_DATA:
8193       start_ix = BUILT_IN_GOMP_TARGET_DATA;
8194       break;
8195     case GF_OMP_TARGET_KIND_UPDATE:
8196       start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8197       break;
8198     case GF_OMP_TARGET_KIND_ENTER_DATA:
8199       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8200       break;
8201     case GF_OMP_TARGET_KIND_EXIT_DATA:
8202       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8203       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8204       break;
8205     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8206     case GF_OMP_TARGET_KIND_OACC_KERNELS:
8207     case GF_OMP_TARGET_KIND_OACC_SERIAL:
8208       start_ix = BUILT_IN_GOACC_PARALLEL;
8209       break;
8210     case GF_OMP_TARGET_KIND_OACC_DATA:
8211     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8212       start_ix = BUILT_IN_GOACC_DATA_START;
8213       break;
8214     case GF_OMP_TARGET_KIND_OACC_UPDATE:
8215       start_ix = BUILT_IN_GOACC_UPDATE;
8216       break;
8217     case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8218       start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8219       break;
8220     case GF_OMP_TARGET_KIND_OACC_DECLARE:
8221       start_ix = BUILT_IN_GOACC_DECLARE;
8222       break;
8223     default:
8224       gcc_unreachable ();
8225     }
8226 
8227   clauses = gimple_omp_target_clauses (entry_stmt);
8228 
8229   tree device = NULL_TREE;
8230   location_t device_loc = UNKNOWN_LOCATION;
8231   tree goacc_flags = NULL_TREE;
8232   if (is_gimple_omp_oacc (entry_stmt))
8233     {
8234       /* By default, no GOACC_FLAGs are set.  */
8235       goacc_flags = integer_zero_node;
8236     }
8237   else
8238     {
8239       c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8240       if (c)
8241 	{
8242 	  device = OMP_CLAUSE_DEVICE_ID (c);
8243 	  device_loc = OMP_CLAUSE_LOCATION (c);
8244 	}
8245       else
8246 	{
8247 	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8248 	     library choose).  */
8249 	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8250 	  device_loc = gimple_location (entry_stmt);
8251 	}
8252 
8253       c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8254       if (c)
8255 	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8256     }
8257 
8258   /* By default, there is no conditional.  */
8259   tree cond = NULL_TREE;
8260   c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8261   if (c)
8262     cond = OMP_CLAUSE_IF_EXPR (c);
8263   /* If we found the clause 'if (cond)', build:
8264      OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8265      OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
8266   if (cond)
8267     {
8268       tree *tp;
8269       if (is_gimple_omp_oacc (entry_stmt))
8270 	tp = &goacc_flags;
8271       else
8272 	{
8273 	  /* Ensure 'device' is of the correct type.  */
8274 	  device = fold_convert_loc (device_loc, integer_type_node, device);
8275 
8276 	  tp = &device;
8277 	}
8278 
8279       cond = gimple_boolify (cond);
8280 
8281       basic_block cond_bb, then_bb, else_bb;
8282       edge e;
8283       tree tmp_var;
8284 
8285       tmp_var = create_tmp_var (TREE_TYPE (*tp));
8286       if (offloaded)
8287 	e = split_block_after_labels (new_bb);
8288       else
8289 	{
8290 	  gsi = gsi_last_nondebug_bb (new_bb);
8291 	  gsi_prev (&gsi);
8292 	  e = split_block (new_bb, gsi_stmt (gsi));
8293 	}
8294       cond_bb = e->src;
8295       new_bb = e->dest;
8296       remove_edge (e);
8297 
8298       then_bb = create_empty_bb (cond_bb);
8299       else_bb = create_empty_bb (then_bb);
8300       set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8301       set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8302 
8303       stmt = gimple_build_cond_empty (cond);
8304       gsi = gsi_last_bb (cond_bb);
8305       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8306 
8307       gsi = gsi_start_bb (then_bb);
8308       stmt = gimple_build_assign (tmp_var, *tp);
8309       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8310 
8311       gsi = gsi_start_bb (else_bb);
8312       if (is_gimple_omp_oacc (entry_stmt))
8313 	stmt = gimple_build_assign (tmp_var,
8314 				    BIT_IOR_EXPR,
8315 				    *tp,
8316 				    build_int_cst (integer_type_node,
8317 						   GOACC_FLAG_HOST_FALLBACK));
8318       else
8319 	stmt = gimple_build_assign (tmp_var,
8320 				    build_int_cst (integer_type_node,
8321 						   GOMP_DEVICE_HOST_FALLBACK));
8322       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8323 
8324       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8325       make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8326       add_bb_to_loop (then_bb, cond_bb->loop_father);
8327       add_bb_to_loop (else_bb, cond_bb->loop_father);
8328       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8329       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8330 
8331       *tp = tmp_var;
8332 
8333       gsi = gsi_last_nondebug_bb (new_bb);
8334     }
8335   else
8336     {
8337       gsi = gsi_last_nondebug_bb (new_bb);
8338 
8339       if (device != NULL_TREE)
8340 	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8341 					   true, GSI_SAME_STMT);
8342     }
8343 
8344   t = gimple_omp_target_data_arg (entry_stmt);
8345   if (t == NULL)
8346     {
8347       t1 = size_zero_node;
8348       t2 = build_zero_cst (ptr_type_node);
8349       t3 = t2;
8350       t4 = t2;
8351     }
8352   else
8353     {
8354       t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8355       t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8356       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8357       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8358       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8359     }
8360 
8361   gimple *g;
8362   bool tagging = false;
8363   /* The maximum number used by any start_ix, without varargs.  */
8364   auto_vec<tree, 11> args;
8365   if (is_gimple_omp_oacc (entry_stmt))
8366     {
8367       tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8368 					TREE_TYPE (goacc_flags), goacc_flags);
8369       goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8370 						NULL_TREE, true,
8371 						GSI_SAME_STMT);
8372       args.quick_push (goacc_flags_m);
8373     }
8374   else
8375     args.quick_push (device);
8376   if (offloaded)
8377     args.quick_push (build_fold_addr_expr (child_fn));
8378   args.quick_push (t1);
8379   args.quick_push (t2);
8380   args.quick_push (t3);
8381   args.quick_push (t4);
8382   switch (start_ix)
8383     {
8384     case BUILT_IN_GOACC_DATA_START:
8385     case BUILT_IN_GOACC_DECLARE:
8386     case BUILT_IN_GOMP_TARGET_DATA:
8387       break;
8388     case BUILT_IN_GOMP_TARGET:
8389     case BUILT_IN_GOMP_TARGET_UPDATE:
8390     case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8391       args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8392       c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8393       if (c)
8394 	depend = OMP_CLAUSE_DECL (c);
8395       else
8396 	depend = build_int_cst (ptr_type_node, 0);
8397       args.quick_push (depend);
8398       if (start_ix == BUILT_IN_GOMP_TARGET)
8399 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
8400       break;
8401     case BUILT_IN_GOACC_PARALLEL:
8402       if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
8403 	{
8404 	  tree dims = NULL_TREE;
8405 	  unsigned int ix;
8406 
8407 	  /* For serial constructs we set all dimensions to 1.  */
8408 	  for (ix = GOMP_DIM_MAX; ix--;)
8409 	    dims = tree_cons (NULL_TREE, integer_one_node, dims);
8410 	  oacc_replace_fn_attrib (child_fn, dims);
8411 	}
8412       else
8413 	oacc_set_fn_attrib (child_fn, clauses, &args);
8414       tagging = true;
8415       /* FALLTHRU */
8416     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8417     case BUILT_IN_GOACC_UPDATE:
8418       {
8419 	tree t_async = NULL_TREE;
8420 
8421 	/* If present, use the value specified by the respective
8422 	   clause, making sure that is of the correct type.  */
8423 	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8424 	if (c)
8425 	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8426 				      integer_type_node,
8427 				      OMP_CLAUSE_ASYNC_EXPR (c));
8428 	else if (!tagging)
8429 	  /* Default values for t_async.  */
8430 	  t_async = fold_convert_loc (gimple_location (entry_stmt),
8431 				      integer_type_node,
8432 				      build_int_cst (integer_type_node,
8433 						     GOMP_ASYNC_SYNC));
8434 	if (tagging && t_async)
8435 	  {
8436 	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8437 
8438 	    if (TREE_CODE (t_async) == INTEGER_CST)
8439 	      {
8440 		/* See if we can pack the async arg in to the tag's
8441 		   operand.  */
8442 		i_async = TREE_INT_CST_LOW (t_async);
8443 		if (i_async < GOMP_LAUNCH_OP_MAX)
8444 		  t_async = NULL_TREE;
8445 		else
8446 		  i_async = GOMP_LAUNCH_OP_MAX;
8447 	      }
8448 	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8449 					      i_async));
8450 	  }
8451 	if (t_async)
8452 	  args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
8453 						    NULL_TREE, true,
8454 						    GSI_SAME_STMT));
8455 
8456 	/* Save the argument index, and ... */
8457 	unsigned t_wait_idx = args.length ();
8458 	unsigned num_waits = 0;
8459 	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8460 	if (!tagging || c)
8461 	  /* ... push a placeholder.  */
8462 	  args.safe_push (integer_zero_node);
8463 
8464 	for (; c; c = OMP_CLAUSE_CHAIN (c))
8465 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8466 	    {
8467 	      tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8468 					   integer_type_node,
8469 					   OMP_CLAUSE_WAIT_EXPR (c));
8470 	      arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
8471 					      GSI_SAME_STMT);
8472 	      args.safe_push (arg);
8473 	      num_waits++;
8474 	    }
8475 
8476 	if (!tagging || num_waits)
8477 	  {
8478 	    tree len;
8479 
8480 	    /* Now that we know the number, update the placeholder.  */
8481 	    if (tagging)
8482 	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8483 	    else
8484 	      len = build_int_cst (integer_type_node, num_waits);
8485 	    len = fold_convert_loc (gimple_location (entry_stmt),
8486 				    unsigned_type_node, len);
8487 	    args[t_wait_idx] = len;
8488 	  }
8489       }
8490       break;
8491     default:
8492       gcc_unreachable ();
8493     }
8494   if (tagging)
8495     /*  Push terminal marker - zero.  */
8496     args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8497 
8498   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8499   gimple_set_location (g, gimple_location (entry_stmt));
8500   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8501   if (!offloaded)
8502     {
8503       g = gsi_stmt (gsi);
8504       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8505       gsi_remove (&gsi, true);
8506     }
8507 }
8508 
8509 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8510    iteration variable derived from the thread number.  INTRA_GROUP means this
8511    is an expansion of a loop iterating over work-items within a separate
8512    iteration over groups.  */
8513 
8514 static void
grid_expand_omp_for_loop(struct omp_region * kfor,bool intra_group)8515 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8516 {
8517   gimple_stmt_iterator gsi;
8518   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8519   gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8520 		       == GF_OMP_FOR_KIND_GRID_LOOP);
8521   size_t collapse = gimple_omp_for_collapse (for_stmt);
8522   struct omp_for_data_loop *loops
8523     = XALLOCAVEC (struct omp_for_data_loop,
8524 		  gimple_omp_for_collapse (for_stmt));
8525   struct omp_for_data fd;
8526 
8527   remove_edge (BRANCH_EDGE (kfor->entry));
8528   basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8529 
8530   gcc_assert (kfor->cont);
8531   omp_extract_for_data (for_stmt, &fd, loops);
8532 
8533   gsi = gsi_start_bb (body_bb);
8534 
8535   for (size_t dim = 0; dim < collapse; dim++)
8536     {
8537       tree type, itype;
8538       itype = type = TREE_TYPE (fd.loops[dim].v);
8539       if (POINTER_TYPE_P (type))
8540 	itype = signed_type_for (type);
8541 
8542       tree n1 = fd.loops[dim].n1;
8543       tree step = fd.loops[dim].step;
8544       n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8545 				     true, NULL_TREE, true, GSI_SAME_STMT);
8546       step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8547 				       true, NULL_TREE, true, GSI_SAME_STMT);
8548       tree threadid;
8549       if (gimple_omp_for_grid_group_iter (for_stmt))
8550 	{
8551 	  gcc_checking_assert (!intra_group);
8552 	  threadid = build_call_expr (builtin_decl_explicit
8553 				      (BUILT_IN_HSA_WORKGROUPID), 1,
8554 				      build_int_cstu (unsigned_type_node, dim));
8555 	}
8556       else if (intra_group)
8557 	threadid = build_call_expr (builtin_decl_explicit
8558 				    (BUILT_IN_HSA_WORKITEMID), 1,
8559 				    build_int_cstu (unsigned_type_node, dim));
8560       else
8561 	threadid = build_call_expr (builtin_decl_explicit
8562 				    (BUILT_IN_HSA_WORKITEMABSID), 1,
8563 				    build_int_cstu (unsigned_type_node, dim));
8564       threadid = fold_convert (itype, threadid);
8565       threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8566 					   true, GSI_SAME_STMT);
8567 
8568       tree startvar = fd.loops[dim].v;
8569       tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8570       if (POINTER_TYPE_P (type))
8571 	t = fold_build_pointer_plus (n1, t);
8572       else
8573 	t = fold_build2 (PLUS_EXPR, type, t, n1);
8574       t = fold_convert (type, t);
8575       t = force_gimple_operand_gsi (&gsi, t,
8576 				    DECL_P (startvar)
8577 				    && TREE_ADDRESSABLE (startvar),
8578 				    NULL_TREE, true, GSI_SAME_STMT);
8579       gassign *assign_stmt = gimple_build_assign (startvar, t);
8580       gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8581     }
8582   /* Remove the omp for statement.  */
8583   gsi = gsi_last_nondebug_bb (kfor->entry);
8584   gsi_remove (&gsi, true);
8585 
8586   /* Remove the GIMPLE_OMP_CONTINUE statement.  */
8587   gsi = gsi_last_nondebug_bb (kfor->cont);
8588   gcc_assert (!gsi_end_p (gsi)
8589 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8590   gsi_remove (&gsi, true);
8591 
8592   /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
8593   gsi = gsi_last_nondebug_bb (kfor->exit);
8594   gcc_assert (!gsi_end_p (gsi)
8595 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8596   if (intra_group)
8597     gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8598   gsi_remove (&gsi, true);
8599 
8600   /* Fixup the much simpler CFG.  */
8601   remove_edge (find_edge (kfor->cont, body_bb));
8602 
8603   if (kfor->cont != body_bb)
8604     set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8605   set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8606 }
8607 
8608 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8609    argument_decls.  */
8610 
8611 struct grid_arg_decl_map
8612 {
8613   tree old_arg;
8614   tree new_arg;
8615 };
8616 
8617 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8618    pertaining to kernel function.  */
8619 
8620 static tree
grid_remap_kernel_arg_accesses(tree * tp,int * walk_subtrees,void * data)8621 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8622 {
8623   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8624   struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8625   tree t = *tp;
8626 
8627   if (t == adm->old_arg)
8628     *tp = adm->new_arg;
8629   *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8630   return NULL_TREE;
8631 }
8632 
8633 /* If TARGET region contains a kernel body for loop, remove its region from the
8634    TARGET and expand it in HSA gridified kernel fashion.  */
8635 
8636 static void
grid_expand_target_grid_body(struct omp_region * target)8637 grid_expand_target_grid_body (struct omp_region *target)
8638 {
8639   if (!hsa_gen_requested_p ())
8640     return;
8641 
8642   gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8643   struct omp_region **pp;
8644 
8645   for (pp = &target->inner; *pp; pp = &(*pp)->next)
8646     if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8647       break;
8648 
8649   struct omp_region *gpukernel = *pp;
8650 
8651   tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8652   if (!gpukernel)
8653     {
8654       /* HSA cannot handle OACC stuff.  */
8655       if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8656 	return;
8657       gcc_checking_assert (orig_child_fndecl);
8658       gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8659 				    OMP_CLAUSE__GRIDDIM_));
8660       cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8661 
8662       hsa_register_kernel (n);
8663       return;
8664     }
8665 
8666   gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8667 			       OMP_CLAUSE__GRIDDIM_));
8668   tree inside_block
8669     = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8670   *pp = gpukernel->next;
8671   for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8672     if ((*pp)->type == GIMPLE_OMP_FOR)
8673       break;
8674 
8675   struct omp_region *kfor = *pp;
8676   gcc_assert (kfor);
8677   gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8678   gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8679   *pp = kfor->next;
8680   if (kfor->inner)
8681     {
8682       if (gimple_omp_for_grid_group_iter (for_stmt))
8683 	{
8684 	  struct omp_region **next_pp;
8685 	  for (pp = &kfor->inner; *pp; pp = next_pp)
8686 	    {
8687 	      next_pp = &(*pp)->next;
8688 	      if ((*pp)->type != GIMPLE_OMP_FOR)
8689 		continue;
8690 	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8691 	      gcc_assert (gimple_omp_for_kind (inner)
8692 			  == GF_OMP_FOR_KIND_GRID_LOOP);
8693 	      grid_expand_omp_for_loop (*pp, true);
8694 	      *pp = (*pp)->next;
8695 	      next_pp = pp;
8696 	    }
8697 	}
8698       expand_omp (kfor->inner);
8699     }
8700   if (gpukernel->inner)
8701     expand_omp (gpukernel->inner);
8702 
8703   tree kern_fndecl = copy_node (orig_child_fndecl);
8704   DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8705 							  "kernel");
8706   SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8707   tree tgtblock = gimple_block (tgt_stmt);
8708   tree fniniblock = make_node (BLOCK);
8709   BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8710   BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8711   BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8712   BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8713   DECL_INITIAL (kern_fndecl) = fniniblock;
8714   push_struct_function (kern_fndecl);
8715   cfun->function_end_locus = gimple_location (tgt_stmt);
8716   init_tree_ssa (cfun);
8717   pop_cfun ();
8718 
8719   tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8720   gcc_assert (!DECL_CHAIN (old_parm_decl));
8721   tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8722   DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8723   DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8724   gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8725   DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8726   DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8727   struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8728   kern_cfun->curr_properties = cfun->curr_properties;
8729 
8730   grid_expand_omp_for_loop (kfor, false);
8731 
8732   /* Remove the omp for statement.  */
8733   gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8734   gsi_remove (&gsi, true);
8735   /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8736      return.  */
8737   gsi = gsi_last_nondebug_bb (gpukernel->exit);
8738   gcc_assert (!gsi_end_p (gsi)
8739 	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8740   gimple *ret_stmt = gimple_build_return (NULL);
8741   gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8742   gsi_remove (&gsi, true);
8743 
8744   /* Statements in the first BB in the target construct have been produced by
8745      target lowering and must be copied inside the GPUKERNEL, with the two
8746      exceptions of the first OMP statement and the OMP_DATA assignment
8747      statement.  */
8748   gsi = gsi_start_bb (single_succ (gpukernel->entry));
8749   tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8750   tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8751   for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8752        !gsi_end_p (tsi); gsi_next (&tsi))
8753     {
8754       gimple *stmt = gsi_stmt (tsi);
8755       if (is_gimple_omp (stmt))
8756 	break;
8757       if (sender
8758 	  && is_gimple_assign (stmt)
8759 	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8760 	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8761 	continue;
8762       gimple *copy = gimple_copy (stmt);
8763       gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8764       gimple_set_block (copy, fniniblock);
8765     }
8766 
8767   move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8768 			  gpukernel->exit, inside_block);
8769 
8770   cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8771   kcn->mark_force_output ();
8772   cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8773 
8774   hsa_register_kernel (kcn, orig_child);
8775 
8776   cgraph_node::add_new_function (kern_fndecl, true);
8777   push_cfun (kern_cfun);
8778   cgraph_edge::rebuild_edges ();
8779 
8780   /* Re-map any mention of the PARM_DECL of the original function to the
8781      PARM_DECL of the new one.
8782 
8783      TODO: It would be great if lowering produced references into the GPU
8784      kernel decl straight away and we did not have to do this.  */
8785   struct grid_arg_decl_map adm;
8786   adm.old_arg = old_parm_decl;
8787   adm.new_arg = new_parm_decl;
8788   basic_block bb;
8789   FOR_EACH_BB_FN (bb, kern_cfun)
8790     {
8791       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8792 	{
8793 	  gimple *stmt = gsi_stmt (gsi);
8794 	  struct walk_stmt_info wi;
8795 	  memset (&wi, 0, sizeof (wi));
8796 	  wi.info = &adm;
8797 	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8798 	}
8799     }
8800   pop_cfun ();
8801 
8802   return;
8803 }
8804 
8805 /* Expand the parallel region tree rooted at REGION.  Expansion
8806    proceeds in depth-first order.  Innermost regions are expanded
8807    first.  This way, parallel regions that require a new function to
8808    be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8809    internal dependencies in their body.  */
8810 
8811 static void
expand_omp(struct omp_region * region)8812 expand_omp (struct omp_region *region)
8813 {
8814   omp_any_child_fn_dumped = false;
8815   while (region)
8816     {
8817       location_t saved_location;
8818       gimple *inner_stmt = NULL;
8819 
8820       /* First, determine whether this is a combined parallel+workshare
8821 	 region.  */
8822       if (region->type == GIMPLE_OMP_PARALLEL)
8823 	determine_parallel_type (region);
8824       else if (region->type == GIMPLE_OMP_TARGET)
8825 	grid_expand_target_grid_body (region);
8826 
8827       if (region->type == GIMPLE_OMP_FOR
8828 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
8829 	inner_stmt = last_stmt (region->inner->entry);
8830 
8831       if (region->inner)
8832 	expand_omp (region->inner);
8833 
8834       saved_location = input_location;
8835       if (gimple_has_location (last_stmt (region->entry)))
8836 	input_location = gimple_location (last_stmt (region->entry));
8837 
8838       switch (region->type)
8839 	{
8840 	case GIMPLE_OMP_PARALLEL:
8841 	case GIMPLE_OMP_TASK:
8842 	  expand_omp_taskreg (region);
8843 	  break;
8844 
8845 	case GIMPLE_OMP_FOR:
8846 	  expand_omp_for (region, inner_stmt);
8847 	  break;
8848 
8849 	case GIMPLE_OMP_SECTIONS:
8850 	  expand_omp_sections (region);
8851 	  break;
8852 
8853 	case GIMPLE_OMP_SECTION:
8854 	  /* Individual omp sections are handled together with their
8855 	     parent GIMPLE_OMP_SECTIONS region.  */
8856 	  break;
8857 
8858 	case GIMPLE_OMP_SINGLE:
8859 	  expand_omp_single (region);
8860 	  break;
8861 
8862 	case GIMPLE_OMP_ORDERED:
8863 	  {
8864 	    gomp_ordered *ord_stmt
8865 	      = as_a <gomp_ordered *> (last_stmt (region->entry));
8866 	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8867 				 OMP_CLAUSE_DEPEND))
8868 	      {
8869 		/* We'll expand these when expanding corresponding
8870 		   worksharing region with ordered(n) clause.  */
8871 		gcc_assert (region->outer
8872 			    && region->outer->type == GIMPLE_OMP_FOR);
8873 		region->ord_stmt = ord_stmt;
8874 		break;
8875 	      }
8876 	  }
8877 	  /* FALLTHRU */
8878 	case GIMPLE_OMP_MASTER:
8879 	case GIMPLE_OMP_TASKGROUP:
8880 	case GIMPLE_OMP_CRITICAL:
8881 	case GIMPLE_OMP_TEAMS:
8882 	  expand_omp_synch (region);
8883 	  break;
8884 
8885 	case GIMPLE_OMP_ATOMIC_LOAD:
8886 	  expand_omp_atomic (region);
8887 	  break;
8888 
8889 	case GIMPLE_OMP_TARGET:
8890 	  expand_omp_target (region);
8891 	  break;
8892 
8893 	default:
8894 	  gcc_unreachable ();
8895 	}
8896 
8897       input_location = saved_location;
8898       region = region->next;
8899     }
8900   if (omp_any_child_fn_dumped)
8901     {
8902       if (dump_file)
8903 	dump_function_header (dump_file, current_function_decl, dump_flags);
8904       omp_any_child_fn_dumped = false;
8905     }
8906 }
8907 
8908 /* Helper for build_omp_regions.  Scan the dominator tree starting at
8909    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
8910    true, the function ends once a single tree is built (otherwise, whole
8911    forest of OMP constructs may be built).  */
8912 
8913 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)8914 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8915 		     bool single_tree)
8916 {
8917   gimple_stmt_iterator gsi;
8918   gimple *stmt;
8919   basic_block son;
8920 
8921   gsi = gsi_last_nondebug_bb (bb);
8922   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8923     {
8924       struct omp_region *region;
8925       enum gimple_code code;
8926 
8927       stmt = gsi_stmt (gsi);
8928       code = gimple_code (stmt);
8929       if (code == GIMPLE_OMP_RETURN)
8930 	{
8931 	  /* STMT is the return point out of region PARENT.  Mark it
8932 	     as the exit point and make PARENT the immediately
8933 	     enclosing region.  */
8934 	  gcc_assert (parent);
8935 	  region = parent;
8936 	  region->exit = bb;
8937 	  parent = parent->outer;
8938 	}
8939       else if (code == GIMPLE_OMP_ATOMIC_STORE)
8940 	{
8941 	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8942 	     GIMPLE_OMP_RETURN, but matches with
8943 	     GIMPLE_OMP_ATOMIC_LOAD.  */
8944 	  gcc_assert (parent);
8945 	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8946 	  region = parent;
8947 	  region->exit = bb;
8948 	  parent = parent->outer;
8949 	}
8950       else if (code == GIMPLE_OMP_CONTINUE)
8951 	{
8952 	  gcc_assert (parent);
8953 	  parent->cont = bb;
8954 	}
8955       else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8956 	{
8957 	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8958 	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8959 	}
8960       else
8961 	{
8962 	  region = new_omp_region (bb, code, parent);
8963 	  /* Otherwise...  */
8964 	  if (code == GIMPLE_OMP_TARGET)
8965 	    {
8966 	      switch (gimple_omp_target_kind (stmt))
8967 		{
8968 		case GF_OMP_TARGET_KIND_REGION:
8969 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8970 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8971 		case GF_OMP_TARGET_KIND_OACC_SERIAL:
8972 		  break;
8973 		case GF_OMP_TARGET_KIND_UPDATE:
8974 		case GF_OMP_TARGET_KIND_ENTER_DATA:
8975 		case GF_OMP_TARGET_KIND_EXIT_DATA:
8976 		case GF_OMP_TARGET_KIND_DATA:
8977 		case GF_OMP_TARGET_KIND_OACC_DATA:
8978 		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8979 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8980 		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8981 		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8982 		  /* ..., other than for those stand-alone directives...
8983 		     To be precise, target data isn't stand-alone, but
8984 		     gimplifier put the end API call into try finally block
8985 		     for it, so omp expansion can treat it as such.  */
8986 		  region = NULL;
8987 		  break;
8988 		default:
8989 		  gcc_unreachable ();
8990 		}
8991 	    }
8992 	  else if (code == GIMPLE_OMP_ORDERED
8993 		   && omp_find_clause (gimple_omp_ordered_clauses
8994 					 (as_a <gomp_ordered *> (stmt)),
8995 				       OMP_CLAUSE_DEPEND))
8996 	    /* #pragma omp ordered depend is also just a stand-alone
8997 	       directive.  */
8998 	    region = NULL;
8999 	  else if (code == GIMPLE_OMP_TASK
9000 		   && gimple_omp_task_taskwait_p (stmt))
9001 	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
9002 	    region = NULL;
9003 	  else if (code == GIMPLE_OMP_TASKGROUP)
9004 	    /* #pragma omp taskgroup isn't a stand-alone directive, but
9005 	       gimplifier put the end API call into try finall block
9006 	       for it, so omp expansion can treat it as such.  */
9007 	    region = NULL;
9008 	  /* ..., this directive becomes the parent for a new region.  */
9009 	  if (region)
9010 	    parent = region;
9011 	}
9012     }
9013 
9014   if (single_tree && !parent)
9015     return;
9016 
9017   for (son = first_dom_son (CDI_DOMINATORS, bb);
9018        son;
9019        son = next_dom_son (CDI_DOMINATORS, son))
9020     build_omp_regions_1 (son, parent, single_tree);
9021 }
9022 
9023 /* Builds the tree of OMP regions rooted at ROOT, storing it to
9024    root_omp_region.  */
9025 
9026 static void
build_omp_regions_root(basic_block root)9027 build_omp_regions_root (basic_block root)
9028 {
9029   gcc_assert (root_omp_region == NULL);
9030   build_omp_regions_1 (root, NULL, true);
9031   gcc_assert (root_omp_region != NULL);
9032 }
9033 
9034 /* Expands omp construct (and its subconstructs) starting in HEAD.  */
9035 
9036 void
omp_expand_local(basic_block head)9037 omp_expand_local (basic_block head)
9038 {
9039   build_omp_regions_root (head);
9040   if (dump_file && (dump_flags & TDF_DETAILS))
9041     {
9042       fprintf (dump_file, "\nOMP region tree\n\n");
9043       dump_omp_region (dump_file, root_omp_region, 0);
9044       fprintf (dump_file, "\n");
9045     }
9046 
9047   remove_exit_barriers (root_omp_region);
9048   expand_omp (root_omp_region);
9049 
9050   omp_free_regions ();
9051 }
9052 
9053 /* Scan the CFG and build a tree of OMP regions.  Return the root of
9054    the OMP region tree.  */
9055 
9056 static void
build_omp_regions(void)9057 build_omp_regions (void)
9058 {
9059   gcc_assert (root_omp_region == NULL);
9060   calculate_dominance_info (CDI_DOMINATORS);
9061   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9062 }
9063 
9064 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
9065 
9066 static unsigned int
execute_expand_omp(void)9067 execute_expand_omp (void)
9068 {
9069   build_omp_regions ();
9070 
9071   if (!root_omp_region)
9072     return 0;
9073 
9074   if (dump_file)
9075     {
9076       fprintf (dump_file, "\nOMP region tree\n\n");
9077       dump_omp_region (dump_file, root_omp_region, 0);
9078       fprintf (dump_file, "\n");
9079     }
9080 
9081   remove_exit_barriers (root_omp_region);
9082 
9083   expand_omp (root_omp_region);
9084 
9085   if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9086     verify_loop_structure ();
9087   cleanup_tree_cfg ();
9088 
9089   omp_free_regions ();
9090 
9091   return 0;
9092 }
9093 
9094 /* OMP expansion -- the default pass, run before creation of SSA form.  */
9095 
9096 namespace {
9097 
9098 const pass_data pass_data_expand_omp =
9099 {
9100   GIMPLE_PASS, /* type */
9101   "ompexp", /* name */
9102   OPTGROUP_OMP, /* optinfo_flags */
9103   TV_NONE, /* tv_id */
9104   PROP_gimple_any, /* properties_required */
9105   PROP_gimple_eomp, /* properties_provided */
9106   0, /* properties_destroyed */
9107   0, /* todo_flags_start */
9108   0, /* todo_flags_finish */
9109 };
9110 
9111 class pass_expand_omp : public gimple_opt_pass
9112 {
9113 public:
pass_expand_omp(gcc::context * ctxt)9114   pass_expand_omp (gcc::context *ctxt)
9115     : gimple_opt_pass (pass_data_expand_omp, ctxt)
9116   {}
9117 
9118   /* opt_pass methods: */
execute(function *)9119   virtual unsigned int execute (function *)
9120     {
9121       bool gate = ((flag_openacc != 0 || flag_openmp != 0
9122 		    || flag_openmp_simd != 0)
9123 		   && !seen_error ());
9124 
9125       /* This pass always runs, to provide PROP_gimple_eomp.
9126 	 But often, there is nothing to do.  */
9127       if (!gate)
9128 	return 0;
9129 
9130       return execute_expand_omp ();
9131     }
9132 
9133 }; // class pass_expand_omp
9134 
9135 } // anon namespace
9136 
9137 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)9138 make_pass_expand_omp (gcc::context *ctxt)
9139 {
9140   return new pass_expand_omp (ctxt);
9141 }
9142 
9143 namespace {
9144 
9145 const pass_data pass_data_expand_omp_ssa =
9146 {
9147   GIMPLE_PASS, /* type */
9148   "ompexpssa", /* name */
9149   OPTGROUP_OMP, /* optinfo_flags */
9150   TV_NONE, /* tv_id */
9151   PROP_cfg | PROP_ssa, /* properties_required */
9152   PROP_gimple_eomp, /* properties_provided */
9153   0, /* properties_destroyed */
9154   0, /* todo_flags_start */
9155   TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9156 };
9157 
9158 class pass_expand_omp_ssa : public gimple_opt_pass
9159 {
9160 public:
pass_expand_omp_ssa(gcc::context * ctxt)9161   pass_expand_omp_ssa (gcc::context *ctxt)
9162     : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9163   {}
9164 
9165   /* opt_pass methods: */
gate(function * fun)9166   virtual bool gate (function *fun)
9167     {
9168       return !(fun->curr_properties & PROP_gimple_eomp);
9169     }
execute(function *)9170   virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()9171   opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9172 
9173 }; // class pass_expand_omp_ssa
9174 
9175 } // anon namespace
9176 
9177 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)9178 make_pass_expand_omp_ssa (gcc::context *ctxt)
9179 {
9180   return new pass_expand_omp_ssa (ctxt);
9181 }
9182 
9183 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9184    GIMPLE_* codes.  */
9185 
9186 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)9187 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9188 		       int *region_idx)
9189 {
9190   gimple *last = last_stmt (bb);
9191   enum gimple_code code = gimple_code (last);
9192   struct omp_region *cur_region = *region;
9193   bool fallthru = false;
9194 
9195   switch (code)
9196     {
9197     case GIMPLE_OMP_PARALLEL:
9198     case GIMPLE_OMP_FOR:
9199     case GIMPLE_OMP_SINGLE:
9200     case GIMPLE_OMP_TEAMS:
9201     case GIMPLE_OMP_MASTER:
9202     case GIMPLE_OMP_CRITICAL:
9203     case GIMPLE_OMP_SECTION:
9204     case GIMPLE_OMP_GRID_BODY:
9205       cur_region = new_omp_region (bb, code, cur_region);
9206       fallthru = true;
9207       break;
9208 
9209     case GIMPLE_OMP_TASKGROUP:
9210       cur_region = new_omp_region (bb, code, cur_region);
9211       fallthru = true;
9212       cur_region = cur_region->outer;
9213       break;
9214 
9215     case GIMPLE_OMP_TASK:
9216       cur_region = new_omp_region (bb, code, cur_region);
9217       fallthru = true;
9218       if (gimple_omp_task_taskwait_p (last))
9219 	cur_region = cur_region->outer;
9220       break;
9221 
9222     case GIMPLE_OMP_ORDERED:
9223       cur_region = new_omp_region (bb, code, cur_region);
9224       fallthru = true;
9225       if (omp_find_clause (gimple_omp_ordered_clauses
9226 			     (as_a <gomp_ordered *> (last)),
9227 			   OMP_CLAUSE_DEPEND))
9228 	cur_region = cur_region->outer;
9229       break;
9230 
9231     case GIMPLE_OMP_TARGET:
9232       cur_region = new_omp_region (bb, code, cur_region);
9233       fallthru = true;
9234       switch (gimple_omp_target_kind (last))
9235 	{
9236 	case GF_OMP_TARGET_KIND_REGION:
9237 	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9238 	case GF_OMP_TARGET_KIND_OACC_KERNELS:
9239 	case GF_OMP_TARGET_KIND_OACC_SERIAL:
9240 	  break;
9241 	case GF_OMP_TARGET_KIND_UPDATE:
9242 	case GF_OMP_TARGET_KIND_ENTER_DATA:
9243 	case GF_OMP_TARGET_KIND_EXIT_DATA:
9244 	case GF_OMP_TARGET_KIND_DATA:
9245 	case GF_OMP_TARGET_KIND_OACC_DATA:
9246 	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9247 	case GF_OMP_TARGET_KIND_OACC_UPDATE:
9248 	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9249 	case GF_OMP_TARGET_KIND_OACC_DECLARE:
9250 	  cur_region = cur_region->outer;
9251 	  break;
9252 	default:
9253 	  gcc_unreachable ();
9254 	}
9255       break;
9256 
9257     case GIMPLE_OMP_SECTIONS:
9258       cur_region = new_omp_region (bb, code, cur_region);
9259       fallthru = true;
9260       break;
9261 
9262     case GIMPLE_OMP_SECTIONS_SWITCH:
9263       fallthru = false;
9264       break;
9265 
9266     case GIMPLE_OMP_ATOMIC_LOAD:
9267     case GIMPLE_OMP_ATOMIC_STORE:
9268        fallthru = true;
9269        break;
9270 
9271     case GIMPLE_OMP_RETURN:
9272       /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9273 	 somewhere other than the next block.  This will be
9274 	 created later.  */
9275       cur_region->exit = bb;
9276       if (cur_region->type == GIMPLE_OMP_TASK)
9277 	/* Add an edge corresponding to not scheduling the task
9278 	   immediately.  */
9279 	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9280       fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9281       cur_region = cur_region->outer;
9282       break;
9283 
9284     case GIMPLE_OMP_CONTINUE:
9285       cur_region->cont = bb;
9286       switch (cur_region->type)
9287 	{
9288 	case GIMPLE_OMP_FOR:
9289 	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9290 	     succs edges as abnormal to prevent splitting
9291 	     them.  */
9292 	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9293 	  /* Make the loopback edge.  */
9294 	  make_edge (bb, single_succ (cur_region->entry),
9295 		     EDGE_ABNORMAL);
9296 
9297 	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
9298 	     corresponds to the case that the body of the loop
9299 	     is not executed at all.  */
9300 	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9301 	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9302 	  fallthru = false;
9303 	  break;
9304 
9305 	case GIMPLE_OMP_SECTIONS:
9306 	  /* Wire up the edges into and out of the nested sections.  */
9307 	  {
9308 	    basic_block switch_bb = single_succ (cur_region->entry);
9309 
9310 	    struct omp_region *i;
9311 	    for (i = cur_region->inner; i ; i = i->next)
9312 	      {
9313 		gcc_assert (i->type == GIMPLE_OMP_SECTION);
9314 		make_edge (switch_bb, i->entry, 0);
9315 		make_edge (i->exit, bb, EDGE_FALLTHRU);
9316 	      }
9317 
9318 	    /* Make the loopback edge to the block with
9319 	       GIMPLE_OMP_SECTIONS_SWITCH.  */
9320 	    make_edge (bb, switch_bb, 0);
9321 
9322 	    /* Make the edge from the switch to exit.  */
9323 	    make_edge (switch_bb, bb->next_bb, 0);
9324 	    fallthru = false;
9325 	  }
9326 	  break;
9327 
9328 	case GIMPLE_OMP_TASK:
9329 	  fallthru = true;
9330 	  break;
9331 
9332 	default:
9333 	  gcc_unreachable ();
9334 	}
9335       break;
9336 
9337     default:
9338       gcc_unreachable ();
9339     }
9340 
9341   if (*region != cur_region)
9342     {
9343       *region = cur_region;
9344       if (cur_region)
9345 	*region_idx = cur_region->entry->index;
9346       else
9347 	*region_idx = 0;
9348     }
9349 
9350   return fallthru;
9351 }
9352 
9353 #include "gt-omp-expand.h"
9354