1 /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
2    constructs
3 
4    Copyright (C) 2020-2022 Free Software Foundation, Inc.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "langhooks.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "cgraph.h"
32 #include "fold-const.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimple-walk.h"
36 #include "gomp-constants.h"
37 #include "omp-general.h"
38 #include "diagnostic-core.h"
39 
40 
41 /* This preprocessing pass is run immediately before lower_omp.  It decomposes
42    OpenACC 'kernels' constructs into parts, a sequence of compute constructs.
43 
44    The translation is as follows:
45      - The entire 'kernels' region is turned into a 'data' region with clauses
46        taken from the 'kernels' region.  New 'create' clauses are added for all
47        variables declared at the top level in the kernels region.
48      - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
49        a new compute construct.
50 	 - 'loop' directives without an explicit 'independent' or 'seq' clause
51 	   get an 'auto' clause added; other clauses are preserved on the loop
52 	   or moved to the new surrounding compute construct, as applicable.
53      - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
54        in new "gang-single" compute construct: 'worker'/'vector' parallelism is
55        preserved, but 'num_gangs (1)' is enforced.
56      - Both points above only apply at the topmost level in the region, that
57        is, the transformation does not introduce new compute constructs inside
58        nested statement bodies.  In particular, this means that a
59        gang-parallelizable loop inside an 'if' statement is made "gang-single".
60      - In order to make the host wait only once for the whole region instead
61        of once per device kernel launch, the new compute constructs are
62        annotated 'async'.  Unless the original 'kernels' construct already was
63        marked 'async', the entire region ends with a 'wait' directive.  If the
64        original 'kernels' construct was marked 'async', the synthesized 'async'
65        clauses use the original 'kernels' construct's 'async' argument
66        (possibly implicit).
67 */
68 
69 
70 /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind
71   'device_type', so we have to defer a lot of processing until we're in the
72   offloading compilation.  "Fortunately", GCC doesn't support the OpenACC
73   'device_type' clause yet, so we get away that.  */
74 
75 
76 /* Helper function for decompose_kernels_region_body.  If STMT contains a
77    "top-level" OMP_FOR statement, returns a pointer to that statement;
78    returns NULL otherwise.
79 
80    A "top-level" OMP_FOR statement is one that is possibly accompanied by
81    small snippets of setup code.  Specifically, this function accepts an
82    OMP_FOR possibly wrapped in a singleton bind and a singleton try
83    statement to allow for a local loop variable, but not an OMP_FOR
84    statement nested in any other constructs.  Alternatively, it accepts a
85    non-singleton bind containing only assignments and then an OMP_FOR
86    statement at the very end.  The former style can be generated by the C
87    frontend, the latter by the Fortran frontend.  */
88 
89 static gimple *
top_level_omp_for_in_stmt(gimple * stmt)90 top_level_omp_for_in_stmt (gimple *stmt)
91 {
92   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
93     return stmt;
94 
95   if (gimple_code (stmt) == GIMPLE_BIND)
96     {
97       gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt));
98       if (gimple_seq_singleton_p (body))
99 	{
100 	  /* Accept an OMP_FOR statement, or a try statement containing only
101 	     a single OMP_FOR.  */
102 	  gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
103 	  if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
104 	    return maybe_for_or_try;
105 	  else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
106 	    {
107 	      gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
108 	      if (!gimple_seq_singleton_p (try_body))
109 		return NULL;
110 	      gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
111 	      if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
112 		return maybe_omp_for_stmt;
113 	    }
114 	}
115       else
116 	{
117 	  gimple_stmt_iterator gsi;
118 	  /* Accept only a block of optional assignments followed by an
119 	     OMP_FOR at the end.  No other kinds of statements allowed.  */
120 	  for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
121 	    {
122 	      gimple *body_stmt = gsi_stmt (gsi);
123 	      if (gimple_code (body_stmt) == GIMPLE_ASSIGN)
124 		continue;
125 	      else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR
126 		       && gsi_one_before_end_p (gsi))
127 		return body_stmt;
128 	      else
129 		return NULL;
130 	    }
131 	}
132     }
133 
134   return NULL;
135 }
136 
137 /* Helper for adjust_region_code: evaluate the statement at GSI_P.  */
138 
139 static tree
adjust_region_code_walk_stmt_fn(gimple_stmt_iterator * gsi_p,bool * handled_ops_p,struct walk_stmt_info * wi)140 adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
141 				 bool *handled_ops_p,
142 				 struct walk_stmt_info *wi)
143 {
144   int *region_code = (int *) wi->info;
145 
146   gimple *stmt = gsi_stmt (*gsi_p);
147   switch (gimple_code (stmt))
148     {
149     case GIMPLE_OMP_FOR:
150       {
151 	tree clauses = gimple_omp_for_clauses (stmt);
152 	if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
153 	  {
154 	    /* Explicit 'independent' clause.  */
155 	    /* Keep going; recurse into loop body.  */
156 	    break;
157 	  }
158 	else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
159 	  {
160 	    /* Explicit 'seq' clause.  */
161 	    /* We'll "parallelize" if at some level a loop construct has been
162 	       marked up by the user as unparallelizable ('seq' clause; we'll
163 	       respect that in the later processing).  Given that the user has
164 	       explicitly marked it up, this loop construct cannot be
165 	       performance-critical, and in this case it's also fine to
166 	       "parallelize" instead of "gang-single", because any outer or
167 	       inner loops may still exploit the available parallelism.  */
168 	    /* Keep going; recurse into loop body.  */
169 	    break;
170 	  }
171 	else
172 	  {
173 	    /* Explicit or implicit 'auto' clause.  */
174 	    /* The user would like this loop analyzed ('auto' clause) and
175 	       typically parallelized, but we don't have available yet the
176 	       compiler logic to analyze this, so can't parallelize it here, so
177 	       we'd very likely be running into a performance problem if we
178 	       were to execute this unparallelized, thus forward the whole loop
179 	       nest to 'parloops'.  */
180 	    *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
181 	    /* Terminate: final decision for this region.  */
182 	    *handled_ops_p = true;
183 	    return integer_zero_node;
184 	  }
185 	gcc_unreachable ();
186       }
187 
188     case GIMPLE_COND:
189     case GIMPLE_GOTO:
190     case GIMPLE_SWITCH:
191     case GIMPLE_ASM:
192     case GIMPLE_TRANSACTION:
193     case GIMPLE_RETURN:
194       /* Statement that might constitute some looping/control flow pattern.  */
195       /* The user would like this code analyzed (implicit inside a 'kernels'
196 	 region) and typically parallelized, but we don't have available yet
197 	 the compiler logic to analyze this, so can't parallelize it here, so
198 	 we'd very likely be running into a performance problem if we were to
199 	 execute this unparallelized, thus forward the whole thing to
200 	 'parloops'.  */
201       *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
202       /* Terminate: final decision for this region.  */
203       *handled_ops_p = true;
204       return integer_zero_node;
205 
206     default:
207       /* Keep going.  */
208       break;
209     }
210 
211   return NULL;
212 }
213 
214 /* Adjust the REGION_CODE for the region in GS.  */
215 
216 static void
adjust_region_code(gimple_seq gs,int * region_code)217 adjust_region_code (gimple_seq gs, int *region_code)
218 {
219   struct walk_stmt_info wi;
220   memset (&wi, 0, sizeof (wi));
221   wi.info = region_code;
222   walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi);
223 }
224 
225 /* Helper function for make_loops_gang_single for walking the tree.  If the
226    statement indicated by GSI_P is an OpenACC for loop with a gang clause,
227    issue a warning and remove the clause.  */
228 
229 static tree
visit_loops_in_gang_single_region(gimple_stmt_iterator * gsi_p,bool * handled_ops_p,struct walk_stmt_info *)230 visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
231 				   bool *handled_ops_p,
232 				   struct walk_stmt_info *)
233 {
234   *handled_ops_p = false;
235 
236   gimple *stmt = gsi_stmt (*gsi_p);
237   switch (gimple_code (stmt))
238     {
239     case GIMPLE_OMP_FOR:
240       /*TODO Given the current 'adjust_region_code' algorithm, this is
241 	actually...  */
242 #if 0
243       gcc_unreachable ();
244 #else
245       /* ..., but due to bugs (PR100400), we may actually come here.
246 	 Reliably catch this, regardless of checking level.  */
247       internal_error ("PR100400");
248 #endif
249 
250       {
251 	tree clauses = gimple_omp_for_clauses (stmt);
252 	tree prev_clause = NULL;
253 	for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
254 	  {
255 	    if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
256 	      {
257 		/* It makes no sense to have a 'gang' clause in a "gang-single"
258 		   region, so warn and remove it.  */
259 		warning_at (gimple_location (stmt), 0,
260 			    "conditionally executed loop in %<kernels%> region"
261 			    " will be executed by a single gang;"
262 			    " ignoring %<gang%> clause");
263 		if (prev_clause != NULL)
264 		  OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
265 		else
266 		  clauses = OMP_CLAUSE_CHAIN (clause);
267 
268 		break;
269 	      }
270 	    prev_clause = clause;
271 	  }
272 	gimple_omp_for_set_clauses (stmt, clauses);
273       }
274       /* No need to recurse into nested statements; no loop nested inside
275 	 this loop can be gang-partitioned.  */
276       sorry ("%<gang%> loop in %<gang-single%> region");
277       *handled_ops_p = true;
278       break;
279 
280     default:
281       break;
282     }
283 
284   return NULL;
285 }
286 
287 /* Visit all nested OpenACC loops in the sequence indicated by GS.  This
288    statement is expected to be inside a gang-single region.  Issue a warning
289    for any loops inside it that have gang clauses and remove the clauses.  */
290 
291 static void
make_loops_gang_single(gimple_seq gs)292 make_loops_gang_single (gimple_seq gs)
293 {
294   struct walk_stmt_info wi;
295   memset (&wi, 0, sizeof (wi));
296   walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi);
297 }
298 
299 /* Construct a "gang-single" compute construct at LOC containing the STMTS.
300    Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
301    additional 'num_gangs (1)' clause to force "gang-single" execution.  */
302 
303 static gimple *
make_region_seq(location_t loc,gimple_seq stmts,tree num_gangs_clause,tree num_workers_clause,tree vector_length_clause,tree clauses)304 make_region_seq (location_t loc, gimple_seq stmts,
305 		 tree num_gangs_clause,
306 		 tree num_workers_clause,
307 		 tree vector_length_clause,
308 		 tree clauses)
309 {
310   /* This correctly unshares the entire clause chain rooted here.  */
311   clauses = unshare_expr (clauses);
312 
313   dump_user_location_t loc_stmts_first = gimple_seq_first (stmts);
314 
315   /* Figure out the region code for this region.  */
316   /* Optimistic default: assume "setup code", no looping; thus not
317      performance-critical.  */
318   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE;
319   adjust_region_code (stmts, &region_code);
320 
321   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
322     {
323       if (dump_enabled_p ())
324 	/*TODO MSG_MISSED_OPTIMIZATION? */
325 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
326 			 "beginning %<gang-single%> part"
327 			 " in OpenACC %<kernels%> region\n");
328 
329       /* Synthesize a 'num_gangs (1)' clause.  */
330       tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
331       OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node;
332       OMP_CLAUSE_CHAIN (gang_single_clause) = clauses;
333       clauses = gang_single_clause;
334 
335       /* Remove and issue warnings about gang clauses on any OpenACC
336 	 loops nested inside this sequentially executed statement.  */
337       make_loops_gang_single (stmts);
338     }
339   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
340     {
341       if (dump_enabled_p ())
342 	dump_printf_loc (MSG_NOTE, loc_stmts_first,
343 			 "beginning %<parloops%> part"
344 			 " in OpenACC %<kernels%> region\n");
345 
346       /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
347 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses
348 	 mangling that 'make_region_loop_nest' is doing.  */
349       /* Re-assemble the clauses stripped off earlier.  */
350       if (num_gangs_clause != NULL)
351 	{
352 	  tree c = unshare_expr (num_gangs_clause);
353 	  OMP_CLAUSE_CHAIN (c) = clauses;
354 	  clauses = c;
355 	}
356       if (num_workers_clause != NULL)
357 	{
358 	  tree c = unshare_expr (num_workers_clause);
359 	  OMP_CLAUSE_CHAIN (c) = clauses;
360 	  clauses = c;
361 	}
362       if (vector_length_clause != NULL)
363 	{
364 	  tree c = unshare_expr (vector_length_clause);
365 	  OMP_CLAUSE_CHAIN (c) = clauses;
366 	  clauses = c;
367 	}
368     }
369   else
370     gcc_unreachable ();
371 
372   /* Build the gang-single region.  */
373   gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses);
374   gimple_set_location (single_region, loc);
375   gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK));
376   gimple_omp_set_body (single_region, single_body);
377 
378   return single_region;
379 }
380 
381 /* Helper function for make_region_loop_nest.  Adds a 'num_gangs'
382    ('num_workers', 'vector_length') clause to the given CLAUSES, either the one
383    from the parent compute construct (PARENT_CLAUSE) or a new one based on the
384    loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
385    clauses) with the given CLAUSE_CODE.  Does nothing if neither PARENT_CLAUSE
386    nor LOOP_CLAUSE exist.  Returns the new clauses.  */
387 
388 static tree
add_parent_or_loop_num_clause(tree parent_clause,tree loop_clause,omp_clause_code clause_code,tree clauses)389 add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
390 			       omp_clause_code clause_code, tree clauses)
391 {
392   if (parent_clause != NULL)
393     {
394       tree num_clause = unshare_expr (parent_clause);
395       OMP_CLAUSE_CHAIN (num_clause) = clauses;
396       clauses = num_clause;
397     }
398   else if (loop_clause != NULL)
399     {
400       /* The kernels region does not have a 'num_gangs' clause, but the loop
401 	 itself had a 'gang (num: N)' clause.  Honor it by adding a
402 	 'num_gangs (N)' clause on the compute construct.  */
403       tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
404       tree new_num_clause
405 	= build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
406       OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
407       OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
408       clauses = new_num_clause;
409     }
410   return clauses;
411 }
412 
413 /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector
414    (length: N)' clauses in nested loops.  Removes the argument, transferring it
415    to the enclosing compute construct (via WI->INFO).  If arguments within the
416    same loop nest conflict, emits a warning.
417 
418    This function also decides whether to add an 'auto' clause on each of these
419    nested loops.  */
420 
421 struct adjust_nested_loop_clauses_wi_info
422 {
423   tree *loop_gang_clause_ptr;
424   tree *loop_worker_clause_ptr;
425   tree *loop_vector_clause_ptr;
426 };
427 
428 static tree
adjust_nested_loop_clauses(gimple_stmt_iterator * gsi_p,bool *,struct walk_stmt_info * wi)429 adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
430 			    struct walk_stmt_info *wi)
431 {
432   struct adjust_nested_loop_clauses_wi_info *wi_info
433     = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
434   gimple *stmt = gsi_stmt (*gsi_p);
435 
436   if (gimple_code (stmt) == GIMPLE_OMP_FOR)
437     {
438       bool add_auto_clause = true;
439       tree loop_clauses = gimple_omp_for_clauses (stmt);
440       tree loop_clause = loop_clauses;
441       for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
442 	{
443 	  tree *outer_clause_ptr = NULL;
444 	  switch (OMP_CLAUSE_CODE (loop_clause))
445 	    {
446 	    case OMP_CLAUSE_GANG:
447 	      outer_clause_ptr = wi_info->loop_gang_clause_ptr;
448 	      break;
449 	    case OMP_CLAUSE_WORKER:
450 	      outer_clause_ptr = wi_info->loop_worker_clause_ptr;
451 	      break;
452 	    case OMP_CLAUSE_VECTOR:
453 	      outer_clause_ptr = wi_info->loop_vector_clause_ptr;
454 	      break;
455 	    case OMP_CLAUSE_SEQ:
456 	    case OMP_CLAUSE_INDEPENDENT:
457 	    case OMP_CLAUSE_AUTO:
458 	      add_auto_clause = false;
459 	    default:
460 	      break;
461 	    }
462 	  if (outer_clause_ptr != NULL)
463 	    {
464 	      if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
465 		  && *outer_clause_ptr == NULL)
466 		{
467 		  /* Transfer the clause to the enclosing compute construct and
468 		     remove the numerical argument from the 'loop'.  */
469 		  *outer_clause_ptr = unshare_expr (loop_clause);
470 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
471 		}
472 	      else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
473 		       OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
474 		{
475 		  /* See if both of these are the same constant.  If they
476 		     aren't, emit a warning.  */
477 		  tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
478 		  tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
479 		  if (!(cst_and_fits_in_hwi (old_op) &&
480 			cst_and_fits_in_hwi (new_op) &&
481 			int_cst_value (old_op) == int_cst_value (new_op)))
482 		    {
483 		      const char *clause_name
484 			= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
485 		      error_at (gimple_location (stmt),
486 				"cannot honor conflicting %qs clause",
487 				clause_name);
488 		      inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
489 			      "location of the previous clause"
490 			      " in the same loop nest");
491 		    }
492 		  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
493 		}
494 	    }
495 	}
496       if (add_auto_clause)
497 	{
498 	  tree auto_clause
499 	    = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
500 	  OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
501 	  gimple_omp_for_set_clauses (stmt, auto_clause);
502 	}
503     }
504 
505   return NULL;
506 }
507 
508 /* Helper for make_region_loop_nest.  Transform OpenACC 'kernels'/'loop'
509    construct clauses into OpenACC 'parallel'/'loop' construct ones.  */
510 
511 static tree
transform_kernels_loop_clauses(gimple * omp_for,tree num_gangs_clause,tree num_workers_clause,tree vector_length_clause,tree clauses)512 transform_kernels_loop_clauses (gimple *omp_for,
513 				tree num_gangs_clause,
514 				tree num_workers_clause,
515 				tree vector_length_clause,
516 				tree clauses)
517 {
518   /* If this loop in a kernels region does not have an explicit 'seq',
519      'independent', or 'auto' clause, we must give it an explicit 'auto'
520      clause.
521      We also check for 'gang (num: N)' clauses.  These must not appear in
522      kernels regions that have their own 'num_gangs' clause.  Otherwise, they
523      must be converted and put on the region; similarly for 'worker' and
524      'vector' clauses.  */
525   bool add_auto_clause = true;
526   tree loop_gang_clause = NULL, loop_worker_clause = NULL,
527        loop_vector_clause = NULL;
528   tree loop_clauses = gimple_omp_for_clauses (omp_for);
529   for (tree loop_clause = loop_clauses;
530        loop_clause;
531        loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
532     {
533       bool found_num_clause = false;
534       tree *clause_ptr, clause_to_check;
535       switch (OMP_CLAUSE_CODE (loop_clause))
536 	{
537 	case OMP_CLAUSE_GANG:
538 	  found_num_clause = true;
539 	  clause_ptr = &loop_gang_clause;
540 	  clause_to_check = num_gangs_clause;
541 	  break;
542 	case OMP_CLAUSE_WORKER:
543 	  found_num_clause = true;
544 	  clause_ptr = &loop_worker_clause;
545 	  clause_to_check = num_workers_clause;
546 	  break;
547 	case OMP_CLAUSE_VECTOR:
548 	  found_num_clause = true;
549 	  clause_ptr = &loop_vector_clause;
550 	  clause_to_check = vector_length_clause;
551 	  break;
552 	case OMP_CLAUSE_INDEPENDENT:
553 	case OMP_CLAUSE_SEQ:
554 	case OMP_CLAUSE_AUTO:
555 	  add_auto_clause = false;
556 	default:
557 	  break;
558 	}
559       if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
560 	{
561 	  if (clause_to_check)
562 	    {
563 	      const char *clause_name
564 		= omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
565 	      const char *parent_clause_name
566 		= omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
567 	      error_at (OMP_CLAUSE_LOCATION (loop_clause),
568 			"argument not permitted on %qs clause"
569 			" in OpenACC %<kernels%> region with a %qs clause",
570 			clause_name, parent_clause_name);
571 	      inform (OMP_CLAUSE_LOCATION (clause_to_check),
572 		      "location of OpenACC %<kernels%>");
573 	    }
574 	  /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the
575 	     enclosing compute construct.  */
576 	  *clause_ptr = unshare_expr (loop_clause);
577 	  OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
578 	  /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without
579 	     argument.  */
580 	  OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
581 	}
582     }
583   if (add_auto_clause)
584     {
585       tree auto_clause = build_omp_clause (gimple_location (omp_for),
586 					   OMP_CLAUSE_AUTO);
587       OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
588       loop_clauses = auto_clause;
589     }
590   gimple_omp_for_set_clauses (omp_for, loop_clauses);
591   /* We must also recurse into the loop; it might contain nested loops having
592      their own 'worker (num: W)' or 'vector (length: V)' clauses.  Turn these
593      into 'worker'/'vector' clauses on the compute construct.  */
594   struct walk_stmt_info wi;
595   memset (&wi, 0, sizeof (wi));
596   struct adjust_nested_loop_clauses_wi_info wi_info;
597   wi_info.loop_gang_clause_ptr = &loop_gang_clause;
598   wi_info.loop_worker_clause_ptr = &loop_worker_clause;
599   wi_info.loop_vector_clause_ptr = &loop_vector_clause;
600   wi.info = &wi_info;
601   gimple *body = gimple_omp_body (omp_for);
602   walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
603   /* Check if there were conflicting numbers of workers or vector length.  */
604   if (loop_gang_clause != NULL &&
605       OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
606     loop_gang_clause = NULL;
607   if (loop_worker_clause != NULL &&
608       OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
609     loop_worker_clause = NULL;
610   if (loop_vector_clause != NULL &&
611       OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
612     vector_length_clause = NULL;
613 
614   /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length'
615      clauses, add these to this new compute construct.  */
616   clauses
617     = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
618 				     OMP_CLAUSE_NUM_GANGS, clauses);
619   clauses
620     = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
621 				     OMP_CLAUSE_NUM_WORKERS, clauses);
622   clauses
623     = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
624 				     OMP_CLAUSE_VECTOR_LENGTH, clauses);
625 
626   return clauses;
627 }
628 
629 /* Construct a possibly gang-parallel compute construct containing the STMT,
630    which must be identical to, or a bind containing, the loop OMP_FOR.
631 
632    The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
633    optional clauses from the original kernels region and must not be contained
634    in the other CLAUSES. The newly created compute construct is annotated with
635    the optional NUM_GANGS_CLAUSE as well as the other CLAUSES.  If there is no
636    NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
637    converted to a 'num_gangs (N)' clause on the new compute construct, and
638    similarly for 'worker' and 'vector' clauses.
639 
640    The outermost loop gets an 'auto' clause unless there already is an
641    'seq'/'independent'/'auto' clause.  Nested loops inside OMP_FOR are treated
642    similarly by the adjust_nested_loop_clauses function.  */
643 
644 static gimple *
make_region_loop_nest(gimple * omp_for,gimple_seq stmts,tree num_gangs_clause,tree num_workers_clause,tree vector_length_clause,tree clauses)645 make_region_loop_nest (gimple *omp_for, gimple_seq stmts,
646 		       tree num_gangs_clause,
647 		       tree num_workers_clause,
648 		       tree vector_length_clause,
649 		       tree clauses)
650 {
651   /* This correctly unshares the entire clause chain rooted here.  */
652   clauses = unshare_expr (clauses);
653 
654   /* Figure out the region code for this region.  */
655   /* Optimistic default: assume that the loop nest is parallelizable
656      (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause,
657      and no un-annotated loops).  */
658   int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED;
659   adjust_region_code (stmts, &region_code);
660 
661   if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
662     {
663       if (dump_enabled_p ())
664 	/* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the
665 	   user asked us to.  */
666 	dump_printf_loc (MSG_NOTE, omp_for,
667 			 "parallelized loop nest"
668 			 " in OpenACC %<kernels%> region\n");
669 
670       clauses = transform_kernels_loop_clauses (omp_for,
671 						num_gangs_clause,
672 						num_workers_clause,
673 						vector_length_clause,
674 						clauses);
675     }
676   else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
677     {
678       if (dump_enabled_p ())
679 	dump_printf_loc (MSG_NOTE, omp_for,
680 			 "forwarded loop nest"
681 			 " in OpenACC %<kernels%> region"
682 			 " to %<parloops%> for analysis\n");
683 
684       /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
685 	 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to
686 	 'transform_kernels_loop_clauses'.  */
687       /* Re-assemble the clauses stripped off earlier.  */
688       clauses
689 	= add_parent_or_loop_num_clause (num_gangs_clause, NULL,
690 					 OMP_CLAUSE_NUM_GANGS, clauses);
691       clauses
692 	= add_parent_or_loop_num_clause (num_workers_clause, NULL,
693 					 OMP_CLAUSE_NUM_WORKERS, clauses);
694       clauses
695 	= add_parent_or_loop_num_clause (vector_length_clause, NULL,
696 					 OMP_CLAUSE_VECTOR_LENGTH, clauses);
697     }
698   else
699     gcc_unreachable ();
700 
701   gimple *parallel_body_bind
702     = gimple_build_bind (NULL, stmts, make_node (BLOCK));
703   gimple *parallel_region
704     = gimple_build_omp_target (parallel_body_bind, region_code, clauses);
705   gimple_set_location (parallel_region, gimple_location (omp_for));
706 
707   return parallel_region;
708 }
709 
710 /* Eliminate any binds directly inside BIND by adding their statements to
711    BIND (i.e., modifying it in place), excluding binds that hold only an
712    OMP_FOR loop and associated setup/cleanup code.  Recurse into binds but
713    not other statements.  Return a chain of the local variables of eliminated
714    binds, i.e., the local variables found in nested binds.  If
715    INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
716    to BIND itself. */
717 
718 static tree
flatten_binds(gbind * bind,bool include_toplevel_vars=false)719 flatten_binds (gbind *bind, bool include_toplevel_vars = false)
720 {
721   tree vars = NULL, last_var = NULL;
722 
723   if (include_toplevel_vars)
724     {
725       vars = gimple_bind_vars (bind);
726       last_var = vars;
727     }
728 
729   gimple_seq new_body = NULL;
730   gimple_seq body_sequence = gimple_bind_body (bind);
731   gimple_stmt_iterator gsi, gsi_n;
732   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n)
733     {
734       /* Advance the iterator here because otherwise it would be invalidated
735 	 by moving statements below.  */
736       gsi_n = gsi;
737       gsi_next (&gsi_n);
738 
739       gimple *stmt = gsi_stmt (gsi);
740       /* Flatten bind statements, except the ones that contain only an
741 	 OpenACC for loop.  */
742       if (gimple_code (stmt) == GIMPLE_BIND
743 	  && !top_level_omp_for_in_stmt (stmt))
744 	{
745 	  gbind *inner_bind = as_a <gbind *> (stmt);
746 	  /* Flatten recursively, and collect all variables.  */
747 	  tree inner_vars = flatten_binds (inner_bind, true);
748 	  gimple_seq inner_sequence = gimple_bind_body (inner_bind);
749 	  if (flag_checking)
750 	    {
751 	      for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence);
752 		   !gsi_end_p (inner_gsi);
753 		   gsi_next (&inner_gsi))
754 		{
755 		  gimple *inner_stmt = gsi_stmt (inner_gsi);
756 		  gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND
757 			      || top_level_omp_for_in_stmt (inner_stmt));
758 		}
759 	    }
760 	  gimple_seq_add_seq (&new_body, inner_sequence);
761 	  /* Find the last variable; we will append others to it.  */
762 	  while (last_var != NULL && TREE_CHAIN (last_var) != NULL)
763 	    last_var = TREE_CHAIN (last_var);
764 	  if (last_var != NULL)
765 	    {
766 	      TREE_CHAIN (last_var) = inner_vars;
767 	      last_var = inner_vars;
768 	    }
769 	  else
770 	    {
771 	      vars = inner_vars;
772 	      last_var = vars;
773 	    }
774 	}
775       else
776 	gimple_seq_add_stmt (&new_body, stmt);
777     }
778 
779   /* Put the possibly transformed body back into the bind.  */
780   gimple_bind_set_body (bind, new_body);
781   return vars;
782 }
783 
784 /* Helper function for places where we construct data regions.  Wraps the BODY
785    inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
786    in its cleanup block.  Returns this try statement.  */
787 
788 static gimple *
make_data_region_try_statement(location_t loc,gimple * body)789 make_data_region_try_statement (location_t loc, gimple *body)
790 {
791   tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
792   gimple *call = gimple_build_call (data_end_fn, 0);
793   gimple_seq cleanup = NULL;
794   gimple_seq_add_stmt (&cleanup, call);
795   gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
796   gimple_set_location (body, loc);
797   return try_stmt;
798 }
799 
800 /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
801    location LOC containing BODY and having 'create (var)' clauses for each
802    variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
803    If INNER_CLEANUP is present, add a try-finally statement with
804    this cleanup code in the finally block.  Return the new data region, or
805    the original BODY if no data region was needed.  */
806 
807 static gimple *
maybe_build_inner_data_region(location_t loc,gimple * body,tree inner_bind_vars,gimple * inner_cleanup)808 maybe_build_inner_data_region (location_t loc, gimple *body,
809 			       tree inner_bind_vars, gimple *inner_cleanup)
810 {
811   /* Is this an instantiation of a template?  (In this case, we don't care what
812      the generic decl is - just whether the function decl has one.)  */
813   bool generic_inst_p
814     = (lang_hooks.decls.get_generic_function_decl (current_function_decl)
815        != NULL);
816 
817   /* Build data 'create (var)' clauses for these local variables.
818      Below we will add these to a data region enclosing the entire body
819      of the decomposed kernels region.  */
820   tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL,
821        inner_data_clauses = NULL;
822   for (tree v = inner_bind_vars; v; v = next)
823     {
824       next = TREE_CHAIN (v);
825       if (DECL_ARTIFICIAL (v)
826 	  || TREE_CODE (v) == CONST_DECL
827 	  || generic_inst_p)
828 	{
829 	  /* If this is an artificial temporary, it need not be mapped.  We
830 	     move its declaration into the bind inside the data region.
831 	     Also avoid mapping variables if we are inside a template
832 	     instantiation; the code does not contain all the copies to
833 	     temporaries that would make this legal.  */
834 	  TREE_CHAIN (v) = artificial_vars;
835 	  artificial_vars = v;
836 	  if (prev_mapped_var != NULL)
837 	    TREE_CHAIN (prev_mapped_var) = next;
838 	  else
839 	    inner_bind_vars = next;
840 	}
841       else
842 	{
843 	  /* Otherwise, build the map clause.  */
844 	  tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
845 	  OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC);
846 	  OMP_CLAUSE_DECL (new_clause) = v;
847 	  OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
848 	  OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
849 	  inner_data_clauses = new_clause;
850 
851 	  prev_mapped_var = v;
852 
853 	  /* See <https://gcc.gnu.org/PR100280>.  */
854 	  if (!TREE_ADDRESSABLE (v))
855 	    {
856 	      /* Request that OMP lowering make 'v' addressable.  */
857 	      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
858 
859 	      if (dump_enabled_p ())
860 		{
861 		  const dump_user_location_t d_u_loc
862 		    = dump_user_location_t::from_location_t (loc);
863 		  /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
864 #if __GNUC__ >= 10
865 # pragma GCC diagnostic push
866 # pragma GCC diagnostic ignored "-Wformat"
867 #endif
868 		  dump_printf_loc (MSG_NOTE, d_u_loc,
869 				   "OpenACC %<kernels%> decomposition:"
870 				   " variable %<%T%> declared in block"
871 				   " requested to be made addressable\n",
872 				   v);
873 #if __GNUC__ >= 10
874 # pragma GCC diagnostic pop
875 #endif
876 		}
877 	    }
878 	}
879     }
880 
881   if (artificial_vars)
882     body = gimple_build_bind (artificial_vars, body, make_node (BLOCK));
883 
884   /* If we determined above that there are variables that need to be created
885      on the device, construct a data region for them and wrap the body
886      inside that.  */
887   if (inner_data_clauses != NULL)
888     {
889       gcc_assert (inner_bind_vars != NULL);
890       gimple *inner_data_region
891 	= gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
892 				   inner_data_clauses);
893       gimple_set_location (inner_data_region, loc);
894       /* Make sure __builtin_GOACC_data_end is called at the end.  */
895       gimple *try_stmt = make_data_region_try_statement (loc, body);
896       gimple_omp_set_body (inner_data_region, try_stmt);
897       gimple *bind_body;
898       if (inner_cleanup != NULL)
899 	/* Clobber all the inner variables that need to be clobbered.  */
900 	bind_body = gimple_build_try (inner_data_region, inner_cleanup,
901 				      GIMPLE_TRY_FINALLY);
902       else
903 	bind_body = inner_data_region;
904       body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK));
905     }
906 
907   return body;
908 }
909 
910 static void
add_wait(location_t loc,gimple_seq * region_body)911 add_wait (location_t loc, gimple_seq *region_body)
912 {
913   /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0).  */
914   tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
915   tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
916   gimple *wait_call = gimple_build_call (wait_fn, 2,
917 					 sync_arg, integer_zero_node);
918   gimple_set_location (wait_call, loc);
919   gimple_seq_add_stmt (region_body, wait_call);
920 }
921 
922 /* Helper function of decompose_kernels_region_body.  The statements in
923    REGION_BODY are expected to be decomposed parts; add an 'async' clause to
924    each.  Also add a 'wait' directive at the end of the sequence.  */
925 
926 static void
add_async_clauses_and_wait(location_t loc,gimple_seq * region_body)927 add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
928 {
929   tree default_async_queue
930     = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
931   for (gimple_stmt_iterator gsi = gsi_start (*region_body);
932        !gsi_end_p (gsi);
933        gsi_next (&gsi))
934     {
935       gimple *stmt = gsi_stmt (gsi);
936       tree target_clauses = gimple_omp_target_clauses (stmt);
937       tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
938       OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
939       OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
940       target_clauses = new_async_clause;
941       gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
942 				     target_clauses);
943     }
944   add_wait (loc, region_body);
945 }
946 
947 /* Auxiliary analysis of the body of a kernels region, to determine for each
948    OpenACC loop whether it is control-dependent (i.e., not necessarily
949    executed every time the kernels region is entered) or not.
950    We say that a loop is control-dependent if there is some cond, switch, or
951    goto statement that jumps over it, forwards or backwards.  For example,
952    if the loop is controlled by an if statement, then a jump to the true
953    block, the false block, or from one of those blocks to the control flow
954    join point will necessarily jump over the loop.
955    This analysis implements an ad-hoc union-find data structure classifying
956    statements into "control-flow regions" as follows: Most statements are in
957    the same region as their predecessor, except that each OpenACC loop is in
958    a region of its own, and each OpenACC loop's successor starts a new
959    region.  We then unite the regions of any statements linked by jumps,
960    placing any cond, switch, or goto statement in the same region as its
961    target label(s).
962    In the end, control dependence of OpenACC loops can be determined by
963    comparing their immediate predecessor and successor statements' regions.
964    A jump crosses the loop if and only if the predecessor and successor are
965    in the same region.  (If there is no predecessor or successor, the loop
966    is executed unconditionally.)
967    The methods in this class identify statements by their index in the
968    kernels region's body.  */
969 
970 class control_flow_regions
971 {
972   public:
973     /* Initialize an instance and pre-compute the control-flow region
974        information for the statement sequence SEQ.  */
975     control_flow_regions (gimple_seq seq);
976 
977     /* Return true if the statement with the given index IDX in the analyzed
978        statement sequence is an unconditionally executed OpenACC loop.  */
979     bool is_unconditional_oacc_for_loop (size_t idx);
980 
981   private:
982     /* Find the region representative for the statement identified by index
983        STMT_IDX.  */
984     size_t find_rep (size_t stmt_idx);
985 
986     /* Union the regions containing the statements represented by
987        representatives A and B.  */
988     void union_reps (size_t a, size_t b);
989 
990     /* Helper for the constructor.  Performs the actual computation of the
991        control-flow regions in the statement sequence SEQ.  */
992     void compute_regions (gimple_seq seq);
993 
994     /* The mapping from statement indices to region representatives.  */
995     vec <size_t> representatives;
996 
997     /* A cache mapping statement indices to a flag indicating whether the
998        statement is a top level OpenACC for loop.  */
999     vec <bool> omp_for_loops;
1000 };
1001 
control_flow_regions(gimple_seq seq)1002 control_flow_regions::control_flow_regions (gimple_seq seq)
1003 {
1004   representatives.create (1);
1005   omp_for_loops.create (1);
1006   compute_regions (seq);
1007 }
1008 
1009 bool
is_unconditional_oacc_for_loop(size_t idx)1010 control_flow_regions::is_unconditional_oacc_for_loop (size_t idx)
1011 {
1012   if (idx == 0 || idx == representatives.length () - 1)
1013     /* The first or last statement in the kernels region.  This means that
1014        there is no room before or after it for a jump or a label.  Thus
1015        there cannot be a jump across it, so it is unconditional.  */
1016     return true;
1017   /* Otherwise, the loop is unconditional if the statements before and after
1018      it are in different control flow regions.  Scan forward and backward,
1019      skipping over neighboring OpenACC for loops, to find these preceding
1020      statements.  */
1021   size_t prev_index = idx - 1;
1022   while (prev_index > 0 && omp_for_loops [prev_index] == true)
1023     prev_index--;
1024   /* If all preceding statements are also OpenACC loops, all of these are
1025      unconditional.  */
1026   if (prev_index == 0)
1027     return true;
1028   size_t succ_index = idx + 1;
1029   while (succ_index < omp_for_loops.length ()
1030 	 && omp_for_loops [succ_index] == true)
1031     succ_index++;
1032   /* If all following statements are also OpenACC loops, all of these are
1033      unconditional.  */
1034   if (succ_index == omp_for_loops.length ())
1035     return true;
1036   return (find_rep (prev_index) != find_rep (succ_index));
1037 }
1038 
1039 size_t
find_rep(size_t stmt_idx)1040 control_flow_regions::find_rep (size_t stmt_idx)
1041 {
1042   size_t rep = stmt_idx, aux = stmt_idx;
1043   /* Find the root representative of this statement.  */
1044   while (representatives[rep] != rep)
1045     rep = representatives[rep];
1046   /* Compress the path from the original statement to the representative.  */
1047   while (representatives[aux] != rep)
1048     {
1049       size_t tmp = representatives[aux];
1050       representatives[aux] = rep;
1051       aux = tmp;
1052     }
1053   return rep;
1054 }
1055 
1056 void
union_reps(size_t a,size_t b)1057 control_flow_regions::union_reps (size_t a, size_t b)
1058 {
1059   a = find_rep (a);
1060   b = find_rep (b);
1061   representatives[b] = a;
1062 }
1063 
1064 void
compute_regions(gimple_seq seq)1065 control_flow_regions::compute_regions (gimple_seq seq)
1066 {
1067   hash_map <gimple *, size_t> control_flow_reps;
1068   hash_map <tree, size_t> label_reps;
1069   size_t current_region = 0, idx = 0;
1070 
1071   /* In a first pass, assign an initial region to each statement.  Except in
1072      the case of OpenACC loops, each statement simply gets the same region
1073      representative as its predecessor.  */
1074   for (gimple_stmt_iterator gsi = gsi_start (seq);
1075        !gsi_end_p (gsi);
1076        gsi_next (&gsi))
1077     {
1078       gimple *stmt = gsi_stmt (gsi);
1079       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1080       omp_for_loops.safe_push (omp_for != NULL);
1081       if (omp_for != NULL)
1082 	{
1083 	  /* Assign a new region to this loop and to its successor.  */
1084 	  current_region = idx;
1085 	  representatives.safe_push (current_region);
1086 	  current_region++;
1087 	}
1088       else
1089 	{
1090 	  representatives.safe_push (current_region);
1091 	  /* Remember any jumps and labels for the second pass below.  */
1092 	  if (gimple_code (stmt) == GIMPLE_COND
1093 	      || gimple_code (stmt) == GIMPLE_SWITCH
1094 	      || gimple_code (stmt) == GIMPLE_GOTO)
1095 	    control_flow_reps.put (stmt, current_region);
1096 	  else if (gimple_code (stmt) == GIMPLE_LABEL)
1097 	    label_reps.put (gimple_label_label (as_a <glabel *> (stmt)),
1098 			    current_region);
1099 	}
1100       idx++;
1101     }
1102   gcc_assert (representatives.length () == omp_for_loops.length ());
1103 
1104   /* Revisit all the control flow statements and union the region of each
1105      cond, switch, or goto statement with the target labels' regions.  */
1106   for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin ();
1107        it != control_flow_reps.end ();
1108        ++it)
1109     {
1110       gimple *stmt = (*it).first;
1111       size_t stmt_rep = (*it).second;
1112       switch (gimple_code (stmt))
1113 	{
1114 	  tree label;
1115 	  unsigned int n;
1116 
1117 	case GIMPLE_COND:
1118 	  label = gimple_cond_true_label (as_a <gcond *> (stmt));
1119 	  union_reps (stmt_rep, *label_reps.get (label));
1120 	  label = gimple_cond_false_label (as_a <gcond *> (stmt));
1121 	  union_reps (stmt_rep, *label_reps.get (label));
1122 	  break;
1123 
1124 	case GIMPLE_SWITCH:
1125 	  n = gimple_switch_num_labels (as_a <gswitch *> (stmt));
1126 	  for (unsigned int i = 0; i < n; i++)
1127 	    {
1128 	      tree switch_case
1129 		= gimple_switch_label (as_a <gswitch *> (stmt), i);
1130 	      label = CASE_LABEL (switch_case);
1131 	      union_reps (stmt_rep, *label_reps.get (label));
1132 	    }
1133 	  break;
1134 
1135 	case GIMPLE_GOTO:
1136 	  label = gimple_goto_dest (stmt);
1137 	  union_reps (stmt_rep, *label_reps.get (label));
1138 	  break;
1139 
1140 	default:
1141 	  gcc_unreachable ();
1142 	}
1143     }
1144 }
1145 
1146 /* Decompose the body of the KERNELS_REGION, which was originally annotated
1147    with the KERNELS_CLAUSES, into a series of compute constructs.  */
1148 
1149 static gimple *
decompose_kernels_region_body(gimple * kernels_region,tree kernels_clauses)1150 decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
1151 {
1152   location_t loc = gimple_location (kernels_region);
1153 
1154   /* The kernels clauses will be propagated to the child clauses unmodified,
1155      except that the 'num_gangs', 'num_workers', and 'vector_length' clauses
1156      will only be added to loop regions.  The other regions are "gang-single"
1157      and get an explicit 'num_gangs (1)' clause.  So separate out the
1158      'num_gangs', 'num_workers', and 'vector_length' clauses here.
1159      Also check for the presence of an 'async' clause but do not remove it from
1160      the 'kernels' clauses.  */
1161   tree num_gangs_clause = NULL, num_workers_clause = NULL,
1162        vector_length_clause = NULL;
1163   tree async_clause = NULL;
1164   tree prev_clause = NULL, next_clause = NULL;
1165   tree parallel_clauses = kernels_clauses;
1166   for (tree c = parallel_clauses; c; c = next_clause)
1167     {
1168       /* Preserve this here, as we might NULL it later.  */
1169       next_clause = OMP_CLAUSE_CHAIN (c);
1170 
1171       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
1172 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
1173 	  || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
1174 	{
1175 	  /* Cut this clause out of the chain.  */
1176 	  if (prev_clause != NULL)
1177 	    OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
1178 	  else
1179 	    kernels_clauses = OMP_CLAUSE_CHAIN (c);
1180 	  OMP_CLAUSE_CHAIN (c) = NULL;
1181 	  switch (OMP_CLAUSE_CODE (c))
1182 	    {
1183 	    case OMP_CLAUSE_NUM_GANGS:
1184 	      num_gangs_clause = c;
1185 	      break;
1186 	    case OMP_CLAUSE_NUM_WORKERS:
1187 	      num_workers_clause = c;
1188 	      break;
1189 	    case OMP_CLAUSE_VECTOR_LENGTH:
1190 	      vector_length_clause = c;
1191 	      break;
1192 	    default:
1193 	      gcc_unreachable ();
1194 	    }
1195 	}
1196       else
1197 	prev_clause = c;
1198       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
1199 	async_clause = c;
1200     }
1201 
1202   gimple *kernels_body = gimple_omp_body (kernels_region);
1203   gbind *kernels_bind = as_a <gbind *> (kernels_body);
1204 
1205   /* The body of the region may contain other nested binds declaring inner
1206      local variables.  Collapse all these binds into one to ensure that we
1207      have a single sequence of statements to iterate over; also, collect all
1208      inner variables.  */
1209   tree inner_bind_vars = flatten_binds (kernels_bind);
1210   gimple_seq body_sequence = gimple_bind_body (kernels_bind);
1211 
1212   /* All these inner variables will get allocated on the device (below, by
1213      calling maybe_build_inner_data_region).  Here we create 'present'
1214      clauses for them and add these clauses to the list of clauses to be
1215      attached to each inner compute construct.  */
1216   tree present_clauses = kernels_clauses;
1217   for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
1218     {
1219       if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL)
1220 	{
1221 	  tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
1222 	  OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT);
1223 	  OMP_CLAUSE_DECL (present_clause) = var;
1224 	  OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var);
1225 	  OMP_CLAUSE_CHAIN (present_clause) = present_clauses;
1226 	  present_clauses = present_clause;
1227 	}
1228     }
1229   kernels_clauses = present_clauses;
1230 
1231   /* In addition to nested binds, the "real" body of the region may be
1232      nested inside a try-finally block.  Find its cleanup block, which
1233      contains code to clobber the local variables that must be clobbered.  */
1234   gimple *inner_cleanup = NULL;
1235   if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY)
1236     {
1237       if (gimple_seq_singleton_p (body_sequence))
1238 	{
1239 	  /* The try statement is the only thing inside the bind.  */
1240 	  inner_cleanup = gimple_try_cleanup (body_sequence);
1241 	  body_sequence = gimple_try_eval (body_sequence);
1242 	}
1243       else
1244 	{
1245 	  /* The bind's body starts with a try statement, but it is followed
1246 	     by other things.  */
1247 	  gimple_stmt_iterator gsi = gsi_start (body_sequence);
1248 	  gimple *try_stmt = gsi_stmt (gsi);
1249 	  inner_cleanup = gimple_try_cleanup (try_stmt);
1250 	  gimple *try_body = gimple_try_eval (try_stmt);
1251 
1252 	  gsi_remove (&gsi, false);
1253 	  /* Now gsi indicates the sequence of statements after the try
1254 	     statement in the bind.  Append the statement in the try body and
1255 	     the trailing statements from gsi.  */
1256 	  gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING);
1257 	  body_sequence = gsi_stmt (gsi);
1258 	}
1259     }
1260 
1261   /* This sequence will collect all the top-level statements in the body of
1262      the data region we are about to construct.  */
1263   gimple_seq region_body = NULL;
1264   /* This sequence will collect consecutive statements to be put into a
1265      gang-single region.  */
1266   gimple_seq gang_single_seq = NULL;
1267   /* Flag recording whether the gang_single_seq only contains copies to
1268      local variables.  These may be loop setup code that should not be
1269      separated from the loop.  */
1270   bool only_simple_assignments = true;
1271 
1272   /* Precompute the control flow region information to determine whether an
1273      OpenACC loop is executed conditionally or unconditionally.  */
1274   control_flow_regions cf_regions (body_sequence);
1275 
1276   /* Iterate over the statements in the kernels region's body.  */
1277   size_t idx = 0;
1278   gimple_stmt_iterator gsi, gsi_n;
1279   for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++)
1280     {
1281       /* Advance the iterator here because otherwise it would be invalidated
1282 	 by moving statements below.  */
1283       gsi_n = gsi;
1284       gsi_next (&gsi_n);
1285 
1286       gimple *stmt = gsi_stmt (gsi);
1287       if (gimple_code (stmt) == GIMPLE_DEBUG)
1288 	{
1289 	  if (flag_compare_debug_opt || flag_compare_debug)
1290 	    /* Let the usual '-fcompare-debug' analysis bail out, as
1291 	       necessary.  */
1292 	    ;
1293 	  else
1294 	    sorry_at (loc, "%qs not yet supported",
1295 		      gimple_code_name[gimple_code (stmt)]);
1296 	}
1297       gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1298       bool is_unconditional_oacc_for_loop = false;
1299       if (omp_for != NULL)
1300 	is_unconditional_oacc_for_loop
1301 	  = cf_regions.is_unconditional_oacc_for_loop (idx);
1302       if (omp_for != NULL
1303 	  && is_unconditional_oacc_for_loop)
1304 	{
1305 	  /* This is an OMP for statement, put it into a separate region.
1306 	     But first, construct a gang-single region containing any
1307 	     complex sequential statements we may have seen.  */
1308 	  if (gang_single_seq != NULL && !only_simple_assignments)
1309 	    {
1310 	      gimple *single_region
1311 		= make_region_seq (loc, gang_single_seq,
1312 				   num_gangs_clause,
1313 				   num_workers_clause,
1314 				   vector_length_clause,
1315 				   kernels_clauses);
1316 	      gimple_seq_add_stmt (&region_body, single_region);
1317 	    }
1318 	  else if (gang_single_seq != NULL && only_simple_assignments)
1319 	    {
1320 	      /* There is a sequence of sequential statements preceding this
1321 		 loop, but they are all simple assignments.  This is
1322 		 probably setup code for the loop; in particular, Fortran DO
1323 		 loops are preceded by code to copy the loop limit variable
1324 		 to a temporary.  Group this code together with the loop
1325 		 itself.  */
1326 	      gimple_seq_add_stmt (&gang_single_seq, stmt);
1327 	      stmt = gimple_build_bind (NULL, gang_single_seq,
1328 					make_node (BLOCK));
1329 	    }
1330 	  gang_single_seq = NULL;
1331 	  only_simple_assignments = true;
1332 
1333 	  gimple_seq parallel_seq = NULL;
1334 	  gimple_seq_add_stmt (&parallel_seq, stmt);
1335 	  gimple *parallel_region
1336 	    = make_region_loop_nest (omp_for, parallel_seq,
1337 				     num_gangs_clause,
1338 				     num_workers_clause,
1339 				     vector_length_clause,
1340 				     kernels_clauses);
1341 	  gimple_seq_add_stmt (&region_body, parallel_region);
1342 	}
1343       else
1344 	{
1345 	  if (omp_for != NULL)
1346 	    {
1347 	      gcc_checking_assert (!is_unconditional_oacc_for_loop);
1348 	      if (dump_enabled_p ())
1349 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for,
1350 				 "unparallelized loop nest"
1351 				 " in OpenACC %<kernels%> region:"
1352 				 " it's executed conditionally\n");
1353 	    }
1354 
1355 	  /* This is not an unconditional OMP for statement, so it will be
1356 	     put into a gang-single region.  */
1357 	  gimple_seq_add_stmt (&gang_single_seq, stmt);
1358 	  /* Is this a simple assignment? We call it simple if it is an
1359 	     assignment to an artificial local variable.  This captures
1360 	     Fortran loop setup code computing loop bounds and offsets.  */
1361 	  bool is_simple_assignment
1362 	    = (gimple_code (stmt) == GIMPLE_ASSIGN
1363 	       && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
1364 	       && DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
1365 	  if (!is_simple_assignment)
1366 	    only_simple_assignments = false;
1367 	}
1368     }
1369 
1370   /* If we did not emit a new region, and are not going to emit one now
1371      (that is, the original region was empty), prepare to emit a dummy so as
1372      to preserve the original construct, which other processing (at least
1373      test cases) depend on.  */
1374   if (region_body == NULL && gang_single_seq == NULL)
1375     {
1376       gimple *stmt = gimple_build_nop ();
1377       gimple_set_location (stmt, loc);
1378       gimple_seq_add_stmt (&gang_single_seq, stmt);
1379     }
1380 
1381   /* Gather up any remaining gang-single statements.  */
1382   if (gang_single_seq != NULL)
1383     {
1384       gimple *single_region
1385 	= make_region_seq (loc, gang_single_seq,
1386 			   num_gangs_clause,
1387 			   num_workers_clause,
1388 			   vector_length_clause,
1389 			   kernels_clauses);
1390       gimple_seq_add_stmt (&region_body, single_region);
1391     }
1392 
1393   /* We want to launch these kernels asynchronously.  If the original
1394      kernels region had an async clause, this is done automatically because
1395      that async clause was copied to the individual regions we created.
1396      Otherwise, add an async clause to each newly created region, as well as
1397      a wait directive at the end.  */
1398   if (async_clause == NULL)
1399     add_async_clauses_and_wait (loc, &region_body);
1400   else
1401     /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
1402        region, then target memory will get unmapped at the point the data
1403        region ends, even if the inner asynchronous parallels have not yet
1404        completed.  For kernels marked "async", we might want to use "enter data
1405        async(...)" and "exit data async(...)" instead, or asynchronous data
1406        regions (see also <https://gcc.gnu.org/PR97390>
1407        "[OpenACC] 'async' clause on 'data' construct",
1408        which is to share the same implementation).
1409        For now, insert a (synchronous) wait at the end of the block.  */
1410     add_wait (loc, &region_body);
1411 
1412   tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
1413   gimple *body = gimple_build_bind (kernels_locals, region_body,
1414 				    make_node (BLOCK));
1415 
1416   /* If we found variables declared in nested scopes, build a data region to
1417      map them to the device.  */
1418   body = maybe_build_inner_data_region (loc, body, inner_bind_vars,
1419 					inner_cleanup);
1420 
1421   return body;
1422 }
1423 
1424 /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct
1425    containing the original OpenACC 'kernels' construct's region cut up into a
1426    sequence of compute constructs.  */
1427 
1428 static gimple *
omp_oacc_kernels_decompose_1(gimple * kernels_stmt)1429 omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
1430 {
1431   gcc_checking_assert (gimple_omp_target_kind (kernels_stmt)
1432 		       == GF_OMP_TARGET_KIND_OACC_KERNELS);
1433   location_t loc = gimple_location (kernels_stmt);
1434 
1435   /* Collect the data clauses of the OpenACC 'kernels' directive and create a
1436      new OpenACC 'data' construct with those clauses.  */
1437   tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt);
1438   tree data_clauses = NULL;
1439   for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
1440     {
1441       /* Certain clauses are copied to the enclosing OpenACC 'data'.  Other
1442 	 clauses remain on the OpenACC 'kernels'.  */
1443       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
1444 	{
1445 	  tree decl = OMP_CLAUSE_DECL (c);
1446 	  HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c);
1447 	  switch (map_kind)
1448 	    {
1449 	    default:
1450 	      if (map_kind == GOMP_MAP_ALLOC
1451 		  && integer_zerop (OMP_CLAUSE_SIZE (c)))
1452 		/* ??? This is an alloc clause for mapping a pointer whose
1453 		   target is already mapped.  We leave these on the inner
1454 		   compute constructs because moving them to the outer data
1455 		   region causes runtime errors.  */
1456 		break;
1457 
1458 	      /* For non-artificial variables, and for non-declaration
1459 		 expressions like A[0:n], copy the clause to the data
1460 		 region.  */
1461 	      if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
1462 		  || !DECL_P (decl))
1463 		{
1464 		  tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
1465 						      OMP_CLAUSE_MAP);
1466 		  OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind);
1467 		  /* This must be unshared here to avoid "incorrect sharing
1468 		     of tree nodes" errors from verify_gimple.  */
1469 		  OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
1470 		  OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
1471 		  OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
1472 		  data_clauses = new_clause;
1473 
1474 		  /* Now that this data is mapped, turn the data clause on the
1475 		     inner OpenACC 'kernels' into a 'present' clause.  */
1476 		  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
1477 
1478 		  /* See <https://gcc.gnu.org/PR100280>,
1479 		     <https://gcc.gnu.org/PR104086>.  */
1480 		  if (DECL_P (decl)
1481 		      && !TREE_ADDRESSABLE (decl))
1482 		    {
1483 		      /* Request that OMP lowering make 'decl' addressable.  */
1484 		      OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
1485 
1486 		      if (dump_enabled_p ())
1487 			{
1488 			  location_t loc = OMP_CLAUSE_LOCATION (new_clause);
1489 			  const dump_user_location_t d_u_loc
1490 			    = dump_user_location_t::from_location_t (loc);
1491 			  /* PR100695 "Format decoder, quoting in 'dump_printf'
1492 			     etc." */
1493 #if __GNUC__ >= 10
1494 # pragma GCC diagnostic push
1495 # pragma GCC diagnostic ignored "-Wformat"
1496 #endif
1497 			  dump_printf_loc
1498 			    (MSG_NOTE, d_u_loc,
1499 			     "OpenACC %<kernels%> decomposition:"
1500 			     " variable %<%T%> in %qs clause"
1501 			     " requested to be made addressable\n",
1502 			     decl,
1503 			     user_omp_clause_code_name (new_clause, true));
1504 #if __GNUC__ >= 10
1505 # pragma GCC diagnostic pop
1506 #endif
1507 			}
1508 		    }
1509 		}
1510 	      break;
1511 
1512 	    case GOMP_MAP_POINTER:
1513 	    case GOMP_MAP_TO_PSET:
1514 	    case GOMP_MAP_FIRSTPRIVATE_POINTER:
1515 	    case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
1516 	      /* ??? Copying these map kinds leads to internal compiler
1517 		 errors in later passes.  */
1518 	      break;
1519 	    }
1520 	}
1521       else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
1522 	{
1523 	  /* If there is an 'if' clause, it must be duplicated to the
1524 	     enclosing data region.  Temporarily remove the if clause's
1525 	     chain to avoid copying it.  */
1526 	  tree saved_chain = OMP_CLAUSE_CHAIN (c);
1527 	  OMP_CLAUSE_CHAIN (c) = NULL;
1528 	  tree new_if_clause = unshare_expr (c);
1529 	  OMP_CLAUSE_CHAIN (c) = saved_chain;
1530 	  OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
1531 	  data_clauses = new_if_clause;
1532 	}
1533     }
1534   /* Restore the original order of the clauses.  */
1535   data_clauses = nreverse (data_clauses);
1536 
1537   gimple *data_region
1538     = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
1539 			       data_clauses);
1540   gimple_set_location (data_region, loc);
1541 
1542   /* Transform the body of the kernels region into a sequence of compute
1543      constructs.  */
1544   gimple *body = decompose_kernels_region_body (kernels_stmt,
1545 						kernels_clauses);
1546 
1547   /* Put the transformed pieces together.  The entire body of the region is
1548      wrapped in a try-finally statement that calls __builtin_GOACC_data_end
1549      for cleanup.  */
1550   gimple *try_stmt = make_data_region_try_statement (loc, body);
1551   gimple_omp_set_body (data_region, try_stmt);
1552 
1553   return data_region;
1554 }
1555 
1556 
1557 /* Decompose OpenACC 'kernels' constructs in the current function.  */
1558 
1559 static tree
omp_oacc_kernels_decompose_callback_stmt(gimple_stmt_iterator * gsi_p,bool * handled_ops_p,struct walk_stmt_info *)1560 omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p,
1561 					  bool *handled_ops_p,
1562 					  struct walk_stmt_info *)
1563 {
1564   gimple *stmt = gsi_stmt (*gsi_p);
1565 
1566   if ((gimple_code (stmt) == GIMPLE_OMP_TARGET)
1567       && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
1568     {
1569       gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt);
1570       gsi_replace (gsi_p, stmt_new, false);
1571       *handled_ops_p = true;
1572     }
1573   else
1574     *handled_ops_p = false;
1575 
1576   return NULL;
1577 }
1578 
1579 static unsigned int
omp_oacc_kernels_decompose(void)1580 omp_oacc_kernels_decompose (void)
1581 {
1582   gimple_seq body = gimple_body (current_function_decl);
1583 
1584   struct walk_stmt_info wi;
1585   memset (&wi, 0, sizeof (wi));
1586   walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL,
1587 		       &wi);
1588 
1589   gimple_set_body (current_function_decl, body);
1590 
1591   return 0;
1592 }
1593 
1594 
1595 namespace {
1596 
1597 const pass_data pass_data_omp_oacc_kernels_decompose =
1598 {
1599   GIMPLE_PASS, /* type */
1600   "omp_oacc_kernels_decompose", /* name */
1601   OPTGROUP_OMP, /* optinfo_flags */
1602   TV_NONE, /* tv_id */
1603   PROP_gimple_any, /* properties_required */
1604   0, /* properties_provided */
1605   0, /* properties_destroyed */
1606   0, /* todo_flags_start */
1607   0, /* todo_flags_finish */
1608 };
1609 
1610 class pass_omp_oacc_kernels_decompose : public gimple_opt_pass
1611 {
1612 public:
pass_omp_oacc_kernels_decompose(gcc::context * ctxt)1613   pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1614     : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt)
1615   {}
1616 
1617   /* opt_pass methods: */
gate(function *)1618   virtual bool gate (function *)
1619   {
1620     return (flag_openacc
1621 	    && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE);
1622   }
execute(function *)1623   virtual unsigned int execute (function *)
1624   {
1625     return omp_oacc_kernels_decompose ();
1626   }
1627 
1628 }; // class pass_omp_oacc_kernels_decompose
1629 
1630 } // anon namespace
1631 
1632 gimple_opt_pass *
make_pass_omp_oacc_kernels_decompose(gcc::context * ctxt)1633 make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1634 {
1635   return new pass_omp_oacc_kernels_decompose (ctxt);
1636 }
1637