1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2    and a lowering pass for OpenACC device directives.
3 
4    Copyright (C) 2005-2021 Free Software Foundation, Inc.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55 #include "context.h"
56 
57 /* Describe the OpenACC looping structure of a function.  The entire
58    function is held in a 'NULL' loop.  */
59 
60 struct oacc_loop
61 {
62   oacc_loop *parent; /* Containing loop.  */
63 
64   oacc_loop *child; /* First inner loop.  */
65 
66   oacc_loop *sibling; /* Next loop within same parent.  */
67 
68   location_t loc; /* Location of the loop start.  */
69 
70   gcall *marker; /* Initial head marker.  */
71 
72   gcall *heads[GOMP_DIM_MAX];  /* Head marker functions.  */
73   gcall *tails[GOMP_DIM_MAX];  /* Tail marker functions.  */
74 
75   tree routine;  /* Pseudo-loop enclosing a routine.  */
76 
77   unsigned mask;   /* Partitioning mask.  */
78   unsigned e_mask; /* Partitioning of element loops (when tiling).  */
79   unsigned inner;  /* Partitioning of inner loops.  */
80   unsigned flags;  /* Partitioning flags.  */
81   vec<gcall *> ifns;  /* Contained loop abstraction functions.  */
82   tree chunk_size; /* Chunk size.  */
83   gcall *head_end; /* Final marker of head sequence.  */
84 };
85 
86 /* Holds offload tables with decls.  */
87 vec<tree, va_gc> *offload_funcs, *offload_vars;
88 
89 /* Return level at which oacc routine may spawn a partitioned loop, or
90    -1 if it is not a routine (i.e. is an offload fn).  */
91 
92 int
oacc_fn_attrib_level(tree attr)93 oacc_fn_attrib_level (tree attr)
94 {
95   tree pos = TREE_VALUE (attr);
96 
97   if (!TREE_PURPOSE (pos))
98     return -1;
99 
100   int ix = 0;
101   for (ix = 0; ix != GOMP_DIM_MAX;
102        ix++, pos = TREE_CHAIN (pos))
103     if (!integer_zerop (TREE_PURPOSE (pos)))
104       break;
105 
106   return ix;
107 }
108 
109 /* Helper function for omp_finish_file routine.  Takes decls from V_DECLS and
110    adds their addresses and sizes to constructor-vector V_CTOR.  */
111 
112 static void
add_decls_addresses_to_decl_constructor(vec<tree,va_gc> * v_decls,vec<constructor_elt,va_gc> * v_ctor)113 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
114 					 vec<constructor_elt, va_gc> *v_ctor)
115 {
116   unsigned len = vec_safe_length (v_decls);
117   for (unsigned i = 0; i < len; i++)
118     {
119       tree it = (*v_decls)[i];
120       bool is_var = VAR_P (it);
121       bool is_link_var
122 	= is_var
123 #ifdef ACCEL_COMPILER
124 	  && DECL_HAS_VALUE_EXPR_P (it)
125 #endif
126 	  && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
127 
128       /* See also omp_finish_file and output_offload_tables in lto-cgraph.c.  */
129       if (!in_lto_p && !symtab_node::get (it))
130 	continue;
131 
132       tree size = NULL_TREE;
133       if (is_var)
134 	size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
135 
136       tree addr;
137       if (!is_link_var)
138 	addr = build_fold_addr_expr (it);
139       else
140 	{
141 #ifdef ACCEL_COMPILER
142 	  /* For "omp declare target link" vars add address of the pointer to
143 	     the target table, instead of address of the var.  */
144 	  tree value_expr = DECL_VALUE_EXPR (it);
145 	  tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
146 	  varpool_node::finalize_decl (link_ptr_decl);
147 	  addr = build_fold_addr_expr (link_ptr_decl);
148 #else
149 	  addr = build_fold_addr_expr (it);
150 #endif
151 
152 	  /* Most significant bit of the size marks "omp declare target link"
153 	     vars in host and target tables.  */
154 	  unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
155 	  isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
156 			    * BITS_PER_UNIT - 1);
157 	  size = wide_int_to_tree (const_ptr_type_node, isize);
158 	}
159 
160       CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
161       if (is_var)
162 	CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
163     }
164 }
165 
166 /* Return true if DECL is a function for which its references should be
167    analyzed.  */
168 
169 static bool
omp_declare_target_fn_p(tree decl)170 omp_declare_target_fn_p (tree decl)
171 {
172   return (TREE_CODE (decl) == FUNCTION_DECL
173 	  && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
174 	  && !lookup_attribute ("omp declare target host",
175 				DECL_ATTRIBUTES (decl))
176 	  && (!flag_openacc
177 	      || oacc_get_fn_attrib (decl) == NULL_TREE));
178 }
179 
180 /* Return true if DECL Is a variable for which its initializer references
181    should be analyzed.  */
182 
183 static bool
omp_declare_target_var_p(tree decl)184 omp_declare_target_var_p (tree decl)
185 {
186   return (VAR_P (decl)
187 	  && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
188 	  && !lookup_attribute ("omp declare target link",
189 				DECL_ATTRIBUTES (decl)));
190 }
191 
192 /* Helper function for omp_discover_implicit_declare_target, called through
193    walk_tree.  Mark referenced FUNCTION_DECLs implicitly as
194    declare target to.  */
195 
196 static tree
omp_discover_declare_target_tgt_fn_r(tree * tp,int * walk_subtrees,void * data)197 omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
198 {
199   if (TREE_CODE (*tp) == CALL_EXPR
200       && CALL_EXPR_FN (*tp)
201       && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
202       && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
203       && lookup_attribute ("omp declare variant base",
204 			   DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
205 							  0))))
206     {
207       tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
208       for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
209 	{
210 	  attr = lookup_attribute ("omp declare variant base", attr);
211 	  if (attr == NULL_TREE)
212 	    break;
213 	  tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
214 	  if (TREE_CODE (purpose) == FUNCTION_DECL)
215 	    omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
216 	}
217     }
218   else if (TREE_CODE (*tp) == FUNCTION_DECL)
219     {
220       tree decl = *tp;
221       tree id = get_identifier ("omp declare target");
222       symtab_node *node = symtab_node::get (*tp);
223       if (node != NULL)
224 	{
225 	  while (node->alias_target
226 		 && TREE_CODE (node->alias_target) == FUNCTION_DECL)
227 	    {
228 	      if (!omp_declare_target_fn_p (node->decl)
229 		  && !lookup_attribute ("omp declare target host",
230 					DECL_ATTRIBUTES (node->decl)))
231 		{
232 		  node->offloadable = 1;
233 		  DECL_ATTRIBUTES (node->decl)
234 		    = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
235 		}
236 	      node = symtab_node::get (node->alias_target);
237 	    }
238 	  symtab_node *new_node = node->ultimate_alias_target ();
239 	  decl = new_node->decl;
240 	  while (node != new_node)
241 	    {
242 	      if (!omp_declare_target_fn_p (node->decl)
243 		  && !lookup_attribute ("omp declare target host",
244 					DECL_ATTRIBUTES (node->decl)))
245 		{
246 		  node->offloadable = 1;
247 		  DECL_ATTRIBUTES (node->decl)
248 		    = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
249 		}
250 	      gcc_assert (node->alias && node->analyzed);
251 	      node = node->get_alias_target ();
252 	    }
253 	  node->offloadable = 1;
254 	  if (ENABLE_OFFLOADING)
255 	    g->have_offload = true;
256 	}
257       if (omp_declare_target_fn_p (decl)
258 	  || lookup_attribute ("omp declare target host",
259 			       DECL_ATTRIBUTES (decl)))
260 	return NULL_TREE;
261 
262       if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
263 	((vec<tree> *) data)->safe_push (decl);
264       DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
265 					  DECL_ATTRIBUTES (decl));
266     }
267   else if (TYPE_P (*tp))
268     *walk_subtrees = 0;
269   /* else if (TREE_CODE (*tp) == OMP_TARGET)
270        {
271 	 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
272 	   if (OMP_DEVICE_ANCESTOR (dev))
273 	     *walk_subtrees = 0;
274        } */
275   return NULL_TREE;
276 }
277 
278 /* Similarly, but ignore references outside of OMP_TARGET regions.  */
279 
280 static tree
omp_discover_declare_target_fn_r(tree * tp,int * walk_subtrees,void * data)281 omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
282 {
283   if (TREE_CODE (*tp) == OMP_TARGET)
284     {
285       /* And not OMP_DEVICE_ANCESTOR.  */
286       walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
287 				    omp_discover_declare_target_tgt_fn_r,
288 				    data);
289       *walk_subtrees = 0;
290     }
291   else if (TYPE_P (*tp))
292     *walk_subtrees = 0;
293   return NULL_TREE;
294 }
295 
296 /* Helper function for omp_discover_implicit_declare_target, called through
297    walk_tree.  Mark referenced FUNCTION_DECLs implicitly as
298    declare target to.  */
299 
300 static tree
omp_discover_declare_target_var_r(tree * tp,int * walk_subtrees,void * data)301 omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
302 {
303   if (TREE_CODE (*tp) == FUNCTION_DECL)
304     return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
305   else if (VAR_P (*tp)
306 	   && is_global_var (*tp)
307 	   && !omp_declare_target_var_p (*tp))
308     {
309       tree id = get_identifier ("omp declare target");
310       if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
311 	{
312 	  error_at (DECL_SOURCE_LOCATION (*tp),
313 		    "%qD specified both in declare target %<link%> and "
314 		    "implicitly in %<to%> clauses", *tp);
315 	  DECL_ATTRIBUTES (*tp)
316 	    = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
317 	}
318       if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
319 	((vec<tree> *) data)->safe_push (*tp);
320       DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
321       symtab_node *node = symtab_node::get (*tp);
322       if (node != NULL && !node->offloadable)
323 	{
324 	  node->offloadable = 1;
325 	  if (ENABLE_OFFLOADING)
326 	    {
327 	      g->have_offload = true;
328 	      if (is_a <varpool_node *> (node))
329 		vec_safe_push (offload_vars, node->decl);
330 	    }
331 	}
332     }
333   else if (TYPE_P (*tp))
334     *walk_subtrees = 0;
335   return NULL_TREE;
336 }
337 
338 /* Perform the OpenMP implicit declare target to discovery.  */
339 
340 void
omp_discover_implicit_declare_target(void)341 omp_discover_implicit_declare_target (void)
342 {
343   cgraph_node *node;
344   varpool_node *vnode;
345   auto_vec<tree> worklist;
346 
347   FOR_EACH_DEFINED_FUNCTION (node)
348     if (DECL_SAVED_TREE (node->decl))
349       {
350 	struct cgraph_node *cgn;
351         if (omp_declare_target_fn_p (node->decl))
352 	  worklist.safe_push (node->decl);
353 	else if (DECL_STRUCT_FUNCTION (node->decl)
354 		 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
355 	  worklist.safe_push (node->decl);
356 	for (cgn = first_nested_function (node);
357 	     cgn; cgn = next_nested_function (cgn))
358 	  if (omp_declare_target_fn_p (cgn->decl))
359 	    worklist.safe_push (cgn->decl);
360 	  else if (DECL_STRUCT_FUNCTION (cgn->decl)
361 		   && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
362 	    worklist.safe_push (cgn->decl);
363       }
364   FOR_EACH_VARIABLE (vnode)
365     if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
366 	&& omp_declare_target_var_p (vnode->decl))
367       worklist.safe_push (vnode->decl);
368   while (!worklist.is_empty ())
369     {
370       tree decl = worklist.pop ();
371       if (VAR_P (decl))
372 	walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
373 				      omp_discover_declare_target_var_r,
374 				      &worklist);
375       else if (omp_declare_target_fn_p (decl))
376 	walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
377 				      omp_discover_declare_target_tgt_fn_r,
378 				      &worklist);
379       else
380 	walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
381 				      omp_discover_declare_target_fn_r,
382 				      &worklist);
383     }
384 
385   lang_hooks.decls.omp_finish_decl_inits ();
386 }
387 
388 
389 /* Create new symbols containing (address, size) pairs for global variables,
390    marked with "omp declare target" attribute, as well as addresses for the
391    functions, which are outlined offloading regions.  */
392 void
omp_finish_file(void)393 omp_finish_file (void)
394 {
395   unsigned num_funcs = vec_safe_length (offload_funcs);
396   unsigned num_vars = vec_safe_length (offload_vars);
397 
398   if (num_funcs == 0 && num_vars == 0)
399     return;
400 
401   if (targetm_common.have_named_sections)
402     {
403       vec<constructor_elt, va_gc> *v_f, *v_v;
404       vec_alloc (v_f, num_funcs);
405       vec_alloc (v_v, num_vars * 2);
406 
407       add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
408       add_decls_addresses_to_decl_constructor (offload_vars, v_v);
409 
410       tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
411 						    vec_safe_length (v_v));
412       tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
413 						     num_funcs);
414       SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
415       SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
416       tree ctor_v = build_constructor (vars_decl_type, v_v);
417       tree ctor_f = build_constructor (funcs_decl_type, v_f);
418       TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
419       TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
420       tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
421 				    get_identifier (".offload_func_table"),
422 				    funcs_decl_type);
423       tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
424 				   get_identifier (".offload_var_table"),
425 				   vars_decl_type);
426       TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
427       /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
428 	 otherwise a joint table in a binary will contain padding between
429 	 tables from multiple object files.  */
430       DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
431       SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
432       SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
433       DECL_INITIAL (funcs_decl) = ctor_f;
434       DECL_INITIAL (vars_decl) = ctor_v;
435       set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
436       set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
437 
438       varpool_node::finalize_decl (vars_decl);
439       varpool_node::finalize_decl (funcs_decl);
440     }
441   else
442     {
443       for (unsigned i = 0; i < num_funcs; i++)
444 	{
445 	  tree it = (*offload_funcs)[i];
446 	  /* See also add_decls_addresses_to_decl_constructor
447 	     and output_offload_tables in lto-cgraph.c.  */
448 	  if (!in_lto_p && !symtab_node::get (it))
449 	    continue;
450 	  targetm.record_offload_symbol (it);
451 	}
452       for (unsigned i = 0; i < num_vars; i++)
453 	{
454 	  tree it = (*offload_vars)[i];
455 	  if (!in_lto_p && !symtab_node::get (it))
456 	    continue;
457 #ifdef ACCEL_COMPILER
458 	  if (DECL_HAS_VALUE_EXPR_P (it)
459 	      && lookup_attribute ("omp declare target link",
460 				   DECL_ATTRIBUTES (it)))
461 	    {
462 	      tree value_expr = DECL_VALUE_EXPR (it);
463 	      tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
464 	      targetm.record_offload_symbol (link_ptr_decl);
465 	      varpool_node::finalize_decl (link_ptr_decl);
466 	    }
467 	  else
468 #endif
469 	    targetm.record_offload_symbol (it);
470 	}
471     }
472 }
473 
474 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
475    axis DIM.  Return a tmp var holding the result.  */
476 
477 static tree
oacc_dim_call(bool pos,int dim,gimple_seq * seq)478 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
479 {
480   tree arg = build_int_cst (unsigned_type_node, dim);
481   tree size = create_tmp_var (integer_type_node);
482   enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
483   gimple *call = gimple_build_call_internal (fn, 1, arg);
484 
485   gimple_call_set_lhs (call, size);
486   gimple_seq_add_stmt (seq, call);
487 
488   return size;
489 }
490 
491 /* Find the number of threads (POS = false), or thread number (POS =
492    true) for an OpenACC region partitioned as MASK.  Setup code
493    required for the calculation is added to SEQ.  */
494 
495 static tree
oacc_thread_numbers(bool pos,int mask,gimple_seq * seq)496 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
497 {
498   tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
499   unsigned ix;
500 
501   /* Start at gang level, and examine relevant dimension indices.  */
502   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
503     if (GOMP_DIM_MASK (ix) & mask)
504       {
505 	if (res)
506 	  {
507 	    /* We had an outer index, so scale that by the size of
508 	       this dimension.  */
509 	    tree n = oacc_dim_call (false, ix, seq);
510 	    res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
511 	  }
512 	if (pos)
513 	  {
514 	    /* Determine index in this dimension.  */
515 	    tree id = oacc_dim_call (true, ix, seq);
516 	    if (res)
517 	      res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
518 	    else
519 	      res = id;
520 	  }
521       }
522 
523   if (res == NULL_TREE)
524     res = integer_zero_node;
525 
526   return res;
527 }
528 
529 /* Transform IFN_GOACC_LOOP calls to actual code.  See
530    expand_oacc_for for where these are generated.  At the vector
531    level, we stride loops, such that each member of a warp will
532    operate on adjacent iterations.  At the worker and gang level,
533    each gang/warp executes a set of contiguous iterations.  Chunking
534    can override this such that each iteration engine executes a
535    contiguous chunk, and then moves on to stride to the next chunk.  */
536 
537 static void
oacc_xform_loop(gcall * call)538 oacc_xform_loop (gcall *call)
539 {
540   gimple_stmt_iterator gsi = gsi_for_stmt (call);
541   enum ifn_goacc_loop_kind code
542     = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
543   tree dir = gimple_call_arg (call, 1);
544   tree range = gimple_call_arg (call, 2);
545   tree step = gimple_call_arg (call, 3);
546   tree chunk_size = NULL_TREE;
547   unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
548   tree lhs = gimple_call_lhs (call);
549   tree type = NULL_TREE;
550   tree diff_type = TREE_TYPE (range);
551   tree r = NULL_TREE;
552   gimple_seq seq = NULL;
553   bool chunking = false, striding = true;
554   unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
555   unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
556 
557   /* Skip lowering if return value of IFN_GOACC_LOOP call is not used.  */
558   if (!lhs)
559     {
560       gsi_replace_with_seq (&gsi, seq, true);
561       return;
562     }
563 
564   type = TREE_TYPE (lhs);
565 
566 #ifdef ACCEL_COMPILER
567   chunk_size = gimple_call_arg (call, 4);
568   if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
569       || integer_zerop (chunk_size))   /* Default (also static).  */
570     {
571       /* If we're at the gang level, we want each to execute a
572 	 contiguous run of iterations.  Otherwise we want each element
573 	 to stride.  */
574       striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
575       chunking = false;
576     }
577   else
578     {
579       /* Chunk of size 1 is striding.  */
580       striding = integer_onep (chunk_size);
581       chunking = !striding;
582     }
583 #endif
584 
585   /* striding=true, chunking=true
586        -> invalid.
587      striding=true, chunking=false
588        -> chunks=1
589      striding=false,chunking=true
590        -> chunks=ceil (range/(chunksize*threads*step))
591      striding=false,chunking=false
592        -> chunk_size=ceil(range/(threads*step)),chunks=1  */
593   push_gimplify_context (true);
594 
595   switch (code)
596     {
597     default: gcc_unreachable ();
598 
599     case IFN_GOACC_LOOP_CHUNKS:
600       if (!chunking)
601 	r = build_int_cst (type, 1);
602       else
603 	{
604 	  /* chunk_max
605 	     = (range - dir) / (chunks * step * num_threads) + dir  */
606 	  tree per = oacc_thread_numbers (false, mask, &seq);
607 	  per = fold_convert (type, per);
608 	  chunk_size = fold_convert (type, chunk_size);
609 	  per = fold_build2 (MULT_EXPR, type, per, chunk_size);
610 	  per = fold_build2 (MULT_EXPR, type, per, step);
611 	  r = build2 (MINUS_EXPR, type, range, dir);
612 	  r = build2 (PLUS_EXPR, type, r, per);
613 	  r = build2 (TRUNC_DIV_EXPR, type, r, per);
614 	}
615       break;
616 
617     case IFN_GOACC_LOOP_STEP:
618       {
619 	/* If striding, step by the entire compute volume, otherwise
620 	   step by the inner volume.  */
621 	unsigned volume = striding ? mask : inner_mask;
622 
623 	r = oacc_thread_numbers (false, volume, &seq);
624 	r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
625       }
626       break;
627 
628     case IFN_GOACC_LOOP_OFFSET:
629       /* Enable vectorization on non-SIMT targets.  */
630       if (!targetm.simt.vf
631 	  && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
632 	  /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
633 	     the loop.  */
634 	  && (flag_tree_loop_vectorize
635 	      || !global_options_set.x_flag_tree_loop_vectorize))
636 	{
637 	  basic_block bb = gsi_bb (gsi);
638 	  class loop *parent = bb->loop_father;
639 	  class loop *body = parent->inner;
640 
641 	  parent->force_vectorize = true;
642 	  parent->safelen = INT_MAX;
643 
644 	  /* "Chunking loops" may have inner loops.  */
645 	  if (parent->inner)
646 	    {
647 	      body->force_vectorize = true;
648 	      body->safelen = INT_MAX;
649 	    }
650 
651 	  cfun->has_force_vectorize_loops = true;
652 	}
653       if (striding)
654 	{
655 	  r = oacc_thread_numbers (true, mask, &seq);
656 	  r = fold_convert (diff_type, r);
657 	}
658       else
659 	{
660 	  tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
661 	  tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
662 	  tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
663 				     inner_size, outer_size);
664 
665 	  volume = fold_convert (diff_type, volume);
666 	  if (chunking)
667 	    chunk_size = fold_convert (diff_type, chunk_size);
668 	  else
669 	    {
670 	      tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
671 
672 	      chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
673 	      chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
674 	      chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
675 	    }
676 
677 	  tree span = build2 (MULT_EXPR, diff_type, chunk_size,
678 			      fold_convert (diff_type, inner_size));
679 	  r = oacc_thread_numbers (true, outer_mask, &seq);
680 	  r = fold_convert (diff_type, r);
681 	  r = build2 (MULT_EXPR, diff_type, r, span);
682 
683 	  tree inner = oacc_thread_numbers (true, inner_mask, &seq);
684 	  inner = fold_convert (diff_type, inner);
685 	  r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
686 
687 	  if (chunking)
688 	    {
689 	      tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
690 	      tree per
691 		= fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
692 	      per = build2 (MULT_EXPR, diff_type, per, chunk);
693 
694 	      r = build2 (PLUS_EXPR, diff_type, r, per);
695 	    }
696 	}
697       r = fold_build2 (MULT_EXPR, diff_type, r, step);
698       if (type != diff_type)
699 	r = fold_convert (type, r);
700       break;
701 
702     case IFN_GOACC_LOOP_BOUND:
703       if (striding)
704 	r = range;
705       else
706 	{
707 	  tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
708 	  tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
709 	  tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
710 				     inner_size, outer_size);
711 
712 	  volume = fold_convert (diff_type, volume);
713 	  if (chunking)
714 	    chunk_size = fold_convert (diff_type, chunk_size);
715 	  else
716 	    {
717 	      tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
718 
719 	      chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
720 	      chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
721 	      chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
722 	    }
723 
724 	  tree span = build2 (MULT_EXPR, diff_type, chunk_size,
725 			      fold_convert (diff_type, inner_size));
726 
727 	  r = fold_build2 (MULT_EXPR, diff_type, span, step);
728 
729 	  tree offset = gimple_call_arg (call, 6);
730 	  r = build2 (PLUS_EXPR, diff_type, r,
731 		      fold_convert (diff_type, offset));
732 	  r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
733 		      diff_type, r, range);
734 	}
735       if (diff_type != type)
736 	r = fold_convert (type, r);
737       break;
738     }
739 
740   gimplify_assign (lhs, r, &seq);
741 
742   pop_gimplify_context (NULL);
743 
744   gsi_replace_with_seq (&gsi, seq, true);
745 }
746 
747 /* Transform a GOACC_TILE call.  Determines the element loop span for
748    the specified loop of the nest.  This is 1 if we're not tiling.
749 
750    GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element);  */
751 
752 static void
oacc_xform_tile(gcall * call)753 oacc_xform_tile (gcall *call)
754 {
755   gimple_stmt_iterator gsi = gsi_for_stmt (call);
756   unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
757   /* Inner loops have higher loop_nos.  */
758   unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
759   tree tile_size = gimple_call_arg (call, 2);
760   unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
761   tree lhs = gimple_call_lhs (call);
762   tree type = TREE_TYPE (lhs);
763   gimple_seq seq = NULL;
764   tree span = build_int_cst (type, 1);
765 
766   gcc_assert (!(e_mask
767 		& ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
768 		    | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
769   push_gimplify_context (!seen_error ());
770 
771 #ifndef ACCEL_COMPILER
772   /* Partitioning disabled on host compilers.  */
773   e_mask = 0;
774 #endif
775   if (!e_mask)
776     /* Not paritioning.  */
777     span = integer_one_node;
778   else if (!integer_zerop (tile_size))
779     /* User explicitly specified size.  */
780     span = tile_size;
781   else
782     {
783       /* Pick a size based on the paritioning of the element loop and
784 	 the number of loop nests.  */
785       tree first_size = NULL_TREE;
786       tree second_size = NULL_TREE;
787 
788       if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
789 	first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
790       if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
791 	second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
792 
793       if (!first_size)
794 	{
795 	  first_size = second_size;
796 	  second_size = NULL_TREE;
797 	}
798 
799       if (loop_no + 1 == collapse)
800 	{
801 	  span = first_size;
802 	  if (!loop_no && second_size)
803 	    span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
804 				span, second_size);
805 	}
806       else if (loop_no + 2 == collapse)
807 	span = second_size;
808       else
809 	span = NULL_TREE;
810 
811       if (!span)
812 	/* There's no obvious element size for this loop.  Options
813 	   are 1, first_size or some non-unity constant (32 is my
814 	   favourite).   We should gather some statistics.  */
815 	span = first_size;
816     }
817 
818   span = fold_convert (type, span);
819   gimplify_assign (lhs, span, &seq);
820 
821   pop_gimplify_context (NULL);
822 
823   gsi_replace_with_seq (&gsi, seq, true);
824 }
825 
826 /* Default partitioned and minimum partitioned dimensions.  */
827 
828 static int oacc_default_dims[GOMP_DIM_MAX];
829 static int oacc_min_dims[GOMP_DIM_MAX];
830 
831 int
oacc_get_default_dim(int dim)832 oacc_get_default_dim (int dim)
833 {
834   gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
835   return oacc_default_dims[dim];
836 }
837 
838 int
oacc_get_min_dim(int dim)839 oacc_get_min_dim (int dim)
840 {
841   gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
842   return oacc_min_dims[dim];
843 }
844 
845 /* Parse the default dimension parameter.  This is a set of
846    :-separated optional compute dimensions.  Each specified dimension
847    is a positive integer.  When device type support is added, it is
848    planned to be a comma separated list of such compute dimensions,
849    with all but the first prefixed by the colon-terminated device
850    type.  */
851 
852 static void
oacc_parse_default_dims(const char * dims)853 oacc_parse_default_dims (const char *dims)
854 {
855   int ix;
856 
857   for (ix = GOMP_DIM_MAX; ix--;)
858     {
859       oacc_default_dims[ix] = -1;
860       oacc_min_dims[ix] = 1;
861     }
862 
863 #ifndef ACCEL_COMPILER
864   /* Cannot be overridden on the host.  */
865   dims = NULL;
866 #endif
867   if (dims)
868     {
869       const char *pos = dims;
870 
871       for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
872 	{
873 	  if (ix)
874 	    {
875 	      if (*pos != ':')
876 		goto malformed;
877 	      pos++;
878 	    }
879 
880 	  if (*pos != ':')
881 	    {
882 	      long val;
883 	      const char *eptr;
884 
885 	      errno = 0;
886 	      val = strtol (pos, CONST_CAST (char **, &eptr), 10);
887 	      if (errno || val <= 0 || (int) val != val)
888 		goto malformed;
889 	      pos = eptr;
890 	      oacc_default_dims[ix] = (int) val;
891 	    }
892 	}
893       if (*pos)
894 	{
895 	malformed:
896 	  error_at (UNKNOWN_LOCATION,
897 		    "%<-fopenacc-dim%> operand is malformed at %qs", pos);
898 	}
899     }
900 
901   /* Allow the backend to validate the dimensions.  */
902   targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
903   targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
904 }
905 
906 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
907    raw attribute.  DIMS is an array of dimensions, which is filled in.
908    LEVEL is the partitioning level of a routine, or -1 for an offload
909    region itself.  USED is the mask of partitioned execution in the
910    function.  */
911 
912 static void
oacc_validate_dims(tree fn,tree attrs,int * dims,int level,unsigned used)913 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
914 {
915   tree purpose[GOMP_DIM_MAX];
916   unsigned ix;
917   tree pos = TREE_VALUE (attrs);
918 
919   /* Make sure the attribute creator attached the dimension
920      information.  */
921   gcc_assert (pos);
922 
923   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
924     {
925       purpose[ix] = TREE_PURPOSE (pos);
926       tree val = TREE_VALUE (pos);
927       dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
928       pos = TREE_CHAIN (pos);
929     }
930 
931   bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
932 
933   /* Default anything left to 1 or a partitioned default.  */
934   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
935     if (dims[ix] < 0)
936       {
937 	/* The OpenACC spec says 'If the [num_gangs] clause is not
938 	   specified, an implementation-defined default will be used;
939 	   the default may depend on the code within the construct.'
940 	   (2.5.6).  Thus an implementation is free to choose
941 	   non-unity default for a parallel region that doesn't have
942 	   any gang-partitioned loops.  However, it appears that there
943 	   is a sufficient body of user code that expects non-gang
944 	   partitioned regions to not execute in gang-redundant mode.
945 	   So we (a) don't warn about the non-portability and (b) pick
946 	   the minimum permissible dimension size when there is no
947 	   partitioned execution.  Otherwise we pick the global
948 	   default for the dimension, which the user can control.  The
949 	   same wording and logic applies to num_workers and
950 	   vector_length, however the worker- or vector- single
951 	   execution doesn't have the same impact as gang-redundant
952 	   execution.  (If the minimum gang-level partioning is not 1,
953 	   the target is probably too confusing.)  */
954 	dims[ix] = (used & GOMP_DIM_MASK (ix)
955 		    ? oacc_default_dims[ix] : oacc_min_dims[ix]);
956 	changed = true;
957       }
958 
959   if (changed)
960     {
961       /* Replace the attribute with new values.  */
962       pos = NULL_TREE;
963       for (ix = GOMP_DIM_MAX; ix--;)
964 	pos = tree_cons (purpose[ix],
965 			 build_int_cst (integer_type_node, dims[ix]), pos);
966       oacc_replace_fn_attrib (fn, pos);
967     }
968 }
969 
970 /* Create an empty OpenACC loop structure at LOC.  */
971 
972 static oacc_loop *
new_oacc_loop_raw(oacc_loop * parent,location_t loc)973 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
974 {
975   oacc_loop *loop = XCNEW (oacc_loop);
976 
977   loop->parent = parent;
978 
979   if (parent)
980     {
981       loop->sibling = parent->child;
982       parent->child = loop;
983     }
984 
985   loop->loc = loc;
986   return loop;
987 }
988 
989 /* Create an outermost, dummy OpenACC loop for offloaded function
990    DECL.  */
991 
992 static oacc_loop *
new_oacc_loop_outer(tree decl)993 new_oacc_loop_outer (tree decl)
994 {
995   return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
996 }
997 
998 /* Start a new OpenACC loop  structure beginning at head marker HEAD.
999    Link into PARENT loop.  Return the new loop.  */
1000 
1001 static oacc_loop *
new_oacc_loop(oacc_loop * parent,gcall * marker)1002 new_oacc_loop (oacc_loop *parent, gcall *marker)
1003 {
1004   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
1005 
1006   loop->marker = marker;
1007 
1008   /* TODO: This is where device_type flattening would occur for the loop
1009      flags.  */
1010 
1011   loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
1012 
1013   tree chunk_size = integer_zero_node;
1014   if (loop->flags & OLF_GANG_STATIC)
1015     chunk_size = gimple_call_arg (marker, 4);
1016   loop->chunk_size = chunk_size;
1017 
1018   return loop;
1019 }
1020 
1021 /* Create a dummy loop encompassing a call to a openACC routine.
1022    Extract the routine's partitioning requirements.  */
1023 
1024 static void
new_oacc_loop_routine(oacc_loop * parent,gcall * call,tree decl,tree attrs)1025 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
1026 {
1027   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
1028   int level = oacc_fn_attrib_level (attrs);
1029 
1030   gcc_assert (level >= 0);
1031 
1032   loop->marker = call;
1033   loop->routine = decl;
1034   loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
1035 		^ (GOMP_DIM_MASK (level) - 1));
1036 }
1037 
1038 /* Finish off the current OpenACC loop ending at tail marker TAIL.
1039    Return the parent loop.  */
1040 
1041 static oacc_loop *
finish_oacc_loop(oacc_loop * loop)1042 finish_oacc_loop (oacc_loop *loop)
1043 {
1044   /* If the loop has been collapsed, don't partition it.  */
1045   if (loop->ifns.is_empty ())
1046     loop->mask = loop->flags = 0;
1047   return loop->parent;
1048 }
1049 
1050 /* Free all OpenACC loop structures within LOOP (inclusive).  */
1051 
1052 static void
free_oacc_loop(oacc_loop * loop)1053 free_oacc_loop (oacc_loop *loop)
1054 {
1055   if (loop->sibling)
1056     free_oacc_loop (loop->sibling);
1057   if (loop->child)
1058     free_oacc_loop (loop->child);
1059 
1060   loop->ifns.release ();
1061   free (loop);
1062 }
1063 
1064 /* Dump out the OpenACC loop head or tail beginning at FROM.  */
1065 
1066 static void
dump_oacc_loop_part(FILE * file,gcall * from,int depth,const char * title,int level)1067 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1068 		     const char *title, int level)
1069 {
1070   enum ifn_unique_kind kind
1071     = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1072 
1073   fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1074   for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1075     {
1076       gimple *stmt = gsi_stmt (gsi);
1077 
1078       if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1079 	{
1080 	  enum ifn_unique_kind k
1081 	    = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1082 	       (gimple_call_arg (stmt, 0)));
1083 
1084 	  if (k == kind && stmt != from)
1085 	    break;
1086 	}
1087       print_gimple_stmt (file, stmt, depth * 2 + 2);
1088 
1089       gsi_next (&gsi);
1090       while (gsi_end_p (gsi))
1091 	gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1092     }
1093 }
1094 
1095 /* Dump OpenACC loop LOOP, its children, and its siblings.  */
1096 
1097 static void
dump_oacc_loop(FILE * file,oacc_loop * loop,int depth)1098 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1099 {
1100   int ix;
1101 
1102   fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1103 	   loop->flags, loop->mask,
1104 	   LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1105 
1106   if (loop->marker)
1107     print_gimple_stmt (file, loop->marker, depth * 2);
1108 
1109   if (loop->routine)
1110     fprintf (file, "%*sRoutine %s:%u:%s\n",
1111 	     depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1112 	     DECL_SOURCE_LINE (loop->routine),
1113 	     IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1114 
1115   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1116     if (loop->heads[ix])
1117       dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1118   for (ix = GOMP_DIM_MAX; ix--;)
1119     if (loop->tails[ix])
1120       dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1121 
1122   if (loop->child)
1123     dump_oacc_loop (file, loop->child, depth + 1);
1124   if (loop->sibling)
1125     dump_oacc_loop (file, loop->sibling, depth);
1126 }
1127 
1128 void debug_oacc_loop (oacc_loop *);
1129 
1130 /* Dump loops to stderr.  */
1131 
1132 DEBUG_FUNCTION void
debug_oacc_loop(oacc_loop * loop)1133 debug_oacc_loop (oacc_loop *loop)
1134 {
1135   dump_oacc_loop (stderr, loop, 0);
1136 }
1137 
1138 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1139    siblings.  */
1140 
1141 static void
inform_oacc_loop(const oacc_loop * loop)1142 inform_oacc_loop (const oacc_loop *loop)
1143 {
1144   const char *gang
1145     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1146   const char *worker
1147     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1148   const char *vector
1149     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1150   const char *seq = loop->mask == 0 ? " seq" : "";
1151   const dump_user_location_t loc
1152     = dump_user_location_t::from_location_t (loop->loc);
1153   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1154 		   "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1155 		   vector, seq);
1156 
1157   if (loop->child)
1158     inform_oacc_loop (loop->child);
1159   if (loop->sibling)
1160     inform_oacc_loop (loop->sibling);
1161 }
1162 
1163 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1164    structures as we go.  By construction these loops are properly
1165    nested.  */
1166 
1167 static void
oacc_loop_discover_walk(oacc_loop * loop,basic_block bb)1168 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1169 {
1170   int marker = 0;
1171   int remaining = 0;
1172 
1173   if (bb->flags & BB_VISITED)
1174     return;
1175 
1176  follow:
1177   bb->flags |= BB_VISITED;
1178 
1179   /* Scan for loop markers.  */
1180   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1181        gsi_next (&gsi))
1182     {
1183       gimple *stmt = gsi_stmt (gsi);
1184 
1185       if (!is_gimple_call (stmt))
1186 	continue;
1187 
1188       gcall *call = as_a <gcall *> (stmt);
1189 
1190       /* If this is a routine, make a dummy loop for it.  */
1191       if (tree decl = gimple_call_fndecl (call))
1192 	if (tree attrs = oacc_get_fn_attrib (decl))
1193 	  {
1194 	    gcc_assert (!marker);
1195 	    new_oacc_loop_routine (loop, call, decl, attrs);
1196 	  }
1197 
1198       if (!gimple_call_internal_p (call))
1199 	continue;
1200 
1201       switch (gimple_call_internal_fn (call))
1202 	{
1203 	default:
1204 	  break;
1205 
1206 	case IFN_GOACC_LOOP:
1207 	case IFN_GOACC_TILE:
1208 	  /* Record the abstraction function, so we can manipulate it
1209 	     later.  */
1210 	  loop->ifns.safe_push (call);
1211 	  break;
1212 
1213 	case IFN_UNIQUE:
1214 	  enum ifn_unique_kind kind
1215 	    = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1216 				      (gimple_call_arg (call, 0)));
1217 	  if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1218 	      || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1219 	    {
1220 	      if (gimple_call_num_args (call) == 2)
1221 		{
1222 		  gcc_assert (marker && !remaining);
1223 		  marker = 0;
1224 		  if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1225 		    loop = finish_oacc_loop (loop);
1226 		  else
1227 		    loop->head_end = call;
1228 		}
1229 	      else
1230 		{
1231 		  int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1232 
1233 		  if (!marker)
1234 		    {
1235 		      if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1236 			loop = new_oacc_loop (loop, call);
1237 		      remaining = count;
1238 		    }
1239 		  gcc_assert (count == remaining);
1240 		  if (remaining)
1241 		    {
1242 		      remaining--;
1243 		      if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1244 			loop->heads[marker] = call;
1245 		      else
1246 			loop->tails[remaining] = call;
1247 		    }
1248 		  marker++;
1249 		}
1250 	    }
1251 	}
1252     }
1253   if (remaining || marker)
1254     {
1255       bb = single_succ (bb);
1256       gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1257       goto follow;
1258     }
1259 
1260   /* Walk successor blocks.  */
1261   edge e;
1262   edge_iterator ei;
1263 
1264   FOR_EACH_EDGE (e, ei, bb->succs)
1265     oacc_loop_discover_walk (loop, e->dest);
1266 }
1267 
1268 /* LOOP is the first sibling.  Reverse the order in place and return
1269    the new first sibling.  Recurse to child loops.  */
1270 
1271 static oacc_loop *
oacc_loop_sibling_nreverse(oacc_loop * loop)1272 oacc_loop_sibling_nreverse (oacc_loop *loop)
1273 {
1274   oacc_loop *last = NULL;
1275   do
1276     {
1277       if (loop->child)
1278 	loop->child = oacc_loop_sibling_nreverse (loop->child);
1279 
1280       oacc_loop *next = loop->sibling;
1281       loop->sibling = last;
1282       last = loop;
1283       loop = next;
1284     }
1285   while (loop);
1286 
1287   return last;
1288 }
1289 
1290 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1291    the current function.  */
1292 
1293 static oacc_loop *
oacc_loop_discovery()1294 oacc_loop_discovery ()
1295 {
1296   /* Clear basic block flags, in particular BB_VISITED which we're going to use
1297      in the following.  */
1298   clear_bb_flags ();
1299 
1300   oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1301   oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1302 
1303   /* The siblings were constructed in reverse order, reverse them so
1304      that diagnostics come out in an unsurprising order.  */
1305   top = oacc_loop_sibling_nreverse (top);
1306 
1307   return top;
1308 }
1309 
1310 /* Transform the abstract internal function markers starting at FROM
1311    to be for partitioning level LEVEL.  Stop when we meet another HEAD
1312    or TAIL  marker.  */
1313 
1314 static void
oacc_loop_xform_head_tail(gcall * from,int level)1315 oacc_loop_xform_head_tail (gcall *from, int level)
1316 {
1317   enum ifn_unique_kind kind
1318     = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1319   tree replacement = build_int_cst (unsigned_type_node, level);
1320 
1321   for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1322     {
1323       gimple *stmt = gsi_stmt (gsi);
1324 
1325       if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1326 	{
1327 	  enum ifn_unique_kind k
1328 	    = ((enum ifn_unique_kind)
1329 	       TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1330 
1331 	  if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1332 	    *gimple_call_arg_ptr (stmt, 2) = replacement;
1333 	  else if (k == kind && stmt != from)
1334 	    break;
1335 	}
1336       else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1337 	*gimple_call_arg_ptr (stmt, 3) = replacement;
1338 
1339       gsi_next (&gsi);
1340       while (gsi_end_p (gsi))
1341 	gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1342     }
1343 }
1344 
1345 /* Process the discovered OpenACC loops, setting the correct
1346    partitioning level etc.  */
1347 
1348 static void
oacc_loop_process(oacc_loop * loop)1349 oacc_loop_process (oacc_loop *loop)
1350 {
1351   if (loop->child)
1352     oacc_loop_process (loop->child);
1353 
1354   if (loop->mask && !loop->routine)
1355     {
1356       int ix;
1357       tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1358       tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1359       tree chunk_arg = loop->chunk_size;
1360       gcall *call;
1361 
1362       for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1363 	switch (gimple_call_internal_fn (call))
1364 	  {
1365 	  case IFN_GOACC_LOOP:
1366 	    {
1367 	      bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1368 	      gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1369 	      if (!is_e)
1370 		gimple_call_set_arg (call, 4, chunk_arg);
1371 	    }
1372 	    break;
1373 
1374 	  case IFN_GOACC_TILE:
1375 	    gimple_call_set_arg (call, 3, mask_arg);
1376 	    gimple_call_set_arg (call, 4, e_mask_arg);
1377 	    break;
1378 
1379 	  default:
1380 	    gcc_unreachable ();
1381 	  }
1382 
1383       unsigned dim = GOMP_DIM_GANG;
1384       unsigned mask = loop->mask | loop->e_mask;
1385       for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1386 	{
1387 	  while (!(GOMP_DIM_MASK (dim) & mask))
1388 	    dim++;
1389 
1390 	  oacc_loop_xform_head_tail (loop->heads[ix], dim);
1391 	  oacc_loop_xform_head_tail (loop->tails[ix], dim);
1392 
1393 	  mask ^= GOMP_DIM_MASK (dim);
1394 	}
1395     }
1396 
1397   if (loop->sibling)
1398     oacc_loop_process (loop->sibling);
1399 }
1400 
1401 /* Walk the OpenACC loop heirarchy checking and assigning the
1402    programmer-specified partitionings.  OUTER_MASK is the partitioning
1403    this loop is contained within.  Return mask of partitioning
1404    encountered.  If any auto loops are discovered, set GOMP_DIM_MAX
1405    bit.  */
1406 
1407 static unsigned
oacc_loop_fixed_partitions(oacc_loop * loop,unsigned outer_mask)1408 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1409 {
1410   unsigned this_mask = loop->mask;
1411   unsigned mask_all = 0;
1412   bool noisy = true;
1413 
1414 #ifdef ACCEL_COMPILER
1415   /* When device_type is supported, we want the device compiler to be
1416      noisy, if the loop parameters are device_type-specific.  */
1417   noisy = false;
1418 #endif
1419 
1420   if (!loop->routine)
1421     {
1422       bool auto_par = (loop->flags & OLF_AUTO) != 0;
1423       bool seq_par = (loop->flags & OLF_SEQ) != 0;
1424       bool tiling = (loop->flags & OLF_TILE) != 0;
1425 
1426       this_mask = ((loop->flags >> OLF_DIM_BASE)
1427 		   & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1428 
1429       /* Apply auto partitioning if this is a non-partitioned regular
1430 	 loop, or (no more than) single axis tiled loop.  */
1431       bool maybe_auto
1432 	= !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1433 
1434       if ((this_mask != 0) + auto_par + seq_par > 1)
1435 	{
1436 	  if (noisy)
1437 	    error_at (loop->loc,
1438 		      seq_par
1439 		      ? G_("%<seq%> overrides other OpenACC loop specifiers")
1440 		      : G_("%<auto%> conflicts with other OpenACC loop "
1441 			   "specifiers"));
1442 	  maybe_auto = false;
1443 	  loop->flags &= ~OLF_AUTO;
1444 	  if (seq_par)
1445 	    {
1446 	      loop->flags
1447 		&= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1448 	      this_mask = 0;
1449 	    }
1450 	}
1451 
1452       if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1453 	{
1454 	  loop->flags |= OLF_AUTO;
1455 	  mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1456 	}
1457     }
1458 
1459   if (this_mask & outer_mask)
1460     {
1461       const oacc_loop *outer;
1462       for (outer = loop->parent; outer; outer = outer->parent)
1463 	if ((outer->mask | outer->e_mask) & this_mask)
1464 	  break;
1465 
1466       if (noisy)
1467 	{
1468 	  if (outer)
1469 	    {
1470 	      error_at (loop->loc,
1471 			loop->routine
1472 			? G_("routine call uses same OpenACC parallelism"
1473 			     " as containing loop")
1474 			: G_("inner loop uses same OpenACC parallelism"
1475 			     " as containing loop"));
1476 	      inform (outer->loc, "containing loop here");
1477 	    }
1478 	  else
1479 	    error_at (loop->loc,
1480 		      loop->routine
1481 		      ? G_("routine call uses OpenACC parallelism disallowed"
1482 			   " by containing routine")
1483 		      : G_("loop uses OpenACC parallelism disallowed"
1484 			   " by containing routine"));
1485 
1486 	  if (loop->routine)
1487 	    inform (DECL_SOURCE_LOCATION (loop->routine),
1488 		    "routine %qD declared here", loop->routine);
1489 	}
1490       this_mask &= ~outer_mask;
1491     }
1492   else
1493     {
1494       unsigned outermost = least_bit_hwi (this_mask);
1495 
1496       if (outermost && outermost <= outer_mask)
1497 	{
1498 	  if (noisy)
1499 	    {
1500 	      error_at (loop->loc,
1501 			"incorrectly nested OpenACC loop parallelism");
1502 
1503 	      const oacc_loop *outer;
1504 	      for (outer = loop->parent;
1505 		   outer->flags && outer->flags < outermost;
1506 		   outer = outer->parent)
1507 		continue;
1508 	      inform (outer->loc, "containing loop here");
1509 	    }
1510 
1511 	  this_mask &= ~outermost;
1512 	}
1513     }
1514 
1515   mask_all |= this_mask;
1516 
1517   if (loop->flags & OLF_TILE)
1518     {
1519       /* When tiling, vector goes to the element loop, and failing
1520 	 that we put worker there.  The std doesn't contemplate
1521 	 specifying all three.  We choose to put worker and vector on
1522 	 the element loops in that case.  */
1523       unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1524       if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1525 	this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1526 
1527       loop->e_mask = this_e_mask;
1528       this_mask ^= this_e_mask;
1529     }
1530 
1531   loop->mask = this_mask;
1532 
1533   if (dump_file)
1534     fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1535 	     LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1536 	     loop->mask, loop->e_mask);
1537 
1538   if (loop->child)
1539     {
1540       unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1541       loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1542       mask_all |= loop->inner;
1543     }
1544 
1545   if (loop->sibling)
1546     mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1547 
1548   return mask_all;
1549 }
1550 
1551 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1552    OUTER_MASK is the partitioning this loop is contained within.
1553    OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1554    Return the cumulative partitioning used by this loop, siblings and
1555    children.  */
1556 
1557 static unsigned
oacc_loop_auto_partitions(oacc_loop * loop,unsigned outer_mask,bool outer_assign)1558 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1559 			   bool outer_assign)
1560 {
1561   bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1562   bool noisy = true;
1563   bool tiling = loop->flags & OLF_TILE;
1564 
1565 #ifdef ACCEL_COMPILER
1566   /* When device_type is supported, we want the device compiler to be
1567      noisy, if the loop parameters are device_type-specific.  */
1568   noisy = false;
1569 #endif
1570 
1571   if (assign && (!outer_assign || loop->inner))
1572     {
1573       /* Allocate outermost and non-innermost loops at the outermost
1574 	 non-innermost available level.  */
1575       unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1576 
1577       /* Find the first outermost available partition. */
1578       while (this_mask <= outer_mask)
1579 	this_mask <<= 1;
1580 
1581       /* Grab two axes if tiling, and we've not assigned anything  */
1582       if (tiling && !(loop->mask | loop->e_mask))
1583 	this_mask |= this_mask << 1;
1584 
1585       /* Prohibit the innermost partitioning at the moment.  */
1586       this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1587 
1588       /* Don't use any dimension explicitly claimed by an inner loop. */
1589       this_mask &= ~loop->inner;
1590 
1591       if (tiling && !loop->e_mask)
1592 	{
1593 	  /* If we got two axes, allocate the inner one to the element
1594 	     loop.  */
1595 	  loop->e_mask = this_mask & (this_mask << 1);
1596 	  this_mask ^= loop->e_mask;
1597 	}
1598 
1599       loop->mask |= this_mask;
1600     }
1601 
1602   if (loop->child)
1603     {
1604       unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1605       loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1606 					       outer_assign | assign);
1607     }
1608 
1609   if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1610     {
1611       /* Allocate the loop at the innermost available level.  Note
1612 	 that we do this even if we already assigned this loop the
1613 	 outermost available level above.  That way we'll partition
1614 	 this along 2 axes, if they are available.  */
1615       unsigned this_mask = 0;
1616 
1617       /* Determine the outermost partitioning used within this loop.  */
1618       this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1619       this_mask = least_bit_hwi (this_mask);
1620 
1621       /* Pick the partitioning just inside that one.  */
1622       this_mask >>= 1;
1623 
1624       /* And avoid picking one use by an outer loop.  */
1625       this_mask &= ~outer_mask;
1626 
1627       /* If tiling and we failed completely above, grab the next one
1628 	 too.  Making sure it doesn't hit an outer loop.  */
1629       if (tiling)
1630 	{
1631 	  this_mask &= ~(loop->e_mask | loop->mask);
1632 	  unsigned tile_mask = ((this_mask >> 1)
1633 				& ~(outer_mask | loop->e_mask | loop->mask));
1634 
1635 	  if (tile_mask || loop->mask)
1636 	    {
1637 	      loop->e_mask |= this_mask;
1638 	      this_mask = tile_mask;
1639 	    }
1640 	  if (!loop->e_mask && noisy)
1641 	    warning_at (loop->loc, 0,
1642 			"insufficient partitioning available"
1643 			" to parallelize element loop");
1644 	}
1645 
1646       loop->mask |= this_mask;
1647       if (!loop->mask && noisy)
1648 	warning_at (loop->loc, 0,
1649 		    tiling
1650 		    ? G_("insufficient partitioning available"
1651 			 " to parallelize tile loop")
1652 		    : G_("insufficient partitioning available"
1653 			 " to parallelize loop"));
1654     }
1655 
1656   if (assign && dump_file)
1657     fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1658 	     LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1659 	     loop->mask, loop->e_mask);
1660 
1661   unsigned inner_mask = 0;
1662 
1663   if (loop->sibling)
1664     inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1665 					     outer_mask, outer_assign);
1666 
1667   inner_mask |= loop->inner | loop->mask | loop->e_mask;
1668 
1669   return inner_mask;
1670 }
1671 
1672 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1673    axes.  Return mask of partitioning.  */
1674 
1675 static unsigned
oacc_loop_partition(oacc_loop * loop,unsigned outer_mask)1676 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1677 {
1678   unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1679 
1680   if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1681     {
1682       mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1683       mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1684     }
1685   return mask_all;
1686 }
1687 
1688 /* Default fork/join early expander.  Delete the function calls if
1689    there is no RTL expander.  */
1690 
1691 bool
default_goacc_fork_join(gcall * ARG_UNUSED (call),const int * ARG_UNUSED (dims),bool is_fork)1692 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1693 			 const int *ARG_UNUSED (dims), bool is_fork)
1694 {
1695   if (is_fork)
1696     return targetm.have_oacc_fork ();
1697   else
1698     return targetm.have_oacc_join ();
1699 }
1700 
1701 /* Default goacc.reduction early expander.
1702 
1703    LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1704    If RES_PTR is not integer-zerop:
1705        SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1706        TEARDOWN - emit '*RES_PTR = VAR'
1707    If LHS is not NULL
1708        emit 'LHS = VAR'   */
1709 
1710 void
default_goacc_reduction(gcall * call)1711 default_goacc_reduction (gcall *call)
1712 {
1713   unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1714   gimple_stmt_iterator gsi = gsi_for_stmt (call);
1715   tree lhs = gimple_call_lhs (call);
1716   tree var = gimple_call_arg (call, 2);
1717   gimple_seq seq = NULL;
1718 
1719   if (code == IFN_GOACC_REDUCTION_SETUP
1720       || code == IFN_GOACC_REDUCTION_TEARDOWN)
1721     {
1722       /* Setup and Teardown need to copy from/to the receiver object,
1723 	 if there is one.  */
1724       tree ref_to_res = gimple_call_arg (call, 1);
1725 
1726       if (!integer_zerop (ref_to_res))
1727 	{
1728 	  tree dst = build_simple_mem_ref (ref_to_res);
1729 	  tree src = var;
1730 
1731 	  if (code == IFN_GOACC_REDUCTION_SETUP)
1732 	    {
1733 	      src = dst;
1734 	      dst = lhs;
1735 	      lhs = NULL;
1736 	    }
1737 	  gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1738 	}
1739     }
1740 
1741   /* Copy VAR to LHS, if there is an LHS.  */
1742   if (lhs)
1743     gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1744 
1745   gsi_replace_with_seq (&gsi, seq, true);
1746 }
1747 
1748 /* Main entry point for oacc transformations which run on the device
1749    compiler after LTO, so we know what the target device is at this
1750    point (including the host fallback).  */
1751 
1752 static unsigned int
execute_oacc_device_lower()1753 execute_oacc_device_lower ()
1754 {
1755   tree attrs = oacc_get_fn_attrib (current_function_decl);
1756 
1757   if (!attrs)
1758     /* Not an offloaded function.  */
1759     return 0;
1760 
1761   /* Parse the default dim argument exactly once.  */
1762   if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1763     {
1764       oacc_parse_default_dims (flag_openacc_dims);
1765       flag_openacc_dims = (char *)&flag_openacc_dims;
1766     }
1767 
1768   bool is_oacc_parallel
1769     = (lookup_attribute ("oacc parallel",
1770 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1771   bool is_oacc_kernels
1772     = (lookup_attribute ("oacc kernels",
1773 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1774   bool is_oacc_serial
1775     = (lookup_attribute ("oacc serial",
1776 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1777   bool is_oacc_parallel_kernels_parallelized
1778     = (lookup_attribute ("oacc parallel_kernels_parallelized",
1779 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1780   bool is_oacc_parallel_kernels_gang_single
1781     = (lookup_attribute ("oacc parallel_kernels_gang_single",
1782 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1783   int fn_level = oacc_fn_attrib_level (attrs);
1784   bool is_oacc_routine = (fn_level >= 0);
1785   gcc_checking_assert (is_oacc_parallel
1786 		       + is_oacc_kernels
1787 		       + is_oacc_serial
1788 		       + is_oacc_parallel_kernels_parallelized
1789 		       + is_oacc_parallel_kernels_gang_single
1790 		       + is_oacc_routine
1791 		       == 1);
1792 
1793   bool is_oacc_kernels_parallelized
1794     = (lookup_attribute ("oacc kernels parallelized",
1795 			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1796   if (is_oacc_kernels_parallelized)
1797     gcc_checking_assert (is_oacc_kernels);
1798 
1799   if (dump_file)
1800     {
1801       if (is_oacc_parallel)
1802 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
1803       else if (is_oacc_kernels)
1804 	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1805 		 (is_oacc_kernels_parallelized
1806 		  ? "parallelized" : "unparallelized"));
1807       else if (is_oacc_serial)
1808 	fprintf (dump_file, "Function is OpenACC serial offload\n");
1809       else if (is_oacc_parallel_kernels_parallelized)
1810 	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1811 		 "parallel_kernels_parallelized");
1812       else if (is_oacc_parallel_kernels_gang_single)
1813 	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1814 		 "parallel_kernels_gang_single");
1815       else if (is_oacc_routine)
1816 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
1817 		 fn_level);
1818       else
1819 	gcc_unreachable ();
1820     }
1821 
1822   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1823      kernels, so remove the parallelism dimensions function attributes
1824      potentially set earlier on.  */
1825   if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1826     {
1827       oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1828       attrs = oacc_get_fn_attrib (current_function_decl);
1829     }
1830 
1831   /* Discover, partition and process the loops.  */
1832   oacc_loop *loops = oacc_loop_discovery ();
1833 
1834   unsigned outer_mask = 0;
1835   if (is_oacc_routine)
1836     outer_mask = GOMP_DIM_MASK (fn_level) - 1;
1837   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1838   /* OpenACC kernels constructs are special: they currently don't use the
1839      generic oacc_loop infrastructure and attribute/dimension processing.  */
1840   if (is_oacc_kernels && is_oacc_kernels_parallelized)
1841     {
1842       /* Parallelized OpenACC kernels constructs use gang parallelism.  See
1843 	 also tree-parloops.c:create_parallel_loop.  */
1844       used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1845     }
1846 
1847   int dims[GOMP_DIM_MAX];
1848   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1849 
1850   if (dump_file)
1851     {
1852       const char *comma = "Compute dimensions [";
1853       for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1854 	fprintf (dump_file, "%s%d", comma, dims[ix]);
1855       fprintf (dump_file, "]\n");
1856     }
1857 
1858   /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
1859      a single gang only.  */
1860   if (is_oacc_parallel_kernels_gang_single)
1861     gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
1862 
1863   oacc_loop_process (loops);
1864   if (dump_file)
1865     {
1866       fprintf (dump_file, "OpenACC loops\n");
1867       dump_oacc_loop (dump_file, loops, 0);
1868       fprintf (dump_file, "\n");
1869     }
1870   if (dump_enabled_p ())
1871     {
1872       oacc_loop *l = loops;
1873       /* OpenACC kernels constructs are special: they currently don't use the
1874 	 generic oacc_loop infrastructure.  */
1875       if (is_oacc_kernels)
1876 	{
1877 	  /* Create a fake oacc_loop for diagnostic purposes.  */
1878 	  l = new_oacc_loop_raw (NULL,
1879 				 DECL_SOURCE_LOCATION (current_function_decl));
1880 	  l->mask = used_mask;
1881 	}
1882       else
1883 	{
1884 	  /* Skip the outermost, dummy OpenACC loop  */
1885 	  l = l->child;
1886 	}
1887       if (l)
1888 	inform_oacc_loop (l);
1889       if (is_oacc_kernels)
1890 	free_oacc_loop (l);
1891     }
1892 
1893   /* Offloaded targets may introduce new basic blocks, which require
1894      dominance information to update SSA.  */
1895   calculate_dominance_info (CDI_DOMINATORS);
1896 
1897   /* Now lower internal loop functions to target-specific code
1898      sequences.  */
1899   basic_block bb;
1900   FOR_ALL_BB_FN (bb, cfun)
1901     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1902       {
1903 	gimple *stmt = gsi_stmt (gsi);
1904 	if (!is_gimple_call (stmt))
1905 	  {
1906 	    gsi_next (&gsi);
1907 	    continue;
1908 	  }
1909 
1910 	gcall *call = as_a <gcall *> (stmt);
1911 	if (!gimple_call_internal_p (call))
1912 	  {
1913 	    gsi_next (&gsi);
1914 	    continue;
1915 	  }
1916 
1917 	/* Rewind to allow rescan.  */
1918 	gsi_prev (&gsi);
1919 	bool rescan = false, remove = false;
1920 	enum  internal_fn ifn_code = gimple_call_internal_fn (call);
1921 
1922 	switch (ifn_code)
1923 	  {
1924 	  default: break;
1925 
1926 	  case IFN_GOACC_TILE:
1927 	    oacc_xform_tile (call);
1928 	    rescan = true;
1929 	    break;
1930 
1931 	  case IFN_GOACC_LOOP:
1932 	    oacc_xform_loop (call);
1933 	    rescan = true;
1934 	    break;
1935 
1936 	  case IFN_GOACC_REDUCTION:
1937 	    /* Mark the function for SSA renaming.  */
1938 	    mark_virtual_operands_for_renaming (cfun);
1939 
1940 	    /* If the level is -1, this ended up being an unused
1941 	       axis.  Handle as a default.  */
1942 	    if (integer_minus_onep (gimple_call_arg (call, 3)))
1943 	      default_goacc_reduction (call);
1944 	    else
1945 	      targetm.goacc.reduction (call);
1946 	    rescan = true;
1947 	    break;
1948 
1949 	  case IFN_UNIQUE:
1950 	    {
1951 	      enum ifn_unique_kind kind
1952 		= ((enum ifn_unique_kind)
1953 		   TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1954 
1955 	      switch (kind)
1956 		{
1957 		default:
1958 		  break;
1959 
1960 		case IFN_UNIQUE_OACC_FORK:
1961 		case IFN_UNIQUE_OACC_JOIN:
1962 		  if (integer_minus_onep (gimple_call_arg (call, 2)))
1963 		    remove = true;
1964 		  else if (!targetm.goacc.fork_join
1965 			   (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1966 		    remove = true;
1967 		  break;
1968 
1969 		case IFN_UNIQUE_OACC_HEAD_MARK:
1970 		case IFN_UNIQUE_OACC_TAIL_MARK:
1971 		  remove = true;
1972 		  break;
1973 		}
1974 	      break;
1975 	    }
1976 	  }
1977 
1978 	if (gsi_end_p (gsi))
1979 	  /* We rewound past the beginning of the BB.  */
1980 	  gsi = gsi_start_bb (bb);
1981 	else
1982 	  /* Undo the rewind.  */
1983 	  gsi_next (&gsi);
1984 
1985 	if (remove)
1986 	  {
1987 	    if (gimple_vdef (call))
1988 	      replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1989 	    if (gimple_call_lhs (call))
1990 	      {
1991 		/* Propagate the data dependency var.  */
1992 		gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1993 						   gimple_call_arg (call, 1));
1994 		gsi_replace (&gsi, ass,  false);
1995 	      }
1996 	    else
1997 	      gsi_remove (&gsi, true);
1998 	  }
1999 	else if (!rescan)
2000 	  /* If not rescanning, advance over the call.  */
2001 	  gsi_next (&gsi);
2002       }
2003 
2004   free_oacc_loop (loops);
2005 
2006   return 0;
2007 }
2008 
2009 /* Default launch dimension validator.  Force everything to 1.  A
2010    backend that wants to provide larger dimensions must override this
2011    hook.  */
2012 
2013 bool
default_goacc_validate_dims(tree ARG_UNUSED (decl),int * dims,int ARG_UNUSED (fn_level),unsigned ARG_UNUSED (used))2014 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
2015 			     int ARG_UNUSED (fn_level),
2016 			     unsigned ARG_UNUSED (used))
2017 {
2018   bool changed = false;
2019 
2020   for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
2021     {
2022       if (dims[ix] != 1)
2023 	{
2024 	  dims[ix] = 1;
2025 	  changed = true;
2026 	}
2027     }
2028 
2029   return changed;
2030 }
2031 
2032 /* Default dimension bound is unknown on accelerator and 1 on host.  */
2033 
2034 int
default_goacc_dim_limit(int ARG_UNUSED (axis))2035 default_goacc_dim_limit (int ARG_UNUSED (axis))
2036 {
2037 #ifdef ACCEL_COMPILER
2038   return 0;
2039 #else
2040   return 1;
2041 #endif
2042 }
2043 
2044 namespace {
2045 
2046 const pass_data pass_data_oacc_device_lower =
2047 {
2048   GIMPLE_PASS, /* type */
2049   "oaccdevlow", /* name */
2050   OPTGROUP_OMP, /* optinfo_flags */
2051   TV_NONE, /* tv_id */
2052   PROP_cfg, /* properties_required */
2053   0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */
2054   0, /* properties_destroyed */
2055   0, /* todo_flags_start */
2056   TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2057 };
2058 
2059 class pass_oacc_device_lower : public gimple_opt_pass
2060 {
2061 public:
pass_oacc_device_lower(gcc::context * ctxt)2062   pass_oacc_device_lower (gcc::context *ctxt)
2063     : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
2064   {}
2065 
2066   /* opt_pass methods: */
gate(function *)2067   virtual bool gate (function *) { return flag_openacc; };
2068 
execute(function *)2069   virtual unsigned int execute (function *)
2070     {
2071       return execute_oacc_device_lower ();
2072     }
2073 
2074 }; // class pass_oacc_device_lower
2075 
2076 } // anon namespace
2077 
2078 gimple_opt_pass *
make_pass_oacc_device_lower(gcc::context * ctxt)2079 make_pass_oacc_device_lower (gcc::context *ctxt)
2080 {
2081   return new pass_oacc_device_lower (ctxt);
2082 }
2083 
2084 
2085 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2086    GOMP_SIMT_ENTER call identifying the privatized variables, which are
2087    turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2088    Set *REGIMPLIFY to true, except if no privatized variables were seen.  */
2089 
2090 static void
ompdevlow_adjust_simt_enter(gimple_stmt_iterator * gsi,bool * regimplify)2091 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
2092 {
2093   gimple *alloc_stmt = gsi_stmt (*gsi);
2094   tree simtrec = gimple_call_lhs (alloc_stmt);
2095   tree simduid = gimple_call_arg (alloc_stmt, 0);
2096   gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
2097   gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
2098   tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
2099   TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
2100   TREE_ADDRESSABLE (rectype) = 1;
2101   TREE_TYPE (simtrec) = build_pointer_type (rectype);
2102   for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2103     {
2104       tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2105       if (*argp == null_pointer_node)
2106 	continue;
2107       gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2108 		  && VAR_P (TREE_OPERAND (*argp, 0)));
2109       tree var = TREE_OPERAND (*argp, 0);
2110 
2111       tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2112 			       DECL_NAME (var), TREE_TYPE (var));
2113       SET_DECL_ALIGN (field, DECL_ALIGN (var));
2114       DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2115       TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2116 
2117       insert_field_into_struct (rectype, field);
2118 
2119       tree t = build_simple_mem_ref (simtrec);
2120       t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2121       TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2122       SET_DECL_VALUE_EXPR (var, t);
2123       DECL_HAS_VALUE_EXPR_P (var) = 1;
2124       *regimplify = true;
2125     }
2126   layout_type (rectype);
2127   tree size = TYPE_SIZE_UNIT (rectype);
2128   tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2129 
2130   alloc_stmt
2131     = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2132   gimple_call_set_lhs (alloc_stmt, simtrec);
2133   gsi_replace (gsi, alloc_stmt, false);
2134   gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2135   enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2136   gsi_replace (&enter_gsi, enter_stmt, false);
2137 
2138   use_operand_p use;
2139   gimple *exit_stmt;
2140   if (single_imm_use (simtrec, &use, &exit_stmt))
2141     {
2142       gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2143       gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
2144       tree clobber = build_clobber (rectype);
2145       exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2146       gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2147     }
2148   else
2149     gcc_checking_assert (has_zero_uses (simtrec));
2150 }
2151 
2152 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables.  */
2153 
2154 static tree
find_simtpriv_var_op(tree * tp,int * walk_subtrees,void *)2155 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2156 {
2157   tree t = *tp;
2158 
2159   if (VAR_P (t)
2160       && DECL_HAS_VALUE_EXPR_P (t)
2161       && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2162     {
2163       *walk_subtrees = 0;
2164       return t;
2165     }
2166   return NULL_TREE;
2167 }
2168 
2169 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2170    VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2171    LANE is kept to be expanded to RTL later on.  Also cleanup all other SIMT
2172    internal functions on non-SIMT targets, and likewise some SIMD internal
2173    functions on SIMT targets.  */
2174 
2175 static unsigned int
execute_omp_device_lower()2176 execute_omp_device_lower ()
2177 {
2178   int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
2179   bool regimplify = false;
2180   basic_block bb;
2181   gimple_stmt_iterator gsi;
2182   bool calls_declare_variant_alt
2183     = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
2184   FOR_EACH_BB_FN (bb, cfun)
2185     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2186       {
2187 	gimple *stmt = gsi_stmt (gsi);
2188 	if (!is_gimple_call (stmt))
2189 	  continue;
2190 	if (!gimple_call_internal_p (stmt))
2191 	  {
2192 	    if (calls_declare_variant_alt)
2193 	      if (tree fndecl = gimple_call_fndecl (stmt))
2194 		{
2195 		  tree new_fndecl = omp_resolve_declare_variant (fndecl);
2196 		  if (new_fndecl != fndecl)
2197 		    {
2198 		      gimple_call_set_fndecl (stmt, new_fndecl);
2199 		      update_stmt (stmt);
2200 		    }
2201 		}
2202 	    continue;
2203 	  }
2204 	tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2205 	tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2206 	switch (gimple_call_internal_fn (stmt))
2207 	  {
2208 	  case IFN_GOMP_USE_SIMT:
2209 	    rhs = vf == 1 ? integer_zero_node : integer_one_node;
2210 	    break;
2211 	  case IFN_GOMP_SIMT_ENTER:
2212 	    rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2213 	    goto simtreg_enter_exit;
2214 	  case IFN_GOMP_SIMT_ENTER_ALLOC:
2215 	    if (vf != 1)
2216 	      ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2217 	    rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2218 	    goto simtreg_enter_exit;
2219 	  case IFN_GOMP_SIMT_EXIT:
2220 	  simtreg_enter_exit:
2221 	    if (vf != 1)
2222 	      continue;
2223 	    unlink_stmt_vdef (stmt);
2224 	    break;
2225 	  case IFN_GOMP_SIMT_LANE:
2226 	  case IFN_GOMP_SIMT_LAST_LANE:
2227 	    rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2228 	    break;
2229 	  case IFN_GOMP_SIMT_VF:
2230 	    rhs = build_int_cst (type, vf);
2231 	    break;
2232 	  case IFN_GOMP_SIMT_ORDERED_PRED:
2233 	    rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2234 	    if (rhs || !lhs)
2235 	      unlink_stmt_vdef (stmt);
2236 	    break;
2237 	  case IFN_GOMP_SIMT_VOTE_ANY:
2238 	  case IFN_GOMP_SIMT_XCHG_BFLY:
2239 	  case IFN_GOMP_SIMT_XCHG_IDX:
2240 	    rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2241 	    break;
2242 	  case IFN_GOMP_SIMD_LANE:
2243 	  case IFN_GOMP_SIMD_LAST_LANE:
2244 	    rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2245 	    break;
2246 	  case IFN_GOMP_SIMD_VF:
2247 	    rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2248 	    break;
2249 	  default:
2250 	    continue;
2251 	  }
2252 	if (lhs && !rhs)
2253 	  continue;
2254 	stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2255 	gsi_replace (&gsi, stmt, false);
2256       }
2257   if (regimplify)
2258     FOR_EACH_BB_REVERSE_FN (bb, cfun)
2259       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2260 	if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2261 	  {
2262 	    if (gimple_clobber_p (gsi_stmt (gsi)))
2263 	      gsi_remove (&gsi, true);
2264 	    else
2265 	      gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2266 	  }
2267   if (vf != 1)
2268     cfun->has_force_vectorize_loops = false;
2269   return 0;
2270 }
2271 
2272 namespace {
2273 
2274 const pass_data pass_data_omp_device_lower =
2275 {
2276   GIMPLE_PASS, /* type */
2277   "ompdevlow", /* name */
2278   OPTGROUP_OMP, /* optinfo_flags */
2279   TV_NONE, /* tv_id */
2280   PROP_cfg, /* properties_required */
2281   PROP_gimple_lomp_dev, /* properties_provided */
2282   0, /* properties_destroyed */
2283   0, /* todo_flags_start */
2284   TODO_update_ssa, /* todo_flags_finish */
2285 };
2286 
2287 class pass_omp_device_lower : public gimple_opt_pass
2288 {
2289 public:
pass_omp_device_lower(gcc::context * ctxt)2290   pass_omp_device_lower (gcc::context *ctxt)
2291     : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2292   {}
2293 
2294   /* opt_pass methods: */
gate(function * fun)2295   virtual bool gate (function *fun)
2296     {
2297       return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2298 	      || (flag_openmp
2299 		  && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
2300     }
execute(function *)2301   virtual unsigned int execute (function *)
2302     {
2303       return execute_omp_device_lower ();
2304     }
2305 
2306 }; // class pass_expand_omp_ssa
2307 
2308 } // anon namespace
2309 
2310 gimple_opt_pass *
make_pass_omp_device_lower(gcc::context * ctxt)2311 make_pass_omp_device_lower (gcc::context *ctxt)
2312 {
2313   return new pass_omp_device_lower (ctxt);
2314 }
2315 
2316 /* "omp declare target link" handling pass.  */
2317 
2318 namespace {
2319 
2320 const pass_data pass_data_omp_target_link =
2321 {
2322   GIMPLE_PASS,			/* type */
2323   "omptargetlink",		/* name */
2324   OPTGROUP_OMP,			/* optinfo_flags */
2325   TV_NONE,			/* tv_id */
2326   PROP_ssa,			/* properties_required */
2327   0,				/* properties_provided */
2328   0,				/* properties_destroyed */
2329   0,				/* todo_flags_start */
2330   TODO_update_ssa,		/* todo_flags_finish */
2331 };
2332 
2333 class pass_omp_target_link : public gimple_opt_pass
2334 {
2335 public:
pass_omp_target_link(gcc::context * ctxt)2336   pass_omp_target_link (gcc::context *ctxt)
2337     : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2338   {}
2339 
2340   /* opt_pass methods: */
gate(function * fun)2341   virtual bool gate (function *fun)
2342     {
2343 #ifdef ACCEL_COMPILER
2344       return offloading_function_p (fun->decl);
2345 #else
2346       (void) fun;
2347       return false;
2348 #endif
2349     }
2350 
2351   virtual unsigned execute (function *);
2352 };
2353 
2354 /* Callback for walk_gimple_stmt used to scan for link var operands.  */
2355 
2356 static tree
find_link_var_op(tree * tp,int * walk_subtrees,void *)2357 find_link_var_op (tree *tp, int *walk_subtrees, void *)
2358 {
2359   tree t = *tp;
2360 
2361   if (VAR_P (t)
2362       && DECL_HAS_VALUE_EXPR_P (t)
2363       && is_global_var (t)
2364       && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2365     {
2366       *walk_subtrees = 0;
2367       return t;
2368     }
2369 
2370   return NULL_TREE;
2371 }
2372 
2373 unsigned
execute(function * fun)2374 pass_omp_target_link::execute (function *fun)
2375 {
2376   basic_block bb;
2377   FOR_EACH_BB_FN (bb, fun)
2378     {
2379       gimple_stmt_iterator gsi;
2380       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2381 	if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2382 	  gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2383     }
2384 
2385   return 0;
2386 }
2387 
2388 } // anon namespace
2389 
2390 gimple_opt_pass *
make_pass_omp_target_link(gcc::context * ctxt)2391 make_pass_omp_target_link (gcc::context *ctxt)
2392 {
2393   return new pass_omp_target_link (ctxt);
2394 }
2395