1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110 };
111
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119
120 /* Return true if REGION is a combined parallel+workshare region. */
121
122 static inline bool
is_combined_parallel(struct omp_region * region)123 is_combined_parallel (struct omp_region *region)
124 {
125 return region->is_combined_parallel;
126 }
127
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170 static bool
workshare_safe_to_combine_p(basic_block ws_entry_bb)171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202 }
203
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207 static tree
omp_adjust_chunk_size(tree chunk_size,bool simd_schedule)208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
212
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222 }
223
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228 static vec<tree, va_gc> *
get_ws_args_for(gimple * par_stmt,gimple * ws_stmt)229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290 }
291
292 /* Discover whether REGION is a combined parallel+workshare region. */
293
294 static void
determine_parallel_type(struct omp_region * region)295 determine_parallel_type (struct omp_region *region)
296 {
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
356 }
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368 }
369
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374
375 /* Dump the parallel region tree rooted at REGION. */
376
377 void
dump_omp_region(FILE * file,struct omp_region * region,int indent)378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400 }
401
402 DEBUG_FUNCTION void
debug_omp_region(struct omp_region * region)403 debug_omp_region (struct omp_region *region)
404 {
405 dump_omp_region (stderr, region, 0);
406 }
407
408 DEBUG_FUNCTION void
debug_all_omp_regions(void)409 debug_all_omp_regions (void)
410 {
411 dump_omp_region (stderr, root_omp_region, 0);
412 }
413
414 /* Create a new parallel region starting at STMT inside region PARENT. */
415
416 static struct omp_region *
new_omp_region(basic_block bb,enum gimple_code type,struct omp_region * parent)417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419 {
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442 }
443
444 /* Release the memory associated with the region tree rooted at REGION. */
445
446 static void
free_omp_region_1(struct omp_region * region)447 free_omp_region_1 (struct omp_region *region)
448 {
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458 }
459
460 /* Release the memory for the entire omp region tree. */
461
462 void
omp_free_regions(void)463 omp_free_regions (void)
464 {
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472 }
473
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477 static gcond *
gimple_build_cond_empty(tree cond)478 gimple_build_cond_empty (tree cond)
479 {
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486
487 /* Return true if a parallel REGION is within a declare target function or
488 within a target region and is not a part of a gridified target. */
489
490 static bool
parallel_needs_hsa_kernel_p(struct omp_region * region)491 parallel_needs_hsa_kernel_p (struct omp_region *region)
492 {
493 bool indirect = false;
494 for (region = region->outer; region; region = region->outer)
495 {
496 if (region->type == GIMPLE_OMP_PARALLEL)
497 indirect = true;
498 else if (region->type == GIMPLE_OMP_TARGET)
499 {
500 gomp_target *tgt_stmt
501 = as_a <gomp_target *> (last_stmt (region->entry));
502
503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 OMP_CLAUSE__GRIDDIM_))
505 return indirect;
506 else
507 return true;
508 }
509 }
510
511 if (lookup_attribute ("omp declare target",
512 DECL_ATTRIBUTES (current_function_decl)))
513 return true;
514
515 return false;
516 }
517
518 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519 Add CHILD_FNDECL to decl chain of the supercontext of the block
520 ENTRY_BLOCK - this is the block which originally contained the
521 code from which CHILD_FNDECL was created.
522
523 Together, these actions ensure that the debug info for the outlined
524 function will be emitted with the correct lexical scope. */
525
526 static void
adjust_context_and_scope(struct omp_region * region,tree entry_block,tree child_fndecl)527 adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 tree child_fndecl)
529 {
530 tree parent_fndecl = NULL_TREE;
531 gimple *entry_stmt;
532 /* OMP expansion expands inner regions before outer ones, so if
533 we e.g. have explicit task region nested in parallel region, when
534 expanding the task region current_function_decl will be the original
535 source function, but we actually want to use as context the child
536 function of the parallel. */
537 for (region = region->outer;
538 region && parent_fndecl == NULL_TREE; region = region->outer)
539 switch (region->type)
540 {
541 case GIMPLE_OMP_PARALLEL:
542 case GIMPLE_OMP_TASK:
543 case GIMPLE_OMP_TEAMS:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 break;
547 case GIMPLE_OMP_TARGET:
548 entry_stmt = last_stmt (region->entry);
549 parent_fndecl
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 break;
552 default:
553 break;
554 }
555
556 if (parent_fndecl == NULL_TREE)
557 parent_fndecl = current_function_decl;
558 DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561 {
562 tree b = BLOCK_SUPERCONTEXT (entry_block);
563 if (TREE_CODE (b) == BLOCK)
564 {
565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 BLOCK_VARS (b) = child_fndecl;
567 }
568 }
569 }
570
571 /* Build the function calls to GOMP_parallel etc to actually
572 generate the parallel operation. REGION is the parallel region
573 being expanded. BB is the block where to insert the code. WS_ARGS
574 will be set if this is a call to a combined parallel+workshare
575 construct, it contains the list of additional arguments needed by
576 the workshare construct. */
577
578 static void
expand_parallel_call(struct omp_region * region,basic_block bb,gomp_parallel * entry_stmt,vec<tree,va_gc> * ws_args)579 expand_parallel_call (struct omp_region *region, basic_block bb,
580 gomp_parallel *entry_stmt,
581 vec<tree, va_gc> *ws_args)
582 {
583 tree t, t1, t2, val, cond, c, clauses, flags;
584 gimple_stmt_iterator gsi;
585 gimple *stmt;
586 enum built_in_function start_ix;
587 int start_ix2;
588 location_t clause_loc;
589 vec<tree, va_gc> *args;
590
591 clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593 /* Determine what flavor of GOMP_parallel we will be
594 emitting. */
595 start_ix = BUILT_IN_GOMP_PARALLEL;
596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597 if (rtmp)
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599 else if (is_combined_parallel (region))
600 {
601 switch (region->inner->type)
602 {
603 case GIMPLE_OMP_FOR:
604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 switch (region->inner->sched_kind)
606 {
607 case OMP_CLAUSE_SCHEDULE_RUNTIME:
608 /* For lastprivate(conditional:), our implementation
609 requires monotonic behavior. */
610 if (region->inner->has_lastprivate_conditional != 0)
611 start_ix2 = 3;
612 else if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
614 start_ix2 = 6;
615 else if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 start_ix2 = 7;
618 else
619 start_ix2 = 3;
620 break;
621 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 case OMP_CLAUSE_SCHEDULE_GUIDED:
623 if ((region->inner->sched_modifiers
624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 && !region->inner->has_lastprivate_conditional)
626 {
627 start_ix2 = 3 + region->inner->sched_kind;
628 break;
629 }
630 /* FALLTHRU */
631 default:
632 start_ix2 = region->inner->sched_kind;
633 break;
634 }
635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 start_ix = (enum built_in_function) start_ix2;
637 break;
638 case GIMPLE_OMP_SECTIONS:
639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 break;
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* By default, the value of NUM_THREADS is zero (selected at run time)
647 and there is no conditional. */
648 cond = NULL_TREE;
649 val = build_int_cst (unsigned_type_node, 0);
650 flags = build_int_cst (unsigned_type_node, 0);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653 if (c)
654 cond = OMP_CLAUSE_IF_EXPR (c);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657 if (c)
658 {
659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660 clause_loc = OMP_CLAUSE_LOCATION (c);
661 }
662 else
663 clause_loc = gimple_location (entry_stmt);
664
665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666 if (c)
667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669 /* Ensure 'val' is of the correct type. */
670 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672 /* If we found the clause 'if (cond)', build either
673 (cond != 0) or (cond ? val : 1u). */
674 if (cond)
675 {
676 cond = gimple_boolify (cond);
677
678 if (integer_zerop (val))
679 val = fold_build2_loc (clause_loc,
680 EQ_EXPR, unsigned_type_node, cond,
681 build_int_cst (TREE_TYPE (cond), 0));
682 else
683 {
684 basic_block cond_bb, then_bb, else_bb;
685 edge e, e_then, e_else;
686 tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688 tmp_var = create_tmp_var (TREE_TYPE (val));
689 if (gimple_in_ssa_p (cfun))
690 {
691 tmp_then = make_ssa_name (tmp_var);
692 tmp_else = make_ssa_name (tmp_var);
693 tmp_join = make_ssa_name (tmp_var);
694 }
695 else
696 {
697 tmp_then = tmp_var;
698 tmp_else = tmp_var;
699 tmp_join = tmp_var;
700 }
701
702 e = split_block_after_labels (bb);
703 cond_bb = e->src;
704 bb = e->dest;
705 remove_edge (e);
706
707 then_bb = create_empty_bb (cond_bb);
708 else_bb = create_empty_bb (then_bb);
709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712 stmt = gimple_build_cond_empty (cond);
713 gsi = gsi_start_bb (cond_bb);
714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716 gsi = gsi_start_bb (then_bb);
717 expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719 gsi = gsi_start_bb (else_bb);
720 expand_omp_build_assign (&gsi, tmp_else,
721 build_int_cst (unsigned_type_node, 1),
722 true);
723
724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 add_bb_to_loop (then_bb, cond_bb->loop_father);
727 add_bb_to_loop (else_bb, cond_bb->loop_father);
728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731 if (gimple_in_ssa_p (cfun))
732 {
733 gphi *phi = create_phi_node (tmp_join, bb);
734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 }
737
738 val = tmp_join;
739 }
740
741 gsi = gsi_start_bb (bb);
742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
746 gsi = gsi_last_nondebug_bb (bb);
747 t = gimple_omp_parallel_data_arg (entry_stmt);
748 if (t == NULL)
749 t1 = null_pointer_node;
750 else
751 t1 = build_fold_addr_expr (t);
752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753 t2 = build_fold_addr_expr (child_fndecl);
754
755 vec_alloc (args, 4 + vec_safe_length (ws_args));
756 args->quick_push (t2);
757 args->quick_push (t1);
758 args->quick_push (val);
759 if (ws_args)
760 args->splice (*ws_args);
761 args->quick_push (flags);
762
763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 builtin_decl_explicit (start_ix), args);
765
766 if (rtmp)
767 {
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 fold_convert (type,
771 fold_convert (pointer_sized_int_node, t)));
772 }
773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 false, GSI_CONTINUE_LINKING);
775
776 if (hsa_gen_requested_p ()
777 && parallel_needs_hsa_kernel_p (region))
778 {
779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780 hsa_register_kernel (child_cnode);
781 }
782 }
783
784 /* Build the function call to GOMP_task to actually
785 generate the task operation. BB is the block where to insert the code. */
786
787 static void
expand_task_call(struct omp_region * region,basic_block bb,gomp_task * entry_stmt)788 expand_task_call (struct omp_region *region, basic_block bb,
789 gomp_task *entry_stmt)
790 {
791 tree t1, t2, t3;
792 gimple_stmt_iterator gsi;
793 location_t loc = gimple_location (entry_stmt);
794
795 tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804 unsigned int iflags
805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811 tree num_tasks = NULL_TREE;
812 bool ull = false;
813 if (taskloop_p)
814 {
815 gimple *g = last_stmt (region->outer->entry);
816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818 struct omp_for_data fd;
819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 OMP_CLAUSE__LOOPTEMP_);
823 startvar = OMP_CLAUSE_DECL (startvar);
824 endvar = OMP_CLAUSE_DECL (endvar);
825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826 if (fd.loop.cond_code == LT_EXPR)
827 iflags |= GOMP_TASK_FLAG_UP;
828 tree tclauses = gimple_omp_for_clauses (g);
829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830 if (num_tasks)
831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 else
833 {
834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 if (num_tasks)
836 {
837 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 }
840 else
841 num_tasks = integer_zero_node;
842 }
843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844 if (ifc == NULL_TREE)
845 iflags |= GOMP_TASK_FLAG_IF;
846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 iflags |= GOMP_TASK_FLAG_NOGROUP;
848 ull = fd.iter_type == long_long_unsigned_type_node;
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 iflags |= GOMP_TASK_FLAG_REDUCTION;
851 }
852 else if (priority)
853 iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855 tree flags = build_int_cst (unsigned_type_node, iflags);
856
857 tree cond = boolean_true_node;
858 if (ifc)
859 {
860 if (taskloop_p)
861 {
862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 build_int_cst (unsigned_type_node,
865 GOMP_TASK_FLAG_IF),
866 build_int_cst (unsigned_type_node, 0));
867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 flags, t);
869 }
870 else
871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872 }
873
874 if (finalc)
875 {
876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 build_int_cst (unsigned_type_node,
879 GOMP_TASK_FLAG_FINAL),
880 build_int_cst (unsigned_type_node, 0));
881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882 }
883 if (depend)
884 depend = OMP_CLAUSE_DECL (depend);
885 else
886 depend = build_int_cst (ptr_type_node, 0);
887 if (priority)
888 priority = fold_convert (integer_type_node,
889 OMP_CLAUSE_PRIORITY_EXPR (priority));
890 else
891 priority = integer_zero_node;
892
893 gsi = gsi_last_nondebug_bb (bb);
894 tree t = gimple_omp_task_data_arg (entry_stmt);
895 if (t == NULL)
896 t2 = null_pointer_node;
897 else
898 t2 = build_fold_addr_expr_loc (loc, t);
899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900 t = gimple_omp_task_copy_fn (entry_stmt);
901 if (t == NULL)
902 t3 = null_pointer_node;
903 else
904 t3 = build_fold_addr_expr_loc (loc, t);
905
906 if (taskloop_p)
907 t = build_call_expr (ull
908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 11, t1, t2, t3,
911 gimple_omp_task_arg_size (entry_stmt),
912 gimple_omp_task_arg_align (entry_stmt), flags,
913 num_tasks, priority, startvar, endvar, step);
914 else
915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 9, t1, t2, t3,
917 gimple_omp_task_arg_size (entry_stmt),
918 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 depend, priority);
920
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 false, GSI_CONTINUE_LINKING);
923 }
924
925 /* Build the function call to GOMP_taskwait_depend to actually
926 generate the taskwait operation. BB is the block where to insert the
927 code. */
928
929 static void
expand_taskwait_call(basic_block bb,gomp_task * entry_stmt)930 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931 {
932 tree clauses = gimple_omp_task_clauses (entry_stmt);
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934 if (depend == NULL_TREE)
935 return;
936
937 depend = OMP_CLAUSE_DECL (depend);
938
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940 tree t
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 1, depend);
943
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946 }
947
948 /* Build the function call to GOMP_teams_reg to actually
949 generate the host teams operation. REGION is the teams region
950 being expanded. BB is the block where to insert the code. */
951
952 static void
expand_teams_call(basic_block bb,gomp_teams * entry_stmt)953 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954 {
955 tree clauses = gimple_omp_teams_clauses (entry_stmt);
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957 if (num_teams == NULL_TREE)
958 num_teams = build_int_cst (unsigned_type_node, 0);
959 else
960 {
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962 num_teams = fold_convert (unsigned_type_node, num_teams);
963 }
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965 if (thread_limit == NULL_TREE)
966 thread_limit = build_int_cst (unsigned_type_node, 0);
967 else
968 {
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970 thread_limit = fold_convert (unsigned_type_node, thread_limit);
971 }
972
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975 if (t == NULL)
976 t1 = null_pointer_node;
977 else
978 t1 = build_fold_addr_expr (t);
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980 tree t2 = build_fold_addr_expr (child_fndecl);
981
982 vec<tree, va_gc> *args;
983 vec_alloc (args, 5);
984 args->quick_push (t2);
985 args->quick_push (t1);
986 args->quick_push (num_teams);
987 args->quick_push (thread_limit);
988 /* For future extensibility. */
989 args->quick_push (build_zero_cst (unsigned_type_node));
990
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 args);
994
995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 false, GSI_CONTINUE_LINKING);
997 }
998
999 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1000
1001 static tree
vec2chain(vec<tree,va_gc> * v)1002 vec2chain (vec<tree, va_gc> *v)
1003 {
1004 tree chain = NULL_TREE, t;
1005 unsigned ix;
1006
1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008 {
1009 DECL_CHAIN (t) = chain;
1010 chain = t;
1011 }
1012
1013 return chain;
1014 }
1015
1016 /* Remove barriers in REGION->EXIT's block. Note that this is only
1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020 removed. */
1021
1022 static void
remove_exit_barrier(struct omp_region * region)1023 remove_exit_barrier (struct omp_region *region)
1024 {
1025 gimple_stmt_iterator gsi;
1026 basic_block exit_bb;
1027 edge_iterator ei;
1028 edge e;
1029 gimple *stmt;
1030 int any_addressable_vars = -1;
1031
1032 exit_bb = region->exit;
1033
1034 /* If the parallel region doesn't return, we don't have REGION->EXIT
1035 block at all. */
1036 if (! exit_bb)
1037 return;
1038
1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1041 statements that can appear in between are extremely limited -- no
1042 memory operations at all. Here, we allow nothing at all, so the
1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1044 gsi = gsi_last_nondebug_bb (exit_bb);
1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1046 gsi_prev_nondebug (&gsi);
1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048 return;
1049
1050 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051 {
1052 gsi = gsi_last_nondebug_bb (e->src);
1053 if (gsi_end_p (gsi))
1054 continue;
1055 stmt = gsi_stmt (gsi);
1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 && !gimple_omp_return_nowait_p (stmt))
1058 {
1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 in many cases. If there could be tasks queued, the barrier
1061 might be needed to let the tasks run before some local
1062 variable of the parallel that the task uses as shared
1063 runs out of scope. The task can be spawned either
1064 from within current function (this would be easy to check)
1065 or from some function it calls and gets passed an address
1066 of such a variable. */
1067 if (any_addressable_vars < 0)
1068 {
1069 gomp_parallel *parallel_stmt
1070 = as_a <gomp_parallel *> (last_stmt (region->entry));
1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 tree local_decls, block, decl;
1073 unsigned ix;
1074
1075 any_addressable_vars = 0;
1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 if (TREE_ADDRESSABLE (decl))
1078 {
1079 any_addressable_vars = 1;
1080 break;
1081 }
1082 for (block = gimple_block (stmt);
1083 !any_addressable_vars
1084 && block
1085 && TREE_CODE (block) == BLOCK;
1086 block = BLOCK_SUPERCONTEXT (block))
1087 {
1088 for (local_decls = BLOCK_VARS (block);
1089 local_decls;
1090 local_decls = DECL_CHAIN (local_decls))
1091 if (TREE_ADDRESSABLE (local_decls))
1092 {
1093 any_addressable_vars = 1;
1094 break;
1095 }
1096 if (block == gimple_block (parallel_stmt))
1097 break;
1098 }
1099 }
1100 if (!any_addressable_vars)
1101 gimple_omp_return_set_nowait (stmt);
1102 }
1103 }
1104 }
1105
1106 static void
remove_exit_barriers(struct omp_region * region)1107 remove_exit_barriers (struct omp_region *region)
1108 {
1109 if (region->type == GIMPLE_OMP_PARALLEL)
1110 remove_exit_barrier (region);
1111
1112 if (region->inner)
1113 {
1114 region = region->inner;
1115 remove_exit_barriers (region);
1116 while (region->next)
1117 {
1118 region = region->next;
1119 remove_exit_barriers (region);
1120 }
1121 }
1122 }
1123
1124 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125 calls. These can't be declared as const functions, but
1126 within one parallel body they are constant, so they can be
1127 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128 which are declared const. Similarly for task body, except
1129 that in untied task omp_get_thread_num () can change at any task
1130 scheduling point. */
1131
1132 static void
optimize_omp_library_calls(gimple * entry_stmt)1133 optimize_omp_library_calls (gimple *entry_stmt)
1134 {
1135 basic_block bb;
1136 gimple_stmt_iterator gsi;
1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 OMP_CLAUSE_UNTIED) != NULL);
1144
1145 FOR_EACH_BB_FN (bb, cfun)
1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147 {
1148 gimple *call = gsi_stmt (gsi);
1149 tree decl;
1150
1151 if (is_gimple_call (call)
1152 && (decl = gimple_call_fndecl (call))
1153 && DECL_EXTERNAL (decl)
1154 && TREE_PUBLIC (decl)
1155 && DECL_INITIAL (decl) == NULL)
1156 {
1157 tree built_in;
1158
1159 if (DECL_NAME (decl) == thr_num_id)
1160 {
1161 /* In #pragma omp task untied omp_get_thread_num () can change
1162 during the execution of the task region. */
1163 if (untied_task)
1164 continue;
1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 }
1167 else if (DECL_NAME (decl) == num_thr_id)
1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 else
1170 continue;
1171
1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 || gimple_call_num_args (call) != 0)
1174 continue;
1175
1176 if (flag_exceptions && !TREE_NOTHROW (decl))
1177 continue;
1178
1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 TREE_TYPE (TREE_TYPE (built_in))))
1182 continue;
1183
1184 gimple_call_set_fndecl (call, built_in);
1185 }
1186 }
1187 }
1188
1189 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1190 regimplified. */
1191
1192 static tree
expand_omp_regimplify_p(tree * tp,int * walk_subtrees,void *)1193 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194 {
1195 tree t = *tp;
1196
1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199 return t;
1200
1201 if (TREE_CODE (t) == ADDR_EXPR)
1202 recompute_tree_invariant_for_addr_expr (t);
1203
1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205 return NULL_TREE;
1206 }
1207
1208 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1209
1210 static void
expand_omp_build_assign(gimple_stmt_iterator * gsi_p,tree to,tree from,bool after)1211 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 bool after)
1213 {
1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 !after, after ? GSI_CONTINUE_LINKING
1217 : GSI_SAME_STMT);
1218 gimple *stmt = gimple_build_assign (to, from);
1219 if (after)
1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221 else
1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225 {
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gimple_regimplify_operands (stmt, &gsi);
1228 }
1229 }
1230
1231 /* Expand the OpenMP parallel or task directive starting at REGION. */
1232
1233 static void
expand_omp_taskreg(struct omp_region * region)1234 expand_omp_taskreg (struct omp_region *region)
1235 {
1236 basic_block entry_bb, exit_bb, new_bb;
1237 struct function *child_cfun;
1238 tree child_fn, block, t;
1239 gimple_stmt_iterator gsi;
1240 gimple *entry_stmt, *stmt;
1241 edge e;
1242 vec<tree, va_gc> *ws_args;
1243
1244 entry_stmt = last_stmt (region->entry);
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246 && gimple_omp_task_taskwait_p (entry_stmt))
1247 {
1248 new_bb = region->entry;
1249 gsi = gsi_last_nondebug_bb (region->entry);
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251 gsi_remove (&gsi, true);
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253 return;
1254 }
1255
1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259 entry_bb = region->entry;
1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261 exit_bb = region->cont;
1262 else
1263 exit_bb = region->exit;
1264
1265 if (is_combined_parallel (region))
1266 ws_args = region->ws_args;
1267 else
1268 ws_args = NULL;
1269
1270 if (child_cfun->cfg)
1271 {
1272 /* Due to inlining, it may happen that we have already outlined
1273 the region, in which case all we need to do is make the
1274 sub-graph unreachable and emit the parallel call. */
1275 edge entry_succ_e, exit_succ_e;
1276
1277 entry_succ_e = single_succ_edge (entry_bb);
1278
1279 gsi = gsi_last_nondebug_bb (entry_bb);
1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1283 gsi_remove (&gsi, true);
1284
1285 new_bb = entry_bb;
1286 if (exit_bb)
1287 {
1288 exit_succ_e = single_succ_edge (exit_bb);
1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 }
1291 remove_edge_and_dominated_blocks (entry_succ_e);
1292 }
1293 else
1294 {
1295 unsigned srcidx, dstidx, num;
1296
1297 /* If the parallel region needs data sent from the parent
1298 function, then the very first statement (except possible
1299 tree profile counter updates) of the parallel body
1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1301 &.OMP_DATA_O is passed as an argument to the child function,
1302 we need to replace it with the argument as seen by the child
1303 function.
1304
1305 In most cases, this will end up being the identity assignment
1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1307 a function call that has been inlined, the original PARM_DECL
1308 .OMP_DATA_I may have been converted into a different local
1309 variable. In which case, we need to keep the assignment. */
1310 if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 {
1312 basic_block entry_succ_bb
1313 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 : FALLTHRU_EDGE (entry_bb)->dest;
1315 tree arg;
1316 gimple *parcopy_stmt = NULL;
1317
1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 {
1320 gimple *stmt;
1321
1322 gcc_assert (!gsi_end_p (gsi));
1323 stmt = gsi_stmt (gsi);
1324 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 continue;
1326
1327 if (gimple_num_ops (stmt) == 2)
1328 {
1329 tree arg = gimple_assign_rhs1 (stmt);
1330
1331 /* We're ignore the subcode because we're
1332 effectively doing a STRIP_NOPS. */
1333
1334 if (TREE_CODE (arg) == ADDR_EXPR
1335 && (TREE_OPERAND (arg, 0)
1336 == gimple_omp_taskreg_data_arg (entry_stmt)))
1337 {
1338 parcopy_stmt = stmt;
1339 break;
1340 }
1341 }
1342 }
1343
1344 gcc_assert (parcopy_stmt != NULL);
1345 arg = DECL_ARGUMENTS (child_fn);
1346
1347 if (!gimple_in_ssa_p (cfun))
1348 {
1349 if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 gsi_remove (&gsi, true);
1351 else
1352 {
1353 /* ?? Is setting the subcode really necessary ?? */
1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 }
1357 }
1358 else
1359 {
1360 tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 /* We'd like to set the rhs to the default def in the child_fn,
1363 but it's too early to create ssa names in the child_fn.
1364 Instead, we set the rhs to the parm. In
1365 move_sese_region_to_fn, we introduce a default def for the
1366 parm, map the parm to it's default def, and once we encounter
1367 this stmt, replace the parm with the default def. */
1368 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 update_stmt (parcopy_stmt);
1370 }
1371 }
1372
1373 /* Declare local variables needed in CHILD_CFUN. */
1374 block = DECL_INITIAL (child_fn);
1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376 /* The gimplifier could record temporaries in parallel/task block
1377 rather than in containing function's local_decls chain,
1378 which would mean cgraph missed finalizing them. Do it now. */
1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 varpool_node::finalize_decl (t);
1382 DECL_SAVED_TREE (child_fn) = NULL;
1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1384 gimple_set_body (child_fn, NULL);
1385 TREE_USED (block) = 1;
1386
1387 /* Reset DECL_CONTEXT on function arguments. */
1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 DECL_CONTEXT (t) = child_fn;
1390
1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 so that it can be moved to the child function. */
1393 gsi = gsi_last_nondebug_bb (entry_bb);
1394 stmt = gsi_stmt (gsi);
1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1396 || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1398 e = split_block (entry_bb, stmt);
1399 gsi_remove (&gsi, true);
1400 entry_bb = e->dest;
1401 edge e2 = NULL;
1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404 else
1405 {
1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 gcc_assert (e2->dest == region->exit);
1408 remove_edge (BRANCH_EDGE (entry_bb));
1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1410 gsi = gsi_last_nondebug_bb (region->exit);
1411 gcc_assert (!gsi_end_p (gsi)
1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 gsi_remove (&gsi, true);
1414 }
1415
1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1417 if (exit_bb)
1418 {
1419 gsi = gsi_last_nondebug_bb (exit_bb);
1420 gcc_assert (!gsi_end_p (gsi)
1421 && (gimple_code (gsi_stmt (gsi))
1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 stmt = gimple_build_return (NULL);
1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 gsi_remove (&gsi, true);
1426 }
1427
1428 /* Move the parallel region into CHILD_CFUN. */
1429
1430 if (gimple_in_ssa_p (cfun))
1431 {
1432 init_tree_ssa (child_cfun);
1433 init_ssa_operands (child_cfun);
1434 child_cfun->gimple_df->in_ssa_p = true;
1435 block = NULL_TREE;
1436 }
1437 else
1438 block = gimple_block (entry_stmt);
1439
1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441 if (exit_bb)
1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443 if (e2)
1444 {
1445 basic_block dest_bb = e2->dest;
1446 if (!exit_bb)
1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 remove_edge (e2);
1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 }
1451 /* When the OMP expansion process cannot guarantee an up-to-date
1452 loop tree arrange for the child function to fixup loops. */
1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1457 num = vec_safe_length (child_cfun->local_decls);
1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 {
1460 t = (*child_cfun->local_decls)[srcidx];
1461 if (DECL_CONTEXT (t) == cfun->decl)
1462 continue;
1463 if (srcidx != dstidx)
1464 (*child_cfun->local_decls)[dstidx] = t;
1465 dstidx++;
1466 }
1467 if (dstidx != num)
1468 vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470 /* Inform the callgraph about the new function. */
1471 child_cfun->curr_properties = cfun->curr_properties;
1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474 cgraph_node *node = cgraph_node::get_create (child_fn);
1475 node->parallelized_function = 1;
1476 cgraph_node::add_new_function (child_fn, true);
1477
1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1482 fixed in a following pass. */
1483 push_cfun (child_cfun);
1484 if (need_asm)
1485 assign_assembler_name_if_needed (child_fn);
1486
1487 if (optimize)
1488 optimize_omp_library_calls (entry_stmt);
1489 update_max_bb_count ();
1490 cgraph_edge::rebuild_edges ();
1491
1492 /* Some EH regions might become dead, see PR34608. If
1493 pass_cleanup_cfg isn't the first pass to happen with the
1494 new child, these dead EH edges might cause problems.
1495 Clean them up now. */
1496 if (flag_exceptions)
1497 {
1498 basic_block bb;
1499 bool changed = false;
1500
1501 FOR_EACH_BB_FN (bb, cfun)
1502 changed |= gimple_purge_dead_eh_edges (bb);
1503 if (changed)
1504 cleanup_tree_cfg ();
1505 }
1506 if (gimple_in_ssa_p (cfun))
1507 update_ssa (TODO_update_ssa);
1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 verify_loop_structure ();
1510 pop_cfun ();
1511
1512 if (dump_file && !gimple_in_ssa_p (cfun))
1513 {
1514 omp_any_child_fn_dumped = true;
1515 dump_function_header (dump_file, child_fn, dump_flags);
1516 dump_function_to_file (child_fn, dump_file, dump_flags);
1517 }
1518 }
1519
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1523 expand_parallel_call (region, new_bb,
1524 as_a <gomp_parallel *> (entry_stmt), ws_args);
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1527 else
1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529 if (gimple_in_ssa_p (cfun))
1530 update_ssa (TODO_update_ssa_only_virtuals);
1531 }
1532
1533 /* Information about members of an OpenACC collapsed loop nest. */
1534
1535 struct oacc_collapse
1536 {
1537 tree base; /* Base value. */
1538 tree iters; /* Number of steps. */
1539 tree step; /* Step size. */
1540 tree tile; /* Tile increment (if tiled). */
1541 tree outer; /* Tile iterator var. */
1542 };
1543
1544 /* Helper for expand_oacc_for. Determine collapsed loop information.
1545 Fill in COUNTS array. Emit any initialization code before GSI.
1546 Return the calculated outer loop bound of BOUND_TYPE. */
1547
1548 static tree
expand_oacc_collapse_init(const struct omp_for_data * fd,gimple_stmt_iterator * gsi,oacc_collapse * counts,tree diff_type,tree bound_type,location_t loc)1549 expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 gimple_stmt_iterator *gsi,
1551 oacc_collapse *counts, tree diff_type,
1552 tree bound_type, location_t loc)
1553 {
1554 tree tiling = fd->tiling;
1555 tree total = build_int_cst (bound_type, 1);
1556 int ix;
1557
1558 gcc_assert (integer_onep (fd->loop.step));
1559 gcc_assert (integer_zerop (fd->loop.n1));
1560
1561 /* When tiling, the first operand of the tile clause applies to the
1562 innermost loop, and we work outwards from there. Seems
1563 backwards, but whatever. */
1564 for (ix = fd->collapse; ix--;)
1565 {
1566 const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568 tree iter_type = TREE_TYPE (loop->v);
1569 tree plus_type = iter_type;
1570
1571 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1572
1573 if (POINTER_TYPE_P (iter_type))
1574 plus_type = sizetype;
1575
1576 if (tiling)
1577 {
1578 tree num = build_int_cst (integer_type_node, fd->collapse);
1579 tree loop_no = build_int_cst (integer_type_node, ix);
1580 tree tile = TREE_VALUE (tiling);
1581 gcall *call
1582 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1583 /* gwv-outer=*/integer_zero_node,
1584 /* gwv-inner=*/integer_zero_node);
1585
1586 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1587 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1588 gimple_call_set_lhs (call, counts[ix].tile);
1589 gimple_set_location (call, loc);
1590 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1591
1592 tiling = TREE_CHAIN (tiling);
1593 }
1594 else
1595 {
1596 counts[ix].tile = NULL;
1597 counts[ix].outer = loop->v;
1598 }
1599
1600 tree b = loop->n1;
1601 tree e = loop->n2;
1602 tree s = loop->step;
1603 bool up = loop->cond_code == LT_EXPR;
1604 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1605 bool negating;
1606 tree expr;
1607
1608 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1609 true, GSI_SAME_STMT);
1610 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1611 true, GSI_SAME_STMT);
1612
1613 /* Convert the step, avoiding possible unsigned->signed overflow. */
1614 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1615 if (negating)
1616 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1617 s = fold_convert (diff_type, s);
1618 if (negating)
1619 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1620 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1621 true, GSI_SAME_STMT);
1622
1623 /* Determine the range, avoiding possible unsigned->signed overflow. */
1624 negating = !up && TYPE_UNSIGNED (iter_type);
1625 expr = fold_build2 (MINUS_EXPR, plus_type,
1626 fold_convert (plus_type, negating ? b : e),
1627 fold_convert (plus_type, negating ? e : b));
1628 expr = fold_convert (diff_type, expr);
1629 if (negating)
1630 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1631 tree range = force_gimple_operand_gsi
1632 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1633
1634 /* Determine number of iterations. */
1635 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1636 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1637 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1638
1639 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1640 true, GSI_SAME_STMT);
1641
1642 counts[ix].base = b;
1643 counts[ix].iters = iters;
1644 counts[ix].step = s;
1645
1646 total = fold_build2 (MULT_EXPR, bound_type, total,
1647 fold_convert (bound_type, iters));
1648 }
1649
1650 return total;
1651 }
1652
1653 /* Emit initializers for collapsed loop members. INNER is true if
1654 this is for the element loop of a TILE. IVAR is the outer
1655 loop iteration variable, from which collapsed loop iteration values
1656 are calculated. COUNTS array has been initialized by
1657 expand_oacc_collapse_inits. */
1658
1659 static void
expand_oacc_collapse_vars(const struct omp_for_data * fd,bool inner,gimple_stmt_iterator * gsi,const oacc_collapse * counts,tree ivar,tree diff_type)1660 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1661 gimple_stmt_iterator *gsi,
1662 const oacc_collapse *counts, tree ivar,
1663 tree diff_type)
1664 {
1665 tree ivar_type = TREE_TYPE (ivar);
1666
1667 /* The most rapidly changing iteration variable is the innermost
1668 one. */
1669 for (int ix = fd->collapse; ix--;)
1670 {
1671 const omp_for_data_loop *loop = &fd->loops[ix];
1672 const oacc_collapse *collapse = &counts[ix];
1673 tree v = inner ? loop->v : collapse->outer;
1674 tree iter_type = TREE_TYPE (v);
1675 tree plus_type = iter_type;
1676 enum tree_code plus_code = PLUS_EXPR;
1677 tree expr;
1678
1679 if (POINTER_TYPE_P (iter_type))
1680 {
1681 plus_code = POINTER_PLUS_EXPR;
1682 plus_type = sizetype;
1683 }
1684
1685 expr = ivar;
1686 if (ix)
1687 {
1688 tree mod = fold_convert (ivar_type, collapse->iters);
1689 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1690 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1691 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1692 true, GSI_SAME_STMT);
1693 }
1694
1695 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1696 fold_convert (diff_type, collapse->step));
1697 expr = fold_build2 (plus_code, iter_type,
1698 inner ? collapse->outer : collapse->base,
1699 fold_convert (plus_type, expr));
1700 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1701 true, GSI_SAME_STMT);
1702 gassign *ass = gimple_build_assign (v, expr);
1703 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1704 }
1705 }
1706
1707 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1708 of the combined collapse > 1 loop constructs, generate code like:
1709 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1710 if (cond3 is <)
1711 adj = STEP3 - 1;
1712 else
1713 adj = STEP3 + 1;
1714 count3 = (adj + N32 - N31) / STEP3;
1715 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1716 if (cond2 is <)
1717 adj = STEP2 - 1;
1718 else
1719 adj = STEP2 + 1;
1720 count2 = (adj + N22 - N21) / STEP2;
1721 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1722 if (cond1 is <)
1723 adj = STEP1 - 1;
1724 else
1725 adj = STEP1 + 1;
1726 count1 = (adj + N12 - N11) / STEP1;
1727 count = count1 * count2 * count3;
1728 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1729 count = 0;
1730 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1731 of the combined loop constructs, just initialize COUNTS array
1732 from the _looptemp_ clauses. */
1733
1734 /* NOTE: It *could* be better to moosh all of the BBs together,
1735 creating one larger BB with all the computation and the unexpected
1736 jump at the end. I.e.
1737
1738 bool zero3, zero2, zero1, zero;
1739
1740 zero3 = N32 c3 N31;
1741 count3 = (N32 - N31) /[cl] STEP3;
1742 zero2 = N22 c2 N21;
1743 count2 = (N22 - N21) /[cl] STEP2;
1744 zero1 = N12 c1 N11;
1745 count1 = (N12 - N11) /[cl] STEP1;
1746 zero = zero3 || zero2 || zero1;
1747 count = count1 * count2 * count3;
1748 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749
1750 After all, we expect the zero=false, and thus we expect to have to
1751 evaluate all of the comparison expressions, so short-circuiting
1752 oughtn't be a win. Since the condition isn't protecting a
1753 denominator, we're not concerned about divide-by-zero, so we can
1754 fully evaluate count even if a numerator turned out to be wrong.
1755
1756 It seems like putting this all together would create much better
1757 scheduling opportunities, and less pressure on the chip's branch
1758 predictor. */
1759
1760 static void
expand_omp_for_init_counts(struct omp_for_data * fd,gimple_stmt_iterator * gsi,basic_block & entry_bb,tree * counts,basic_block & zero_iter1_bb,int & first_zero_iter1,basic_block & zero_iter2_bb,int & first_zero_iter2,basic_block & l2_dom_bb)1761 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1762 basic_block &entry_bb, tree *counts,
1763 basic_block &zero_iter1_bb, int &first_zero_iter1,
1764 basic_block &zero_iter2_bb, int &first_zero_iter2,
1765 basic_block &l2_dom_bb)
1766 {
1767 tree t, type = TREE_TYPE (fd->loop.v);
1768 edge e, ne;
1769 int i;
1770
1771 /* Collapsed loops need work for expansion into SSA form. */
1772 gcc_assert (!gimple_in_ssa_p (cfun));
1773
1774 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1775 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1776 {
1777 gcc_assert (fd->ordered == 0);
1778 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1779 isn't supposed to be handled, as the inner loop doesn't
1780 use it. */
1781 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1782 OMP_CLAUSE__LOOPTEMP_);
1783 gcc_assert (innerc);
1784 for (i = 0; i < fd->collapse; i++)
1785 {
1786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 if (i)
1790 counts[i] = OMP_CLAUSE_DECL (innerc);
1791 else
1792 counts[0] = NULL_TREE;
1793 }
1794 return;
1795 }
1796
1797 for (i = fd->collapse; i < fd->ordered; i++)
1798 {
1799 tree itype = TREE_TYPE (fd->loops[i].v);
1800 counts[i] = NULL_TREE;
1801 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1802 fold_convert (itype, fd->loops[i].n1),
1803 fold_convert (itype, fd->loops[i].n2));
1804 if (t && integer_zerop (t))
1805 {
1806 for (i = fd->collapse; i < fd->ordered; i++)
1807 counts[i] = build_int_cst (type, 0);
1808 break;
1809 }
1810 }
1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1812 {
1813 tree itype = TREE_TYPE (fd->loops[i].v);
1814
1815 if (i >= fd->collapse && counts[i])
1816 continue;
1817 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1818 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1819 fold_convert (itype, fd->loops[i].n1),
1820 fold_convert (itype, fd->loops[i].n2)))
1821 == NULL_TREE || !integer_onep (t)))
1822 {
1823 gcond *cond_stmt;
1824 tree n1, n2;
1825 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1826 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1827 true, GSI_SAME_STMT);
1828 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1829 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1830 true, GSI_SAME_STMT);
1831 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1832 NULL_TREE, NULL_TREE);
1833 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1834 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1835 expand_omp_regimplify_p, NULL, NULL)
1836 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1837 expand_omp_regimplify_p, NULL, NULL))
1838 {
1839 *gsi = gsi_for_stmt (cond_stmt);
1840 gimple_regimplify_operands (cond_stmt, gsi);
1841 }
1842 e = split_block (entry_bb, cond_stmt);
1843 basic_block &zero_iter_bb
1844 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1845 int &first_zero_iter
1846 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1847 if (zero_iter_bb == NULL)
1848 {
1849 gassign *assign_stmt;
1850 first_zero_iter = i;
1851 zero_iter_bb = create_empty_bb (entry_bb);
1852 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1853 *gsi = gsi_after_labels (zero_iter_bb);
1854 if (i < fd->collapse)
1855 assign_stmt = gimple_build_assign (fd->loop.n2,
1856 build_zero_cst (type));
1857 else
1858 {
1859 counts[i] = create_tmp_reg (type, ".count");
1860 assign_stmt
1861 = gimple_build_assign (counts[i], build_zero_cst (type));
1862 }
1863 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1864 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1865 entry_bb);
1866 }
1867 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1868 ne->probability = profile_probability::very_unlikely ();
1869 e->flags = EDGE_TRUE_VALUE;
1870 e->probability = ne->probability.invert ();
1871 if (l2_dom_bb == NULL)
1872 l2_dom_bb = entry_bb;
1873 entry_bb = e->dest;
1874 *gsi = gsi_last_nondebug_bb (entry_bb);
1875 }
1876
1877 if (POINTER_TYPE_P (itype))
1878 itype = signed_type_for (itype);
1879 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1880 ? -1 : 1));
1881 t = fold_build2 (PLUS_EXPR, itype,
1882 fold_convert (itype, fd->loops[i].step), t);
1883 t = fold_build2 (PLUS_EXPR, itype, t,
1884 fold_convert (itype, fd->loops[i].n2));
1885 t = fold_build2 (MINUS_EXPR, itype, t,
1886 fold_convert (itype, fd->loops[i].n1));
1887 /* ?? We could probably use CEIL_DIV_EXPR instead of
1888 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1889 generate the same code in the end because generically we
1890 don't know that the values involved must be negative for
1891 GT?? */
1892 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1893 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1894 fold_build1 (NEGATE_EXPR, itype, t),
1895 fold_build1 (NEGATE_EXPR, itype,
1896 fold_convert (itype,
1897 fd->loops[i].step)));
1898 else
1899 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1900 fold_convert (itype, fd->loops[i].step));
1901 t = fold_convert (type, t);
1902 if (TREE_CODE (t) == INTEGER_CST)
1903 counts[i] = t;
1904 else
1905 {
1906 if (i < fd->collapse || i != first_zero_iter2)
1907 counts[i] = create_tmp_reg (type, ".count");
1908 expand_omp_build_assign (gsi, counts[i], t);
1909 }
1910 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1911 {
1912 if (i == 0)
1913 t = counts[0];
1914 else
1915 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1916 expand_omp_build_assign (gsi, fd->loop.n2, t);
1917 }
1918 }
1919 }
1920
1921 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1922 T = V;
1923 V3 = N31 + (T % count3) * STEP3;
1924 T = T / count3;
1925 V2 = N21 + (T % count2) * STEP2;
1926 T = T / count2;
1927 V1 = N11 + T * STEP1;
1928 if this loop doesn't have an inner loop construct combined with it.
1929 If it does have an inner loop construct combined with it and the
1930 iteration count isn't known constant, store values from counts array
1931 into its _looptemp_ temporaries instead. */
1932
1933 static void
expand_omp_for_init_vars(struct omp_for_data * fd,gimple_stmt_iterator * gsi,tree * counts,gimple * inner_stmt,tree startvar)1934 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1935 tree *counts, gimple *inner_stmt, tree startvar)
1936 {
1937 int i;
1938 if (gimple_omp_for_combined_p (fd->for_stmt))
1939 {
1940 /* If fd->loop.n2 is constant, then no propagation of the counts
1941 is needed, they are constant. */
1942 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1943 return;
1944
1945 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1946 ? gimple_omp_taskreg_clauses (inner_stmt)
1947 : gimple_omp_for_clauses (inner_stmt);
1948 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1949 isn't supposed to be handled, as the inner loop doesn't
1950 use it. */
1951 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1952 gcc_assert (innerc);
1953 for (i = 0; i < fd->collapse; i++)
1954 {
1955 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1956 OMP_CLAUSE__LOOPTEMP_);
1957 gcc_assert (innerc);
1958 if (i)
1959 {
1960 tree tem = OMP_CLAUSE_DECL (innerc);
1961 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1962 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1963 false, GSI_CONTINUE_LINKING);
1964 gassign *stmt = gimple_build_assign (tem, t);
1965 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1966 }
1967 }
1968 return;
1969 }
1970
1971 tree type = TREE_TYPE (fd->loop.v);
1972 tree tem = create_tmp_reg (type, ".tem");
1973 gassign *stmt = gimple_build_assign (tem, startvar);
1974 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1975
1976 for (i = fd->collapse - 1; i >= 0; i--)
1977 {
1978 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1979 itype = vtype;
1980 if (POINTER_TYPE_P (vtype))
1981 itype = signed_type_for (vtype);
1982 if (i != 0)
1983 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1984 else
1985 t = tem;
1986 t = fold_convert (itype, t);
1987 t = fold_build2 (MULT_EXPR, itype, t,
1988 fold_convert (itype, fd->loops[i].step));
1989 if (POINTER_TYPE_P (vtype))
1990 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1991 else
1992 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1993 t = force_gimple_operand_gsi (gsi, t,
1994 DECL_P (fd->loops[i].v)
1995 && TREE_ADDRESSABLE (fd->loops[i].v),
1996 NULL_TREE, false,
1997 GSI_CONTINUE_LINKING);
1998 stmt = gimple_build_assign (fd->loops[i].v, t);
1999 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2000 if (i != 0)
2001 {
2002 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2003 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2004 false, GSI_CONTINUE_LINKING);
2005 stmt = gimple_build_assign (tem, t);
2006 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2007 }
2008 }
2009 }
2010
2011 /* Helper function for expand_omp_for_*. Generate code like:
2012 L10:
2013 V3 += STEP3;
2014 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2015 L11:
2016 V3 = N31;
2017 V2 += STEP2;
2018 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2019 L12:
2020 V2 = N21;
2021 V1 += STEP1;
2022 goto BODY_BB; */
2023
2024 static basic_block
extract_omp_for_update_vars(struct omp_for_data * fd,basic_block cont_bb,basic_block body_bb)2025 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2026 basic_block body_bb)
2027 {
2028 basic_block last_bb, bb, collapse_bb = NULL;
2029 int i;
2030 gimple_stmt_iterator gsi;
2031 edge e;
2032 tree t;
2033 gimple *stmt;
2034
2035 last_bb = cont_bb;
2036 for (i = fd->collapse - 1; i >= 0; i--)
2037 {
2038 tree vtype = TREE_TYPE (fd->loops[i].v);
2039
2040 bb = create_empty_bb (last_bb);
2041 add_bb_to_loop (bb, last_bb->loop_father);
2042 gsi = gsi_start_bb (bb);
2043
2044 if (i < fd->collapse - 1)
2045 {
2046 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2047 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2048
2049 t = fd->loops[i + 1].n1;
2050 t = force_gimple_operand_gsi (&gsi, t,
2051 DECL_P (fd->loops[i + 1].v)
2052 && TREE_ADDRESSABLE (fd->loops[i
2053 + 1].v),
2054 NULL_TREE, false,
2055 GSI_CONTINUE_LINKING);
2056 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2057 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2058 }
2059 else
2060 collapse_bb = bb;
2061
2062 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2063
2064 if (POINTER_TYPE_P (vtype))
2065 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2066 else
2067 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2068 t = force_gimple_operand_gsi (&gsi, t,
2069 DECL_P (fd->loops[i].v)
2070 && TREE_ADDRESSABLE (fd->loops[i].v),
2071 NULL_TREE, false, GSI_CONTINUE_LINKING);
2072 stmt = gimple_build_assign (fd->loops[i].v, t);
2073 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2074
2075 if (i > 0)
2076 {
2077 t = fd->loops[i].n2;
2078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2079 false, GSI_CONTINUE_LINKING);
2080 tree v = fd->loops[i].v;
2081 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2082 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2085 stmt = gimple_build_cond_empty (t);
2086 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2087 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2088 expand_omp_regimplify_p, NULL, NULL)
2089 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2090 expand_omp_regimplify_p, NULL, NULL))
2091 gimple_regimplify_operands (stmt, &gsi);
2092 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2093 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2094 }
2095 else
2096 make_edge (bb, body_bb, EDGE_FALLTHRU);
2097 last_bb = bb;
2098 }
2099
2100 return collapse_bb;
2101 }
2102
2103 /* Expand #pragma omp ordered depend(source). */
2104
2105 static void
expand_omp_ordered_source(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,location_t loc)2106 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2107 tree *counts, location_t loc)
2108 {
2109 enum built_in_function source_ix
2110 = fd->iter_type == long_integer_type_node
2111 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2112 gimple *g
2113 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2114 build_fold_addr_expr (counts[fd->ordered]));
2115 gimple_set_location (g, loc);
2116 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2117 }
2118
2119 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2120
2121 static void
expand_omp_ordered_sink(gimple_stmt_iterator * gsi,struct omp_for_data * fd,tree * counts,tree c,location_t loc)2122 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2123 tree *counts, tree c, location_t loc)
2124 {
2125 auto_vec<tree, 10> args;
2126 enum built_in_function sink_ix
2127 = fd->iter_type == long_integer_type_node
2128 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2129 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2130 int i;
2131 gimple_stmt_iterator gsi2 = *gsi;
2132 bool warned_step = false;
2133
2134 for (i = 0; i < fd->ordered; i++)
2135 {
2136 tree step = NULL_TREE;
2137 off = TREE_PURPOSE (deps);
2138 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2139 {
2140 step = TREE_OPERAND (off, 1);
2141 off = TREE_OPERAND (off, 0);
2142 }
2143 if (!integer_zerop (off))
2144 {
2145 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2146 || fd->loops[i].cond_code == GT_EXPR);
2147 bool forward = fd->loops[i].cond_code == LT_EXPR;
2148 if (step)
2149 {
2150 /* Non-simple Fortran DO loops. If step is variable,
2151 we don't know at compile even the direction, so can't
2152 warn. */
2153 if (TREE_CODE (step) != INTEGER_CST)
2154 break;
2155 forward = tree_int_cst_sgn (step) != -1;
2156 }
2157 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2158 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2159 "waiting for lexically later iteration");
2160 break;
2161 }
2162 deps = TREE_CHAIN (deps);
2163 }
2164 /* If all offsets corresponding to the collapsed loops are zero,
2165 this depend clause can be ignored. FIXME: but there is still a
2166 flush needed. We need to emit one __sync_synchronize () for it
2167 though (perhaps conditionally)? Solve this together with the
2168 conservative dependence folding optimization.
2169 if (i >= fd->collapse)
2170 return; */
2171
2172 deps = OMP_CLAUSE_DECL (c);
2173 gsi_prev (&gsi2);
2174 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2175 edge e2 = split_block_after_labels (e1->dest);
2176
2177 gsi2 = gsi_after_labels (e1->dest);
2178 *gsi = gsi_last_bb (e1->src);
2179 for (i = 0; i < fd->ordered; i++)
2180 {
2181 tree itype = TREE_TYPE (fd->loops[i].v);
2182 tree step = NULL_TREE;
2183 tree orig_off = NULL_TREE;
2184 if (POINTER_TYPE_P (itype))
2185 itype = sizetype;
2186 if (i)
2187 deps = TREE_CHAIN (deps);
2188 off = TREE_PURPOSE (deps);
2189 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2190 {
2191 step = TREE_OPERAND (off, 1);
2192 off = TREE_OPERAND (off, 0);
2193 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2194 && integer_onep (fd->loops[i].step)
2195 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2196 }
2197 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2198 if (step)
2199 {
2200 off = fold_convert_loc (loc, itype, off);
2201 orig_off = off;
2202 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2203 }
2204
2205 if (integer_zerop (off))
2206 t = boolean_true_node;
2207 else
2208 {
2209 tree a;
2210 tree co = fold_convert_loc (loc, itype, off);
2211 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2212 {
2213 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2214 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2215 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2216 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2217 co);
2218 }
2219 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2220 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2221 fd->loops[i].v, co);
2222 else
2223 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2224 fd->loops[i].v, co);
2225 if (step)
2226 {
2227 tree t1, t2;
2228 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2229 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2230 fd->loops[i].n1);
2231 else
2232 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2233 fd->loops[i].n2);
2234 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2235 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2236 fd->loops[i].n2);
2237 else
2238 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2239 fd->loops[i].n1);
2240 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2241 step, build_int_cst (TREE_TYPE (step), 0));
2242 if (TREE_CODE (step) != INTEGER_CST)
2243 {
2244 t1 = unshare_expr (t1);
2245 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2246 false, GSI_CONTINUE_LINKING);
2247 t2 = unshare_expr (t2);
2248 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2249 false, GSI_CONTINUE_LINKING);
2250 }
2251 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2252 t, t2, t1);
2253 }
2254 else if (fd->loops[i].cond_code == LT_EXPR)
2255 {
2256 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2257 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2258 fd->loops[i].n1);
2259 else
2260 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2261 fd->loops[i].n2);
2262 }
2263 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2264 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2265 fd->loops[i].n2);
2266 else
2267 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2268 fd->loops[i].n1);
2269 }
2270 if (cond)
2271 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2272 else
2273 cond = t;
2274
2275 off = fold_convert_loc (loc, itype, off);
2276
2277 if (step
2278 || (fd->loops[i].cond_code == LT_EXPR
2279 ? !integer_onep (fd->loops[i].step)
2280 : !integer_minus_onep (fd->loops[i].step)))
2281 {
2282 if (step == NULL_TREE
2283 && TYPE_UNSIGNED (itype)
2284 && fd->loops[i].cond_code == GT_EXPR)
2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2286 fold_build1_loc (loc, NEGATE_EXPR, itype,
2287 s));
2288 else
2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2290 orig_off ? orig_off : off, s);
2291 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2292 build_int_cst (itype, 0));
2293 if (integer_zerop (t) && !warned_step)
2294 {
2295 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2296 "refers to iteration never in the iteration "
2297 "space");
2298 warned_step = true;
2299 }
2300 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2301 cond, t);
2302 }
2303
2304 if (i <= fd->collapse - 1 && fd->collapse > 1)
2305 t = fd->loop.v;
2306 else if (counts[i])
2307 t = counts[i];
2308 else
2309 {
2310 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2311 fd->loops[i].v, fd->loops[i].n1);
2312 t = fold_convert_loc (loc, fd->iter_type, t);
2313 }
2314 if (step)
2315 /* We have divided off by step already earlier. */;
2316 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2318 fold_build1_loc (loc, NEGATE_EXPR, itype,
2319 s));
2320 else
2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2322 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2323 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2324 off = fold_convert_loc (loc, fd->iter_type, off);
2325 if (i <= fd->collapse - 1 && fd->collapse > 1)
2326 {
2327 if (i)
2328 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2329 off);
2330 if (i < fd->collapse - 1)
2331 {
2332 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2333 counts[i]);
2334 continue;
2335 }
2336 }
2337 off = unshare_expr (off);
2338 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2339 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2340 true, GSI_SAME_STMT);
2341 args.safe_push (t);
2342 }
2343 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2344 gimple_set_location (g, loc);
2345 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2346
2347 cond = unshare_expr (cond);
2348 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2349 GSI_CONTINUE_LINKING);
2350 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2351 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2352 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2353 e1->probability = e3->probability.invert ();
2354 e1->flags = EDGE_TRUE_VALUE;
2355 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2356
2357 *gsi = gsi_after_labels (e2->dest);
2358 }
2359
2360 /* Expand all #pragma omp ordered depend(source) and
2361 #pragma omp ordered depend(sink:...) constructs in the current
2362 #pragma omp for ordered(n) region. */
2363
2364 static void
expand_omp_ordered_source_sink(struct omp_region * region,struct omp_for_data * fd,tree * counts,basic_block cont_bb)2365 expand_omp_ordered_source_sink (struct omp_region *region,
2366 struct omp_for_data *fd, tree *counts,
2367 basic_block cont_bb)
2368 {
2369 struct omp_region *inner;
2370 int i;
2371 for (i = fd->collapse - 1; i < fd->ordered; i++)
2372 if (i == fd->collapse - 1 && fd->collapse > 1)
2373 counts[i] = NULL_TREE;
2374 else if (i >= fd->collapse && !cont_bb)
2375 counts[i] = build_zero_cst (fd->iter_type);
2376 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2377 && integer_onep (fd->loops[i].step))
2378 counts[i] = NULL_TREE;
2379 else
2380 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2381 tree atype
2382 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2383 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2384 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2385
2386 for (inner = region->inner; inner; inner = inner->next)
2387 if (inner->type == GIMPLE_OMP_ORDERED)
2388 {
2389 gomp_ordered *ord_stmt = inner->ord_stmt;
2390 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2391 location_t loc = gimple_location (ord_stmt);
2392 tree c;
2393 for (c = gimple_omp_ordered_clauses (ord_stmt);
2394 c; c = OMP_CLAUSE_CHAIN (c))
2395 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2396 break;
2397 if (c)
2398 expand_omp_ordered_source (&gsi, fd, counts, loc);
2399 for (c = gimple_omp_ordered_clauses (ord_stmt);
2400 c; c = OMP_CLAUSE_CHAIN (c))
2401 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2402 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2403 gsi_remove (&gsi, true);
2404 }
2405 }
2406
2407 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2408 collapsed. */
2409
2410 static basic_block
expand_omp_for_ordered_loops(struct omp_for_data * fd,tree * counts,basic_block cont_bb,basic_block body_bb,basic_block l0_bb,bool ordered_lastprivate)2411 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2412 basic_block cont_bb, basic_block body_bb,
2413 basic_block l0_bb, bool ordered_lastprivate)
2414 {
2415 if (fd->ordered == fd->collapse)
2416 return cont_bb;
2417
2418 if (!cont_bb)
2419 {
2420 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2421 for (int i = fd->collapse; i < fd->ordered; i++)
2422 {
2423 tree type = TREE_TYPE (fd->loops[i].v);
2424 tree n1 = fold_convert (type, fd->loops[i].n1);
2425 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2426 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2427 size_int (i - fd->collapse + 1),
2428 NULL_TREE, NULL_TREE);
2429 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2430 }
2431 return NULL;
2432 }
2433
2434 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2435 {
2436 tree t, type = TREE_TYPE (fd->loops[i].v);
2437 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2438 expand_omp_build_assign (&gsi, fd->loops[i].v,
2439 fold_convert (type, fd->loops[i].n1));
2440 if (counts[i])
2441 expand_omp_build_assign (&gsi, counts[i],
2442 build_zero_cst (fd->iter_type));
2443 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2444 size_int (i - fd->collapse + 1),
2445 NULL_TREE, NULL_TREE);
2446 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2447 if (!gsi_end_p (gsi))
2448 gsi_prev (&gsi);
2449 else
2450 gsi = gsi_last_bb (body_bb);
2451 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2452 basic_block new_body = e1->dest;
2453 if (body_bb == cont_bb)
2454 cont_bb = new_body;
2455 edge e2 = NULL;
2456 basic_block new_header;
2457 if (EDGE_COUNT (cont_bb->preds) > 0)
2458 {
2459 gsi = gsi_last_bb (cont_bb);
2460 if (POINTER_TYPE_P (type))
2461 t = fold_build_pointer_plus (fd->loops[i].v,
2462 fold_convert (sizetype,
2463 fd->loops[i].step));
2464 else
2465 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2466 fold_convert (type, fd->loops[i].step));
2467 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2468 if (counts[i])
2469 {
2470 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2471 build_int_cst (fd->iter_type, 1));
2472 expand_omp_build_assign (&gsi, counts[i], t);
2473 t = counts[i];
2474 }
2475 else
2476 {
2477 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2478 fd->loops[i].v, fd->loops[i].n1);
2479 t = fold_convert (fd->iter_type, t);
2480 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2481 true, GSI_SAME_STMT);
2482 }
2483 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2484 size_int (i - fd->collapse + 1),
2485 NULL_TREE, NULL_TREE);
2486 expand_omp_build_assign (&gsi, aref, t);
2487 gsi_prev (&gsi);
2488 e2 = split_block (cont_bb, gsi_stmt (gsi));
2489 new_header = e2->dest;
2490 }
2491 else
2492 new_header = cont_bb;
2493 gsi = gsi_after_labels (new_header);
2494 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2495 true, GSI_SAME_STMT);
2496 tree n2
2497 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2498 true, NULL_TREE, true, GSI_SAME_STMT);
2499 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2500 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2501 edge e3 = split_block (new_header, gsi_stmt (gsi));
2502 cont_bb = e3->dest;
2503 remove_edge (e1);
2504 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2505 e3->flags = EDGE_FALSE_VALUE;
2506 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2507 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2508 e1->probability = e3->probability.invert ();
2509
2510 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2511 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2512
2513 if (e2)
2514 {
2515 class loop *loop = alloc_loop ();
2516 loop->header = new_header;
2517 loop->latch = e2->src;
2518 add_loop (loop, l0_bb->loop_father);
2519 }
2520 }
2521
2522 /* If there are any lastprivate clauses and it is possible some loops
2523 might have zero iterations, ensure all the decls are initialized,
2524 otherwise we could crash evaluating C++ class iterators with lastprivate
2525 clauses. */
2526 bool need_inits = false;
2527 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2528 if (need_inits)
2529 {
2530 tree type = TREE_TYPE (fd->loops[i].v);
2531 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2532 expand_omp_build_assign (&gsi, fd->loops[i].v,
2533 fold_convert (type, fd->loops[i].n1));
2534 }
2535 else
2536 {
2537 tree type = TREE_TYPE (fd->loops[i].v);
2538 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2539 boolean_type_node,
2540 fold_convert (type, fd->loops[i].n1),
2541 fold_convert (type, fd->loops[i].n2));
2542 if (!integer_onep (this_cond))
2543 need_inits = true;
2544 }
2545
2546 return cont_bb;
2547 }
2548
2549 /* A subroutine of expand_omp_for. Generate code for a parallel
2550 loop with any schedule. Given parameters:
2551
2552 for (V = N1; V cond N2; V += STEP) BODY;
2553
2554 where COND is "<" or ">", we generate pseudocode
2555
2556 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2557 if (more) goto L0; else goto L3;
2558 L0:
2559 V = istart0;
2560 iend = iend0;
2561 L1:
2562 BODY;
2563 V += STEP;
2564 if (V cond iend) goto L1; else goto L2;
2565 L2:
2566 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2567 L3:
2568
2569 If this is a combined omp parallel loop, instead of the call to
2570 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2571 If this is gimple_omp_for_combined_p loop, then instead of assigning
2572 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2573 inner GIMPLE_OMP_FOR and V += STEP; and
2574 if (V cond iend) goto L1; else goto L2; are removed.
2575
2576 For collapsed loops, given parameters:
2577 collapse(3)
2578 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2579 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2580 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2581 BODY;
2582
2583 we generate pseudocode
2584
2585 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2586 if (cond3 is <)
2587 adj = STEP3 - 1;
2588 else
2589 adj = STEP3 + 1;
2590 count3 = (adj + N32 - N31) / STEP3;
2591 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2592 if (cond2 is <)
2593 adj = STEP2 - 1;
2594 else
2595 adj = STEP2 + 1;
2596 count2 = (adj + N22 - N21) / STEP2;
2597 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2598 if (cond1 is <)
2599 adj = STEP1 - 1;
2600 else
2601 adj = STEP1 + 1;
2602 count1 = (adj + N12 - N11) / STEP1;
2603 count = count1 * count2 * count3;
2604 goto Z1;
2605 Z0:
2606 count = 0;
2607 Z1:
2608 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2609 if (more) goto L0; else goto L3;
2610 L0:
2611 V = istart0;
2612 T = V;
2613 V3 = N31 + (T % count3) * STEP3;
2614 T = T / count3;
2615 V2 = N21 + (T % count2) * STEP2;
2616 T = T / count2;
2617 V1 = N11 + T * STEP1;
2618 iend = iend0;
2619 L1:
2620 BODY;
2621 V += 1;
2622 if (V < iend) goto L10; else goto L2;
2623 L10:
2624 V3 += STEP3;
2625 if (V3 cond3 N32) goto L1; else goto L11;
2626 L11:
2627 V3 = N31;
2628 V2 += STEP2;
2629 if (V2 cond2 N22) goto L1; else goto L12;
2630 L12:
2631 V2 = N21;
2632 V1 += STEP1;
2633 goto L1;
2634 L2:
2635 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2636 L3:
2637
2638 */
2639
2640 static void
expand_omp_for_generic(struct omp_region * region,struct omp_for_data * fd,enum built_in_function start_fn,enum built_in_function next_fn,tree sched_arg,gimple * inner_stmt)2641 expand_omp_for_generic (struct omp_region *region,
2642 struct omp_for_data *fd,
2643 enum built_in_function start_fn,
2644 enum built_in_function next_fn,
2645 tree sched_arg,
2646 gimple *inner_stmt)
2647 {
2648 tree type, istart0, iend0, iend;
2649 tree t, vmain, vback, bias = NULL_TREE;
2650 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2651 basic_block l2_bb = NULL, l3_bb = NULL;
2652 gimple_stmt_iterator gsi;
2653 gassign *assign_stmt;
2654 bool in_combined_parallel = is_combined_parallel (region);
2655 bool broken_loop = region->cont == NULL;
2656 edge e, ne;
2657 tree *counts = NULL;
2658 int i;
2659 bool ordered_lastprivate = false;
2660
2661 gcc_assert (!broken_loop || !in_combined_parallel);
2662 gcc_assert (fd->iter_type == long_integer_type_node
2663 || !in_combined_parallel);
2664
2665 entry_bb = region->entry;
2666 cont_bb = region->cont;
2667 collapse_bb = NULL;
2668 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2669 gcc_assert (broken_loop
2670 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2671 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2672 l1_bb = single_succ (l0_bb);
2673 if (!broken_loop)
2674 {
2675 l2_bb = create_empty_bb (cont_bb);
2676 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2677 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2678 == l1_bb));
2679 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2680 }
2681 else
2682 l2_bb = NULL;
2683 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2684 exit_bb = region->exit;
2685
2686 gsi = gsi_last_nondebug_bb (entry_bb);
2687
2688 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2689 if (fd->ordered
2690 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2691 OMP_CLAUSE_LASTPRIVATE))
2692 ordered_lastprivate = false;
2693 tree reductions = NULL_TREE;
2694 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2695 tree memv = NULL_TREE;
2696 if (fd->lastprivate_conditional)
2697 {
2698 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2699 OMP_CLAUSE__CONDTEMP_);
2700 if (fd->have_pointer_condtemp)
2701 condtemp = OMP_CLAUSE_DECL (c);
2702 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2703 cond_var = OMP_CLAUSE_DECL (c);
2704 }
2705 if (sched_arg)
2706 {
2707 if (fd->have_reductemp)
2708 {
2709 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2710 OMP_CLAUSE__REDUCTEMP_);
2711 reductions = OMP_CLAUSE_DECL (c);
2712 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2713 gimple *g = SSA_NAME_DEF_STMT (reductions);
2714 reductions = gimple_assign_rhs1 (g);
2715 OMP_CLAUSE_DECL (c) = reductions;
2716 entry_bb = gimple_bb (g);
2717 edge e = split_block (entry_bb, g);
2718 if (region->entry == entry_bb)
2719 region->entry = e->dest;
2720 gsi = gsi_last_bb (entry_bb);
2721 }
2722 else
2723 reductions = null_pointer_node;
2724 if (fd->have_pointer_condtemp)
2725 {
2726 tree type = TREE_TYPE (condtemp);
2727 memv = create_tmp_var (type);
2728 TREE_ADDRESSABLE (memv) = 1;
2729 unsigned HOST_WIDE_INT sz
2730 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2731 sz *= fd->lastprivate_conditional;
2732 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2733 false);
2734 mem = build_fold_addr_expr (memv);
2735 }
2736 else
2737 mem = null_pointer_node;
2738 }
2739 if (fd->collapse > 1 || fd->ordered)
2740 {
2741 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2742 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2743
2744 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2745 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2746 zero_iter1_bb, first_zero_iter1,
2747 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2748
2749 if (zero_iter1_bb)
2750 {
2751 /* Some counts[i] vars might be uninitialized if
2752 some loop has zero iterations. But the body shouldn't
2753 be executed in that case, so just avoid uninit warnings. */
2754 for (i = first_zero_iter1;
2755 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2756 if (SSA_VAR_P (counts[i]))
2757 TREE_NO_WARNING (counts[i]) = 1;
2758 gsi_prev (&gsi);
2759 e = split_block (entry_bb, gsi_stmt (gsi));
2760 entry_bb = e->dest;
2761 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2762 gsi = gsi_last_nondebug_bb (entry_bb);
2763 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2764 get_immediate_dominator (CDI_DOMINATORS,
2765 zero_iter1_bb));
2766 }
2767 if (zero_iter2_bb)
2768 {
2769 /* Some counts[i] vars might be uninitialized if
2770 some loop has zero iterations. But the body shouldn't
2771 be executed in that case, so just avoid uninit warnings. */
2772 for (i = first_zero_iter2; i < fd->ordered; i++)
2773 if (SSA_VAR_P (counts[i]))
2774 TREE_NO_WARNING (counts[i]) = 1;
2775 if (zero_iter1_bb)
2776 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2777 else
2778 {
2779 gsi_prev (&gsi);
2780 e = split_block (entry_bb, gsi_stmt (gsi));
2781 entry_bb = e->dest;
2782 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2783 gsi = gsi_last_nondebug_bb (entry_bb);
2784 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2785 get_immediate_dominator
2786 (CDI_DOMINATORS, zero_iter2_bb));
2787 }
2788 }
2789 if (fd->collapse == 1)
2790 {
2791 counts[0] = fd->loop.n2;
2792 fd->loop = fd->loops[0];
2793 }
2794 }
2795
2796 type = TREE_TYPE (fd->loop.v);
2797 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2798 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2799 TREE_ADDRESSABLE (istart0) = 1;
2800 TREE_ADDRESSABLE (iend0) = 1;
2801
2802 /* See if we need to bias by LLONG_MIN. */
2803 if (fd->iter_type == long_long_unsigned_type_node
2804 && TREE_CODE (type) == INTEGER_TYPE
2805 && !TYPE_UNSIGNED (type)
2806 && fd->ordered == 0)
2807 {
2808 tree n1, n2;
2809
2810 if (fd->loop.cond_code == LT_EXPR)
2811 {
2812 n1 = fd->loop.n1;
2813 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814 }
2815 else
2816 {
2817 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 n2 = fd->loop.n1;
2819 }
2820 if (TREE_CODE (n1) != INTEGER_CST
2821 || TREE_CODE (n2) != INTEGER_CST
2822 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2823 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2824 }
2825
2826 gimple_stmt_iterator gsif = gsi;
2827 gsi_prev (&gsif);
2828
2829 tree arr = NULL_TREE;
2830 if (in_combined_parallel)
2831 {
2832 gcc_assert (fd->ordered == 0);
2833 /* In a combined parallel loop, emit a call to
2834 GOMP_loop_foo_next. */
2835 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2836 build_fold_addr_expr (istart0),
2837 build_fold_addr_expr (iend0));
2838 }
2839 else
2840 {
2841 tree t0, t1, t2, t3, t4;
2842 /* If this is not a combined parallel loop, emit a call to
2843 GOMP_loop_foo_start in ENTRY_BB. */
2844 t4 = build_fold_addr_expr (iend0);
2845 t3 = build_fold_addr_expr (istart0);
2846 if (fd->ordered)
2847 {
2848 t0 = build_int_cst (unsigned_type_node,
2849 fd->ordered - fd->collapse + 1);
2850 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2851 fd->ordered
2852 - fd->collapse + 1),
2853 ".omp_counts");
2854 DECL_NAMELESS (arr) = 1;
2855 TREE_ADDRESSABLE (arr) = 1;
2856 TREE_STATIC (arr) = 1;
2857 vec<constructor_elt, va_gc> *v;
2858 vec_alloc (v, fd->ordered - fd->collapse + 1);
2859 int idx;
2860
2861 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2862 {
2863 tree c;
2864 if (idx == 0 && fd->collapse > 1)
2865 c = fd->loop.n2;
2866 else
2867 c = counts[idx + fd->collapse - 1];
2868 tree purpose = size_int (idx);
2869 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2870 if (TREE_CODE (c) != INTEGER_CST)
2871 TREE_STATIC (arr) = 0;
2872 }
2873
2874 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2875 if (!TREE_STATIC (arr))
2876 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2877 void_type_node, arr),
2878 true, NULL_TREE, true, GSI_SAME_STMT);
2879 t1 = build_fold_addr_expr (arr);
2880 t2 = NULL_TREE;
2881 }
2882 else
2883 {
2884 t2 = fold_convert (fd->iter_type, fd->loop.step);
2885 t1 = fd->loop.n2;
2886 t0 = fd->loop.n1;
2887 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2888 {
2889 tree innerc
2890 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2891 OMP_CLAUSE__LOOPTEMP_);
2892 gcc_assert (innerc);
2893 t0 = OMP_CLAUSE_DECL (innerc);
2894 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2895 OMP_CLAUSE__LOOPTEMP_);
2896 gcc_assert (innerc);
2897 t1 = OMP_CLAUSE_DECL (innerc);
2898 }
2899 if (POINTER_TYPE_P (TREE_TYPE (t0))
2900 && TYPE_PRECISION (TREE_TYPE (t0))
2901 != TYPE_PRECISION (fd->iter_type))
2902 {
2903 /* Avoid casting pointers to integer of a different size. */
2904 tree itype = signed_type_for (type);
2905 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2906 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2907 }
2908 else
2909 {
2910 t1 = fold_convert (fd->iter_type, t1);
2911 t0 = fold_convert (fd->iter_type, t0);
2912 }
2913 if (bias)
2914 {
2915 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2916 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2917 }
2918 }
2919 if (fd->iter_type == long_integer_type_node || fd->ordered)
2920 {
2921 if (fd->chunk_size)
2922 {
2923 t = fold_convert (fd->iter_type, fd->chunk_size);
2924 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2925 if (sched_arg)
2926 {
2927 if (fd->ordered)
2928 t = build_call_expr (builtin_decl_explicit (start_fn),
2929 8, t0, t1, sched_arg, t, t3, t4,
2930 reductions, mem);
2931 else
2932 t = build_call_expr (builtin_decl_explicit (start_fn),
2933 9, t0, t1, t2, sched_arg, t, t3, t4,
2934 reductions, mem);
2935 }
2936 else if (fd->ordered)
2937 t = build_call_expr (builtin_decl_explicit (start_fn),
2938 5, t0, t1, t, t3, t4);
2939 else
2940 t = build_call_expr (builtin_decl_explicit (start_fn),
2941 6, t0, t1, t2, t, t3, t4);
2942 }
2943 else if (fd->ordered)
2944 t = build_call_expr (builtin_decl_explicit (start_fn),
2945 4, t0, t1, t3, t4);
2946 else
2947 t = build_call_expr (builtin_decl_explicit (start_fn),
2948 5, t0, t1, t2, t3, t4);
2949 }
2950 else
2951 {
2952 tree t5;
2953 tree c_bool_type;
2954 tree bfn_decl;
2955
2956 /* The GOMP_loop_ull_*start functions have additional boolean
2957 argument, true for < loops and false for > loops.
2958 In Fortran, the C bool type can be different from
2959 boolean_type_node. */
2960 bfn_decl = builtin_decl_explicit (start_fn);
2961 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2962 t5 = build_int_cst (c_bool_type,
2963 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2964 if (fd->chunk_size)
2965 {
2966 tree bfn_decl = builtin_decl_explicit (start_fn);
2967 t = fold_convert (fd->iter_type, fd->chunk_size);
2968 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2969 if (sched_arg)
2970 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2971 t, t3, t4, reductions, mem);
2972 else
2973 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2974 }
2975 else
2976 t = build_call_expr (builtin_decl_explicit (start_fn),
2977 6, t5, t0, t1, t2, t3, t4);
2978 }
2979 }
2980 if (TREE_TYPE (t) != boolean_type_node)
2981 t = fold_build2 (NE_EXPR, boolean_type_node,
2982 t, build_int_cst (TREE_TYPE (t), 0));
2983 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2984 true, GSI_SAME_STMT);
2985 if (arr && !TREE_STATIC (arr))
2986 {
2987 tree clobber = build_clobber (TREE_TYPE (arr));
2988 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2989 GSI_SAME_STMT);
2990 }
2991 if (fd->have_pointer_condtemp)
2992 expand_omp_build_assign (&gsi, condtemp, memv, false);
2993 if (fd->have_reductemp)
2994 {
2995 gimple *g = gsi_stmt (gsi);
2996 gsi_remove (&gsi, true);
2997 release_ssa_name (gimple_assign_lhs (g));
2998
2999 entry_bb = region->entry;
3000 gsi = gsi_last_nondebug_bb (entry_bb);
3001
3002 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3003 }
3004 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3005
3006 /* Remove the GIMPLE_OMP_FOR statement. */
3007 gsi_remove (&gsi, true);
3008
3009 if (gsi_end_p (gsif))
3010 gsif = gsi_after_labels (gsi_bb (gsif));
3011 gsi_next (&gsif);
3012
3013 /* Iteration setup for sequential loop goes in L0_BB. */
3014 tree startvar = fd->loop.v;
3015 tree endvar = NULL_TREE;
3016
3017 if (gimple_omp_for_combined_p (fd->for_stmt))
3018 {
3019 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3020 && gimple_omp_for_kind (inner_stmt)
3021 == GF_OMP_FOR_KIND_SIMD);
3022 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3023 OMP_CLAUSE__LOOPTEMP_);
3024 gcc_assert (innerc);
3025 startvar = OMP_CLAUSE_DECL (innerc);
3026 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3027 OMP_CLAUSE__LOOPTEMP_);
3028 gcc_assert (innerc);
3029 endvar = OMP_CLAUSE_DECL (innerc);
3030 }
3031
3032 gsi = gsi_start_bb (l0_bb);
3033 t = istart0;
3034 if (fd->ordered && fd->collapse == 1)
3035 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3036 fold_convert (fd->iter_type, fd->loop.step));
3037 else if (bias)
3038 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3039 if (fd->ordered && fd->collapse == 1)
3040 {
3041 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3042 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3043 fd->loop.n1, fold_convert (sizetype, t));
3044 else
3045 {
3046 t = fold_convert (TREE_TYPE (startvar), t);
3047 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, t);
3049 }
3050 }
3051 else
3052 {
3053 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3054 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3055 t = fold_convert (TREE_TYPE (startvar), t);
3056 }
3057 t = force_gimple_operand_gsi (&gsi, t,
3058 DECL_P (startvar)
3059 && TREE_ADDRESSABLE (startvar),
3060 NULL_TREE, false, GSI_CONTINUE_LINKING);
3061 assign_stmt = gimple_build_assign (startvar, t);
3062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3063 if (cond_var)
3064 {
3065 tree itype = TREE_TYPE (cond_var);
3066 /* For lastprivate(conditional:) itervar, we need some iteration
3067 counter that starts at unsigned non-zero and increases.
3068 Prefer as few IVs as possible, so if we can use startvar
3069 itself, use that, or startvar + constant (those would be
3070 incremented with step), and as last resort use the s0 + 1
3071 incremented by 1. */
3072 if ((fd->ordered && fd->collapse == 1)
3073 || bias
3074 || POINTER_TYPE_P (type)
3075 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3076 || fd->loop.cond_code != LT_EXPR)
3077 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3078 build_int_cst (itype, 1));
3079 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3080 t = fold_convert (itype, t);
3081 else
3082 {
3083 tree c = fold_convert (itype, fd->loop.n1);
3084 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3085 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3086 }
3087 t = force_gimple_operand_gsi (&gsi, t, false,
3088 NULL_TREE, false, GSI_CONTINUE_LINKING);
3089 assign_stmt = gimple_build_assign (cond_var, t);
3090 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3091 }
3092
3093 t = iend0;
3094 if (fd->ordered && fd->collapse == 1)
3095 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3096 fold_convert (fd->iter_type, fd->loop.step));
3097 else if (bias)
3098 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3099 if (fd->ordered && fd->collapse == 1)
3100 {
3101 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3102 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3103 fd->loop.n1, fold_convert (sizetype, t));
3104 else
3105 {
3106 t = fold_convert (TREE_TYPE (startvar), t);
3107 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, t);
3109 }
3110 }
3111 else
3112 {
3113 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3114 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3115 t = fold_convert (TREE_TYPE (startvar), t);
3116 }
3117 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3118 false, GSI_CONTINUE_LINKING);
3119 if (endvar)
3120 {
3121 assign_stmt = gimple_build_assign (endvar, iend);
3122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3123 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3124 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3125 else
3126 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 }
3129 /* Handle linear clause adjustments. */
3130 tree itercnt = NULL_TREE;
3131 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3132 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3133 c; c = OMP_CLAUSE_CHAIN (c))
3134 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3135 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3136 {
3137 tree d = OMP_CLAUSE_DECL (c);
3138 bool is_ref = omp_is_reference (d);
3139 tree t = d, a, dest;
3140 if (is_ref)
3141 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3142 tree type = TREE_TYPE (t);
3143 if (POINTER_TYPE_P (type))
3144 type = sizetype;
3145 dest = unshare_expr (t);
3146 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3147 expand_omp_build_assign (&gsif, v, t);
3148 if (itercnt == NULL_TREE)
3149 {
3150 itercnt = startvar;
3151 tree n1 = fd->loop.n1;
3152 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3153 {
3154 itercnt
3155 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3156 itercnt);
3157 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3158 }
3159 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3160 itercnt, n1);
3161 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3162 itercnt, fd->loop.step);
3163 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3164 NULL_TREE, false,
3165 GSI_CONTINUE_LINKING);
3166 }
3167 a = fold_build2 (MULT_EXPR, type,
3168 fold_convert (type, itercnt),
3169 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3170 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3171 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3172 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3173 false, GSI_CONTINUE_LINKING);
3174 assign_stmt = gimple_build_assign (dest, t);
3175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3176 }
3177 if (fd->collapse > 1)
3178 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3179
3180 if (fd->ordered)
3181 {
3182 /* Until now, counts array contained number of iterations or
3183 variable containing it for ith loop. From now on, we need
3184 those counts only for collapsed loops, and only for the 2nd
3185 till the last collapsed one. Move those one element earlier,
3186 we'll use counts[fd->collapse - 1] for the first source/sink
3187 iteration counter and so on and counts[fd->ordered]
3188 as the array holding the current counter values for
3189 depend(source). */
3190 if (fd->collapse > 1)
3191 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3192 if (broken_loop)
3193 {
3194 int i;
3195 for (i = fd->collapse; i < fd->ordered; i++)
3196 {
3197 tree type = TREE_TYPE (fd->loops[i].v);
3198 tree this_cond
3199 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3200 fold_convert (type, fd->loops[i].n1),
3201 fold_convert (type, fd->loops[i].n2));
3202 if (!integer_onep (this_cond))
3203 break;
3204 }
3205 if (i < fd->ordered)
3206 {
3207 if (entry_bb->loop_father != l0_bb->loop_father)
3208 {
3209 remove_bb_from_loops (l0_bb);
3210 add_bb_to_loop (l0_bb, entry_bb->loop_father);
3211 gcc_assert (single_succ (l0_bb) == l1_bb);
3212 }
3213 cont_bb
3214 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3215 add_bb_to_loop (cont_bb, l0_bb->loop_father);
3216 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3217 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3218 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3219 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3220 make_edge (cont_bb, l1_bb, 0);
3221 l2_bb = create_empty_bb (cont_bb);
3222 broken_loop = false;
3223 }
3224 }
3225 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3226 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3227 l0_bb, ordered_lastprivate);
3228 if (counts[fd->collapse - 1])
3229 {
3230 gcc_assert (fd->collapse == 1);
3231 gsi = gsi_last_bb (l0_bb);
3232 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3233 istart0, true);
3234 if (cont_bb)
3235 {
3236 gsi = gsi_last_bb (cont_bb);
3237 t = fold_build2 (PLUS_EXPR, fd->iter_type,
3238 counts[fd->collapse - 1],
3239 build_int_cst (fd->iter_type, 1));
3240 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3241 tree aref = build4 (ARRAY_REF, fd->iter_type,
3242 counts[fd->ordered], size_zero_node,
3243 NULL_TREE, NULL_TREE);
3244 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3245 }
3246 t = counts[fd->collapse - 1];
3247 }
3248 else if (fd->collapse > 1)
3249 t = fd->loop.v;
3250 else
3251 {
3252 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3253 fd->loops[0].v, fd->loops[0].n1);
3254 t = fold_convert (fd->iter_type, t);
3255 }
3256 gsi = gsi_last_bb (l0_bb);
3257 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3258 size_zero_node, NULL_TREE, NULL_TREE);
3259 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260 false, GSI_CONTINUE_LINKING);
3261 expand_omp_build_assign (&gsi, aref, t, true);
3262 }
3263
3264 if (!broken_loop)
3265 {
3266 /* Code to control the increment and predicate for the sequential
3267 loop goes in the CONT_BB. */
3268 gsi = gsi_last_nondebug_bb (cont_bb);
3269 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3270 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3271 vmain = gimple_omp_continue_control_use (cont_stmt);
3272 vback = gimple_omp_continue_control_def (cont_stmt);
3273
3274 if (cond_var)
3275 {
3276 tree itype = TREE_TYPE (cond_var);
3277 tree t2;
3278 if ((fd->ordered && fd->collapse == 1)
3279 || bias
3280 || POINTER_TYPE_P (type)
3281 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3282 || fd->loop.cond_code != LT_EXPR)
3283 t2 = build_int_cst (itype, 1);
3284 else
3285 t2 = fold_convert (itype, fd->loop.step);
3286 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3287 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3288 NULL_TREE, true, GSI_SAME_STMT);
3289 assign_stmt = gimple_build_assign (cond_var, t2);
3290 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3291 }
3292
3293 if (!gimple_omp_for_combined_p (fd->for_stmt))
3294 {
3295 if (POINTER_TYPE_P (type))
3296 t = fold_build_pointer_plus (vmain, fd->loop.step);
3297 else
3298 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3299 t = force_gimple_operand_gsi (&gsi, t,
3300 DECL_P (vback)
3301 && TREE_ADDRESSABLE (vback),
3302 NULL_TREE, true, GSI_SAME_STMT);
3303 assign_stmt = gimple_build_assign (vback, t);
3304 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3305
3306 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3307 {
3308 tree tem;
3309 if (fd->collapse > 1)
3310 tem = fd->loop.v;
3311 else
3312 {
3313 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3314 fd->loops[0].v, fd->loops[0].n1);
3315 tem = fold_convert (fd->iter_type, tem);
3316 }
3317 tree aref = build4 (ARRAY_REF, fd->iter_type,
3318 counts[fd->ordered], size_zero_node,
3319 NULL_TREE, NULL_TREE);
3320 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3321 true, GSI_SAME_STMT);
3322 expand_omp_build_assign (&gsi, aref, tem);
3323 }
3324
3325 t = build2 (fd->loop.cond_code, boolean_type_node,
3326 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3327 iend);
3328 gcond *cond_stmt = gimple_build_cond_empty (t);
3329 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3330 }
3331
3332 /* Remove GIMPLE_OMP_CONTINUE. */
3333 gsi_remove (&gsi, true);
3334
3335 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3336 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3337
3338 /* Emit code to get the next parallel iteration in L2_BB. */
3339 gsi = gsi_start_bb (l2_bb);
3340
3341 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3342 build_fold_addr_expr (istart0),
3343 build_fold_addr_expr (iend0));
3344 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3345 false, GSI_CONTINUE_LINKING);
3346 if (TREE_TYPE (t) != boolean_type_node)
3347 t = fold_build2 (NE_EXPR, boolean_type_node,
3348 t, build_int_cst (TREE_TYPE (t), 0));
3349 gcond *cond_stmt = gimple_build_cond_empty (t);
3350 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3351 }
3352
3353 /* Add the loop cleanup function. */
3354 gsi = gsi_last_nondebug_bb (exit_bb);
3355 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3356 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3357 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3358 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3359 else
3360 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3361 gcall *call_stmt = gimple_build_call (t, 0);
3362 if (fd->ordered)
3363 {
3364 tree arr = counts[fd->ordered];
3365 tree clobber = build_clobber (TREE_TYPE (arr));
3366 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3367 GSI_SAME_STMT);
3368 }
3369 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3370 {
3371 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3372 if (fd->have_reductemp)
3373 {
3374 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3375 gimple_call_lhs (call_stmt));
3376 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3377 }
3378 }
3379 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3380 gsi_remove (&gsi, true);
3381
3382 /* Connect the new blocks. */
3383 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3384 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3385
3386 if (!broken_loop)
3387 {
3388 gimple_seq phis;
3389
3390 e = find_edge (cont_bb, l3_bb);
3391 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3392
3393 phis = phi_nodes (l3_bb);
3394 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3395 {
3396 gimple *phi = gsi_stmt (gsi);
3397 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3398 PHI_ARG_DEF_FROM_EDGE (phi, e));
3399 }
3400 remove_edge (e);
3401
3402 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3403 e = find_edge (cont_bb, l1_bb);
3404 if (e == NULL)
3405 {
3406 e = BRANCH_EDGE (cont_bb);
3407 gcc_assert (single_succ (e->dest) == l1_bb);
3408 }
3409 if (gimple_omp_for_combined_p (fd->for_stmt))
3410 {
3411 remove_edge (e);
3412 e = NULL;
3413 }
3414 else if (fd->collapse > 1)
3415 {
3416 remove_edge (e);
3417 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3418 }
3419 else
3420 e->flags = EDGE_TRUE_VALUE;
3421 if (e)
3422 {
3423 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3424 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3425 }
3426 else
3427 {
3428 e = find_edge (cont_bb, l2_bb);
3429 e->flags = EDGE_FALLTHRU;
3430 }
3431 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3432
3433 if (gimple_in_ssa_p (cfun))
3434 {
3435 /* Add phis to the outer loop that connect to the phis in the inner,
3436 original loop, and move the loop entry value of the inner phi to
3437 the loop entry value of the outer phi. */
3438 gphi_iterator psi;
3439 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3440 {
3441 location_t locus;
3442 gphi *nphi;
3443 gphi *exit_phi = psi.phi ();
3444
3445 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3446 continue;
3447
3448 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3449 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3450
3451 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3452 edge latch_to_l1 = find_edge (latch, l1_bb);
3453 gphi *inner_phi
3454 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3455
3456 tree t = gimple_phi_result (exit_phi);
3457 tree new_res = copy_ssa_name (t, NULL);
3458 nphi = create_phi_node (new_res, l0_bb);
3459
3460 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3461 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3462 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3463 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3464 add_phi_arg (nphi, t, entry_to_l0, locus);
3465
3466 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3467 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3468
3469 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3470 }
3471 }
3472
3473 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3474 recompute_dominator (CDI_DOMINATORS, l2_bb));
3475 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3476 recompute_dominator (CDI_DOMINATORS, l3_bb));
3477 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3478 recompute_dominator (CDI_DOMINATORS, l0_bb));
3479 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3480 recompute_dominator (CDI_DOMINATORS, l1_bb));
3481
3482 /* We enter expand_omp_for_generic with a loop. This original loop may
3483 have its own loop struct, or it may be part of an outer loop struct
3484 (which may be the fake loop). */
3485 class loop *outer_loop = entry_bb->loop_father;
3486 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3487
3488 add_bb_to_loop (l2_bb, outer_loop);
3489
3490 /* We've added a new loop around the original loop. Allocate the
3491 corresponding loop struct. */
3492 class loop *new_loop = alloc_loop ();
3493 new_loop->header = l0_bb;
3494 new_loop->latch = l2_bb;
3495 add_loop (new_loop, outer_loop);
3496
3497 /* Allocate a loop structure for the original loop unless we already
3498 had one. */
3499 if (!orig_loop_has_loop_struct
3500 && !gimple_omp_for_combined_p (fd->for_stmt))
3501 {
3502 class loop *orig_loop = alloc_loop ();
3503 orig_loop->header = l1_bb;
3504 /* The loop may have multiple latches. */
3505 add_loop (orig_loop, new_loop);
3506 }
3507 }
3508 }
3509
3510 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3511 compute needed allocation size. If !ALLOC of team allocations,
3512 if ALLOC of thread allocation. SZ is the initial needed size for
3513 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3514 CNT number of elements of each array, for !ALLOC this is
3515 omp_get_num_threads (), for ALLOC number of iterations handled by the
3516 current thread. If PTR is non-NULL, it is the start of the allocation
3517 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3518 clauses pointers to the corresponding arrays. */
3519
3520 static tree
expand_omp_scantemp_alloc(tree clauses,tree ptr,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT alloc_align,tree cnt,gimple_stmt_iterator * gsi,bool alloc)3521 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3522 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3523 gimple_stmt_iterator *gsi, bool alloc)
3524 {
3525 tree eltsz = NULL_TREE;
3526 unsigned HOST_WIDE_INT preval = 0;
3527 if (ptr && sz)
3528 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3529 ptr, size_int (sz));
3530 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3531 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3532 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3533 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3534 {
3535 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3536 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3537 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3538 {
3539 unsigned HOST_WIDE_INT szl
3540 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3541 szl = least_bit_hwi (szl);
3542 if (szl)
3543 al = MIN (al, szl);
3544 }
3545 if (ptr == NULL_TREE)
3546 {
3547 if (eltsz == NULL_TREE)
3548 eltsz = TYPE_SIZE_UNIT (pointee_type);
3549 else
3550 eltsz = size_binop (PLUS_EXPR, eltsz,
3551 TYPE_SIZE_UNIT (pointee_type));
3552 }
3553 if (preval == 0 && al <= alloc_align)
3554 {
3555 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3556 sz += diff;
3557 if (diff && ptr)
3558 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3559 ptr, size_int (diff));
3560 }
3561 else if (al > preval)
3562 {
3563 if (ptr)
3564 {
3565 ptr = fold_convert (pointer_sized_int_node, ptr);
3566 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3567 build_int_cst (pointer_sized_int_node,
3568 al - 1));
3569 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3570 build_int_cst (pointer_sized_int_node,
3571 -(HOST_WIDE_INT) al));
3572 ptr = fold_convert (ptr_type_node, ptr);
3573 }
3574 else
3575 sz += al - 1;
3576 }
3577 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3578 preval = al;
3579 else
3580 preval = 1;
3581 if (ptr)
3582 {
3583 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3584 ptr = OMP_CLAUSE_DECL (c);
3585 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3586 size_binop (MULT_EXPR, cnt,
3587 TYPE_SIZE_UNIT (pointee_type)));
3588 }
3589 }
3590
3591 if (ptr == NULL_TREE)
3592 {
3593 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3594 if (sz)
3595 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3596 return eltsz;
3597 }
3598 else
3599 return ptr;
3600 }
3601
3602 /* A subroutine of expand_omp_for. Generate code for a parallel
3603 loop with static schedule and no specified chunk size. Given
3604 parameters:
3605
3606 for (V = N1; V cond N2; V += STEP) BODY;
3607
3608 where COND is "<" or ">", we generate pseudocode
3609
3610 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3611 if (cond is <)
3612 adj = STEP - 1;
3613 else
3614 adj = STEP + 1;
3615 if ((__typeof (V)) -1 > 0 && cond is >)
3616 n = -(adj + N2 - N1) / -STEP;
3617 else
3618 n = (adj + N2 - N1) / STEP;
3619 q = n / nthreads;
3620 tt = n % nthreads;
3621 if (threadid < tt) goto L3; else goto L4;
3622 L3:
3623 tt = 0;
3624 q = q + 1;
3625 L4:
3626 s0 = q * threadid + tt;
3627 e0 = s0 + q;
3628 V = s0 * STEP + N1;
3629 if (s0 >= e0) goto L2; else goto L0;
3630 L0:
3631 e = e0 * STEP + N1;
3632 L1:
3633 BODY;
3634 V += STEP;
3635 if (V cond e) goto L1;
3636 L2:
3637 */
3638
3639 static void
expand_omp_for_static_nochunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)3640 expand_omp_for_static_nochunk (struct omp_region *region,
3641 struct omp_for_data *fd,
3642 gimple *inner_stmt)
3643 {
3644 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
3645 tree type, itype, vmain, vback;
3646 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3647 basic_block body_bb, cont_bb, collapse_bb = NULL;
3648 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3649 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
3650 gimple_stmt_iterator gsi, gsip;
3651 edge ep;
3652 bool broken_loop = region->cont == NULL;
3653 tree *counts = NULL;
3654 tree n1, n2, step;
3655 tree reductions = NULL_TREE;
3656 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3657
3658 itype = type = TREE_TYPE (fd->loop.v);
3659 if (POINTER_TYPE_P (type))
3660 itype = signed_type_for (type);
3661
3662 entry_bb = region->entry;
3663 cont_bb = region->cont;
3664 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3665 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3666 gcc_assert (broken_loop
3667 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3668 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3669 body_bb = single_succ (seq_start_bb);
3670 if (!broken_loop)
3671 {
3672 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3673 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3674 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3675 }
3676 exit_bb = region->exit;
3677
3678 /* Iteration space partitioning goes in ENTRY_BB. */
3679 gsi = gsi_last_nondebug_bb (entry_bb);
3680 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3681 gsip = gsi;
3682 gsi_prev (&gsip);
3683
3684 if (fd->collapse > 1)
3685 {
3686 int first_zero_iter = -1, dummy = -1;
3687 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3688
3689 counts = XALLOCAVEC (tree, fd->collapse);
3690 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3691 fin_bb, first_zero_iter,
3692 dummy_bb, dummy, l2_dom_bb);
3693 t = NULL_TREE;
3694 }
3695 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3696 t = integer_one_node;
3697 else
3698 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3699 fold_convert (type, fd->loop.n1),
3700 fold_convert (type, fd->loop.n2));
3701 if (fd->collapse == 1
3702 && TYPE_UNSIGNED (type)
3703 && (t == NULL_TREE || !integer_onep (t)))
3704 {
3705 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3706 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3707 true, GSI_SAME_STMT);
3708 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3709 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3710 true, GSI_SAME_STMT);
3711 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3712 NULL_TREE, NULL_TREE);
3713 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3714 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3715 expand_omp_regimplify_p, NULL, NULL)
3716 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3717 expand_omp_regimplify_p, NULL, NULL))
3718 {
3719 gsi = gsi_for_stmt (cond_stmt);
3720 gimple_regimplify_operands (cond_stmt, &gsi);
3721 }
3722 ep = split_block (entry_bb, cond_stmt);
3723 ep->flags = EDGE_TRUE_VALUE;
3724 entry_bb = ep->dest;
3725 ep->probability = profile_probability::very_likely ();
3726 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3727 ep->probability = profile_probability::very_unlikely ();
3728 if (gimple_in_ssa_p (cfun))
3729 {
3730 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3731 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3732 !gsi_end_p (gpi); gsi_next (&gpi))
3733 {
3734 gphi *phi = gpi.phi ();
3735 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3736 ep, UNKNOWN_LOCATION);
3737 }
3738 }
3739 gsi = gsi_last_bb (entry_bb);
3740 }
3741
3742 if (fd->lastprivate_conditional)
3743 {
3744 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3745 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3746 if (fd->have_pointer_condtemp)
3747 condtemp = OMP_CLAUSE_DECL (c);
3748 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3749 cond_var = OMP_CLAUSE_DECL (c);
3750 }
3751 if (fd->have_reductemp
3752 /* For scan, we don't want to reinitialize condtemp before the
3753 second loop. */
3754 || (fd->have_pointer_condtemp && !fd->have_scantemp)
3755 || fd->have_nonctrl_scantemp)
3756 {
3757 tree t1 = build_int_cst (long_integer_type_node, 0);
3758 tree t2 = build_int_cst (long_integer_type_node, 1);
3759 tree t3 = build_int_cstu (long_integer_type_node,
3760 (HOST_WIDE_INT_1U << 31) + 1);
3761 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3762 gimple_stmt_iterator gsi2 = gsi_none ();
3763 gimple *g = NULL;
3764 tree mem = null_pointer_node, memv = NULL_TREE;
3765 unsigned HOST_WIDE_INT condtemp_sz = 0;
3766 unsigned HOST_WIDE_INT alloc_align = 0;
3767 if (fd->have_reductemp)
3768 {
3769 gcc_assert (!fd->have_nonctrl_scantemp);
3770 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3771 reductions = OMP_CLAUSE_DECL (c);
3772 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3773 g = SSA_NAME_DEF_STMT (reductions);
3774 reductions = gimple_assign_rhs1 (g);
3775 OMP_CLAUSE_DECL (c) = reductions;
3776 gsi2 = gsi_for_stmt (g);
3777 }
3778 else
3779 {
3780 if (gsi_end_p (gsip))
3781 gsi2 = gsi_after_labels (region->entry);
3782 else
3783 gsi2 = gsip;
3784 reductions = null_pointer_node;
3785 }
3786 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
3787 {
3788 tree type;
3789 if (fd->have_pointer_condtemp)
3790 type = TREE_TYPE (condtemp);
3791 else
3792 type = ptr_type_node;
3793 memv = create_tmp_var (type);
3794 TREE_ADDRESSABLE (memv) = 1;
3795 unsigned HOST_WIDE_INT sz = 0;
3796 tree size = NULL_TREE;
3797 if (fd->have_pointer_condtemp)
3798 {
3799 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3800 sz *= fd->lastprivate_conditional;
3801 condtemp_sz = sz;
3802 }
3803 if (fd->have_nonctrl_scantemp)
3804 {
3805 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3806 gimple *g = gimple_build_call (nthreads, 0);
3807 nthreads = create_tmp_var (integer_type_node);
3808 gimple_call_set_lhs (g, nthreads);
3809 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3810 nthreads = fold_convert (sizetype, nthreads);
3811 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3812 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3813 alloc_align, nthreads, NULL,
3814 false);
3815 size = fold_convert (type, size);
3816 }
3817 else
3818 size = build_int_cst (type, sz);
3819 expand_omp_build_assign (&gsi2, memv, size, false);
3820 mem = build_fold_addr_expr (memv);
3821 }
3822 tree t
3823 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3824 9, t1, t2, t2, t3, t1, null_pointer_node,
3825 null_pointer_node, reductions, mem);
3826 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3827 true, GSI_SAME_STMT);
3828 if (fd->have_pointer_condtemp)
3829 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3830 if (fd->have_nonctrl_scantemp)
3831 {
3832 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3833 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3834 alloc_align, nthreads, &gsi2, false);
3835 }
3836 if (fd->have_reductemp)
3837 {
3838 gsi_remove (&gsi2, true);
3839 release_ssa_name (gimple_assign_lhs (g));
3840 }
3841 }
3842 switch (gimple_omp_for_kind (fd->for_stmt))
3843 {
3844 case GF_OMP_FOR_KIND_FOR:
3845 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3846 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3847 break;
3848 case GF_OMP_FOR_KIND_DISTRIBUTE:
3849 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3850 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3851 break;
3852 default:
3853 gcc_unreachable ();
3854 }
3855 nthreads = build_call_expr (nthreads, 0);
3856 nthreads = fold_convert (itype, nthreads);
3857 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3858 true, GSI_SAME_STMT);
3859 threadid = build_call_expr (threadid, 0);
3860 threadid = fold_convert (itype, threadid);
3861 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3862 true, GSI_SAME_STMT);
3863
3864 n1 = fd->loop.n1;
3865 n2 = fd->loop.n2;
3866 step = fd->loop.step;
3867 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3868 {
3869 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3870 OMP_CLAUSE__LOOPTEMP_);
3871 gcc_assert (innerc);
3872 n1 = OMP_CLAUSE_DECL (innerc);
3873 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3874 OMP_CLAUSE__LOOPTEMP_);
3875 gcc_assert (innerc);
3876 n2 = OMP_CLAUSE_DECL (innerc);
3877 }
3878 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3879 true, NULL_TREE, true, GSI_SAME_STMT);
3880 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3881 true, NULL_TREE, true, GSI_SAME_STMT);
3882 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3883 true, NULL_TREE, true, GSI_SAME_STMT);
3884
3885 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3886 t = fold_build2 (PLUS_EXPR, itype, step, t);
3887 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3888 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3889 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3890 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3891 fold_build1 (NEGATE_EXPR, itype, t),
3892 fold_build1 (NEGATE_EXPR, itype, step));
3893 else
3894 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3895 t = fold_convert (itype, t);
3896 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3897
3898 q = create_tmp_reg (itype, "q");
3899 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3900 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3901 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3902
3903 tt = create_tmp_reg (itype, "tt");
3904 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3905 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3906 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3907
3908 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3909 gcond *cond_stmt = gimple_build_cond_empty (t);
3910 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3911
3912 second_bb = split_block (entry_bb, cond_stmt)->dest;
3913 gsi = gsi_last_nondebug_bb (second_bb);
3914 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3915
3916 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3917 GSI_SAME_STMT);
3918 gassign *assign_stmt
3919 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3920 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3921
3922 third_bb = split_block (second_bb, assign_stmt)->dest;
3923 gsi = gsi_last_nondebug_bb (third_bb);
3924 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3925
3926 if (fd->have_nonctrl_scantemp)
3927 {
3928 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3929 tree controlp = NULL_TREE, controlb = NULL_TREE;
3930 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3931 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3932 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3933 {
3934 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3935 controlb = OMP_CLAUSE_DECL (c);
3936 else
3937 controlp = OMP_CLAUSE_DECL (c);
3938 if (controlb && controlp)
3939 break;
3940 }
3941 gcc_assert (controlp && controlb);
3942 tree cnt = create_tmp_var (sizetype);
3943 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3944 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3945 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3946 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3947 alloc_align, cnt, NULL, true);
3948 tree size = create_tmp_var (sizetype);
3949 expand_omp_build_assign (&gsi, size, sz, false);
3950 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3951 size, size_int (16384));
3952 expand_omp_build_assign (&gsi, controlb, cmp);
3953 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3954 NULL_TREE, NULL_TREE);
3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956 fourth_bb = split_block (third_bb, g)->dest;
3957 gsi = gsi_last_nondebug_bb (fourth_bb);
3958 /* FIXME: Once we have allocators, this should use allocator. */
3959 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3960 gimple_call_set_lhs (g, controlp);
3961 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3962 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3963 &gsi, true);
3964 gsi_prev (&gsi);
3965 g = gsi_stmt (gsi);
3966 fifth_bb = split_block (fourth_bb, g)->dest;
3967 gsi = gsi_last_nondebug_bb (fifth_bb);
3968
3969 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3970 gimple_call_set_lhs (g, controlp);
3971 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3972 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3973 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3974 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3975 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3976 {
3977 tree tmp = create_tmp_var (sizetype);
3978 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3979 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3980 TYPE_SIZE_UNIT (pointee_type));
3981 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3982 g = gimple_build_call (alloca_decl, 2, tmp,
3983 size_int (TYPE_ALIGN (pointee_type)));
3984 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3985 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3986 }
3987
3988 sixth_bb = split_block (fifth_bb, g)->dest;
3989 gsi = gsi_last_nondebug_bb (sixth_bb);
3990 }
3991
3992 t = build2 (MULT_EXPR, itype, q, threadid);
3993 t = build2 (PLUS_EXPR, itype, t, tt);
3994 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3995
3996 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3997 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3998
3999 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
4000 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4001
4002 /* Remove the GIMPLE_OMP_FOR statement. */
4003 gsi_remove (&gsi, true);
4004
4005 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4006 gsi = gsi_start_bb (seq_start_bb);
4007
4008 tree startvar = fd->loop.v;
4009 tree endvar = NULL_TREE;
4010
4011 if (gimple_omp_for_combined_p (fd->for_stmt))
4012 {
4013 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4014 ? gimple_omp_parallel_clauses (inner_stmt)
4015 : gimple_omp_for_clauses (inner_stmt);
4016 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4017 gcc_assert (innerc);
4018 startvar = OMP_CLAUSE_DECL (innerc);
4019 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4020 OMP_CLAUSE__LOOPTEMP_);
4021 gcc_assert (innerc);
4022 endvar = OMP_CLAUSE_DECL (innerc);
4023 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4024 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4025 {
4026 int i;
4027 for (i = 1; i < fd->collapse; i++)
4028 {
4029 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4030 OMP_CLAUSE__LOOPTEMP_);
4031 gcc_assert (innerc);
4032 }
4033 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4034 OMP_CLAUSE__LOOPTEMP_);
4035 if (innerc)
4036 {
4037 /* If needed (distribute parallel for with lastprivate),
4038 propagate down the total number of iterations. */
4039 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4040 fd->loop.n2);
4041 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4042 GSI_CONTINUE_LINKING);
4043 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4044 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4045 }
4046 }
4047 }
4048 t = fold_convert (itype, s0);
4049 t = fold_build2 (MULT_EXPR, itype, t, step);
4050 if (POINTER_TYPE_P (type))
4051 {
4052 t = fold_build_pointer_plus (n1, t);
4053 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4054 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4055 t = fold_convert (signed_type_for (type), t);
4056 }
4057 else
4058 t = fold_build2 (PLUS_EXPR, type, t, n1);
4059 t = fold_convert (TREE_TYPE (startvar), t);
4060 t = force_gimple_operand_gsi (&gsi, t,
4061 DECL_P (startvar)
4062 && TREE_ADDRESSABLE (startvar),
4063 NULL_TREE, false, GSI_CONTINUE_LINKING);
4064 assign_stmt = gimple_build_assign (startvar, t);
4065 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4066 if (cond_var)
4067 {
4068 tree itype = TREE_TYPE (cond_var);
4069 /* For lastprivate(conditional:) itervar, we need some iteration
4070 counter that starts at unsigned non-zero and increases.
4071 Prefer as few IVs as possible, so if we can use startvar
4072 itself, use that, or startvar + constant (those would be
4073 incremented with step), and as last resort use the s0 + 1
4074 incremented by 1. */
4075 if (POINTER_TYPE_P (type)
4076 || TREE_CODE (n1) != INTEGER_CST
4077 || fd->loop.cond_code != LT_EXPR)
4078 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4079 build_int_cst (itype, 1));
4080 else if (tree_int_cst_sgn (n1) == 1)
4081 t = fold_convert (itype, t);
4082 else
4083 {
4084 tree c = fold_convert (itype, n1);
4085 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4086 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4087 }
4088 t = force_gimple_operand_gsi (&gsi, t, false,
4089 NULL_TREE, false, GSI_CONTINUE_LINKING);
4090 assign_stmt = gimple_build_assign (cond_var, t);
4091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4092 }
4093
4094 t = fold_convert (itype, e0);
4095 t = fold_build2 (MULT_EXPR, itype, t, step);
4096 if (POINTER_TYPE_P (type))
4097 {
4098 t = fold_build_pointer_plus (n1, t);
4099 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4100 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4101 t = fold_convert (signed_type_for (type), t);
4102 }
4103 else
4104 t = fold_build2 (PLUS_EXPR, type, t, n1);
4105 t = fold_convert (TREE_TYPE (startvar), t);
4106 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4107 false, GSI_CONTINUE_LINKING);
4108 if (endvar)
4109 {
4110 assign_stmt = gimple_build_assign (endvar, e);
4111 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4112 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4113 assign_stmt = gimple_build_assign (fd->loop.v, e);
4114 else
4115 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4116 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4117 }
4118 /* Handle linear clause adjustments. */
4119 tree itercnt = NULL_TREE;
4120 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4121 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4122 c; c = OMP_CLAUSE_CHAIN (c))
4123 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4124 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4125 {
4126 tree d = OMP_CLAUSE_DECL (c);
4127 bool is_ref = omp_is_reference (d);
4128 tree t = d, a, dest;
4129 if (is_ref)
4130 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4131 if (itercnt == NULL_TREE)
4132 {
4133 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4134 {
4135 itercnt = fold_build2 (MINUS_EXPR, itype,
4136 fold_convert (itype, n1),
4137 fold_convert (itype, fd->loop.n1));
4138 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4139 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4140 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4141 NULL_TREE, false,
4142 GSI_CONTINUE_LINKING);
4143 }
4144 else
4145 itercnt = s0;
4146 }
4147 tree type = TREE_TYPE (t);
4148 if (POINTER_TYPE_P (type))
4149 type = sizetype;
4150 a = fold_build2 (MULT_EXPR, type,
4151 fold_convert (type, itercnt),
4152 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4153 dest = unshare_expr (t);
4154 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4155 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4156 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4157 false, GSI_CONTINUE_LINKING);
4158 assign_stmt = gimple_build_assign (dest, t);
4159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4160 }
4161 if (fd->collapse > 1)
4162 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4163
4164 if (!broken_loop)
4165 {
4166 /* The code controlling the sequential loop replaces the
4167 GIMPLE_OMP_CONTINUE. */
4168 gsi = gsi_last_nondebug_bb (cont_bb);
4169 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4170 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4171 vmain = gimple_omp_continue_control_use (cont_stmt);
4172 vback = gimple_omp_continue_control_def (cont_stmt);
4173
4174 if (cond_var)
4175 {
4176 tree itype = TREE_TYPE (cond_var);
4177 tree t2;
4178 if (POINTER_TYPE_P (type)
4179 || TREE_CODE (n1) != INTEGER_CST
4180 || fd->loop.cond_code != LT_EXPR)
4181 t2 = build_int_cst (itype, 1);
4182 else
4183 t2 = fold_convert (itype, step);
4184 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4185 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4186 NULL_TREE, true, GSI_SAME_STMT);
4187 assign_stmt = gimple_build_assign (cond_var, t2);
4188 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4189 }
4190
4191 if (!gimple_omp_for_combined_p (fd->for_stmt))
4192 {
4193 if (POINTER_TYPE_P (type))
4194 t = fold_build_pointer_plus (vmain, step);
4195 else
4196 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4197 t = force_gimple_operand_gsi (&gsi, t,
4198 DECL_P (vback)
4199 && TREE_ADDRESSABLE (vback),
4200 NULL_TREE, true, GSI_SAME_STMT);
4201 assign_stmt = gimple_build_assign (vback, t);
4202 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4203
4204 t = build2 (fd->loop.cond_code, boolean_type_node,
4205 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4206 ? t : vback, e);
4207 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4208 }
4209
4210 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4211 gsi_remove (&gsi, true);
4212
4213 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4214 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4215 }
4216
4217 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4218 gsi = gsi_last_nondebug_bb (exit_bb);
4219 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4220 {
4221 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4222 if (fd->have_reductemp
4223 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4224 && !fd->have_nonctrl_scantemp))
4225 {
4226 tree fn;
4227 if (t)
4228 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4229 else
4230 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4231 gcall *g = gimple_build_call (fn, 0);
4232 if (t)
4233 {
4234 gimple_call_set_lhs (g, t);
4235 if (fd->have_reductemp)
4236 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4237 NOP_EXPR, t),
4238 GSI_SAME_STMT);
4239 }
4240 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4241 }
4242 else
4243 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4244 }
4245 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4246 && !fd->have_nonctrl_scantemp)
4247 {
4248 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4249 gcall *g = gimple_build_call (fn, 0);
4250 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4251 }
4252 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4253 {
4254 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4255 tree controlp = NULL_TREE, controlb = NULL_TREE;
4256 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4257 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4258 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4259 {
4260 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4261 controlb = OMP_CLAUSE_DECL (c);
4262 else
4263 controlp = OMP_CLAUSE_DECL (c);
4264 if (controlb && controlp)
4265 break;
4266 }
4267 gcc_assert (controlp && controlb);
4268 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4269 NULL_TREE, NULL_TREE);
4270 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271 exit1_bb = split_block (exit_bb, g)->dest;
4272 gsi = gsi_after_labels (exit1_bb);
4273 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4274 controlp);
4275 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4276 exit2_bb = split_block (exit1_bb, g)->dest;
4277 gsi = gsi_after_labels (exit2_bb);
4278 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4279 controlp);
4280 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4281 exit3_bb = split_block (exit2_bb, g)->dest;
4282 gsi = gsi_after_labels (exit3_bb);
4283 }
4284 gsi_remove (&gsi, true);
4285
4286 /* Connect all the blocks. */
4287 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4288 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4289 ep = find_edge (entry_bb, second_bb);
4290 ep->flags = EDGE_TRUE_VALUE;
4291 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4292 if (fourth_bb)
4293 {
4294 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4295 ep->probability
4296 = profile_probability::guessed_always ().apply_scale (1, 2);
4297 ep = find_edge (third_bb, fourth_bb);
4298 ep->flags = EDGE_TRUE_VALUE;
4299 ep->probability
4300 = profile_probability::guessed_always ().apply_scale (1, 2);
4301 ep = find_edge (fourth_bb, fifth_bb);
4302 redirect_edge_and_branch (ep, sixth_bb);
4303 }
4304 else
4305 sixth_bb = third_bb;
4306 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4307 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4308 if (exit1_bb)
4309 {
4310 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4311 ep->probability
4312 = profile_probability::guessed_always ().apply_scale (1, 2);
4313 ep = find_edge (exit_bb, exit1_bb);
4314 ep->flags = EDGE_TRUE_VALUE;
4315 ep->probability
4316 = profile_probability::guessed_always ().apply_scale (1, 2);
4317 ep = find_edge (exit1_bb, exit2_bb);
4318 redirect_edge_and_branch (ep, exit3_bb);
4319 }
4320
4321 if (!broken_loop)
4322 {
4323 ep = find_edge (cont_bb, body_bb);
4324 if (ep == NULL)
4325 {
4326 ep = BRANCH_EDGE (cont_bb);
4327 gcc_assert (single_succ (ep->dest) == body_bb);
4328 }
4329 if (gimple_omp_for_combined_p (fd->for_stmt))
4330 {
4331 remove_edge (ep);
4332 ep = NULL;
4333 }
4334 else if (fd->collapse > 1)
4335 {
4336 remove_edge (ep);
4337 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4338 }
4339 else
4340 ep->flags = EDGE_TRUE_VALUE;
4341 find_edge (cont_bb, fin_bb)->flags
4342 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4343 }
4344
4345 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4346 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4347 if (fourth_bb)
4348 {
4349 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4350 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4351 }
4352 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4353
4354 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4355 recompute_dominator (CDI_DOMINATORS, body_bb));
4356 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4357 recompute_dominator (CDI_DOMINATORS, fin_bb));
4358 if (exit1_bb)
4359 {
4360 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4361 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4362 }
4363
4364 class loop *loop = body_bb->loop_father;
4365 if (loop != entry_bb->loop_father)
4366 {
4367 gcc_assert (broken_loop || loop->header == body_bb);
4368 gcc_assert (broken_loop
4369 || loop->latch == region->cont
4370 || single_pred (loop->latch) == region->cont);
4371 return;
4372 }
4373
4374 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4375 {
4376 loop = alloc_loop ();
4377 loop->header = body_bb;
4378 if (collapse_bb == NULL)
4379 loop->latch = cont_bb;
4380 add_loop (loop, body_bb->loop_father);
4381 }
4382 }
4383
4384 /* Return phi in E->DEST with ARG on edge E. */
4385
4386 static gphi *
find_phi_with_arg_on_edge(tree arg,edge e)4387 find_phi_with_arg_on_edge (tree arg, edge e)
4388 {
4389 basic_block bb = e->dest;
4390
4391 for (gphi_iterator gpi = gsi_start_phis (bb);
4392 !gsi_end_p (gpi);
4393 gsi_next (&gpi))
4394 {
4395 gphi *phi = gpi.phi ();
4396 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4397 return phi;
4398 }
4399
4400 return NULL;
4401 }
4402
4403 /* A subroutine of expand_omp_for. Generate code for a parallel
4404 loop with static schedule and a specified chunk size. Given
4405 parameters:
4406
4407 for (V = N1; V cond N2; V += STEP) BODY;
4408
4409 where COND is "<" or ">", we generate pseudocode
4410
4411 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4412 if (cond is <)
4413 adj = STEP - 1;
4414 else
4415 adj = STEP + 1;
4416 if ((__typeof (V)) -1 > 0 && cond is >)
4417 n = -(adj + N2 - N1) / -STEP;
4418 else
4419 n = (adj + N2 - N1) / STEP;
4420 trip = 0;
4421 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4422 here so that V is defined
4423 if the loop is not entered
4424 L0:
4425 s0 = (trip * nthreads + threadid) * CHUNK;
4426 e0 = min (s0 + CHUNK, n);
4427 if (s0 < n) goto L1; else goto L4;
4428 L1:
4429 V = s0 * STEP + N1;
4430 e = e0 * STEP + N1;
4431 L2:
4432 BODY;
4433 V += STEP;
4434 if (V cond e) goto L2; else goto L3;
4435 L3:
4436 trip += 1;
4437 goto L0;
4438 L4:
4439 */
4440
4441 static void
expand_omp_for_static_chunk(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)4442 expand_omp_for_static_chunk (struct omp_region *region,
4443 struct omp_for_data *fd, gimple *inner_stmt)
4444 {
4445 tree n, s0, e0, e, t;
4446 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4447 tree type, itype, vmain, vback, vextra;
4448 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4449 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4450 gimple_stmt_iterator gsi, gsip;
4451 edge se;
4452 bool broken_loop = region->cont == NULL;
4453 tree *counts = NULL;
4454 tree n1, n2, step;
4455 tree reductions = NULL_TREE;
4456 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4457
4458 itype = type = TREE_TYPE (fd->loop.v);
4459 if (POINTER_TYPE_P (type))
4460 itype = signed_type_for (type);
4461
4462 entry_bb = region->entry;
4463 se = split_block (entry_bb, last_stmt (entry_bb));
4464 entry_bb = se->src;
4465 iter_part_bb = se->dest;
4466 cont_bb = region->cont;
4467 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4468 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4469 gcc_assert (broken_loop
4470 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4471 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4472 body_bb = single_succ (seq_start_bb);
4473 if (!broken_loop)
4474 {
4475 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4476 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4477 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4478 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4479 }
4480 exit_bb = region->exit;
4481
4482 /* Trip and adjustment setup goes in ENTRY_BB. */
4483 gsi = gsi_last_nondebug_bb (entry_bb);
4484 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4485 gsip = gsi;
4486 gsi_prev (&gsip);
4487
4488 if (fd->collapse > 1)
4489 {
4490 int first_zero_iter = -1, dummy = -1;
4491 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4492
4493 counts = XALLOCAVEC (tree, fd->collapse);
4494 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4495 fin_bb, first_zero_iter,
4496 dummy_bb, dummy, l2_dom_bb);
4497 t = NULL_TREE;
4498 }
4499 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4500 t = integer_one_node;
4501 else
4502 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4503 fold_convert (type, fd->loop.n1),
4504 fold_convert (type, fd->loop.n2));
4505 if (fd->collapse == 1
4506 && TYPE_UNSIGNED (type)
4507 && (t == NULL_TREE || !integer_onep (t)))
4508 {
4509 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4510 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4511 true, GSI_SAME_STMT);
4512 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4513 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4514 true, GSI_SAME_STMT);
4515 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4516 NULL_TREE, NULL_TREE);
4517 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4518 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4519 expand_omp_regimplify_p, NULL, NULL)
4520 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4521 expand_omp_regimplify_p, NULL, NULL))
4522 {
4523 gsi = gsi_for_stmt (cond_stmt);
4524 gimple_regimplify_operands (cond_stmt, &gsi);
4525 }
4526 se = split_block (entry_bb, cond_stmt);
4527 se->flags = EDGE_TRUE_VALUE;
4528 entry_bb = se->dest;
4529 se->probability = profile_probability::very_likely ();
4530 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4531 se->probability = profile_probability::very_unlikely ();
4532 if (gimple_in_ssa_p (cfun))
4533 {
4534 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4535 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4536 !gsi_end_p (gpi); gsi_next (&gpi))
4537 {
4538 gphi *phi = gpi.phi ();
4539 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4540 se, UNKNOWN_LOCATION);
4541 }
4542 }
4543 gsi = gsi_last_bb (entry_bb);
4544 }
4545
4546 if (fd->lastprivate_conditional)
4547 {
4548 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4549 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4550 if (fd->have_pointer_condtemp)
4551 condtemp = OMP_CLAUSE_DECL (c);
4552 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4553 cond_var = OMP_CLAUSE_DECL (c);
4554 }
4555 if (fd->have_reductemp || fd->have_pointer_condtemp)
4556 {
4557 tree t1 = build_int_cst (long_integer_type_node, 0);
4558 tree t2 = build_int_cst (long_integer_type_node, 1);
4559 tree t3 = build_int_cstu (long_integer_type_node,
4560 (HOST_WIDE_INT_1U << 31) + 1);
4561 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4562 gimple_stmt_iterator gsi2 = gsi_none ();
4563 gimple *g = NULL;
4564 tree mem = null_pointer_node, memv = NULL_TREE;
4565 if (fd->have_reductemp)
4566 {
4567 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4568 reductions = OMP_CLAUSE_DECL (c);
4569 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4570 g = SSA_NAME_DEF_STMT (reductions);
4571 reductions = gimple_assign_rhs1 (g);
4572 OMP_CLAUSE_DECL (c) = reductions;
4573 gsi2 = gsi_for_stmt (g);
4574 }
4575 else
4576 {
4577 if (gsi_end_p (gsip))
4578 gsi2 = gsi_after_labels (region->entry);
4579 else
4580 gsi2 = gsip;
4581 reductions = null_pointer_node;
4582 }
4583 if (fd->have_pointer_condtemp)
4584 {
4585 tree type = TREE_TYPE (condtemp);
4586 memv = create_tmp_var (type);
4587 TREE_ADDRESSABLE (memv) = 1;
4588 unsigned HOST_WIDE_INT sz
4589 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4590 sz *= fd->lastprivate_conditional;
4591 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4592 false);
4593 mem = build_fold_addr_expr (memv);
4594 }
4595 tree t
4596 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4597 9, t1, t2, t2, t3, t1, null_pointer_node,
4598 null_pointer_node, reductions, mem);
4599 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4600 true, GSI_SAME_STMT);
4601 if (fd->have_pointer_condtemp)
4602 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4603 if (fd->have_reductemp)
4604 {
4605 gsi_remove (&gsi2, true);
4606 release_ssa_name (gimple_assign_lhs (g));
4607 }
4608 }
4609 switch (gimple_omp_for_kind (fd->for_stmt))
4610 {
4611 case GF_OMP_FOR_KIND_FOR:
4612 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4613 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4614 break;
4615 case GF_OMP_FOR_KIND_DISTRIBUTE:
4616 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4617 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4618 break;
4619 default:
4620 gcc_unreachable ();
4621 }
4622 nthreads = build_call_expr (nthreads, 0);
4623 nthreads = fold_convert (itype, nthreads);
4624 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4625 true, GSI_SAME_STMT);
4626 threadid = build_call_expr (threadid, 0);
4627 threadid = fold_convert (itype, threadid);
4628 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4629 true, GSI_SAME_STMT);
4630
4631 n1 = fd->loop.n1;
4632 n2 = fd->loop.n2;
4633 step = fd->loop.step;
4634 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4635 {
4636 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4637 OMP_CLAUSE__LOOPTEMP_);
4638 gcc_assert (innerc);
4639 n1 = OMP_CLAUSE_DECL (innerc);
4640 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4641 OMP_CLAUSE__LOOPTEMP_);
4642 gcc_assert (innerc);
4643 n2 = OMP_CLAUSE_DECL (innerc);
4644 }
4645 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4646 true, NULL_TREE, true, GSI_SAME_STMT);
4647 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4648 true, NULL_TREE, true, GSI_SAME_STMT);
4649 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4650 true, NULL_TREE, true, GSI_SAME_STMT);
4651 tree chunk_size = fold_convert (itype, fd->chunk_size);
4652 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4653 chunk_size
4654 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4655 GSI_SAME_STMT);
4656
4657 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4658 t = fold_build2 (PLUS_EXPR, itype, step, t);
4659 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4660 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4661 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4662 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4663 fold_build1 (NEGATE_EXPR, itype, t),
4664 fold_build1 (NEGATE_EXPR, itype, step));
4665 else
4666 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4667 t = fold_convert (itype, t);
4668 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4669 true, GSI_SAME_STMT);
4670
4671 trip_var = create_tmp_reg (itype, ".trip");
4672 if (gimple_in_ssa_p (cfun))
4673 {
4674 trip_init = make_ssa_name (trip_var);
4675 trip_main = make_ssa_name (trip_var);
4676 trip_back = make_ssa_name (trip_var);
4677 }
4678 else
4679 {
4680 trip_init = trip_var;
4681 trip_main = trip_var;
4682 trip_back = trip_var;
4683 }
4684
4685 gassign *assign_stmt
4686 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4687 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4688
4689 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4690 t = fold_build2 (MULT_EXPR, itype, t, step);
4691 if (POINTER_TYPE_P (type))
4692 t = fold_build_pointer_plus (n1, t);
4693 else
4694 t = fold_build2 (PLUS_EXPR, type, t, n1);
4695 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4696 true, GSI_SAME_STMT);
4697
4698 /* Remove the GIMPLE_OMP_FOR. */
4699 gsi_remove (&gsi, true);
4700
4701 gimple_stmt_iterator gsif = gsi;
4702
4703 /* Iteration space partitioning goes in ITER_PART_BB. */
4704 gsi = gsi_last_bb (iter_part_bb);
4705
4706 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4707 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4708 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4709 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4710 false, GSI_CONTINUE_LINKING);
4711
4712 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4713 t = fold_build2 (MIN_EXPR, itype, t, n);
4714 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4715 false, GSI_CONTINUE_LINKING);
4716
4717 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4718 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4719
4720 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4721 gsi = gsi_start_bb (seq_start_bb);
4722
4723 tree startvar = fd->loop.v;
4724 tree endvar = NULL_TREE;
4725
4726 if (gimple_omp_for_combined_p (fd->for_stmt))
4727 {
4728 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4729 ? gimple_omp_parallel_clauses (inner_stmt)
4730 : gimple_omp_for_clauses (inner_stmt);
4731 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4732 gcc_assert (innerc);
4733 startvar = OMP_CLAUSE_DECL (innerc);
4734 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4735 OMP_CLAUSE__LOOPTEMP_);
4736 gcc_assert (innerc);
4737 endvar = OMP_CLAUSE_DECL (innerc);
4738 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4739 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4740 {
4741 int i;
4742 for (i = 1; i < fd->collapse; i++)
4743 {
4744 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4745 OMP_CLAUSE__LOOPTEMP_);
4746 gcc_assert (innerc);
4747 }
4748 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4749 OMP_CLAUSE__LOOPTEMP_);
4750 if (innerc)
4751 {
4752 /* If needed (distribute parallel for with lastprivate),
4753 propagate down the total number of iterations. */
4754 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4755 fd->loop.n2);
4756 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4757 GSI_CONTINUE_LINKING);
4758 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4759 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4760 }
4761 }
4762 }
4763
4764 t = fold_convert (itype, s0);
4765 t = fold_build2 (MULT_EXPR, itype, t, step);
4766 if (POINTER_TYPE_P (type))
4767 {
4768 t = fold_build_pointer_plus (n1, t);
4769 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4770 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4771 t = fold_convert (signed_type_for (type), t);
4772 }
4773 else
4774 t = fold_build2 (PLUS_EXPR, type, t, n1);
4775 t = fold_convert (TREE_TYPE (startvar), t);
4776 t = force_gimple_operand_gsi (&gsi, t,
4777 DECL_P (startvar)
4778 && TREE_ADDRESSABLE (startvar),
4779 NULL_TREE, false, GSI_CONTINUE_LINKING);
4780 assign_stmt = gimple_build_assign (startvar, t);
4781 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4782 if (cond_var)
4783 {
4784 tree itype = TREE_TYPE (cond_var);
4785 /* For lastprivate(conditional:) itervar, we need some iteration
4786 counter that starts at unsigned non-zero and increases.
4787 Prefer as few IVs as possible, so if we can use startvar
4788 itself, use that, or startvar + constant (those would be
4789 incremented with step), and as last resort use the s0 + 1
4790 incremented by 1. */
4791 if (POINTER_TYPE_P (type)
4792 || TREE_CODE (n1) != INTEGER_CST
4793 || fd->loop.cond_code != LT_EXPR)
4794 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4795 build_int_cst (itype, 1));
4796 else if (tree_int_cst_sgn (n1) == 1)
4797 t = fold_convert (itype, t);
4798 else
4799 {
4800 tree c = fold_convert (itype, n1);
4801 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4802 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4803 }
4804 t = force_gimple_operand_gsi (&gsi, t, false,
4805 NULL_TREE, false, GSI_CONTINUE_LINKING);
4806 assign_stmt = gimple_build_assign (cond_var, t);
4807 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4808 }
4809
4810 t = fold_convert (itype, e0);
4811 t = fold_build2 (MULT_EXPR, itype, t, step);
4812 if (POINTER_TYPE_P (type))
4813 {
4814 t = fold_build_pointer_plus (n1, t);
4815 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4816 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4817 t = fold_convert (signed_type_for (type), t);
4818 }
4819 else
4820 t = fold_build2 (PLUS_EXPR, type, t, n1);
4821 t = fold_convert (TREE_TYPE (startvar), t);
4822 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4823 false, GSI_CONTINUE_LINKING);
4824 if (endvar)
4825 {
4826 assign_stmt = gimple_build_assign (endvar, e);
4827 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4828 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4829 assign_stmt = gimple_build_assign (fd->loop.v, e);
4830 else
4831 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4832 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4833 }
4834 /* Handle linear clause adjustments. */
4835 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4836 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4837 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4838 c; c = OMP_CLAUSE_CHAIN (c))
4839 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4840 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4841 {
4842 tree d = OMP_CLAUSE_DECL (c);
4843 bool is_ref = omp_is_reference (d);
4844 tree t = d, a, dest;
4845 if (is_ref)
4846 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4847 tree type = TREE_TYPE (t);
4848 if (POINTER_TYPE_P (type))
4849 type = sizetype;
4850 dest = unshare_expr (t);
4851 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4852 expand_omp_build_assign (&gsif, v, t);
4853 if (itercnt == NULL_TREE)
4854 {
4855 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4856 {
4857 itercntbias
4858 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4859 fold_convert (itype, fd->loop.n1));
4860 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4861 itercntbias, step);
4862 itercntbias
4863 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4864 NULL_TREE, true,
4865 GSI_SAME_STMT);
4866 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4867 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4868 NULL_TREE, false,
4869 GSI_CONTINUE_LINKING);
4870 }
4871 else
4872 itercnt = s0;
4873 }
4874 a = fold_build2 (MULT_EXPR, type,
4875 fold_convert (type, itercnt),
4876 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4877 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4878 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4879 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4880 false, GSI_CONTINUE_LINKING);
4881 assign_stmt = gimple_build_assign (dest, t);
4882 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4883 }
4884 if (fd->collapse > 1)
4885 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4886
4887 if (!broken_loop)
4888 {
4889 /* The code controlling the sequential loop goes in CONT_BB,
4890 replacing the GIMPLE_OMP_CONTINUE. */
4891 gsi = gsi_last_nondebug_bb (cont_bb);
4892 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4893 vmain = gimple_omp_continue_control_use (cont_stmt);
4894 vback = gimple_omp_continue_control_def (cont_stmt);
4895
4896 if (cond_var)
4897 {
4898 tree itype = TREE_TYPE (cond_var);
4899 tree t2;
4900 if (POINTER_TYPE_P (type)
4901 || TREE_CODE (n1) != INTEGER_CST
4902 || fd->loop.cond_code != LT_EXPR)
4903 t2 = build_int_cst (itype, 1);
4904 else
4905 t2 = fold_convert (itype, step);
4906 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4907 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4908 NULL_TREE, true, GSI_SAME_STMT);
4909 assign_stmt = gimple_build_assign (cond_var, t2);
4910 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4911 }
4912
4913 if (!gimple_omp_for_combined_p (fd->for_stmt))
4914 {
4915 if (POINTER_TYPE_P (type))
4916 t = fold_build_pointer_plus (vmain, step);
4917 else
4918 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4919 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4920 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4921 true, GSI_SAME_STMT);
4922 assign_stmt = gimple_build_assign (vback, t);
4923 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4924
4925 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4926 t = build2 (EQ_EXPR, boolean_type_node,
4927 build_int_cst (itype, 0),
4928 build_int_cst (itype, 1));
4929 else
4930 t = build2 (fd->loop.cond_code, boolean_type_node,
4931 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4932 ? t : vback, e);
4933 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4934 }
4935
4936 /* Remove GIMPLE_OMP_CONTINUE. */
4937 gsi_remove (&gsi, true);
4938
4939 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4940 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4941
4942 /* Trip update code goes into TRIP_UPDATE_BB. */
4943 gsi = gsi_start_bb (trip_update_bb);
4944
4945 t = build_int_cst (itype, 1);
4946 t = build2 (PLUS_EXPR, itype, trip_main, t);
4947 assign_stmt = gimple_build_assign (trip_back, t);
4948 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4949 }
4950
4951 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4952 gsi = gsi_last_nondebug_bb (exit_bb);
4953 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4954 {
4955 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4956 if (fd->have_reductemp || fd->have_pointer_condtemp)
4957 {
4958 tree fn;
4959 if (t)
4960 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4961 else
4962 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4963 gcall *g = gimple_build_call (fn, 0);
4964 if (t)
4965 {
4966 gimple_call_set_lhs (g, t);
4967 if (fd->have_reductemp)
4968 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4969 NOP_EXPR, t),
4970 GSI_SAME_STMT);
4971 }
4972 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4973 }
4974 else
4975 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4976 }
4977 else if (fd->have_pointer_condtemp)
4978 {
4979 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4980 gcall *g = gimple_build_call (fn, 0);
4981 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4982 }
4983 gsi_remove (&gsi, true);
4984
4985 /* Connect the new blocks. */
4986 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4987 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4988
4989 if (!broken_loop)
4990 {
4991 se = find_edge (cont_bb, body_bb);
4992 if (se == NULL)
4993 {
4994 se = BRANCH_EDGE (cont_bb);
4995 gcc_assert (single_succ (se->dest) == body_bb);
4996 }
4997 if (gimple_omp_for_combined_p (fd->for_stmt))
4998 {
4999 remove_edge (se);
5000 se = NULL;
5001 }
5002 else if (fd->collapse > 1)
5003 {
5004 remove_edge (se);
5005 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5006 }
5007 else
5008 se->flags = EDGE_TRUE_VALUE;
5009 find_edge (cont_bb, trip_update_bb)->flags
5010 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5011
5012 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5013 iter_part_bb);
5014 }
5015
5016 if (gimple_in_ssa_p (cfun))
5017 {
5018 gphi_iterator psi;
5019 gphi *phi;
5020 edge re, ene;
5021 edge_var_map *vm;
5022 size_t i;
5023
5024 gcc_assert (fd->collapse == 1 && !broken_loop);
5025
5026 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5027 remove arguments of the phi nodes in fin_bb. We need to create
5028 appropriate phi nodes in iter_part_bb instead. */
5029 se = find_edge (iter_part_bb, fin_bb);
5030 re = single_succ_edge (trip_update_bb);
5031 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5032 ene = single_succ_edge (entry_bb);
5033
5034 psi = gsi_start_phis (fin_bb);
5035 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5036 gsi_next (&psi), ++i)
5037 {
5038 gphi *nphi;
5039 location_t locus;
5040
5041 phi = psi.phi ();
5042 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5043 redirect_edge_var_map_def (vm), 0))
5044 continue;
5045
5046 t = gimple_phi_result (phi);
5047 gcc_assert (t == redirect_edge_var_map_result (vm));
5048
5049 if (!single_pred_p (fin_bb))
5050 t = copy_ssa_name (t, phi);
5051
5052 nphi = create_phi_node (t, iter_part_bb);
5053
5054 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5055 locus = gimple_phi_arg_location_from_edge (phi, se);
5056
5057 /* A special case -- fd->loop.v is not yet computed in
5058 iter_part_bb, we need to use vextra instead. */
5059 if (t == fd->loop.v)
5060 t = vextra;
5061 add_phi_arg (nphi, t, ene, locus);
5062 locus = redirect_edge_var_map_location (vm);
5063 tree back_arg = redirect_edge_var_map_def (vm);
5064 add_phi_arg (nphi, back_arg, re, locus);
5065 edge ce = find_edge (cont_bb, body_bb);
5066 if (ce == NULL)
5067 {
5068 ce = BRANCH_EDGE (cont_bb);
5069 gcc_assert (single_succ (ce->dest) == body_bb);
5070 ce = single_succ_edge (ce->dest);
5071 }
5072 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5073 gcc_assert (inner_loop_phi != NULL);
5074 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5075 find_edge (seq_start_bb, body_bb), locus);
5076
5077 if (!single_pred_p (fin_bb))
5078 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5079 }
5080 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5081 redirect_edge_var_map_clear (re);
5082 if (single_pred_p (fin_bb))
5083 while (1)
5084 {
5085 psi = gsi_start_phis (fin_bb);
5086 if (gsi_end_p (psi))
5087 break;
5088 remove_phi_node (&psi, false);
5089 }
5090
5091 /* Make phi node for trip. */
5092 phi = create_phi_node (trip_main, iter_part_bb);
5093 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5094 UNKNOWN_LOCATION);
5095 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5096 UNKNOWN_LOCATION);
5097 }
5098
5099 if (!broken_loop)
5100 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5101 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5102 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5103 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5104 recompute_dominator (CDI_DOMINATORS, fin_bb));
5105 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5106 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5107 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5108 recompute_dominator (CDI_DOMINATORS, body_bb));
5109
5110 if (!broken_loop)
5111 {
5112 class loop *loop = body_bb->loop_father;
5113 class loop *trip_loop = alloc_loop ();
5114 trip_loop->header = iter_part_bb;
5115 trip_loop->latch = trip_update_bb;
5116 add_loop (trip_loop, iter_part_bb->loop_father);
5117
5118 if (loop != entry_bb->loop_father)
5119 {
5120 gcc_assert (loop->header == body_bb);
5121 gcc_assert (loop->latch == region->cont
5122 || single_pred (loop->latch) == region->cont);
5123 trip_loop->inner = loop;
5124 return;
5125 }
5126
5127 if (!gimple_omp_for_combined_p (fd->for_stmt))
5128 {
5129 loop = alloc_loop ();
5130 loop->header = body_bb;
5131 if (collapse_bb == NULL)
5132 loop->latch = cont_bb;
5133 add_loop (loop, trip_loop);
5134 }
5135 }
5136 }
5137
5138 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5139 loop. Given parameters:
5140
5141 for (V = N1; V cond N2; V += STEP) BODY;
5142
5143 where COND is "<" or ">", we generate pseudocode
5144
5145 V = N1;
5146 goto L1;
5147 L0:
5148 BODY;
5149 V += STEP;
5150 L1:
5151 if (V cond N2) goto L0; else goto L2;
5152 L2:
5153
5154 For collapsed loops, given parameters:
5155 collapse(3)
5156 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5157 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5158 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5159 BODY;
5160
5161 we generate pseudocode
5162
5163 if (cond3 is <)
5164 adj = STEP3 - 1;
5165 else
5166 adj = STEP3 + 1;
5167 count3 = (adj + N32 - N31) / STEP3;
5168 if (cond2 is <)
5169 adj = STEP2 - 1;
5170 else
5171 adj = STEP2 + 1;
5172 count2 = (adj + N22 - N21) / STEP2;
5173 if (cond1 is <)
5174 adj = STEP1 - 1;
5175 else
5176 adj = STEP1 + 1;
5177 count1 = (adj + N12 - N11) / STEP1;
5178 count = count1 * count2 * count3;
5179 V = 0;
5180 V1 = N11;
5181 V2 = N21;
5182 V3 = N31;
5183 goto L1;
5184 L0:
5185 BODY;
5186 V += 1;
5187 V3 += STEP3;
5188 V2 += (V3 cond3 N32) ? 0 : STEP2;
5189 V3 = (V3 cond3 N32) ? V3 : N31;
5190 V1 += (V2 cond2 N22) ? 0 : STEP1;
5191 V2 = (V2 cond2 N22) ? V2 : N21;
5192 L1:
5193 if (V < count) goto L0; else goto L2;
5194 L2:
5195
5196 */
5197
5198 static void
expand_omp_simd(struct omp_region * region,struct omp_for_data * fd)5199 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5200 {
5201 tree type, t;
5202 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5203 gimple_stmt_iterator gsi;
5204 gimple *stmt;
5205 gcond *cond_stmt;
5206 bool broken_loop = region->cont == NULL;
5207 edge e, ne;
5208 tree *counts = NULL;
5209 int i;
5210 int safelen_int = INT_MAX;
5211 bool dont_vectorize = false;
5212 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213 OMP_CLAUSE_SAFELEN);
5214 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5215 OMP_CLAUSE__SIMDUID_);
5216 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5217 OMP_CLAUSE_IF);
5218 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5219 OMP_CLAUSE_SIMDLEN);
5220 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5221 OMP_CLAUSE__CONDTEMP_);
5222 tree n1, n2;
5223 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
5224
5225 if (safelen)
5226 {
5227 poly_uint64 val;
5228 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
5229 if (!poly_int_tree_p (safelen, &val))
5230 safelen_int = 0;
5231 else
5232 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
5233 if (safelen_int == 1)
5234 safelen_int = 0;
5235 }
5236 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5237 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5238 {
5239 safelen_int = 0;
5240 dont_vectorize = true;
5241 }
5242 type = TREE_TYPE (fd->loop.v);
5243 entry_bb = region->entry;
5244 cont_bb = region->cont;
5245 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5246 gcc_assert (broken_loop
5247 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5248 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5249 if (!broken_loop)
5250 {
5251 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5252 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5253 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5254 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5255 }
5256 else
5257 {
5258 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5259 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5260 l2_bb = single_succ (l1_bb);
5261 }
5262 exit_bb = region->exit;
5263 l2_dom_bb = NULL;
5264
5265 gsi = gsi_last_nondebug_bb (entry_bb);
5266
5267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5268 /* Not needed in SSA form right now. */
5269 gcc_assert (!gimple_in_ssa_p (cfun));
5270 if (fd->collapse > 1)
5271 {
5272 int first_zero_iter = -1, dummy = -1;
5273 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5274
5275 counts = XALLOCAVEC (tree, fd->collapse);
5276 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5277 zero_iter_bb, first_zero_iter,
5278 dummy_bb, dummy, l2_dom_bb);
5279 }
5280 if (l2_dom_bb == NULL)
5281 l2_dom_bb = l1_bb;
5282
5283 n1 = fd->loop.n1;
5284 n2 = fd->loop.n2;
5285 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5286 {
5287 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5288 OMP_CLAUSE__LOOPTEMP_);
5289 gcc_assert (innerc);
5290 n1 = OMP_CLAUSE_DECL (innerc);
5291 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5292 OMP_CLAUSE__LOOPTEMP_);
5293 gcc_assert (innerc);
5294 n2 = OMP_CLAUSE_DECL (innerc);
5295 }
5296 tree step = fd->loop.step;
5297
5298 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5299 OMP_CLAUSE__SIMT_);
5300 if (is_simt)
5301 {
5302 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5303 is_simt = safelen_int > 1;
5304 }
5305 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5306 if (is_simt)
5307 {
5308 simt_lane = create_tmp_var (unsigned_type_node);
5309 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5310 gimple_call_set_lhs (g, simt_lane);
5311 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5312 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5313 fold_convert (TREE_TYPE (step), simt_lane));
5314 n1 = fold_convert (type, n1);
5315 if (POINTER_TYPE_P (type))
5316 n1 = fold_build_pointer_plus (n1, offset);
5317 else
5318 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5319
5320 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5321 if (fd->collapse > 1)
5322 simt_maxlane = build_one_cst (unsigned_type_node);
5323 else if (safelen_int < omp_max_simt_vf ())
5324 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5325 tree vf
5326 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5327 unsigned_type_node, 0);
5328 if (simt_maxlane)
5329 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5330 vf = fold_convert (TREE_TYPE (step), vf);
5331 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5332 }
5333
5334 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5335 if (fd->collapse > 1)
5336 {
5337 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5338 {
5339 gsi_prev (&gsi);
5340 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5341 gsi_next (&gsi);
5342 }
5343 else
5344 for (i = 0; i < fd->collapse; i++)
5345 {
5346 tree itype = TREE_TYPE (fd->loops[i].v);
5347 if (POINTER_TYPE_P (itype))
5348 itype = signed_type_for (itype);
5349 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5350 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5351 }
5352 }
5353 if (cond_var)
5354 {
5355 if (POINTER_TYPE_P (type)
5356 || TREE_CODE (n1) != INTEGER_CST
5357 || fd->loop.cond_code != LT_EXPR
5358 || tree_int_cst_sgn (n1) != 1)
5359 expand_omp_build_assign (&gsi, cond_var,
5360 build_one_cst (TREE_TYPE (cond_var)));
5361 else
5362 expand_omp_build_assign (&gsi, cond_var,
5363 fold_convert (TREE_TYPE (cond_var), n1));
5364 }
5365
5366 /* Remove the GIMPLE_OMP_FOR statement. */
5367 gsi_remove (&gsi, true);
5368
5369 if (!broken_loop)
5370 {
5371 /* Code to control the increment goes in the CONT_BB. */
5372 gsi = gsi_last_nondebug_bb (cont_bb);
5373 stmt = gsi_stmt (gsi);
5374 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5375
5376 if (POINTER_TYPE_P (type))
5377 t = fold_build_pointer_plus (fd->loop.v, step);
5378 else
5379 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5380 expand_omp_build_assign (&gsi, fd->loop.v, t);
5381
5382 if (fd->collapse > 1)
5383 {
5384 i = fd->collapse - 1;
5385 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5386 {
5387 t = fold_convert (sizetype, fd->loops[i].step);
5388 t = fold_build_pointer_plus (fd->loops[i].v, t);
5389 }
5390 else
5391 {
5392 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5393 fd->loops[i].step);
5394 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5395 fd->loops[i].v, t);
5396 }
5397 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5398
5399 for (i = fd->collapse - 1; i > 0; i--)
5400 {
5401 tree itype = TREE_TYPE (fd->loops[i].v);
5402 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5403 if (POINTER_TYPE_P (itype2))
5404 itype2 = signed_type_for (itype2);
5405 t = fold_convert (itype2, fd->loops[i - 1].step);
5406 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5407 GSI_SAME_STMT);
5408 t = build3 (COND_EXPR, itype2,
5409 build2 (fd->loops[i].cond_code, boolean_type_node,
5410 fd->loops[i].v,
5411 fold_convert (itype, fd->loops[i].n2)),
5412 build_int_cst (itype2, 0), t);
5413 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5414 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5415 else
5416 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5417 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5418
5419 t = fold_convert (itype, fd->loops[i].n1);
5420 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5421 GSI_SAME_STMT);
5422 t = build3 (COND_EXPR, itype,
5423 build2 (fd->loops[i].cond_code, boolean_type_node,
5424 fd->loops[i].v,
5425 fold_convert (itype, fd->loops[i].n2)),
5426 fd->loops[i].v, t);
5427 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5428 }
5429 }
5430 if (cond_var)
5431 {
5432 if (POINTER_TYPE_P (type)
5433 || TREE_CODE (n1) != INTEGER_CST
5434 || fd->loop.cond_code != LT_EXPR
5435 || tree_int_cst_sgn (n1) != 1)
5436 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5437 build_one_cst (TREE_TYPE (cond_var)));
5438 else
5439 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5440 fold_convert (TREE_TYPE (cond_var), step));
5441 expand_omp_build_assign (&gsi, cond_var, t);
5442 }
5443
5444 /* Remove GIMPLE_OMP_CONTINUE. */
5445 gsi_remove (&gsi, true);
5446 }
5447
5448 /* Emit the condition in L1_BB. */
5449 gsi = gsi_start_bb (l1_bb);
5450
5451 t = fold_convert (type, n2);
5452 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5453 false, GSI_CONTINUE_LINKING);
5454 tree v = fd->loop.v;
5455 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5456 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5457 false, GSI_CONTINUE_LINKING);
5458 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5459 cond_stmt = gimple_build_cond_empty (t);
5460 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5461 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5462 NULL, NULL)
5463 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5464 NULL, NULL))
5465 {
5466 gsi = gsi_for_stmt (cond_stmt);
5467 gimple_regimplify_operands (cond_stmt, &gsi);
5468 }
5469
5470 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5471 if (is_simt)
5472 {
5473 gsi = gsi_start_bb (l2_bb);
5474 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5475 if (POINTER_TYPE_P (type))
5476 t = fold_build_pointer_plus (fd->loop.v, step);
5477 else
5478 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5479 expand_omp_build_assign (&gsi, fd->loop.v, t);
5480 }
5481
5482 /* Remove GIMPLE_OMP_RETURN. */
5483 gsi = gsi_last_nondebug_bb (exit_bb);
5484 gsi_remove (&gsi, true);
5485
5486 /* Connect the new blocks. */
5487 remove_edge (FALLTHRU_EDGE (entry_bb));
5488
5489 if (!broken_loop)
5490 {
5491 remove_edge (BRANCH_EDGE (entry_bb));
5492 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5493
5494 e = BRANCH_EDGE (l1_bb);
5495 ne = FALLTHRU_EDGE (l1_bb);
5496 e->flags = EDGE_TRUE_VALUE;
5497 }
5498 else
5499 {
5500 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5501
5502 ne = single_succ_edge (l1_bb);
5503 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5504
5505 }
5506 ne->flags = EDGE_FALSE_VALUE;
5507 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5508 ne->probability = e->probability.invert ();
5509
5510 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5511 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5512
5513 if (simt_maxlane)
5514 {
5515 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5516 NULL_TREE, NULL_TREE);
5517 gsi = gsi_last_bb (entry_bb);
5518 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5519 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5520 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5521 FALLTHRU_EDGE (entry_bb)->probability
5522 = profile_probability::guessed_always ().apply_scale (7, 8);
5523 BRANCH_EDGE (entry_bb)->probability
5524 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5525 l2_dom_bb = entry_bb;
5526 }
5527 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5528
5529 if (!broken_loop)
5530 {
5531 class loop *loop = alloc_loop ();
5532 loop->header = l1_bb;
5533 loop->latch = cont_bb;
5534 add_loop (loop, l1_bb->loop_father);
5535 loop->safelen = safelen_int;
5536 if (simduid)
5537 {
5538 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5539 cfun->has_simduid_loops = true;
5540 }
5541 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5542 the loop. */
5543 if ((flag_tree_loop_vectorize
5544 || !global_options_set.x_flag_tree_loop_vectorize)
5545 && flag_tree_loop_optimize
5546 && loop->safelen > 1)
5547 {
5548 loop->force_vectorize = true;
5549 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5550 {
5551 unsigned HOST_WIDE_INT v
5552 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5553 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5554 loop->simdlen = v;
5555 }
5556 cfun->has_force_vectorize_loops = true;
5557 }
5558 else if (dont_vectorize)
5559 loop->dont_vectorize = true;
5560 }
5561 else if (simduid)
5562 cfun->has_simduid_loops = true;
5563 }
5564
5565 /* Taskloop construct is represented after gimplification with
5566 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5567 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5568 which should just compute all the needed loop temporaries
5569 for GIMPLE_OMP_TASK. */
5570
5571 static void
expand_omp_taskloop_for_outer(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5572 expand_omp_taskloop_for_outer (struct omp_region *region,
5573 struct omp_for_data *fd,
5574 gimple *inner_stmt)
5575 {
5576 tree type, bias = NULL_TREE;
5577 basic_block entry_bb, cont_bb, exit_bb;
5578 gimple_stmt_iterator gsi;
5579 gassign *assign_stmt;
5580 tree *counts = NULL;
5581 int i;
5582
5583 gcc_assert (inner_stmt);
5584 gcc_assert (region->cont);
5585 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5586 && gimple_omp_task_taskloop_p (inner_stmt));
5587 type = TREE_TYPE (fd->loop.v);
5588
5589 /* See if we need to bias by LLONG_MIN. */
5590 if (fd->iter_type == long_long_unsigned_type_node
5591 && TREE_CODE (type) == INTEGER_TYPE
5592 && !TYPE_UNSIGNED (type))
5593 {
5594 tree n1, n2;
5595
5596 if (fd->loop.cond_code == LT_EXPR)
5597 {
5598 n1 = fd->loop.n1;
5599 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5600 }
5601 else
5602 {
5603 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5604 n2 = fd->loop.n1;
5605 }
5606 if (TREE_CODE (n1) != INTEGER_CST
5607 || TREE_CODE (n2) != INTEGER_CST
5608 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5609 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5610 }
5611
5612 entry_bb = region->entry;
5613 cont_bb = region->cont;
5614 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5615 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5616 exit_bb = region->exit;
5617
5618 gsi = gsi_last_nondebug_bb (entry_bb);
5619 gimple *for_stmt = gsi_stmt (gsi);
5620 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5621 if (fd->collapse > 1)
5622 {
5623 int first_zero_iter = -1, dummy = -1;
5624 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5625
5626 counts = XALLOCAVEC (tree, fd->collapse);
5627 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5628 zero_iter_bb, first_zero_iter,
5629 dummy_bb, dummy, l2_dom_bb);
5630
5631 if (zero_iter_bb)
5632 {
5633 /* Some counts[i] vars might be uninitialized if
5634 some loop has zero iterations. But the body shouldn't
5635 be executed in that case, so just avoid uninit warnings. */
5636 for (i = first_zero_iter; i < fd->collapse; i++)
5637 if (SSA_VAR_P (counts[i]))
5638 TREE_NO_WARNING (counts[i]) = 1;
5639 gsi_prev (&gsi);
5640 edge e = split_block (entry_bb, gsi_stmt (gsi));
5641 entry_bb = e->dest;
5642 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5643 gsi = gsi_last_bb (entry_bb);
5644 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5645 get_immediate_dominator (CDI_DOMINATORS,
5646 zero_iter_bb));
5647 }
5648 }
5649
5650 tree t0, t1;
5651 t1 = fd->loop.n2;
5652 t0 = fd->loop.n1;
5653 if (POINTER_TYPE_P (TREE_TYPE (t0))
5654 && TYPE_PRECISION (TREE_TYPE (t0))
5655 != TYPE_PRECISION (fd->iter_type))
5656 {
5657 /* Avoid casting pointers to integer of a different size. */
5658 tree itype = signed_type_for (type);
5659 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5660 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5661 }
5662 else
5663 {
5664 t1 = fold_convert (fd->iter_type, t1);
5665 t0 = fold_convert (fd->iter_type, t0);
5666 }
5667 if (bias)
5668 {
5669 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5670 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5671 }
5672
5673 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5674 OMP_CLAUSE__LOOPTEMP_);
5675 gcc_assert (innerc);
5676 tree startvar = OMP_CLAUSE_DECL (innerc);
5677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5678 gcc_assert (innerc);
5679 tree endvar = OMP_CLAUSE_DECL (innerc);
5680 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5681 {
5682 gcc_assert (innerc);
5683 for (i = 1; i < fd->collapse; i++)
5684 {
5685 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5686 OMP_CLAUSE__LOOPTEMP_);
5687 gcc_assert (innerc);
5688 }
5689 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5690 OMP_CLAUSE__LOOPTEMP_);
5691 if (innerc)
5692 {
5693 /* If needed (inner taskloop has lastprivate clause), propagate
5694 down the total number of iterations. */
5695 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5696 NULL_TREE, false,
5697 GSI_CONTINUE_LINKING);
5698 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5699 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5700 }
5701 }
5702
5703 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5704 GSI_CONTINUE_LINKING);
5705 assign_stmt = gimple_build_assign (startvar, t0);
5706 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5707
5708 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5709 GSI_CONTINUE_LINKING);
5710 assign_stmt = gimple_build_assign (endvar, t1);
5711 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5712 if (fd->collapse > 1)
5713 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5714
5715 /* Remove the GIMPLE_OMP_FOR statement. */
5716 gsi = gsi_for_stmt (for_stmt);
5717 gsi_remove (&gsi, true);
5718
5719 gsi = gsi_last_nondebug_bb (cont_bb);
5720 gsi_remove (&gsi, true);
5721
5722 gsi = gsi_last_nondebug_bb (exit_bb);
5723 gsi_remove (&gsi, true);
5724
5725 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5726 remove_edge (BRANCH_EDGE (entry_bb));
5727 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5728 remove_edge (BRANCH_EDGE (cont_bb));
5729 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5730 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5731 recompute_dominator (CDI_DOMINATORS, region->entry));
5732 }
5733
5734 /* Taskloop construct is represented after gimplification with
5735 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5736 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5737 GOMP_taskloop{,_ull} function arranges for each task to be given just
5738 a single range of iterations. */
5739
5740 static void
expand_omp_taskloop_for_inner(struct omp_region * region,struct omp_for_data * fd,gimple * inner_stmt)5741 expand_omp_taskloop_for_inner (struct omp_region *region,
5742 struct omp_for_data *fd,
5743 gimple *inner_stmt)
5744 {
5745 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5746 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5747 basic_block fin_bb;
5748 gimple_stmt_iterator gsi;
5749 edge ep;
5750 bool broken_loop = region->cont == NULL;
5751 tree *counts = NULL;
5752 tree n1, n2, step;
5753
5754 itype = type = TREE_TYPE (fd->loop.v);
5755 if (POINTER_TYPE_P (type))
5756 itype = signed_type_for (type);
5757
5758 /* See if we need to bias by LLONG_MIN. */
5759 if (fd->iter_type == long_long_unsigned_type_node
5760 && TREE_CODE (type) == INTEGER_TYPE
5761 && !TYPE_UNSIGNED (type))
5762 {
5763 tree n1, n2;
5764
5765 if (fd->loop.cond_code == LT_EXPR)
5766 {
5767 n1 = fd->loop.n1;
5768 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5769 }
5770 else
5771 {
5772 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5773 n2 = fd->loop.n1;
5774 }
5775 if (TREE_CODE (n1) != INTEGER_CST
5776 || TREE_CODE (n2) != INTEGER_CST
5777 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5778 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5779 }
5780
5781 entry_bb = region->entry;
5782 cont_bb = region->cont;
5783 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5784 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5785 gcc_assert (broken_loop
5786 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5787 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5788 if (!broken_loop)
5789 {
5790 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5791 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5792 }
5793 exit_bb = region->exit;
5794
5795 /* Iteration space partitioning goes in ENTRY_BB. */
5796 gsi = gsi_last_nondebug_bb (entry_bb);
5797 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5798
5799 if (fd->collapse > 1)
5800 {
5801 int first_zero_iter = -1, dummy = -1;
5802 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5803
5804 counts = XALLOCAVEC (tree, fd->collapse);
5805 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5806 fin_bb, first_zero_iter,
5807 dummy_bb, dummy, l2_dom_bb);
5808 t = NULL_TREE;
5809 }
5810 else
5811 t = integer_one_node;
5812
5813 step = fd->loop.step;
5814 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5815 OMP_CLAUSE__LOOPTEMP_);
5816 gcc_assert (innerc);
5817 n1 = OMP_CLAUSE_DECL (innerc);
5818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5819 gcc_assert (innerc);
5820 n2 = OMP_CLAUSE_DECL (innerc);
5821 if (bias)
5822 {
5823 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5824 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5825 }
5826 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5827 true, NULL_TREE, true, GSI_SAME_STMT);
5828 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5829 true, NULL_TREE, true, GSI_SAME_STMT);
5830 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5831 true, NULL_TREE, true, GSI_SAME_STMT);
5832
5833 tree startvar = fd->loop.v;
5834 tree endvar = NULL_TREE;
5835
5836 if (gimple_omp_for_combined_p (fd->for_stmt))
5837 {
5838 tree clauses = gimple_omp_for_clauses (inner_stmt);
5839 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5840 gcc_assert (innerc);
5841 startvar = OMP_CLAUSE_DECL (innerc);
5842 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5843 OMP_CLAUSE__LOOPTEMP_);
5844 gcc_assert (innerc);
5845 endvar = OMP_CLAUSE_DECL (innerc);
5846 }
5847 t = fold_convert (TREE_TYPE (startvar), n1);
5848 t = force_gimple_operand_gsi (&gsi, t,
5849 DECL_P (startvar)
5850 && TREE_ADDRESSABLE (startvar),
5851 NULL_TREE, false, GSI_CONTINUE_LINKING);
5852 gimple *assign_stmt = gimple_build_assign (startvar, t);
5853 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5854
5855 t = fold_convert (TREE_TYPE (startvar), n2);
5856 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5857 false, GSI_CONTINUE_LINKING);
5858 if (endvar)
5859 {
5860 assign_stmt = gimple_build_assign (endvar, e);
5861 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5862 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5863 assign_stmt = gimple_build_assign (fd->loop.v, e);
5864 else
5865 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5866 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5867 }
5868 if (fd->collapse > 1)
5869 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5870
5871 if (!broken_loop)
5872 {
5873 /* The code controlling the sequential loop replaces the
5874 GIMPLE_OMP_CONTINUE. */
5875 gsi = gsi_last_nondebug_bb (cont_bb);
5876 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5877 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5878 vmain = gimple_omp_continue_control_use (cont_stmt);
5879 vback = gimple_omp_continue_control_def (cont_stmt);
5880
5881 if (!gimple_omp_for_combined_p (fd->for_stmt))
5882 {
5883 if (POINTER_TYPE_P (type))
5884 t = fold_build_pointer_plus (vmain, step);
5885 else
5886 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5887 t = force_gimple_operand_gsi (&gsi, t,
5888 DECL_P (vback)
5889 && TREE_ADDRESSABLE (vback),
5890 NULL_TREE, true, GSI_SAME_STMT);
5891 assign_stmt = gimple_build_assign (vback, t);
5892 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5893
5894 t = build2 (fd->loop.cond_code, boolean_type_node,
5895 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5896 ? t : vback, e);
5897 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5898 }
5899
5900 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5901 gsi_remove (&gsi, true);
5902
5903 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5904 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5905 }
5906
5907 /* Remove the GIMPLE_OMP_FOR statement. */
5908 gsi = gsi_for_stmt (fd->for_stmt);
5909 gsi_remove (&gsi, true);
5910
5911 /* Remove the GIMPLE_OMP_RETURN statement. */
5912 gsi = gsi_last_nondebug_bb (exit_bb);
5913 gsi_remove (&gsi, true);
5914
5915 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5916 if (!broken_loop)
5917 remove_edge (BRANCH_EDGE (entry_bb));
5918 else
5919 {
5920 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5921 region->outer->cont = NULL;
5922 }
5923
5924 /* Connect all the blocks. */
5925 if (!broken_loop)
5926 {
5927 ep = find_edge (cont_bb, body_bb);
5928 if (gimple_omp_for_combined_p (fd->for_stmt))
5929 {
5930 remove_edge (ep);
5931 ep = NULL;
5932 }
5933 else if (fd->collapse > 1)
5934 {
5935 remove_edge (ep);
5936 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5937 }
5938 else
5939 ep->flags = EDGE_TRUE_VALUE;
5940 find_edge (cont_bb, fin_bb)->flags
5941 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5942 }
5943
5944 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5945 recompute_dominator (CDI_DOMINATORS, body_bb));
5946 if (!broken_loop)
5947 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5948 recompute_dominator (CDI_DOMINATORS, fin_bb));
5949
5950 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5951 {
5952 class loop *loop = alloc_loop ();
5953 loop->header = body_bb;
5954 if (collapse_bb == NULL)
5955 loop->latch = cont_bb;
5956 add_loop (loop, body_bb->loop_father);
5957 }
5958 }
5959
5960 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5961 partitioned loop. The lowering here is abstracted, in that the
5962 loop parameters are passed through internal functions, which are
5963 further lowered by oacc_device_lower, once we get to the target
5964 compiler. The loop is of the form:
5965
5966 for (V = B; V LTGT E; V += S) {BODY}
5967
5968 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5969 (constant 0 for no chunking) and we will have a GWV partitioning
5970 mask, specifying dimensions over which the loop is to be
5971 partitioned (see note below). We generate code that looks like
5972 (this ignores tiling):
5973
5974 <entry_bb> [incoming FALL->body, BRANCH->exit]
5975 typedef signedintify (typeof (V)) T; // underlying signed integral type
5976 T range = E - B;
5977 T chunk_no = 0;
5978 T DIR = LTGT == '<' ? +1 : -1;
5979 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5980 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5981
5982 <head_bb> [created by splitting end of entry_bb]
5983 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5984 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5985 if (!(offset LTGT bound)) goto bottom_bb;
5986
5987 <body_bb> [incoming]
5988 V = B + offset;
5989 {BODY}
5990
5991 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5992 offset += step;
5993 if (offset LTGT bound) goto body_bb; [*]
5994
5995 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5996 chunk_no++;
5997 if (chunk < chunk_max) goto head_bb;
5998
5999 <exit_bb> [incoming]
6000 V = B + ((range -/+ 1) / S +/- 1) * S [*]
6001
6002 [*] Needed if V live at end of loop. */
6003
6004 static void
expand_oacc_for(struct omp_region * region,struct omp_for_data * fd)6005 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6006 {
6007 bool is_oacc_kernels_parallelized
6008 = (lookup_attribute ("oacc kernels parallelized",
6009 DECL_ATTRIBUTES (current_function_decl)) != NULL);
6010 {
6011 bool is_oacc_kernels
6012 = (lookup_attribute ("oacc kernels",
6013 DECL_ATTRIBUTES (current_function_decl)) != NULL);
6014 if (is_oacc_kernels_parallelized)
6015 gcc_checking_assert (is_oacc_kernels);
6016 }
6017 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
6018 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
6019 for SSA specifics, and some are for 'parloops' OpenACC
6020 'kernels'-parallelized specifics. */
6021
6022 tree v = fd->loop.v;
6023 enum tree_code cond_code = fd->loop.cond_code;
6024 enum tree_code plus_code = PLUS_EXPR;
6025
6026 tree chunk_size = integer_minus_one_node;
6027 tree gwv = integer_zero_node;
6028 tree iter_type = TREE_TYPE (v);
6029 tree diff_type = iter_type;
6030 tree plus_type = iter_type;
6031 struct oacc_collapse *counts = NULL;
6032
6033 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6034 == GF_OMP_FOR_KIND_OACC_LOOP);
6035 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6036 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6037
6038 if (POINTER_TYPE_P (iter_type))
6039 {
6040 plus_code = POINTER_PLUS_EXPR;
6041 plus_type = sizetype;
6042 }
6043 for (int ix = fd->collapse; ix--;)
6044 {
6045 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
6046 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
6047 diff_type = diff_type2;
6048 }
6049 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6050 diff_type = signed_type_for (diff_type);
6051 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6052 diff_type = integer_type_node;
6053
6054 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6055 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6056 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6057 basic_block bottom_bb = NULL;
6058
6059 /* entry_bb has two successors; the branch edge is to the exit
6060 block, fallthrough edge to body. */
6061 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6062 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6063
6064 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6065 body_bb, or to a block whose only successor is the body_bb. Its
6066 fallthrough successor is the final block (same as the branch
6067 successor of the entry_bb). */
6068 if (cont_bb)
6069 {
6070 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6071 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6072
6073 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6074 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6075 }
6076 else
6077 gcc_assert (!gimple_in_ssa_p (cfun));
6078
6079 /* The exit block only has entry_bb and cont_bb as predecessors. */
6080 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6081
6082 tree chunk_no;
6083 tree chunk_max = NULL_TREE;
6084 tree bound, offset;
6085 tree step = create_tmp_var (diff_type, ".step");
6086 bool up = cond_code == LT_EXPR;
6087 tree dir = build_int_cst (diff_type, up ? +1 : -1);
6088 bool chunking = !gimple_in_ssa_p (cfun);
6089 bool negating;
6090
6091 /* Tiling vars. */
6092 tree tile_size = NULL_TREE;
6093 tree element_s = NULL_TREE;
6094 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6095 basic_block elem_body_bb = NULL;
6096 basic_block elem_cont_bb = NULL;
6097
6098 /* SSA instances. */
6099 tree offset_incr = NULL_TREE;
6100 tree offset_init = NULL_TREE;
6101
6102 gimple_stmt_iterator gsi;
6103 gassign *ass;
6104 gcall *call;
6105 gimple *stmt;
6106 tree expr;
6107 location_t loc;
6108 edge split, be, fte;
6109
6110 /* Split the end of entry_bb to create head_bb. */
6111 split = split_block (entry_bb, last_stmt (entry_bb));
6112 basic_block head_bb = split->dest;
6113 entry_bb = split->src;
6114
6115 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
6116 gsi = gsi_last_nondebug_bb (entry_bb);
6117 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6118 loc = gimple_location (for_stmt);
6119
6120 if (gimple_in_ssa_p (cfun))
6121 {
6122 offset_init = gimple_omp_for_index (for_stmt, 0);
6123 gcc_assert (integer_zerop (fd->loop.n1));
6124 /* The SSA parallelizer does gang parallelism. */
6125 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6126 }
6127
6128 if (fd->collapse > 1 || fd->tiling)
6129 {
6130 gcc_assert (!gimple_in_ssa_p (cfun) && up);
6131 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6132 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
6133 TREE_TYPE (fd->loop.n2), loc);
6134
6135 if (SSA_VAR_P (fd->loop.n2))
6136 {
6137 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6138 true, GSI_SAME_STMT);
6139 ass = gimple_build_assign (fd->loop.n2, total);
6140 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6141 }
6142 }
6143
6144 tree b = fd->loop.n1;
6145 tree e = fd->loop.n2;
6146 tree s = fd->loop.step;
6147
6148 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6149 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6150
6151 /* Convert the step, avoiding possible unsigned->signed overflow. */
6152 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6153 if (negating)
6154 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6155 s = fold_convert (diff_type, s);
6156 if (negating)
6157 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6158 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6159
6160 if (!chunking)
6161 chunk_size = integer_zero_node;
6162 expr = fold_convert (diff_type, chunk_size);
6163 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6164 NULL_TREE, true, GSI_SAME_STMT);
6165
6166 if (fd->tiling)
6167 {
6168 /* Determine the tile size and element step,
6169 modify the outer loop step size. */
6170 tile_size = create_tmp_var (diff_type, ".tile_size");
6171 expr = build_int_cst (diff_type, 1);
6172 for (int ix = 0; ix < fd->collapse; ix++)
6173 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6174 expr = force_gimple_operand_gsi (&gsi, expr, true,
6175 NULL_TREE, true, GSI_SAME_STMT);
6176 ass = gimple_build_assign (tile_size, expr);
6177 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6178
6179 element_s = create_tmp_var (diff_type, ".element_s");
6180 ass = gimple_build_assign (element_s, s);
6181 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6182
6183 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6184 s = force_gimple_operand_gsi (&gsi, expr, true,
6185 NULL_TREE, true, GSI_SAME_STMT);
6186 }
6187
6188 /* Determine the range, avoiding possible unsigned->signed overflow. */
6189 negating = !up && TYPE_UNSIGNED (iter_type);
6190 expr = fold_build2 (MINUS_EXPR, plus_type,
6191 fold_convert (plus_type, negating ? b : e),
6192 fold_convert (plus_type, negating ? e : b));
6193 expr = fold_convert (diff_type, expr);
6194 if (negating)
6195 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6196 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6197 NULL_TREE, true, GSI_SAME_STMT);
6198
6199 chunk_no = build_int_cst (diff_type, 0);
6200 if (chunking)
6201 {
6202 gcc_assert (!gimple_in_ssa_p (cfun));
6203
6204 expr = chunk_no;
6205 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6206 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6207
6208 ass = gimple_build_assign (chunk_no, expr);
6209 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6210
6211 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6212 build_int_cst (integer_type_node,
6213 IFN_GOACC_LOOP_CHUNKS),
6214 dir, range, s, chunk_size, gwv);
6215 gimple_call_set_lhs (call, chunk_max);
6216 gimple_set_location (call, loc);
6217 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6218 }
6219 else
6220 chunk_size = chunk_no;
6221
6222 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6223 build_int_cst (integer_type_node,
6224 IFN_GOACC_LOOP_STEP),
6225 dir, range, s, chunk_size, gwv);
6226 gimple_call_set_lhs (call, step);
6227 gimple_set_location (call, loc);
6228 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6229
6230 /* Remove the GIMPLE_OMP_FOR. */
6231 gsi_remove (&gsi, true);
6232
6233 /* Fixup edges from head_bb. */
6234 be = BRANCH_EDGE (head_bb);
6235 fte = FALLTHRU_EDGE (head_bb);
6236 be->flags |= EDGE_FALSE_VALUE;
6237 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6238
6239 basic_block body_bb = fte->dest;
6240
6241 if (gimple_in_ssa_p (cfun))
6242 {
6243 gsi = gsi_last_nondebug_bb (cont_bb);
6244 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6245
6246 offset = gimple_omp_continue_control_use (cont_stmt);
6247 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6248 }
6249 else
6250 {
6251 offset = create_tmp_var (diff_type, ".offset");
6252 offset_init = offset_incr = offset;
6253 }
6254 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6255
6256 /* Loop offset & bound go into head_bb. */
6257 gsi = gsi_start_bb (head_bb);
6258
6259 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6260 build_int_cst (integer_type_node,
6261 IFN_GOACC_LOOP_OFFSET),
6262 dir, range, s,
6263 chunk_size, gwv, chunk_no);
6264 gimple_call_set_lhs (call, offset_init);
6265 gimple_set_location (call, loc);
6266 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6267
6268 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6269 build_int_cst (integer_type_node,
6270 IFN_GOACC_LOOP_BOUND),
6271 dir, range, s,
6272 chunk_size, gwv, offset_init);
6273 gimple_call_set_lhs (call, bound);
6274 gimple_set_location (call, loc);
6275 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6276
6277 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6278 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6279 GSI_CONTINUE_LINKING);
6280
6281 /* V assignment goes into body_bb. */
6282 if (!gimple_in_ssa_p (cfun))
6283 {
6284 gsi = gsi_start_bb (body_bb);
6285
6286 expr = build2 (plus_code, iter_type, b,
6287 fold_convert (plus_type, offset));
6288 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6289 true, GSI_SAME_STMT);
6290 ass = gimple_build_assign (v, expr);
6291 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6292
6293 if (fd->collapse > 1 || fd->tiling)
6294 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
6295
6296 if (fd->tiling)
6297 {
6298 /* Determine the range of the element loop -- usually simply
6299 the tile_size, but could be smaller if the final
6300 iteration of the outer loop is a partial tile. */
6301 tree e_range = create_tmp_var (diff_type, ".e_range");
6302
6303 expr = build2 (MIN_EXPR, diff_type,
6304 build2 (MINUS_EXPR, diff_type, bound, offset),
6305 build2 (MULT_EXPR, diff_type, tile_size,
6306 element_s));
6307 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6308 true, GSI_SAME_STMT);
6309 ass = gimple_build_assign (e_range, expr);
6310 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6311
6312 /* Determine bound, offset & step of inner loop. */
6313 e_bound = create_tmp_var (diff_type, ".e_bound");
6314 e_offset = create_tmp_var (diff_type, ".e_offset");
6315 e_step = create_tmp_var (diff_type, ".e_step");
6316
6317 /* Mark these as element loops. */
6318 tree t, e_gwv = integer_minus_one_node;
6319 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6320
6321 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6322 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6323 element_s, chunk, e_gwv, chunk);
6324 gimple_call_set_lhs (call, e_offset);
6325 gimple_set_location (call, loc);
6326 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6327
6328 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6329 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6330 element_s, chunk, e_gwv, e_offset);
6331 gimple_call_set_lhs (call, e_bound);
6332 gimple_set_location (call, loc);
6333 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6334
6335 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6336 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6337 element_s, chunk, e_gwv);
6338 gimple_call_set_lhs (call, e_step);
6339 gimple_set_location (call, loc);
6340 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6341
6342 /* Add test and split block. */
6343 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6344 stmt = gimple_build_cond_empty (expr);
6345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6346 split = split_block (body_bb, stmt);
6347 elem_body_bb = split->dest;
6348 if (cont_bb == body_bb)
6349 cont_bb = elem_body_bb;
6350 body_bb = split->src;
6351
6352 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6353
6354 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6355 if (cont_bb == NULL)
6356 {
6357 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6358 e->probability = profile_probability::even ();
6359 split->probability = profile_probability::even ();
6360 }
6361
6362 /* Initialize the user's loop vars. */
6363 gsi = gsi_start_bb (elem_body_bb);
6364 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
6365 diff_type);
6366 }
6367 }
6368
6369 /* Loop increment goes into cont_bb. If this is not a loop, we
6370 will have spawned threads as if it was, and each one will
6371 execute one iteration. The specification is not explicit about
6372 whether such constructs are ill-formed or not, and they can
6373 occur, especially when noreturn routines are involved. */
6374 if (cont_bb)
6375 {
6376 gsi = gsi_last_nondebug_bb (cont_bb);
6377 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6378 loc = gimple_location (cont_stmt);
6379
6380 if (fd->tiling)
6381 {
6382 /* Insert element loop increment and test. */
6383 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6384 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6385 true, GSI_SAME_STMT);
6386 ass = gimple_build_assign (e_offset, expr);
6387 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6388 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6389
6390 stmt = gimple_build_cond_empty (expr);
6391 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6392 split = split_block (cont_bb, stmt);
6393 elem_cont_bb = split->src;
6394 cont_bb = split->dest;
6395
6396 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6397 split->probability = profile_probability::unlikely ().guessed ();
6398 edge latch_edge
6399 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6400 latch_edge->probability = profile_probability::likely ().guessed ();
6401
6402 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6403 skip_edge->probability = profile_probability::unlikely ().guessed ();
6404 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6405 loop_entry_edge->probability
6406 = profile_probability::likely ().guessed ();
6407
6408 gsi = gsi_for_stmt (cont_stmt);
6409 }
6410
6411 /* Increment offset. */
6412 if (gimple_in_ssa_p (cfun))
6413 expr = build2 (plus_code, iter_type, offset,
6414 fold_convert (plus_type, step));
6415 else
6416 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6417 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6418 true, GSI_SAME_STMT);
6419 ass = gimple_build_assign (offset_incr, expr);
6420 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6421 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6422 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6423
6424 /* Remove the GIMPLE_OMP_CONTINUE. */
6425 gsi_remove (&gsi, true);
6426
6427 /* Fixup edges from cont_bb. */
6428 be = BRANCH_EDGE (cont_bb);
6429 fte = FALLTHRU_EDGE (cont_bb);
6430 be->flags |= EDGE_TRUE_VALUE;
6431 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6432
6433 if (chunking)
6434 {
6435 /* Split the beginning of exit_bb to make bottom_bb. We
6436 need to insert a nop at the start, because splitting is
6437 after a stmt, not before. */
6438 gsi = gsi_start_bb (exit_bb);
6439 stmt = gimple_build_nop ();
6440 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6441 split = split_block (exit_bb, stmt);
6442 bottom_bb = split->src;
6443 exit_bb = split->dest;
6444 gsi = gsi_last_bb (bottom_bb);
6445
6446 /* Chunk increment and test goes into bottom_bb. */
6447 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6448 build_int_cst (diff_type, 1));
6449 ass = gimple_build_assign (chunk_no, expr);
6450 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6451
6452 /* Chunk test at end of bottom_bb. */
6453 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6454 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6455 GSI_CONTINUE_LINKING);
6456
6457 /* Fixup edges from bottom_bb. */
6458 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6459 split->probability = profile_probability::unlikely ().guessed ();
6460 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6461 latch_edge->probability = profile_probability::likely ().guessed ();
6462 }
6463 }
6464
6465 gsi = gsi_last_nondebug_bb (exit_bb);
6466 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6467 loc = gimple_location (gsi_stmt (gsi));
6468
6469 if (!gimple_in_ssa_p (cfun))
6470 {
6471 /* Insert the final value of V, in case it is live. This is the
6472 value for the only thread that survives past the join. */
6473 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6474 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6475 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6476 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6477 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6478 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6479 true, GSI_SAME_STMT);
6480 ass = gimple_build_assign (v, expr);
6481 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6482 }
6483
6484 /* Remove the OMP_RETURN. */
6485 gsi_remove (&gsi, true);
6486
6487 if (cont_bb)
6488 {
6489 /* We now have one, two or three nested loops. Update the loop
6490 structures. */
6491 class loop *parent = entry_bb->loop_father;
6492 class loop *body = body_bb->loop_father;
6493
6494 if (chunking)
6495 {
6496 class loop *chunk_loop = alloc_loop ();
6497 chunk_loop->header = head_bb;
6498 chunk_loop->latch = bottom_bb;
6499 add_loop (chunk_loop, parent);
6500 parent = chunk_loop;
6501 }
6502 else if (parent != body)
6503 {
6504 gcc_assert (body->header == body_bb);
6505 gcc_assert (body->latch == cont_bb
6506 || single_pred (body->latch) == cont_bb);
6507 parent = NULL;
6508 }
6509
6510 if (parent)
6511 {
6512 class loop *body_loop = alloc_loop ();
6513 body_loop->header = body_bb;
6514 body_loop->latch = cont_bb;
6515 add_loop (body_loop, parent);
6516
6517 if (fd->tiling)
6518 {
6519 /* Insert tiling's element loop. */
6520 class loop *inner_loop = alloc_loop ();
6521 inner_loop->header = elem_body_bb;
6522 inner_loop->latch = elem_cont_bb;
6523 add_loop (inner_loop, body_loop);
6524 }
6525 }
6526 }
6527 }
6528
6529 /* Expand the OMP loop defined by REGION. */
6530
6531 static void
expand_omp_for(struct omp_region * region,gimple * inner_stmt)6532 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6533 {
6534 struct omp_for_data fd;
6535 struct omp_for_data_loop *loops;
6536
6537 loops
6538 = (struct omp_for_data_loop *)
6539 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6540 * sizeof (struct omp_for_data_loop));
6541 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6542 &fd, loops);
6543 region->sched_kind = fd.sched_kind;
6544 region->sched_modifiers = fd.sched_modifiers;
6545 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6546
6547 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6548 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6549 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6550 if (region->cont)
6551 {
6552 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6553 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6554 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6555 }
6556 else
6557 /* If there isn't a continue then this is a degerate case where
6558 the introduction of abnormal edges during lowering will prevent
6559 original loops from being detected. Fix that up. */
6560 loops_state_set (LOOPS_NEED_FIXUP);
6561
6562 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
6563 expand_omp_simd (region, &fd);
6564 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6565 {
6566 gcc_assert (!inner_stmt);
6567 expand_oacc_for (region, &fd);
6568 }
6569 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6570 {
6571 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6572 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6573 else
6574 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6575 }
6576 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6577 && !fd.have_ordered)
6578 {
6579 if (fd.chunk_size == NULL)
6580 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6581 else
6582 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6583 }
6584 else
6585 {
6586 int fn_index, start_ix, next_ix;
6587 unsigned HOST_WIDE_INT sched = 0;
6588 tree sched_arg = NULL_TREE;
6589
6590 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6591 == GF_OMP_FOR_KIND_FOR);
6592 if (fd.chunk_size == NULL
6593 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6594 fd.chunk_size = integer_zero_node;
6595 switch (fd.sched_kind)
6596 {
6597 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6598 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6599 && fd.lastprivate_conditional == 0)
6600 {
6601 gcc_assert (!fd.have_ordered);
6602 fn_index = 6;
6603 sched = 4;
6604 }
6605 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6606 && !fd.have_ordered
6607 && fd.lastprivate_conditional == 0)
6608 fn_index = 7;
6609 else
6610 {
6611 fn_index = 3;
6612 sched = (HOST_WIDE_INT_1U << 31);
6613 }
6614 break;
6615 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6616 case OMP_CLAUSE_SCHEDULE_GUIDED:
6617 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6618 && !fd.have_ordered
6619 && fd.lastprivate_conditional == 0)
6620 {
6621 fn_index = 3 + fd.sched_kind;
6622 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6623 break;
6624 }
6625 fn_index = fd.sched_kind;
6626 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6627 sched += (HOST_WIDE_INT_1U << 31);
6628 break;
6629 case OMP_CLAUSE_SCHEDULE_STATIC:
6630 gcc_assert (fd.have_ordered);
6631 fn_index = 0;
6632 sched = (HOST_WIDE_INT_1U << 31) + 1;
6633 break;
6634 default:
6635 gcc_unreachable ();
6636 }
6637 if (!fd.ordered)
6638 fn_index += fd.have_ordered * 8;
6639 if (fd.ordered)
6640 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6641 else
6642 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6643 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6644 if (fd.have_reductemp || fd.have_pointer_condtemp)
6645 {
6646 if (fd.ordered)
6647 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6648 else if (fd.have_ordered)
6649 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6650 else
6651 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6652 sched_arg = build_int_cstu (long_integer_type_node, sched);
6653 if (!fd.chunk_size)
6654 fd.chunk_size = integer_zero_node;
6655 }
6656 if (fd.iter_type == long_long_unsigned_type_node)
6657 {
6658 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6659 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6660 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6661 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6662 }
6663 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6664 (enum built_in_function) next_ix, sched_arg,
6665 inner_stmt);
6666 }
6667
6668 if (gimple_in_ssa_p (cfun))
6669 update_ssa (TODO_update_ssa_only_virtuals);
6670 }
6671
6672 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6673
6674 v = GOMP_sections_start (n);
6675 L0:
6676 switch (v)
6677 {
6678 case 0:
6679 goto L2;
6680 case 1:
6681 section 1;
6682 goto L1;
6683 case 2:
6684 ...
6685 case n:
6686 ...
6687 default:
6688 abort ();
6689 }
6690 L1:
6691 v = GOMP_sections_next ();
6692 goto L0;
6693 L2:
6694 reduction;
6695
6696 If this is a combined parallel sections, replace the call to
6697 GOMP_sections_start with call to GOMP_sections_next. */
6698
6699 static void
expand_omp_sections(struct omp_region * region)6700 expand_omp_sections (struct omp_region *region)
6701 {
6702 tree t, u, vin = NULL, vmain, vnext, l2;
6703 unsigned len;
6704 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6705 gimple_stmt_iterator si, switch_si;
6706 gomp_sections *sections_stmt;
6707 gimple *stmt;
6708 gomp_continue *cont;
6709 edge_iterator ei;
6710 edge e;
6711 struct omp_region *inner;
6712 unsigned i, casei;
6713 bool exit_reachable = region->cont != NULL;
6714
6715 gcc_assert (region->exit != NULL);
6716 entry_bb = region->entry;
6717 l0_bb = single_succ (entry_bb);
6718 l1_bb = region->cont;
6719 l2_bb = region->exit;
6720 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6721 l2 = gimple_block_label (l2_bb);
6722 else
6723 {
6724 /* This can happen if there are reductions. */
6725 len = EDGE_COUNT (l0_bb->succs);
6726 gcc_assert (len > 0);
6727 e = EDGE_SUCC (l0_bb, len - 1);
6728 si = gsi_last_nondebug_bb (e->dest);
6729 l2 = NULL_TREE;
6730 if (gsi_end_p (si)
6731 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6732 l2 = gimple_block_label (e->dest);
6733 else
6734 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6735 {
6736 si = gsi_last_nondebug_bb (e->dest);
6737 if (gsi_end_p (si)
6738 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6739 {
6740 l2 = gimple_block_label (e->dest);
6741 break;
6742 }
6743 }
6744 }
6745 if (exit_reachable)
6746 default_bb = create_empty_bb (l1_bb->prev_bb);
6747 else
6748 default_bb = create_empty_bb (l0_bb);
6749
6750 /* We will build a switch() with enough cases for all the
6751 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6752 and a default case to abort if something goes wrong. */
6753 len = EDGE_COUNT (l0_bb->succs);
6754
6755 /* Use vec::quick_push on label_vec throughout, since we know the size
6756 in advance. */
6757 auto_vec<tree> label_vec (len);
6758
6759 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6760 GIMPLE_OMP_SECTIONS statement. */
6761 si = gsi_last_nondebug_bb (entry_bb);
6762 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6763 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6764 vin = gimple_omp_sections_control (sections_stmt);
6765 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6766 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6767 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6768 tree cond_var = NULL_TREE;
6769 if (reductmp || condtmp)
6770 {
6771 tree reductions = null_pointer_node, mem = null_pointer_node;
6772 tree memv = NULL_TREE, condtemp = NULL_TREE;
6773 gimple_stmt_iterator gsi = gsi_none ();
6774 gimple *g = NULL;
6775 if (reductmp)
6776 {
6777 reductions = OMP_CLAUSE_DECL (reductmp);
6778 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6779 g = SSA_NAME_DEF_STMT (reductions);
6780 reductions = gimple_assign_rhs1 (g);
6781 OMP_CLAUSE_DECL (reductmp) = reductions;
6782 gsi = gsi_for_stmt (g);
6783 }
6784 else
6785 gsi = si;
6786 if (condtmp)
6787 {
6788 condtemp = OMP_CLAUSE_DECL (condtmp);
6789 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6790 OMP_CLAUSE__CONDTEMP_);
6791 cond_var = OMP_CLAUSE_DECL (c);
6792 tree type = TREE_TYPE (condtemp);
6793 memv = create_tmp_var (type);
6794 TREE_ADDRESSABLE (memv) = 1;
6795 unsigned cnt = 0;
6796 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6797 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6798 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6799 ++cnt;
6800 unsigned HOST_WIDE_INT sz
6801 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6802 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6803 false);
6804 mem = build_fold_addr_expr (memv);
6805 }
6806 t = build_int_cst (unsigned_type_node, len - 1);
6807 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6808 stmt = gimple_build_call (u, 3, t, reductions, mem);
6809 gimple_call_set_lhs (stmt, vin);
6810 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6811 if (condtmp)
6812 {
6813 expand_omp_build_assign (&gsi, condtemp, memv, false);
6814 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6815 vin, build_one_cst (TREE_TYPE (cond_var)));
6816 expand_omp_build_assign (&gsi, cond_var, t, false);
6817 }
6818 if (reductmp)
6819 {
6820 gsi_remove (&gsi, true);
6821 release_ssa_name (gimple_assign_lhs (g));
6822 }
6823 }
6824 else if (!is_combined_parallel (region))
6825 {
6826 /* If we are not inside a combined parallel+sections region,
6827 call GOMP_sections_start. */
6828 t = build_int_cst (unsigned_type_node, len - 1);
6829 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6830 stmt = gimple_build_call (u, 1, t);
6831 }
6832 else
6833 {
6834 /* Otherwise, call GOMP_sections_next. */
6835 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6836 stmt = gimple_build_call (u, 0);
6837 }
6838 if (!reductmp && !condtmp)
6839 {
6840 gimple_call_set_lhs (stmt, vin);
6841 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6842 }
6843 gsi_remove (&si, true);
6844
6845 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6846 L0_BB. */
6847 switch_si = gsi_last_nondebug_bb (l0_bb);
6848 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6849 if (exit_reachable)
6850 {
6851 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6852 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6853 vmain = gimple_omp_continue_control_use (cont);
6854 vnext = gimple_omp_continue_control_def (cont);
6855 }
6856 else
6857 {
6858 vmain = vin;
6859 vnext = NULL_TREE;
6860 }
6861
6862 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6863 label_vec.quick_push (t);
6864 i = 1;
6865
6866 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6867 for (inner = region->inner, casei = 1;
6868 inner;
6869 inner = inner->next, i++, casei++)
6870 {
6871 basic_block s_entry_bb, s_exit_bb;
6872
6873 /* Skip optional reduction region. */
6874 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6875 {
6876 --i;
6877 --casei;
6878 continue;
6879 }
6880
6881 s_entry_bb = inner->entry;
6882 s_exit_bb = inner->exit;
6883
6884 t = gimple_block_label (s_entry_bb);
6885 u = build_int_cst (unsigned_type_node, casei);
6886 u = build_case_label (u, NULL, t);
6887 label_vec.quick_push (u);
6888
6889 si = gsi_last_nondebug_bb (s_entry_bb);
6890 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6891 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6892 gsi_remove (&si, true);
6893 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6894
6895 if (s_exit_bb == NULL)
6896 continue;
6897
6898 si = gsi_last_nondebug_bb (s_exit_bb);
6899 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6900 gsi_remove (&si, true);
6901
6902 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6903 }
6904
6905 /* Error handling code goes in DEFAULT_BB. */
6906 t = gimple_block_label (default_bb);
6907 u = build_case_label (NULL, NULL, t);
6908 make_edge (l0_bb, default_bb, 0);
6909 add_bb_to_loop (default_bb, current_loops->tree_root);
6910
6911 stmt = gimple_build_switch (vmain, u, label_vec);
6912 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6913 gsi_remove (&switch_si, true);
6914
6915 si = gsi_start_bb (default_bb);
6916 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6917 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6918
6919 if (exit_reachable)
6920 {
6921 tree bfn_decl;
6922
6923 /* Code to get the next section goes in L1_BB. */
6924 si = gsi_last_nondebug_bb (l1_bb);
6925 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6926
6927 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6928 stmt = gimple_build_call (bfn_decl, 0);
6929 gimple_call_set_lhs (stmt, vnext);
6930 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6931 if (cond_var)
6932 {
6933 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6934 vnext, build_one_cst (TREE_TYPE (cond_var)));
6935 expand_omp_build_assign (&si, cond_var, t, false);
6936 }
6937 gsi_remove (&si, true);
6938
6939 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6940 }
6941
6942 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6943 si = gsi_last_nondebug_bb (l2_bb);
6944 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6945 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6946 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6947 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6948 else
6949 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6950 stmt = gimple_build_call (t, 0);
6951 if (gimple_omp_return_lhs (gsi_stmt (si)))
6952 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6953 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6954 gsi_remove (&si, true);
6955
6956 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6957 }
6958
6959 /* Expand code for an OpenMP single directive. We've already expanded
6960 much of the code, here we simply place the GOMP_barrier call. */
6961
6962 static void
expand_omp_single(struct omp_region * region)6963 expand_omp_single (struct omp_region *region)
6964 {
6965 basic_block entry_bb, exit_bb;
6966 gimple_stmt_iterator si;
6967
6968 entry_bb = region->entry;
6969 exit_bb = region->exit;
6970
6971 si = gsi_last_nondebug_bb (entry_bb);
6972 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6973 gsi_remove (&si, true);
6974 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6975
6976 si = gsi_last_nondebug_bb (exit_bb);
6977 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6978 {
6979 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6980 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6981 }
6982 gsi_remove (&si, true);
6983 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6984 }
6985
6986 /* Generic expansion for OpenMP synchronization directives: master,
6987 ordered and critical. All we need to do here is remove the entry
6988 and exit markers for REGION. */
6989
6990 static void
expand_omp_synch(struct omp_region * region)6991 expand_omp_synch (struct omp_region *region)
6992 {
6993 basic_block entry_bb, exit_bb;
6994 gimple_stmt_iterator si;
6995
6996 entry_bb = region->entry;
6997 exit_bb = region->exit;
6998
6999 si = gsi_last_nondebug_bb (entry_bb);
7000 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
7001 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
7002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
7003 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
7004 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
7005 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
7006 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
7007 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
7008 {
7009 expand_omp_taskreg (region);
7010 return;
7011 }
7012 gsi_remove (&si, true);
7013 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7014
7015 if (exit_bb)
7016 {
7017 si = gsi_last_nondebug_bb (exit_bb);
7018 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7019 gsi_remove (&si, true);
7020 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7021 }
7022 }
7023
7024 /* Translate enum omp_memory_order to enum memmodel. The two enums
7025 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7026 is 0. */
7027
7028 static enum memmodel
omp_memory_order_to_memmodel(enum omp_memory_order mo)7029 omp_memory_order_to_memmodel (enum omp_memory_order mo)
7030 {
7031 switch (mo)
7032 {
7033 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7034 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7035 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7036 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7037 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7038 default: gcc_unreachable ();
7039 }
7040 }
7041
7042 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7043 operation as a normal volatile load. */
7044
7045 static bool
expand_omp_atomic_load(basic_block load_bb,tree addr,tree loaded_val,int index)7046 expand_omp_atomic_load (basic_block load_bb, tree addr,
7047 tree loaded_val, int index)
7048 {
7049 enum built_in_function tmpbase;
7050 gimple_stmt_iterator gsi;
7051 basic_block store_bb;
7052 location_t loc;
7053 gimple *stmt;
7054 tree decl, call, type, itype;
7055
7056 gsi = gsi_last_nondebug_bb (load_bb);
7057 stmt = gsi_stmt (gsi);
7058 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7059 loc = gimple_location (stmt);
7060
7061 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7062 is smaller than word size, then expand_atomic_load assumes that the load
7063 is atomic. We could avoid the builtin entirely in this case. */
7064
7065 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7066 decl = builtin_decl_explicit (tmpbase);
7067 if (decl == NULL_TREE)
7068 return false;
7069
7070 type = TREE_TYPE (loaded_val);
7071 itype = TREE_TYPE (TREE_TYPE (decl));
7072
7073 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7074 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7075 call = build_call_expr_loc (loc, decl, 2, addr, mo);
7076 if (!useless_type_conversion_p (type, itype))
7077 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7078 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7079
7080 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7081 gsi_remove (&gsi, true);
7082
7083 store_bb = single_succ (load_bb);
7084 gsi = gsi_last_nondebug_bb (store_bb);
7085 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7086 gsi_remove (&gsi, true);
7087
7088 if (gimple_in_ssa_p (cfun))
7089 update_ssa (TODO_update_ssa_no_phi);
7090
7091 return true;
7092 }
7093
7094 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7095 operation as a normal volatile store. */
7096
7097 static bool
expand_omp_atomic_store(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)7098 expand_omp_atomic_store (basic_block load_bb, tree addr,
7099 tree loaded_val, tree stored_val, int index)
7100 {
7101 enum built_in_function tmpbase;
7102 gimple_stmt_iterator gsi;
7103 basic_block store_bb = single_succ (load_bb);
7104 location_t loc;
7105 gimple *stmt;
7106 tree decl, call, type, itype;
7107 machine_mode imode;
7108 bool exchange;
7109
7110 gsi = gsi_last_nondebug_bb (load_bb);
7111 stmt = gsi_stmt (gsi);
7112 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7113
7114 /* If the load value is needed, then this isn't a store but an exchange. */
7115 exchange = gimple_omp_atomic_need_value_p (stmt);
7116
7117 gsi = gsi_last_nondebug_bb (store_bb);
7118 stmt = gsi_stmt (gsi);
7119 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7120 loc = gimple_location (stmt);
7121
7122 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7123 is smaller than word size, then expand_atomic_store assumes that the store
7124 is atomic. We could avoid the builtin entirely in this case. */
7125
7126 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7127 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7128 decl = builtin_decl_explicit (tmpbase);
7129 if (decl == NULL_TREE)
7130 return false;
7131
7132 type = TREE_TYPE (stored_val);
7133
7134 /* Dig out the type of the function's second argument. */
7135 itype = TREE_TYPE (decl);
7136 itype = TYPE_ARG_TYPES (itype);
7137 itype = TREE_CHAIN (itype);
7138 itype = TREE_VALUE (itype);
7139 imode = TYPE_MODE (itype);
7140
7141 if (exchange && !can_atomic_exchange_p (imode, true))
7142 return false;
7143
7144 if (!useless_type_conversion_p (itype, type))
7145 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7146 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7147 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7148 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
7149 if (exchange)
7150 {
7151 if (!useless_type_conversion_p (type, itype))
7152 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7153 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7154 }
7155
7156 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7157 gsi_remove (&gsi, true);
7158
7159 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
7160 gsi = gsi_last_nondebug_bb (load_bb);
7161 gsi_remove (&gsi, true);
7162
7163 if (gimple_in_ssa_p (cfun))
7164 update_ssa (TODO_update_ssa_no_phi);
7165
7166 return true;
7167 }
7168
7169 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7170 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7171 size of the data type, and thus usable to find the index of the builtin
7172 decl. Returns false if the expression is not of the proper form. */
7173
7174 static bool
expand_omp_atomic_fetch_op(basic_block load_bb,tree addr,tree loaded_val,tree stored_val,int index)7175 expand_omp_atomic_fetch_op (basic_block load_bb,
7176 tree addr, tree loaded_val,
7177 tree stored_val, int index)
7178 {
7179 enum built_in_function oldbase, newbase, tmpbase;
7180 tree decl, itype, call;
7181 tree lhs, rhs;
7182 basic_block store_bb = single_succ (load_bb);
7183 gimple_stmt_iterator gsi;
7184 gimple *stmt;
7185 location_t loc;
7186 enum tree_code code;
7187 bool need_old, need_new;
7188 machine_mode imode;
7189
7190 /* We expect to find the following sequences:
7191
7192 load_bb:
7193 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7194
7195 store_bb:
7196 val = tmp OP something; (or: something OP tmp)
7197 GIMPLE_OMP_STORE (val)
7198
7199 ???FIXME: Allow a more flexible sequence.
7200 Perhaps use data flow to pick the statements.
7201
7202 */
7203
7204 gsi = gsi_after_labels (store_bb);
7205 stmt = gsi_stmt (gsi);
7206 if (is_gimple_debug (stmt))
7207 {
7208 gsi_next_nondebug (&gsi);
7209 if (gsi_end_p (gsi))
7210 return false;
7211 stmt = gsi_stmt (gsi);
7212 }
7213 loc = gimple_location (stmt);
7214 if (!is_gimple_assign (stmt))
7215 return false;
7216 gsi_next_nondebug (&gsi);
7217 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7218 return false;
7219 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7220 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7221 enum omp_memory_order omo
7222 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7223 enum memmodel mo = omp_memory_order_to_memmodel (omo);
7224 gcc_checking_assert (!need_old || !need_new);
7225
7226 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7227 return false;
7228
7229 /* Check for one of the supported fetch-op operations. */
7230 code = gimple_assign_rhs_code (stmt);
7231 switch (code)
7232 {
7233 case PLUS_EXPR:
7234 case POINTER_PLUS_EXPR:
7235 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7236 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7237 break;
7238 case MINUS_EXPR:
7239 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7240 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7241 break;
7242 case BIT_AND_EXPR:
7243 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7244 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7245 break;
7246 case BIT_IOR_EXPR:
7247 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7248 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7249 break;
7250 case BIT_XOR_EXPR:
7251 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7252 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7253 break;
7254 default:
7255 return false;
7256 }
7257
7258 /* Make sure the expression is of the proper form. */
7259 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7260 rhs = gimple_assign_rhs2 (stmt);
7261 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7262 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7263 rhs = gimple_assign_rhs1 (stmt);
7264 else
7265 return false;
7266
7267 tmpbase = ((enum built_in_function)
7268 ((need_new ? newbase : oldbase) + index + 1));
7269 decl = builtin_decl_explicit (tmpbase);
7270 if (decl == NULL_TREE)
7271 return false;
7272 itype = TREE_TYPE (TREE_TYPE (decl));
7273 imode = TYPE_MODE (itype);
7274
7275 /* We could test all of the various optabs involved, but the fact of the
7276 matter is that (with the exception of i486 vs i586 and xadd) all targets
7277 that support any atomic operaton optab also implements compare-and-swap.
7278 Let optabs.c take care of expanding any compare-and-swap loop. */
7279 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
7280 return false;
7281
7282 gsi = gsi_last_nondebug_bb (load_bb);
7283 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7284
7285 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7286 It only requires that the operation happen atomically. Thus we can
7287 use the RELAXED memory model. */
7288 call = build_call_expr_loc (loc, decl, 3, addr,
7289 fold_convert_loc (loc, itype, rhs),
7290 build_int_cst (NULL, mo));
7291
7292 if (need_old || need_new)
7293 {
7294 lhs = need_old ? loaded_val : stored_val;
7295 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7296 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7297 }
7298 else
7299 call = fold_convert_loc (loc, void_type_node, call);
7300 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7301 gsi_remove (&gsi, true);
7302
7303 gsi = gsi_last_nondebug_bb (store_bb);
7304 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7305 gsi_remove (&gsi, true);
7306 gsi = gsi_last_nondebug_bb (store_bb);
7307 stmt = gsi_stmt (gsi);
7308 gsi_remove (&gsi, true);
7309
7310 if (gimple_in_ssa_p (cfun))
7311 {
7312 release_defs (stmt);
7313 update_ssa (TODO_update_ssa_no_phi);
7314 }
7315
7316 return true;
7317 }
7318
7319 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7320
7321 oldval = *addr;
7322 repeat:
7323 newval = rhs; // with oldval replacing *addr in rhs
7324 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7325 if (oldval != newval)
7326 goto repeat;
7327
7328 INDEX is log2 of the size of the data type, and thus usable to find the
7329 index of the builtin decl. */
7330
7331 static bool
expand_omp_atomic_pipeline(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val,int index)7332 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7333 tree addr, tree loaded_val, tree stored_val,
7334 int index)
7335 {
7336 tree loadedi, storedi, initial, new_storedi, old_vali;
7337 tree type, itype, cmpxchg, iaddr, atype;
7338 gimple_stmt_iterator si;
7339 basic_block loop_header = single_succ (load_bb);
7340 gimple *phi, *stmt;
7341 edge e;
7342 enum built_in_function fncode;
7343
7344 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7345 order to use the RELAXED memory model effectively. */
7346 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7347 + index + 1);
7348 cmpxchg = builtin_decl_explicit (fncode);
7349 if (cmpxchg == NULL_TREE)
7350 return false;
7351 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7352 atype = type;
7353 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7354
7355 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7356 || !can_atomic_load_p (TYPE_MODE (itype)))
7357 return false;
7358
7359 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
7360 si = gsi_last_nondebug_bb (load_bb);
7361 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7362
7363 /* For floating-point values, we'll need to view-convert them to integers
7364 so that we can perform the atomic compare and swap. Simplify the
7365 following code by always setting up the "i"ntegral variables. */
7366 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7367 {
7368 tree iaddr_val;
7369
7370 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7371 true));
7372 atype = itype;
7373 iaddr_val
7374 = force_gimple_operand_gsi (&si,
7375 fold_convert (TREE_TYPE (iaddr), addr),
7376 false, NULL_TREE, true, GSI_SAME_STMT);
7377 stmt = gimple_build_assign (iaddr, iaddr_val);
7378 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7379 loadedi = create_tmp_var (itype);
7380 if (gimple_in_ssa_p (cfun))
7381 loadedi = make_ssa_name (loadedi);
7382 }
7383 else
7384 {
7385 iaddr = addr;
7386 loadedi = loaded_val;
7387 }
7388
7389 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7390 tree loaddecl = builtin_decl_explicit (fncode);
7391 if (loaddecl)
7392 initial
7393 = fold_convert (atype,
7394 build_call_expr (loaddecl, 2, iaddr,
7395 build_int_cst (NULL_TREE,
7396 MEMMODEL_RELAXED)));
7397 else
7398 {
7399 tree off
7400 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7401 true), 0);
7402 initial = build2 (MEM_REF, atype, iaddr, off);
7403 }
7404
7405 initial
7406 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7407 GSI_SAME_STMT);
7408
7409 /* Move the value to the LOADEDI temporary. */
7410 if (gimple_in_ssa_p (cfun))
7411 {
7412 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7413 phi = create_phi_node (loadedi, loop_header);
7414 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7415 initial);
7416 }
7417 else
7418 gsi_insert_before (&si,
7419 gimple_build_assign (loadedi, initial),
7420 GSI_SAME_STMT);
7421 if (loadedi != loaded_val)
7422 {
7423 gimple_stmt_iterator gsi2;
7424 tree x;
7425
7426 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7427 gsi2 = gsi_start_bb (loop_header);
7428 if (gimple_in_ssa_p (cfun))
7429 {
7430 gassign *stmt;
7431 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7432 true, GSI_SAME_STMT);
7433 stmt = gimple_build_assign (loaded_val, x);
7434 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7435 }
7436 else
7437 {
7438 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7439 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7440 true, GSI_SAME_STMT);
7441 }
7442 }
7443 gsi_remove (&si, true);
7444
7445 si = gsi_last_nondebug_bb (store_bb);
7446 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7447
7448 if (iaddr == addr)
7449 storedi = stored_val;
7450 else
7451 storedi
7452 = force_gimple_operand_gsi (&si,
7453 build1 (VIEW_CONVERT_EXPR, itype,
7454 stored_val), true, NULL_TREE, true,
7455 GSI_SAME_STMT);
7456
7457 /* Build the compare&swap statement. */
7458 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7459 new_storedi = force_gimple_operand_gsi (&si,
7460 fold_convert (TREE_TYPE (loadedi),
7461 new_storedi),
7462 true, NULL_TREE,
7463 true, GSI_SAME_STMT);
7464
7465 if (gimple_in_ssa_p (cfun))
7466 old_vali = loadedi;
7467 else
7468 {
7469 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7470 stmt = gimple_build_assign (old_vali, loadedi);
7471 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7472
7473 stmt = gimple_build_assign (loadedi, new_storedi);
7474 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7475 }
7476
7477 /* Note that we always perform the comparison as an integer, even for
7478 floating point. This allows the atomic operation to properly
7479 succeed even with NaNs and -0.0. */
7480 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7481 stmt = gimple_build_cond_empty (ne);
7482 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7483
7484 /* Update cfg. */
7485 e = single_succ_edge (store_bb);
7486 e->flags &= ~EDGE_FALLTHRU;
7487 e->flags |= EDGE_FALSE_VALUE;
7488 /* Expect no looping. */
7489 e->probability = profile_probability::guessed_always ();
7490
7491 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7492 e->probability = profile_probability::guessed_never ();
7493
7494 /* Copy the new value to loadedi (we already did that before the condition
7495 if we are not in SSA). */
7496 if (gimple_in_ssa_p (cfun))
7497 {
7498 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7499 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7500 }
7501
7502 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7503 gsi_remove (&si, true);
7504
7505 class loop *loop = alloc_loop ();
7506 loop->header = loop_header;
7507 loop->latch = store_bb;
7508 add_loop (loop, loop_header->loop_father);
7509
7510 if (gimple_in_ssa_p (cfun))
7511 update_ssa (TODO_update_ssa_no_phi);
7512
7513 return true;
7514 }
7515
7516 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7517
7518 GOMP_atomic_start ();
7519 *addr = rhs;
7520 GOMP_atomic_end ();
7521
7522 The result is not globally atomic, but works so long as all parallel
7523 references are within #pragma omp atomic directives. According to
7524 responses received from omp@openmp.org, appears to be within spec.
7525 Which makes sense, since that's how several other compilers handle
7526 this situation as well.
7527 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7528 expanding. STORED_VAL is the operand of the matching
7529 GIMPLE_OMP_ATOMIC_STORE.
7530
7531 We replace
7532 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7533 loaded_val = *addr;
7534
7535 and replace
7536 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7537 *addr = stored_val;
7538 */
7539
7540 static bool
expand_omp_atomic_mutex(basic_block load_bb,basic_block store_bb,tree addr,tree loaded_val,tree stored_val)7541 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7542 tree addr, tree loaded_val, tree stored_val)
7543 {
7544 gimple_stmt_iterator si;
7545 gassign *stmt;
7546 tree t;
7547
7548 si = gsi_last_nondebug_bb (load_bb);
7549 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7550
7551 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7552 t = build_call_expr (t, 0);
7553 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7554
7555 tree mem = build_simple_mem_ref (addr);
7556 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7557 TREE_OPERAND (mem, 1)
7558 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7559 true),
7560 TREE_OPERAND (mem, 1));
7561 stmt = gimple_build_assign (loaded_val, mem);
7562 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7563 gsi_remove (&si, true);
7564
7565 si = gsi_last_nondebug_bb (store_bb);
7566 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7567
7568 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7569 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7570
7571 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7572 t = build_call_expr (t, 0);
7573 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7574 gsi_remove (&si, true);
7575
7576 if (gimple_in_ssa_p (cfun))
7577 update_ssa (TODO_update_ssa_no_phi);
7578 return true;
7579 }
7580
7581 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7582 using expand_omp_atomic_fetch_op. If it failed, we try to
7583 call expand_omp_atomic_pipeline, and if it fails too, the
7584 ultimate fallback is wrapping the operation in a mutex
7585 (expand_omp_atomic_mutex). REGION is the atomic region built
7586 by build_omp_regions_1(). */
7587
7588 static void
expand_omp_atomic(struct omp_region * region)7589 expand_omp_atomic (struct omp_region *region)
7590 {
7591 basic_block load_bb = region->entry, store_bb = region->exit;
7592 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7593 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7594 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7595 tree addr = gimple_omp_atomic_load_rhs (load);
7596 tree stored_val = gimple_omp_atomic_store_val (store);
7597 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7598 HOST_WIDE_INT index;
7599
7600 /* Make sure the type is one of the supported sizes. */
7601 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7602 index = exact_log2 (index);
7603 if (index >= 0 && index <= 4)
7604 {
7605 unsigned int align = TYPE_ALIGN_UNIT (type);
7606
7607 /* __sync builtins require strict data alignment. */
7608 if (exact_log2 (align) >= index)
7609 {
7610 /* Atomic load. */
7611 scalar_mode smode;
7612 if (loaded_val == stored_val
7613 && (is_int_mode (TYPE_MODE (type), &smode)
7614 || is_float_mode (TYPE_MODE (type), &smode))
7615 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7616 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7617 return;
7618
7619 /* Atomic store. */
7620 if ((is_int_mode (TYPE_MODE (type), &smode)
7621 || is_float_mode (TYPE_MODE (type), &smode))
7622 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7623 && store_bb == single_succ (load_bb)
7624 && first_stmt (store_bb) == store
7625 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7626 stored_val, index))
7627 return;
7628
7629 /* When possible, use specialized atomic update functions. */
7630 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7631 && store_bb == single_succ (load_bb)
7632 && expand_omp_atomic_fetch_op (load_bb, addr,
7633 loaded_val, stored_val, index))
7634 return;
7635
7636 /* If we don't have specialized __sync builtins, try and implement
7637 as a compare and swap loop. */
7638 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7639 loaded_val, stored_val, index))
7640 return;
7641 }
7642 }
7643
7644 /* The ultimate fallback is wrapping the operation in a mutex. */
7645 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7646 }
7647
7648 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7649 at REGION_EXIT. */
7650
7651 static void
mark_loops_in_oacc_kernels_region(basic_block region_entry,basic_block region_exit)7652 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7653 basic_block region_exit)
7654 {
7655 class loop *outer = region_entry->loop_father;
7656 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7657
7658 /* Don't parallelize the kernels region if it contains more than one outer
7659 loop. */
7660 unsigned int nr_outer_loops = 0;
7661 class loop *single_outer = NULL;
7662 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
7663 {
7664 gcc_assert (loop_outer (loop) == outer);
7665
7666 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7667 continue;
7668
7669 if (region_exit != NULL
7670 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7671 continue;
7672
7673 nr_outer_loops++;
7674 single_outer = loop;
7675 }
7676 if (nr_outer_loops != 1)
7677 return;
7678
7679 for (class loop *loop = single_outer->inner;
7680 loop != NULL;
7681 loop = loop->inner)
7682 if (loop->next)
7683 return;
7684
7685 /* Mark the loops in the region. */
7686 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
7687 loop->in_oacc_kernels_region = true;
7688 }
7689
7690 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7691
7692 struct GTY(()) grid_launch_attributes_trees
7693 {
7694 tree kernel_dim_array_type;
7695 tree kernel_lattrs_dimnum_decl;
7696 tree kernel_lattrs_grid_decl;
7697 tree kernel_lattrs_group_decl;
7698 tree kernel_launch_attributes_type;
7699 };
7700
7701 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7702
7703 /* Create types used to pass kernel launch attributes to target. */
7704
7705 static void
grid_create_kernel_launch_attr_types(void)7706 grid_create_kernel_launch_attr_types (void)
7707 {
7708 if (grid_attr_trees)
7709 return;
7710 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7711
7712 tree dim_arr_index_type
7713 = build_index_type (build_int_cst (integer_type_node, 2));
7714 grid_attr_trees->kernel_dim_array_type
7715 = build_array_type (uint32_type_node, dim_arr_index_type);
7716
7717 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7718 grid_attr_trees->kernel_lattrs_dimnum_decl
7719 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7720 uint32_type_node);
7721 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7722
7723 grid_attr_trees->kernel_lattrs_grid_decl
7724 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7725 grid_attr_trees->kernel_dim_array_type);
7726 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7727 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7728 grid_attr_trees->kernel_lattrs_group_decl
7729 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7730 grid_attr_trees->kernel_dim_array_type);
7731 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7732 = grid_attr_trees->kernel_lattrs_grid_decl;
7733 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7734 "__gomp_kernel_launch_attributes",
7735 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7736 }
7737
7738 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7739 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7740 of type uint32_type_node. */
7741
7742 static void
grid_insert_store_range_dim(gimple_stmt_iterator * gsi,tree range_var,tree fld_decl,int index,tree value)7743 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7744 tree fld_decl, int index, tree value)
7745 {
7746 tree ref = build4 (ARRAY_REF, uint32_type_node,
7747 build3 (COMPONENT_REF,
7748 grid_attr_trees->kernel_dim_array_type,
7749 range_var, fld_decl, NULL_TREE),
7750 build_int_cst (integer_type_node, index),
7751 NULL_TREE, NULL_TREE);
7752 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7753 }
7754
7755 /* Return a tree representation of a pointer to a structure with grid and
7756 work-group size information. Statements filling that information will be
7757 inserted before GSI, TGT_STMT is the target statement which has the
7758 necessary information in it. */
7759
7760 static tree
grid_get_kernel_launch_attributes(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)7761 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7762 gomp_target *tgt_stmt)
7763 {
7764 grid_create_kernel_launch_attr_types ();
7765 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7766 "__kernel_launch_attrs");
7767
7768 unsigned max_dim = 0;
7769 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7770 clause;
7771 clause = OMP_CLAUSE_CHAIN (clause))
7772 {
7773 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7774 continue;
7775
7776 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7777 max_dim = MAX (dim, max_dim);
7778
7779 grid_insert_store_range_dim (gsi, lattrs,
7780 grid_attr_trees->kernel_lattrs_grid_decl,
7781 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7782 grid_insert_store_range_dim (gsi, lattrs,
7783 grid_attr_trees->kernel_lattrs_group_decl,
7784 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7785 }
7786
7787 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7788 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7789 gcc_checking_assert (max_dim <= 2);
7790 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7791 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7792 GSI_SAME_STMT);
7793 TREE_ADDRESSABLE (lattrs) = 1;
7794 return build_fold_addr_expr (lattrs);
7795 }
7796
7797 /* Build target argument identifier from the DEVICE identifier, value
7798 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7799
7800 static tree
get_target_argument_identifier_1(int device,bool subseqent_param,int id)7801 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7802 {
7803 tree t = build_int_cst (integer_type_node, device);
7804 if (subseqent_param)
7805 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7806 build_int_cst (integer_type_node,
7807 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7808 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7809 build_int_cst (integer_type_node, id));
7810 return t;
7811 }
7812
7813 /* Like above but return it in type that can be directly stored as an element
7814 of the argument array. */
7815
7816 static tree
get_target_argument_identifier(int device,bool subseqent_param,int id)7817 get_target_argument_identifier (int device, bool subseqent_param, int id)
7818 {
7819 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7820 return fold_convert (ptr_type_node, t);
7821 }
7822
7823 /* Return a target argument consisting of DEVICE identifier, value identifier
7824 ID, and the actual VALUE. */
7825
7826 static tree
get_target_argument_value(gimple_stmt_iterator * gsi,int device,int id,tree value)7827 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7828 tree value)
7829 {
7830 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7831 fold_convert (integer_type_node, value),
7832 build_int_cst (unsigned_type_node,
7833 GOMP_TARGET_ARG_VALUE_SHIFT));
7834 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7835 get_target_argument_identifier_1 (device, false, id));
7836 t = fold_convert (ptr_type_node, t);
7837 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7838 }
7839
7840 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7841 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7842 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7843 arguments. */
7844
7845 static void
push_target_argument_according_to_value(gimple_stmt_iterator * gsi,int device,int id,tree value,vec<tree> * args)7846 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7847 int id, tree value, vec <tree> *args)
7848 {
7849 if (tree_fits_shwi_p (value)
7850 && tree_to_shwi (value) > -(1 << 15)
7851 && tree_to_shwi (value) < (1 << 15))
7852 args->quick_push (get_target_argument_value (gsi, device, id, value));
7853 else
7854 {
7855 args->quick_push (get_target_argument_identifier (device, true, id));
7856 value = fold_convert (ptr_type_node, value);
7857 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7858 GSI_SAME_STMT);
7859 args->quick_push (value);
7860 }
7861 }
7862
7863 /* Create an array of arguments that is then passed to GOMP_target. */
7864
7865 static tree
get_target_arguments(gimple_stmt_iterator * gsi,gomp_target * tgt_stmt)7866 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7867 {
7868 auto_vec <tree, 6> args;
7869 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7870 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7871 if (c)
7872 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7873 else
7874 t = integer_minus_one_node;
7875 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7876 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7877
7878 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7879 if (c)
7880 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7881 else
7882 t = integer_minus_one_node;
7883 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7884 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7885 &args);
7886
7887 /* Add HSA-specific grid sizes, if available. */
7888 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7889 OMP_CLAUSE__GRIDDIM_))
7890 {
7891 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7892 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7893 args.quick_push (t);
7894 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7895 }
7896
7897 /* Produce more, perhaps device specific, arguments here. */
7898
7899 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7900 args.length () + 1),
7901 ".omp_target_args");
7902 for (unsigned i = 0; i < args.length (); i++)
7903 {
7904 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7905 build_int_cst (integer_type_node, i),
7906 NULL_TREE, NULL_TREE);
7907 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7908 GSI_SAME_STMT);
7909 }
7910 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7911 build_int_cst (integer_type_node, args.length ()),
7912 NULL_TREE, NULL_TREE);
7913 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7914 GSI_SAME_STMT);
7915 TREE_ADDRESSABLE (argarray) = 1;
7916 return build_fold_addr_expr (argarray);
7917 }
7918
7919 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7920
7921 static void
expand_omp_target(struct omp_region * region)7922 expand_omp_target (struct omp_region *region)
7923 {
7924 basic_block entry_bb, exit_bb, new_bb;
7925 struct function *child_cfun;
7926 tree child_fn, block, t;
7927 gimple_stmt_iterator gsi;
7928 gomp_target *entry_stmt;
7929 gimple *stmt;
7930 edge e;
7931 bool offloaded;
7932 int target_kind;
7933
7934 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7935 target_kind = gimple_omp_target_kind (entry_stmt);
7936 new_bb = region->entry;
7937
7938 offloaded = is_gimple_omp_offloaded (entry_stmt);
7939 switch (target_kind)
7940 {
7941 case GF_OMP_TARGET_KIND_REGION:
7942 case GF_OMP_TARGET_KIND_UPDATE:
7943 case GF_OMP_TARGET_KIND_ENTER_DATA:
7944 case GF_OMP_TARGET_KIND_EXIT_DATA:
7945 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7946 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7947 case GF_OMP_TARGET_KIND_OACC_SERIAL:
7948 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7949 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7950 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7951 case GF_OMP_TARGET_KIND_DATA:
7952 case GF_OMP_TARGET_KIND_OACC_DATA:
7953 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7954 break;
7955 default:
7956 gcc_unreachable ();
7957 }
7958
7959 child_fn = NULL_TREE;
7960 child_cfun = NULL;
7961 if (offloaded)
7962 {
7963 child_fn = gimple_omp_target_child_fn (entry_stmt);
7964 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7965 }
7966
7967 /* Supported by expand_omp_taskreg, but not here. */
7968 if (child_cfun != NULL)
7969 gcc_checking_assert (!child_cfun->cfg);
7970 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7971
7972 entry_bb = region->entry;
7973 exit_bb = region->exit;
7974
7975 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
7976 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7977
7978 /* Going on, all OpenACC compute constructs are mapped to
7979 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
7980 To distinguish between them, we attach attributes. */
7981 switch (target_kind)
7982 {
7983 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7984 DECL_ATTRIBUTES (child_fn)
7985 = tree_cons (get_identifier ("oacc parallel"),
7986 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7987 break;
7988 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7989 DECL_ATTRIBUTES (child_fn)
7990 = tree_cons (get_identifier ("oacc kernels"),
7991 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7992 break;
7993 case GF_OMP_TARGET_KIND_OACC_SERIAL:
7994 DECL_ATTRIBUTES (child_fn)
7995 = tree_cons (get_identifier ("oacc serial"),
7996 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7997 break;
7998 default:
7999 /* Make sure we don't miss any. */
8000 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
8001 && is_gimple_omp_offloaded (entry_stmt)));
8002 break;
8003 }
8004
8005 if (offloaded)
8006 {
8007 unsigned srcidx, dstidx, num;
8008
8009 /* If the offloading region needs data sent from the parent
8010 function, then the very first statement (except possible
8011 tree profile counter updates) of the offloading body
8012 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
8013 &.OMP_DATA_O is passed as an argument to the child function,
8014 we need to replace it with the argument as seen by the child
8015 function.
8016
8017 In most cases, this will end up being the identity assignment
8018 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
8019 a function call that has been inlined, the original PARM_DECL
8020 .OMP_DATA_I may have been converted into a different local
8021 variable. In which case, we need to keep the assignment. */
8022 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8023 if (data_arg)
8024 {
8025 basic_block entry_succ_bb = single_succ (entry_bb);
8026 gimple_stmt_iterator gsi;
8027 tree arg;
8028 gimple *tgtcopy_stmt = NULL;
8029 tree sender = TREE_VEC_ELT (data_arg, 0);
8030
8031 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8032 {
8033 gcc_assert (!gsi_end_p (gsi));
8034 stmt = gsi_stmt (gsi);
8035 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8036 continue;
8037
8038 if (gimple_num_ops (stmt) == 2)
8039 {
8040 tree arg = gimple_assign_rhs1 (stmt);
8041
8042 /* We're ignoring the subcode because we're
8043 effectively doing a STRIP_NOPS. */
8044
8045 if (TREE_CODE (arg) == ADDR_EXPR
8046 && TREE_OPERAND (arg, 0) == sender)
8047 {
8048 tgtcopy_stmt = stmt;
8049 break;
8050 }
8051 }
8052 }
8053
8054 gcc_assert (tgtcopy_stmt != NULL);
8055 arg = DECL_ARGUMENTS (child_fn);
8056
8057 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8058 gsi_remove (&gsi, true);
8059 }
8060
8061 /* Declare local variables needed in CHILD_CFUN. */
8062 block = DECL_INITIAL (child_fn);
8063 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8064 /* The gimplifier could record temporaries in the offloading block
8065 rather than in containing function's local_decls chain,
8066 which would mean cgraph missed finalizing them. Do it now. */
8067 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8068 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8069 varpool_node::finalize_decl (t);
8070 DECL_SAVED_TREE (child_fn) = NULL;
8071 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8072 gimple_set_body (child_fn, NULL);
8073 TREE_USED (block) = 1;
8074
8075 /* Reset DECL_CONTEXT on function arguments. */
8076 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8077 DECL_CONTEXT (t) = child_fn;
8078
8079 /* Split ENTRY_BB at GIMPLE_*,
8080 so that it can be moved to the child function. */
8081 gsi = gsi_last_nondebug_bb (entry_bb);
8082 stmt = gsi_stmt (gsi);
8083 gcc_assert (stmt
8084 && gimple_code (stmt) == gimple_code (entry_stmt));
8085 e = split_block (entry_bb, stmt);
8086 gsi_remove (&gsi, true);
8087 entry_bb = e->dest;
8088 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8089
8090 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8091 if (exit_bb)
8092 {
8093 gsi = gsi_last_nondebug_bb (exit_bb);
8094 gcc_assert (!gsi_end_p (gsi)
8095 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8096 stmt = gimple_build_return (NULL);
8097 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8098 gsi_remove (&gsi, true);
8099 }
8100
8101 /* Move the offloading region into CHILD_CFUN. */
8102
8103 block = gimple_block (entry_stmt);
8104
8105 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8106 if (exit_bb)
8107 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8108 /* When the OMP expansion process cannot guarantee an up-to-date
8109 loop tree arrange for the child function to fixup loops. */
8110 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8111 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8112
8113 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8114 num = vec_safe_length (child_cfun->local_decls);
8115 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8116 {
8117 t = (*child_cfun->local_decls)[srcidx];
8118 if (DECL_CONTEXT (t) == cfun->decl)
8119 continue;
8120 if (srcidx != dstidx)
8121 (*child_cfun->local_decls)[dstidx] = t;
8122 dstidx++;
8123 }
8124 if (dstidx != num)
8125 vec_safe_truncate (child_cfun->local_decls, dstidx);
8126
8127 /* Inform the callgraph about the new function. */
8128 child_cfun->curr_properties = cfun->curr_properties;
8129 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8130 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8131 cgraph_node *node = cgraph_node::get_create (child_fn);
8132 node->parallelized_function = 1;
8133 cgraph_node::add_new_function (child_fn, true);
8134
8135 /* Add the new function to the offload table. */
8136 if (ENABLE_OFFLOADING)
8137 {
8138 if (in_lto_p)
8139 DECL_PRESERVE_P (child_fn) = 1;
8140 vec_safe_push (offload_funcs, child_fn);
8141 }
8142
8143 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8144 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8145
8146 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8147 fixed in a following pass. */
8148 push_cfun (child_cfun);
8149 if (need_asm)
8150 assign_assembler_name_if_needed (child_fn);
8151 cgraph_edge::rebuild_edges ();
8152
8153 /* Some EH regions might become dead, see PR34608. If
8154 pass_cleanup_cfg isn't the first pass to happen with the
8155 new child, these dead EH edges might cause problems.
8156 Clean them up now. */
8157 if (flag_exceptions)
8158 {
8159 basic_block bb;
8160 bool changed = false;
8161
8162 FOR_EACH_BB_FN (bb, cfun)
8163 changed |= gimple_purge_dead_eh_edges (bb);
8164 if (changed)
8165 cleanup_tree_cfg ();
8166 }
8167 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8168 verify_loop_structure ();
8169 pop_cfun ();
8170
8171 if (dump_file && !gimple_in_ssa_p (cfun))
8172 {
8173 omp_any_child_fn_dumped = true;
8174 dump_function_header (dump_file, child_fn, dump_flags);
8175 dump_function_to_file (child_fn, dump_file, dump_flags);
8176 }
8177
8178 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
8179 }
8180
8181 /* Emit a library call to launch the offloading region, or do data
8182 transfers. */
8183 tree t1, t2, t3, t4, depend, c, clauses;
8184 enum built_in_function start_ix;
8185 unsigned int flags_i = 0;
8186
8187 switch (gimple_omp_target_kind (entry_stmt))
8188 {
8189 case GF_OMP_TARGET_KIND_REGION:
8190 start_ix = BUILT_IN_GOMP_TARGET;
8191 break;
8192 case GF_OMP_TARGET_KIND_DATA:
8193 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8194 break;
8195 case GF_OMP_TARGET_KIND_UPDATE:
8196 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8197 break;
8198 case GF_OMP_TARGET_KIND_ENTER_DATA:
8199 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8200 break;
8201 case GF_OMP_TARGET_KIND_EXIT_DATA:
8202 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8203 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8204 break;
8205 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8206 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8207 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8208 start_ix = BUILT_IN_GOACC_PARALLEL;
8209 break;
8210 case GF_OMP_TARGET_KIND_OACC_DATA:
8211 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8212 start_ix = BUILT_IN_GOACC_DATA_START;
8213 break;
8214 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8215 start_ix = BUILT_IN_GOACC_UPDATE;
8216 break;
8217 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8218 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8219 break;
8220 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8221 start_ix = BUILT_IN_GOACC_DECLARE;
8222 break;
8223 default:
8224 gcc_unreachable ();
8225 }
8226
8227 clauses = gimple_omp_target_clauses (entry_stmt);
8228
8229 tree device = NULL_TREE;
8230 location_t device_loc = UNKNOWN_LOCATION;
8231 tree goacc_flags = NULL_TREE;
8232 if (is_gimple_omp_oacc (entry_stmt))
8233 {
8234 /* By default, no GOACC_FLAGs are set. */
8235 goacc_flags = integer_zero_node;
8236 }
8237 else
8238 {
8239 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8240 if (c)
8241 {
8242 device = OMP_CLAUSE_DEVICE_ID (c);
8243 device_loc = OMP_CLAUSE_LOCATION (c);
8244 }
8245 else
8246 {
8247 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8248 library choose). */
8249 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8250 device_loc = gimple_location (entry_stmt);
8251 }
8252
8253 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8254 if (c)
8255 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8256 }
8257
8258 /* By default, there is no conditional. */
8259 tree cond = NULL_TREE;
8260 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8261 if (c)
8262 cond = OMP_CLAUSE_IF_EXPR (c);
8263 /* If we found the clause 'if (cond)', build:
8264 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8265 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
8266 if (cond)
8267 {
8268 tree *tp;
8269 if (is_gimple_omp_oacc (entry_stmt))
8270 tp = &goacc_flags;
8271 else
8272 {
8273 /* Ensure 'device' is of the correct type. */
8274 device = fold_convert_loc (device_loc, integer_type_node, device);
8275
8276 tp = &device;
8277 }
8278
8279 cond = gimple_boolify (cond);
8280
8281 basic_block cond_bb, then_bb, else_bb;
8282 edge e;
8283 tree tmp_var;
8284
8285 tmp_var = create_tmp_var (TREE_TYPE (*tp));
8286 if (offloaded)
8287 e = split_block_after_labels (new_bb);
8288 else
8289 {
8290 gsi = gsi_last_nondebug_bb (new_bb);
8291 gsi_prev (&gsi);
8292 e = split_block (new_bb, gsi_stmt (gsi));
8293 }
8294 cond_bb = e->src;
8295 new_bb = e->dest;
8296 remove_edge (e);
8297
8298 then_bb = create_empty_bb (cond_bb);
8299 else_bb = create_empty_bb (then_bb);
8300 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8301 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8302
8303 stmt = gimple_build_cond_empty (cond);
8304 gsi = gsi_last_bb (cond_bb);
8305 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8306
8307 gsi = gsi_start_bb (then_bb);
8308 stmt = gimple_build_assign (tmp_var, *tp);
8309 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8310
8311 gsi = gsi_start_bb (else_bb);
8312 if (is_gimple_omp_oacc (entry_stmt))
8313 stmt = gimple_build_assign (tmp_var,
8314 BIT_IOR_EXPR,
8315 *tp,
8316 build_int_cst (integer_type_node,
8317 GOACC_FLAG_HOST_FALLBACK));
8318 else
8319 stmt = gimple_build_assign (tmp_var,
8320 build_int_cst (integer_type_node,
8321 GOMP_DEVICE_HOST_FALLBACK));
8322 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8323
8324 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8325 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8326 add_bb_to_loop (then_bb, cond_bb->loop_father);
8327 add_bb_to_loop (else_bb, cond_bb->loop_father);
8328 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8329 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8330
8331 *tp = tmp_var;
8332
8333 gsi = gsi_last_nondebug_bb (new_bb);
8334 }
8335 else
8336 {
8337 gsi = gsi_last_nondebug_bb (new_bb);
8338
8339 if (device != NULL_TREE)
8340 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8341 true, GSI_SAME_STMT);
8342 }
8343
8344 t = gimple_omp_target_data_arg (entry_stmt);
8345 if (t == NULL)
8346 {
8347 t1 = size_zero_node;
8348 t2 = build_zero_cst (ptr_type_node);
8349 t3 = t2;
8350 t4 = t2;
8351 }
8352 else
8353 {
8354 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8355 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8356 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8357 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8358 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8359 }
8360
8361 gimple *g;
8362 bool tagging = false;
8363 /* The maximum number used by any start_ix, without varargs. */
8364 auto_vec<tree, 11> args;
8365 if (is_gimple_omp_oacc (entry_stmt))
8366 {
8367 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8368 TREE_TYPE (goacc_flags), goacc_flags);
8369 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8370 NULL_TREE, true,
8371 GSI_SAME_STMT);
8372 args.quick_push (goacc_flags_m);
8373 }
8374 else
8375 args.quick_push (device);
8376 if (offloaded)
8377 args.quick_push (build_fold_addr_expr (child_fn));
8378 args.quick_push (t1);
8379 args.quick_push (t2);
8380 args.quick_push (t3);
8381 args.quick_push (t4);
8382 switch (start_ix)
8383 {
8384 case BUILT_IN_GOACC_DATA_START:
8385 case BUILT_IN_GOACC_DECLARE:
8386 case BUILT_IN_GOMP_TARGET_DATA:
8387 break;
8388 case BUILT_IN_GOMP_TARGET:
8389 case BUILT_IN_GOMP_TARGET_UPDATE:
8390 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8391 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8392 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8393 if (c)
8394 depend = OMP_CLAUSE_DECL (c);
8395 else
8396 depend = build_int_cst (ptr_type_node, 0);
8397 args.quick_push (depend);
8398 if (start_ix == BUILT_IN_GOMP_TARGET)
8399 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8400 break;
8401 case BUILT_IN_GOACC_PARALLEL:
8402 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
8403 {
8404 tree dims = NULL_TREE;
8405 unsigned int ix;
8406
8407 /* For serial constructs we set all dimensions to 1. */
8408 for (ix = GOMP_DIM_MAX; ix--;)
8409 dims = tree_cons (NULL_TREE, integer_one_node, dims);
8410 oacc_replace_fn_attrib (child_fn, dims);
8411 }
8412 else
8413 oacc_set_fn_attrib (child_fn, clauses, &args);
8414 tagging = true;
8415 /* FALLTHRU */
8416 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8417 case BUILT_IN_GOACC_UPDATE:
8418 {
8419 tree t_async = NULL_TREE;
8420
8421 /* If present, use the value specified by the respective
8422 clause, making sure that is of the correct type. */
8423 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8424 if (c)
8425 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8426 integer_type_node,
8427 OMP_CLAUSE_ASYNC_EXPR (c));
8428 else if (!tagging)
8429 /* Default values for t_async. */
8430 t_async = fold_convert_loc (gimple_location (entry_stmt),
8431 integer_type_node,
8432 build_int_cst (integer_type_node,
8433 GOMP_ASYNC_SYNC));
8434 if (tagging && t_async)
8435 {
8436 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8437
8438 if (TREE_CODE (t_async) == INTEGER_CST)
8439 {
8440 /* See if we can pack the async arg in to the tag's
8441 operand. */
8442 i_async = TREE_INT_CST_LOW (t_async);
8443 if (i_async < GOMP_LAUNCH_OP_MAX)
8444 t_async = NULL_TREE;
8445 else
8446 i_async = GOMP_LAUNCH_OP_MAX;
8447 }
8448 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8449 i_async));
8450 }
8451 if (t_async)
8452 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
8453 NULL_TREE, true,
8454 GSI_SAME_STMT));
8455
8456 /* Save the argument index, and ... */
8457 unsigned t_wait_idx = args.length ();
8458 unsigned num_waits = 0;
8459 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8460 if (!tagging || c)
8461 /* ... push a placeholder. */
8462 args.safe_push (integer_zero_node);
8463
8464 for (; c; c = OMP_CLAUSE_CHAIN (c))
8465 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8466 {
8467 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8468 integer_type_node,
8469 OMP_CLAUSE_WAIT_EXPR (c));
8470 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
8471 GSI_SAME_STMT);
8472 args.safe_push (arg);
8473 num_waits++;
8474 }
8475
8476 if (!tagging || num_waits)
8477 {
8478 tree len;
8479
8480 /* Now that we know the number, update the placeholder. */
8481 if (tagging)
8482 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8483 else
8484 len = build_int_cst (integer_type_node, num_waits);
8485 len = fold_convert_loc (gimple_location (entry_stmt),
8486 unsigned_type_node, len);
8487 args[t_wait_idx] = len;
8488 }
8489 }
8490 break;
8491 default:
8492 gcc_unreachable ();
8493 }
8494 if (tagging)
8495 /* Push terminal marker - zero. */
8496 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8497
8498 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8499 gimple_set_location (g, gimple_location (entry_stmt));
8500 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8501 if (!offloaded)
8502 {
8503 g = gsi_stmt (gsi);
8504 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8505 gsi_remove (&gsi, true);
8506 }
8507 }
8508
8509 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8510 iteration variable derived from the thread number. INTRA_GROUP means this
8511 is an expansion of a loop iterating over work-items within a separate
8512 iteration over groups. */
8513
8514 static void
grid_expand_omp_for_loop(struct omp_region * kfor,bool intra_group)8515 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8516 {
8517 gimple_stmt_iterator gsi;
8518 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8519 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8520 == GF_OMP_FOR_KIND_GRID_LOOP);
8521 size_t collapse = gimple_omp_for_collapse (for_stmt);
8522 struct omp_for_data_loop *loops
8523 = XALLOCAVEC (struct omp_for_data_loop,
8524 gimple_omp_for_collapse (for_stmt));
8525 struct omp_for_data fd;
8526
8527 remove_edge (BRANCH_EDGE (kfor->entry));
8528 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8529
8530 gcc_assert (kfor->cont);
8531 omp_extract_for_data (for_stmt, &fd, loops);
8532
8533 gsi = gsi_start_bb (body_bb);
8534
8535 for (size_t dim = 0; dim < collapse; dim++)
8536 {
8537 tree type, itype;
8538 itype = type = TREE_TYPE (fd.loops[dim].v);
8539 if (POINTER_TYPE_P (type))
8540 itype = signed_type_for (type);
8541
8542 tree n1 = fd.loops[dim].n1;
8543 tree step = fd.loops[dim].step;
8544 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8545 true, NULL_TREE, true, GSI_SAME_STMT);
8546 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8547 true, NULL_TREE, true, GSI_SAME_STMT);
8548 tree threadid;
8549 if (gimple_omp_for_grid_group_iter (for_stmt))
8550 {
8551 gcc_checking_assert (!intra_group);
8552 threadid = build_call_expr (builtin_decl_explicit
8553 (BUILT_IN_HSA_WORKGROUPID), 1,
8554 build_int_cstu (unsigned_type_node, dim));
8555 }
8556 else if (intra_group)
8557 threadid = build_call_expr (builtin_decl_explicit
8558 (BUILT_IN_HSA_WORKITEMID), 1,
8559 build_int_cstu (unsigned_type_node, dim));
8560 else
8561 threadid = build_call_expr (builtin_decl_explicit
8562 (BUILT_IN_HSA_WORKITEMABSID), 1,
8563 build_int_cstu (unsigned_type_node, dim));
8564 threadid = fold_convert (itype, threadid);
8565 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8566 true, GSI_SAME_STMT);
8567
8568 tree startvar = fd.loops[dim].v;
8569 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8570 if (POINTER_TYPE_P (type))
8571 t = fold_build_pointer_plus (n1, t);
8572 else
8573 t = fold_build2 (PLUS_EXPR, type, t, n1);
8574 t = fold_convert (type, t);
8575 t = force_gimple_operand_gsi (&gsi, t,
8576 DECL_P (startvar)
8577 && TREE_ADDRESSABLE (startvar),
8578 NULL_TREE, true, GSI_SAME_STMT);
8579 gassign *assign_stmt = gimple_build_assign (startvar, t);
8580 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8581 }
8582 /* Remove the omp for statement. */
8583 gsi = gsi_last_nondebug_bb (kfor->entry);
8584 gsi_remove (&gsi, true);
8585
8586 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8587 gsi = gsi_last_nondebug_bb (kfor->cont);
8588 gcc_assert (!gsi_end_p (gsi)
8589 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8590 gsi_remove (&gsi, true);
8591
8592 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8593 gsi = gsi_last_nondebug_bb (kfor->exit);
8594 gcc_assert (!gsi_end_p (gsi)
8595 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8596 if (intra_group)
8597 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8598 gsi_remove (&gsi, true);
8599
8600 /* Fixup the much simpler CFG. */
8601 remove_edge (find_edge (kfor->cont, body_bb));
8602
8603 if (kfor->cont != body_bb)
8604 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8605 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8606 }
8607
8608 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8609 argument_decls. */
8610
8611 struct grid_arg_decl_map
8612 {
8613 tree old_arg;
8614 tree new_arg;
8615 };
8616
8617 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8618 pertaining to kernel function. */
8619
8620 static tree
grid_remap_kernel_arg_accesses(tree * tp,int * walk_subtrees,void * data)8621 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8622 {
8623 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8624 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8625 tree t = *tp;
8626
8627 if (t == adm->old_arg)
8628 *tp = adm->new_arg;
8629 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8630 return NULL_TREE;
8631 }
8632
8633 /* If TARGET region contains a kernel body for loop, remove its region from the
8634 TARGET and expand it in HSA gridified kernel fashion. */
8635
8636 static void
grid_expand_target_grid_body(struct omp_region * target)8637 grid_expand_target_grid_body (struct omp_region *target)
8638 {
8639 if (!hsa_gen_requested_p ())
8640 return;
8641
8642 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8643 struct omp_region **pp;
8644
8645 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8646 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8647 break;
8648
8649 struct omp_region *gpukernel = *pp;
8650
8651 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8652 if (!gpukernel)
8653 {
8654 /* HSA cannot handle OACC stuff. */
8655 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8656 return;
8657 gcc_checking_assert (orig_child_fndecl);
8658 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8659 OMP_CLAUSE__GRIDDIM_));
8660 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8661
8662 hsa_register_kernel (n);
8663 return;
8664 }
8665
8666 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8667 OMP_CLAUSE__GRIDDIM_));
8668 tree inside_block
8669 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8670 *pp = gpukernel->next;
8671 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8672 if ((*pp)->type == GIMPLE_OMP_FOR)
8673 break;
8674
8675 struct omp_region *kfor = *pp;
8676 gcc_assert (kfor);
8677 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8678 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8679 *pp = kfor->next;
8680 if (kfor->inner)
8681 {
8682 if (gimple_omp_for_grid_group_iter (for_stmt))
8683 {
8684 struct omp_region **next_pp;
8685 for (pp = &kfor->inner; *pp; pp = next_pp)
8686 {
8687 next_pp = &(*pp)->next;
8688 if ((*pp)->type != GIMPLE_OMP_FOR)
8689 continue;
8690 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8691 gcc_assert (gimple_omp_for_kind (inner)
8692 == GF_OMP_FOR_KIND_GRID_LOOP);
8693 grid_expand_omp_for_loop (*pp, true);
8694 *pp = (*pp)->next;
8695 next_pp = pp;
8696 }
8697 }
8698 expand_omp (kfor->inner);
8699 }
8700 if (gpukernel->inner)
8701 expand_omp (gpukernel->inner);
8702
8703 tree kern_fndecl = copy_node (orig_child_fndecl);
8704 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8705 "kernel");
8706 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8707 tree tgtblock = gimple_block (tgt_stmt);
8708 tree fniniblock = make_node (BLOCK);
8709 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8710 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8711 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8712 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8713 DECL_INITIAL (kern_fndecl) = fniniblock;
8714 push_struct_function (kern_fndecl);
8715 cfun->function_end_locus = gimple_location (tgt_stmt);
8716 init_tree_ssa (cfun);
8717 pop_cfun ();
8718
8719 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8720 gcc_assert (!DECL_CHAIN (old_parm_decl));
8721 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8722 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8723 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8724 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8725 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8726 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8727 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8728 kern_cfun->curr_properties = cfun->curr_properties;
8729
8730 grid_expand_omp_for_loop (kfor, false);
8731
8732 /* Remove the omp for statement. */
8733 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8734 gsi_remove (&gsi, true);
8735 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8736 return. */
8737 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8738 gcc_assert (!gsi_end_p (gsi)
8739 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8740 gimple *ret_stmt = gimple_build_return (NULL);
8741 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8742 gsi_remove (&gsi, true);
8743
8744 /* Statements in the first BB in the target construct have been produced by
8745 target lowering and must be copied inside the GPUKERNEL, with the two
8746 exceptions of the first OMP statement and the OMP_DATA assignment
8747 statement. */
8748 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8749 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8750 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8751 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8752 !gsi_end_p (tsi); gsi_next (&tsi))
8753 {
8754 gimple *stmt = gsi_stmt (tsi);
8755 if (is_gimple_omp (stmt))
8756 break;
8757 if (sender
8758 && is_gimple_assign (stmt)
8759 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8760 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8761 continue;
8762 gimple *copy = gimple_copy (stmt);
8763 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8764 gimple_set_block (copy, fniniblock);
8765 }
8766
8767 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8768 gpukernel->exit, inside_block);
8769
8770 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8771 kcn->mark_force_output ();
8772 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8773
8774 hsa_register_kernel (kcn, orig_child);
8775
8776 cgraph_node::add_new_function (kern_fndecl, true);
8777 push_cfun (kern_cfun);
8778 cgraph_edge::rebuild_edges ();
8779
8780 /* Re-map any mention of the PARM_DECL of the original function to the
8781 PARM_DECL of the new one.
8782
8783 TODO: It would be great if lowering produced references into the GPU
8784 kernel decl straight away and we did not have to do this. */
8785 struct grid_arg_decl_map adm;
8786 adm.old_arg = old_parm_decl;
8787 adm.new_arg = new_parm_decl;
8788 basic_block bb;
8789 FOR_EACH_BB_FN (bb, kern_cfun)
8790 {
8791 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8792 {
8793 gimple *stmt = gsi_stmt (gsi);
8794 struct walk_stmt_info wi;
8795 memset (&wi, 0, sizeof (wi));
8796 wi.info = &adm;
8797 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8798 }
8799 }
8800 pop_cfun ();
8801
8802 return;
8803 }
8804
8805 /* Expand the parallel region tree rooted at REGION. Expansion
8806 proceeds in depth-first order. Innermost regions are expanded
8807 first. This way, parallel regions that require a new function to
8808 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8809 internal dependencies in their body. */
8810
8811 static void
expand_omp(struct omp_region * region)8812 expand_omp (struct omp_region *region)
8813 {
8814 omp_any_child_fn_dumped = false;
8815 while (region)
8816 {
8817 location_t saved_location;
8818 gimple *inner_stmt = NULL;
8819
8820 /* First, determine whether this is a combined parallel+workshare
8821 region. */
8822 if (region->type == GIMPLE_OMP_PARALLEL)
8823 determine_parallel_type (region);
8824 else if (region->type == GIMPLE_OMP_TARGET)
8825 grid_expand_target_grid_body (region);
8826
8827 if (region->type == GIMPLE_OMP_FOR
8828 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8829 inner_stmt = last_stmt (region->inner->entry);
8830
8831 if (region->inner)
8832 expand_omp (region->inner);
8833
8834 saved_location = input_location;
8835 if (gimple_has_location (last_stmt (region->entry)))
8836 input_location = gimple_location (last_stmt (region->entry));
8837
8838 switch (region->type)
8839 {
8840 case GIMPLE_OMP_PARALLEL:
8841 case GIMPLE_OMP_TASK:
8842 expand_omp_taskreg (region);
8843 break;
8844
8845 case GIMPLE_OMP_FOR:
8846 expand_omp_for (region, inner_stmt);
8847 break;
8848
8849 case GIMPLE_OMP_SECTIONS:
8850 expand_omp_sections (region);
8851 break;
8852
8853 case GIMPLE_OMP_SECTION:
8854 /* Individual omp sections are handled together with their
8855 parent GIMPLE_OMP_SECTIONS region. */
8856 break;
8857
8858 case GIMPLE_OMP_SINGLE:
8859 expand_omp_single (region);
8860 break;
8861
8862 case GIMPLE_OMP_ORDERED:
8863 {
8864 gomp_ordered *ord_stmt
8865 = as_a <gomp_ordered *> (last_stmt (region->entry));
8866 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8867 OMP_CLAUSE_DEPEND))
8868 {
8869 /* We'll expand these when expanding corresponding
8870 worksharing region with ordered(n) clause. */
8871 gcc_assert (region->outer
8872 && region->outer->type == GIMPLE_OMP_FOR);
8873 region->ord_stmt = ord_stmt;
8874 break;
8875 }
8876 }
8877 /* FALLTHRU */
8878 case GIMPLE_OMP_MASTER:
8879 case GIMPLE_OMP_TASKGROUP:
8880 case GIMPLE_OMP_CRITICAL:
8881 case GIMPLE_OMP_TEAMS:
8882 expand_omp_synch (region);
8883 break;
8884
8885 case GIMPLE_OMP_ATOMIC_LOAD:
8886 expand_omp_atomic (region);
8887 break;
8888
8889 case GIMPLE_OMP_TARGET:
8890 expand_omp_target (region);
8891 break;
8892
8893 default:
8894 gcc_unreachable ();
8895 }
8896
8897 input_location = saved_location;
8898 region = region->next;
8899 }
8900 if (omp_any_child_fn_dumped)
8901 {
8902 if (dump_file)
8903 dump_function_header (dump_file, current_function_decl, dump_flags);
8904 omp_any_child_fn_dumped = false;
8905 }
8906 }
8907
8908 /* Helper for build_omp_regions. Scan the dominator tree starting at
8909 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8910 true, the function ends once a single tree is built (otherwise, whole
8911 forest of OMP constructs may be built). */
8912
8913 static void
build_omp_regions_1(basic_block bb,struct omp_region * parent,bool single_tree)8914 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8915 bool single_tree)
8916 {
8917 gimple_stmt_iterator gsi;
8918 gimple *stmt;
8919 basic_block son;
8920
8921 gsi = gsi_last_nondebug_bb (bb);
8922 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8923 {
8924 struct omp_region *region;
8925 enum gimple_code code;
8926
8927 stmt = gsi_stmt (gsi);
8928 code = gimple_code (stmt);
8929 if (code == GIMPLE_OMP_RETURN)
8930 {
8931 /* STMT is the return point out of region PARENT. Mark it
8932 as the exit point and make PARENT the immediately
8933 enclosing region. */
8934 gcc_assert (parent);
8935 region = parent;
8936 region->exit = bb;
8937 parent = parent->outer;
8938 }
8939 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8940 {
8941 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8942 GIMPLE_OMP_RETURN, but matches with
8943 GIMPLE_OMP_ATOMIC_LOAD. */
8944 gcc_assert (parent);
8945 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8946 region = parent;
8947 region->exit = bb;
8948 parent = parent->outer;
8949 }
8950 else if (code == GIMPLE_OMP_CONTINUE)
8951 {
8952 gcc_assert (parent);
8953 parent->cont = bb;
8954 }
8955 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8956 {
8957 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8958 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8959 }
8960 else
8961 {
8962 region = new_omp_region (bb, code, parent);
8963 /* Otherwise... */
8964 if (code == GIMPLE_OMP_TARGET)
8965 {
8966 switch (gimple_omp_target_kind (stmt))
8967 {
8968 case GF_OMP_TARGET_KIND_REGION:
8969 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8970 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8971 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8972 break;
8973 case GF_OMP_TARGET_KIND_UPDATE:
8974 case GF_OMP_TARGET_KIND_ENTER_DATA:
8975 case GF_OMP_TARGET_KIND_EXIT_DATA:
8976 case GF_OMP_TARGET_KIND_DATA:
8977 case GF_OMP_TARGET_KIND_OACC_DATA:
8978 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8979 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8980 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8981 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8982 /* ..., other than for those stand-alone directives...
8983 To be precise, target data isn't stand-alone, but
8984 gimplifier put the end API call into try finally block
8985 for it, so omp expansion can treat it as such. */
8986 region = NULL;
8987 break;
8988 default:
8989 gcc_unreachable ();
8990 }
8991 }
8992 else if (code == GIMPLE_OMP_ORDERED
8993 && omp_find_clause (gimple_omp_ordered_clauses
8994 (as_a <gomp_ordered *> (stmt)),
8995 OMP_CLAUSE_DEPEND))
8996 /* #pragma omp ordered depend is also just a stand-alone
8997 directive. */
8998 region = NULL;
8999 else if (code == GIMPLE_OMP_TASK
9000 && gimple_omp_task_taskwait_p (stmt))
9001 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9002 region = NULL;
9003 else if (code == GIMPLE_OMP_TASKGROUP)
9004 /* #pragma omp taskgroup isn't a stand-alone directive, but
9005 gimplifier put the end API call into try finall block
9006 for it, so omp expansion can treat it as such. */
9007 region = NULL;
9008 /* ..., this directive becomes the parent for a new region. */
9009 if (region)
9010 parent = region;
9011 }
9012 }
9013
9014 if (single_tree && !parent)
9015 return;
9016
9017 for (son = first_dom_son (CDI_DOMINATORS, bb);
9018 son;
9019 son = next_dom_son (CDI_DOMINATORS, son))
9020 build_omp_regions_1 (son, parent, single_tree);
9021 }
9022
9023 /* Builds the tree of OMP regions rooted at ROOT, storing it to
9024 root_omp_region. */
9025
9026 static void
build_omp_regions_root(basic_block root)9027 build_omp_regions_root (basic_block root)
9028 {
9029 gcc_assert (root_omp_region == NULL);
9030 build_omp_regions_1 (root, NULL, true);
9031 gcc_assert (root_omp_region != NULL);
9032 }
9033
9034 /* Expands omp construct (and its subconstructs) starting in HEAD. */
9035
9036 void
omp_expand_local(basic_block head)9037 omp_expand_local (basic_block head)
9038 {
9039 build_omp_regions_root (head);
9040 if (dump_file && (dump_flags & TDF_DETAILS))
9041 {
9042 fprintf (dump_file, "\nOMP region tree\n\n");
9043 dump_omp_region (dump_file, root_omp_region, 0);
9044 fprintf (dump_file, "\n");
9045 }
9046
9047 remove_exit_barriers (root_omp_region);
9048 expand_omp (root_omp_region);
9049
9050 omp_free_regions ();
9051 }
9052
9053 /* Scan the CFG and build a tree of OMP regions. Return the root of
9054 the OMP region tree. */
9055
9056 static void
build_omp_regions(void)9057 build_omp_regions (void)
9058 {
9059 gcc_assert (root_omp_region == NULL);
9060 calculate_dominance_info (CDI_DOMINATORS);
9061 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9062 }
9063
9064 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9065
9066 static unsigned int
execute_expand_omp(void)9067 execute_expand_omp (void)
9068 {
9069 build_omp_regions ();
9070
9071 if (!root_omp_region)
9072 return 0;
9073
9074 if (dump_file)
9075 {
9076 fprintf (dump_file, "\nOMP region tree\n\n");
9077 dump_omp_region (dump_file, root_omp_region, 0);
9078 fprintf (dump_file, "\n");
9079 }
9080
9081 remove_exit_barriers (root_omp_region);
9082
9083 expand_omp (root_omp_region);
9084
9085 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9086 verify_loop_structure ();
9087 cleanup_tree_cfg ();
9088
9089 omp_free_regions ();
9090
9091 return 0;
9092 }
9093
9094 /* OMP expansion -- the default pass, run before creation of SSA form. */
9095
9096 namespace {
9097
9098 const pass_data pass_data_expand_omp =
9099 {
9100 GIMPLE_PASS, /* type */
9101 "ompexp", /* name */
9102 OPTGROUP_OMP, /* optinfo_flags */
9103 TV_NONE, /* tv_id */
9104 PROP_gimple_any, /* properties_required */
9105 PROP_gimple_eomp, /* properties_provided */
9106 0, /* properties_destroyed */
9107 0, /* todo_flags_start */
9108 0, /* todo_flags_finish */
9109 };
9110
9111 class pass_expand_omp : public gimple_opt_pass
9112 {
9113 public:
pass_expand_omp(gcc::context * ctxt)9114 pass_expand_omp (gcc::context *ctxt)
9115 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9116 {}
9117
9118 /* opt_pass methods: */
execute(function *)9119 virtual unsigned int execute (function *)
9120 {
9121 bool gate = ((flag_openacc != 0 || flag_openmp != 0
9122 || flag_openmp_simd != 0)
9123 && !seen_error ());
9124
9125 /* This pass always runs, to provide PROP_gimple_eomp.
9126 But often, there is nothing to do. */
9127 if (!gate)
9128 return 0;
9129
9130 return execute_expand_omp ();
9131 }
9132
9133 }; // class pass_expand_omp
9134
9135 } // anon namespace
9136
9137 gimple_opt_pass *
make_pass_expand_omp(gcc::context * ctxt)9138 make_pass_expand_omp (gcc::context *ctxt)
9139 {
9140 return new pass_expand_omp (ctxt);
9141 }
9142
9143 namespace {
9144
9145 const pass_data pass_data_expand_omp_ssa =
9146 {
9147 GIMPLE_PASS, /* type */
9148 "ompexpssa", /* name */
9149 OPTGROUP_OMP, /* optinfo_flags */
9150 TV_NONE, /* tv_id */
9151 PROP_cfg | PROP_ssa, /* properties_required */
9152 PROP_gimple_eomp, /* properties_provided */
9153 0, /* properties_destroyed */
9154 0, /* todo_flags_start */
9155 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9156 };
9157
9158 class pass_expand_omp_ssa : public gimple_opt_pass
9159 {
9160 public:
pass_expand_omp_ssa(gcc::context * ctxt)9161 pass_expand_omp_ssa (gcc::context *ctxt)
9162 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9163 {}
9164
9165 /* opt_pass methods: */
gate(function * fun)9166 virtual bool gate (function *fun)
9167 {
9168 return !(fun->curr_properties & PROP_gimple_eomp);
9169 }
execute(function *)9170 virtual unsigned int execute (function *) { return execute_expand_omp (); }
clone()9171 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9172
9173 }; // class pass_expand_omp_ssa
9174
9175 } // anon namespace
9176
9177 gimple_opt_pass *
make_pass_expand_omp_ssa(gcc::context * ctxt)9178 make_pass_expand_omp_ssa (gcc::context *ctxt)
9179 {
9180 return new pass_expand_omp_ssa (ctxt);
9181 }
9182
9183 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9184 GIMPLE_* codes. */
9185
9186 bool
omp_make_gimple_edges(basic_block bb,struct omp_region ** region,int * region_idx)9187 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9188 int *region_idx)
9189 {
9190 gimple *last = last_stmt (bb);
9191 enum gimple_code code = gimple_code (last);
9192 struct omp_region *cur_region = *region;
9193 bool fallthru = false;
9194
9195 switch (code)
9196 {
9197 case GIMPLE_OMP_PARALLEL:
9198 case GIMPLE_OMP_FOR:
9199 case GIMPLE_OMP_SINGLE:
9200 case GIMPLE_OMP_TEAMS:
9201 case GIMPLE_OMP_MASTER:
9202 case GIMPLE_OMP_CRITICAL:
9203 case GIMPLE_OMP_SECTION:
9204 case GIMPLE_OMP_GRID_BODY:
9205 cur_region = new_omp_region (bb, code, cur_region);
9206 fallthru = true;
9207 break;
9208
9209 case GIMPLE_OMP_TASKGROUP:
9210 cur_region = new_omp_region (bb, code, cur_region);
9211 fallthru = true;
9212 cur_region = cur_region->outer;
9213 break;
9214
9215 case GIMPLE_OMP_TASK:
9216 cur_region = new_omp_region (bb, code, cur_region);
9217 fallthru = true;
9218 if (gimple_omp_task_taskwait_p (last))
9219 cur_region = cur_region->outer;
9220 break;
9221
9222 case GIMPLE_OMP_ORDERED:
9223 cur_region = new_omp_region (bb, code, cur_region);
9224 fallthru = true;
9225 if (omp_find_clause (gimple_omp_ordered_clauses
9226 (as_a <gomp_ordered *> (last)),
9227 OMP_CLAUSE_DEPEND))
9228 cur_region = cur_region->outer;
9229 break;
9230
9231 case GIMPLE_OMP_TARGET:
9232 cur_region = new_omp_region (bb, code, cur_region);
9233 fallthru = true;
9234 switch (gimple_omp_target_kind (last))
9235 {
9236 case GF_OMP_TARGET_KIND_REGION:
9237 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9238 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9239 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9240 break;
9241 case GF_OMP_TARGET_KIND_UPDATE:
9242 case GF_OMP_TARGET_KIND_ENTER_DATA:
9243 case GF_OMP_TARGET_KIND_EXIT_DATA:
9244 case GF_OMP_TARGET_KIND_DATA:
9245 case GF_OMP_TARGET_KIND_OACC_DATA:
9246 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9247 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9248 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9249 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9250 cur_region = cur_region->outer;
9251 break;
9252 default:
9253 gcc_unreachable ();
9254 }
9255 break;
9256
9257 case GIMPLE_OMP_SECTIONS:
9258 cur_region = new_omp_region (bb, code, cur_region);
9259 fallthru = true;
9260 break;
9261
9262 case GIMPLE_OMP_SECTIONS_SWITCH:
9263 fallthru = false;
9264 break;
9265
9266 case GIMPLE_OMP_ATOMIC_LOAD:
9267 case GIMPLE_OMP_ATOMIC_STORE:
9268 fallthru = true;
9269 break;
9270
9271 case GIMPLE_OMP_RETURN:
9272 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9273 somewhere other than the next block. This will be
9274 created later. */
9275 cur_region->exit = bb;
9276 if (cur_region->type == GIMPLE_OMP_TASK)
9277 /* Add an edge corresponding to not scheduling the task
9278 immediately. */
9279 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9280 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9281 cur_region = cur_region->outer;
9282 break;
9283
9284 case GIMPLE_OMP_CONTINUE:
9285 cur_region->cont = bb;
9286 switch (cur_region->type)
9287 {
9288 case GIMPLE_OMP_FOR:
9289 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9290 succs edges as abnormal to prevent splitting
9291 them. */
9292 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9293 /* Make the loopback edge. */
9294 make_edge (bb, single_succ (cur_region->entry),
9295 EDGE_ABNORMAL);
9296
9297 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9298 corresponds to the case that the body of the loop
9299 is not executed at all. */
9300 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9301 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9302 fallthru = false;
9303 break;
9304
9305 case GIMPLE_OMP_SECTIONS:
9306 /* Wire up the edges into and out of the nested sections. */
9307 {
9308 basic_block switch_bb = single_succ (cur_region->entry);
9309
9310 struct omp_region *i;
9311 for (i = cur_region->inner; i ; i = i->next)
9312 {
9313 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9314 make_edge (switch_bb, i->entry, 0);
9315 make_edge (i->exit, bb, EDGE_FALLTHRU);
9316 }
9317
9318 /* Make the loopback edge to the block with
9319 GIMPLE_OMP_SECTIONS_SWITCH. */
9320 make_edge (bb, switch_bb, 0);
9321
9322 /* Make the edge from the switch to exit. */
9323 make_edge (switch_bb, bb->next_bb, 0);
9324 fallthru = false;
9325 }
9326 break;
9327
9328 case GIMPLE_OMP_TASK:
9329 fallthru = true;
9330 break;
9331
9332 default:
9333 gcc_unreachable ();
9334 }
9335 break;
9336
9337 default:
9338 gcc_unreachable ();
9339 }
9340
9341 if (*region != cur_region)
9342 {
9343 *region = cur_region;
9344 if (cur_region)
9345 *region_idx = cur_region->entry->index;
9346 else
9347 *region_idx = 0;
9348 }
9349
9350 return fallthru;
9351 }
9352
9353 #include "gt-omp-expand.h"
9354