1 /*
2 * Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /** \file
19 * \brief SMP expander routines
20 */
21
22 #include "expsmp.h"
23 #include "exputil.h"
24 #include "exp_rte.h"
25 #include "dtypeutl.h"
26 #include "expatomics.h"
27 #include "regutil.h"
28 #include "machreg.h"
29 #include "ilm.h"
30 #include "ilmtp.h"
31 #include "ili.h"
32 #define EXPANDER_DECLARE_INTERNAL
33 #include "expand.h"
34 #include "machar.h"
35 #include "ccffinfo.h"
36 #include "kmpcutil.h"
37 #include "outliner.h"
38 #include "mp.h"
39 #include "x86.h"
40 #include "assem.h"
41 #include "llutil.h"
42 #include "llassem.h"
43 #include "ll_ftn.h"
44 #include "llmputil.h"
45 #if defined(OMP_OFFLOAD_LLVM) || defined(OMP_OFFLOAD_PGI)
46 #include "ompaccel.h"
47 #include "tgtutil.h"
48 #endif
49 #include "symfun.h"
50
51 #ifdef __cplusplus
GetPARUPLEVEL(SPTR sptr)52 inline SPTR GetPARUPLEVEL(SPTR sptr) {
53 return static_cast<SPTR>(PARUPLEVELG(sptr));
54 }
55 #undef PARUPLEVELG
56 #define PARUPLEVELG GetPARUPLEVEL
57 #endif
58
59 static int incrOutlinedCnt(void);
60 static int decrOutlinedCnt(void);
61 static int getOutlinedTemp(char *, int);
62 static int isUnnamedCs(int);
63 static int addMpUnp(void);
64 static int addMpUnv(void);
65 static int addMpBcsNest(void);
66 static int addMpEcsNest(void);
67 static int allocThreadprivate(SPTR sym, int *tmpthr);
68
69 #define mk_prototype mk_prototype_llvm
70
71 static int availIreg; /* next available integer register for jsr */
72 static int availFreg; /* next available floating point register for jsr */
73 static int maxIreg; /* max # of integer registers used by jsr */
74 static int maxFreg; /* max # of floating point registers used by jsr */
75
76 static int outlinedCnt; /* counter to record of outlined function */
77 static int parCnt; /* counter to record parallel regions */
78 static int parsectCnt; /* counter to record parallel sections */
79 static int critCnt; /* counter for critical sections */
80 static int taskbih; /* mark where task allocation should be */
81 static int taskCnt; /* counter for task regions */
82 static int taskLoopCnt; /* counter for taskloop regions */
83 static int taskLab; /* label after ETASK */
84 static int taskBv; /* bit values for flag for BTASK & TASKREG:
85 * 0x01 -- untied
86 * 0x02 -- if clause present
87 * 0x04 -- orphaned (dynamic, not lexically,
88 parallel)
89 * 0x08 -- nested task
90 * 0x10 -- forced defer (CUDA)
91 * 0x20 -- final task
92 * 0x40 -- execute immediately
93 */
94 static int taskdup;
95 static int taskIfv; /* value of if clause for BTASK & TASKREG */
96 static SPTR taskFlags; ///< value of final clause for BTASK & TASKREG
97 static SPTR taskFnsptr; ///< store task func sptr
98 static SPTR taskAllocSptr; ///< store the return value from kmpc_alloc
99 static int maxOutlinedCnt; /* maximum parCnt for a function */
100 static int sumOutlinedCnt; /* sum of parCnts of functions already
101 * processed. 'sumOutlinedCnt+parCnt' can be
102 * the suffix of the name of a temp created for
103 * a outlined region within a function so that
104 * the temp is:
105 * 1) unique across functions,
106 * 2) reused across parallel regions within
107 * a function.
108 */
109 static SPTR scopeSptr;
110 static int *mppgbih;
111 static int mppgcnt;
112 static int mppgBihSiz;
113
114 static struct {
115 int lb_ili;
116 int ub_ili;
117 int st_ili;
118 int lastitr;
119 int flags;
120 INT offset;
121 int tasklpargs[10]; /* ili in order as enum tasklooparg below */
122 } taskLpInfo;
123
124 enum taskloooparg {
125 TASKLPARG_TASK = 0,
126 TASKLPARG_IF_VAL,
127 TASKLPARG_LB,
128 TASKLPARG_UB,
129 TASKLPARG_ST,
130 TASKLPARG_NOGROUP,
131 TASKLPARG_SCHED,
132 TASKLPARG_GRAINSIZE,
133 TASKLPARG_TASKDUP,
134 TASKLPARG_MAX // must be last
135 };
136
137 #define TASK_LB taskLpInfo.lb_ili
138 #define TASK_LPVAR_OFFSET taskLpInfo.offset
139 #define TASK_UB taskLpInfo.ub_ili
140 #define TASK_ST taskLpInfo.st_ili
141 #define TASK_LASTITR taskLpInfo.lastitr
142
143 /* arguments to __kmpc_taskloop excepts ident and gtid */
144 #define TASKLPARGS taskLpInfo.tasklpargs
145 #define TASKLP_TASK taskLpInfo.tasklpargs[TASKLPARG_TASK]
146 #define TASKLP_IF taskLpInfo.tasklpargs[TASKLPARG_IF_VAL]
147 #define TASKLP_LB taskLpInfo.tasklpargs[TASKLPARG_LB]
148 #define TASKLP_UB taskLpInfo.tasklpargs[TASKLPARG_UB]
149 #define TASKLP_ST taskLpInfo.tasklpargs[TASKLPARG_ST]
150 #define TASKLP_NOGROUP taskLpInfo.tasklpargs[TASKLPARG_NOGROUP]
151 #define TASKLP_SCHED taskLpInfo.tasklpargs[TASKLPARG_SCHED]
152 #define TASKLP_GRAINSIZE taskLpInfo.tasklpargs[TASKLPARG_GRAINSIZE]
153 #define TASKLP_TASKDUP taskLpInfo.tasklpargs[TASKLPARG_TASKDUP]
154
155 typedef struct SectionsWrk_t {
156 SPTR lb; /* start at 0 */
157 SPTR ub; /* number of sections */
158 SPTR st; /* stride 1 */
159 SPTR last; /* flag for last section */
160 int cnt; /* running count */
161 int bbih; /* start block for sections */
162 } SectionsWrk_t;
163
164 static SectionsWrk_t sectionsWrk;
165
166 #define SECT_UB sectionsWrk.lb
167 #define SECT_LB sectionsWrk.ub
168 #define SECT_ST sectionsWrk.st
169 #define SECT_LAST sectionsWrk.last
170 #define SECT_CNT sectionsWrk.cnt
171 #define SECT_BBIH sectionsWrk.bbih
172
173 #define MP_NOT_IMPLEMENTED(_str) error(375, ERR_Fatal, 0, _str, NULL)
174
175 /**
176 \brief For use with generating an array filed with copyprivate addresses.
177 */
178 typedef struct sptrListT {
179 SPTR o_sptr;
180 SPTR sptr; ///< either base sptr or TPpxxx thread private common block vector
181 int size_ili;
182 int vec_size_ili;
183 bool is_common_block;
184 struct sptrListT *next;
185 int cplus_assign_rou;
186 } sptrListT;
187
188 /* called once per function */
189 void
exp_smp_init(void)190 exp_smp_init(void)
191 {
192 parCnt = 0;
193 parsectCnt = 0;
194 outlinedCnt = 0;
195 critCnt = 0;
196 expb.lcpu2 = 0;
197 expb.lcpu3 = 0;
198 expb.ncpus2 = 0;
199 maxOutlinedCnt = 0;
200 mppgBihSiz = 16;
201 NEW(mppgbih, int, mppgBihSiz);
202 mppgcnt = 0;
203 taskCnt = 0;
204 taskLoopCnt = 0;
205 }
206
207 void
exp_smp_fini(void)208 exp_smp_fini(void)
209 {
210 sumOutlinedCnt = +maxOutlinedCnt;
211 FREE(mppgbih);
212 }
213
214 static SPTR
getPrivateTemp(DTYPE dtype)215 getPrivateTemp(DTYPE dtype)
216 {
217 static int count;
218 SPTR sptr = getnewccsym('s', count++, ST_VAR);
219 SCP(sptr, SC_PRIVATE);
220 DTYPEP(sptr, dtype);
221 ENCLFUNCP(sptr, GBL_CURRFUNC);
222 return sptr;
223 }
224
225 static void
expSmpSectionInit(void)226 expSmpSectionInit(void)
227 {
228 SECT_LB = getPrivateTemp(DT_UINT);
229 SECT_UB = getPrivateTemp(DT_UINT);
230 SECT_LAST = getPrivateTemp(DT_UINT);
231 SECT_ST = getPrivateTemp(DT_UINT);
232 SECT_CNT = 0;
233 if (!gbl.outlined) {
234 SCP(SECT_LB, SC_AUTO);
235 SCP(SECT_UB, SC_AUTO);
236 SCP(SECT_LAST, SC_AUTO);
237 SCP(SECT_ST, SC_AUTO);
238 }
239 SECT_BBIH = expb.curbih;
240 }
241
242 static void
expSmpSectionEnd(void)243 expSmpSectionEnd(void)
244 {
245 SECT_LB = getPrivateTemp(DT_UINT);
246 SECT_UB = getPrivateTemp(DT_UINT);
247 SECT_LAST = getPrivateTemp(DT_UINT);
248 SECT_ST = getPrivateTemp(DT_UINT);
249 SECT_CNT = 0;
250 SECT_BBIH = 0;
251 }
252
253 static int
sectionCreateBlock(int nextLabel,SPTR lb,SPTR ub,int myVal)254 sectionCreateBlock(int nextLabel, SPTR lb, SPTR ub, int myVal)
255 {
256 int ili, ubVal, lbVal;
257
258 myVal = ad_icon(myVal);
259 lbVal = ad3ili(IL_LD, ad_acon(lb, 0), addnme(NT_VAR, lb, 0, 0), MSZ_WORD);
260 ubVal = ad3ili(IL_LD, ad_acon(ub, 0), addnme(NT_VAR, ub, 0, 0), MSZ_WORD);
261
262 ili = ad4ili(IL_UICJMP, myVal, ubVal, CC_GT, nextLabel);
263 RFCNTI(nextLabel);
264 chk_block(ili);
265
266 ili = ad4ili(IL_UICJMP, myVal, lbVal, CC_LT, nextLabel);
267 RFCNTI(nextLabel);
268
269 return ili;
270 }
271
272 static int
sectionCreateLastblock(int nextLabel,SPTR lastValSym,int myVal)273 sectionCreateLastblock(int nextLabel, SPTR lastValSym, int myVal)
274 {
275 int ili, lastVal;
276
277 myVal = ad_icon(myVal);
278 lastVal = ad3ili(IL_LD, ad_acon(lastValSym, 0),
279 addnme(NT_VAR, lastValSym, 0, 0), MSZ_WORD);
280
281 ili = ad4ili(IL_UICJMP, myVal, lastVal, CC_EQ, nextLabel);
282 RFCNTI(nextLabel);
283 return ili;
284 }
285
286 void
section_create_endblock(SPTR endLabel)287 section_create_endblock(SPTR endLabel)
288 {
289 /* call kmpc_for_static_fini */
290 int ili;
291
292 wr_block();
293 cr_block();
294 ili = ll_make_kmpc_for_static_fini();
295 exp_label(endLabel);
296 iltb.callfg = 1;
297 chk_block(ili);
298 ili = ll_make_kmpc_barrier();
299 iltb.callfg = 1;
300 chk_block(ili);
301 BIH_LABEL(expb.curbih) = endLabel;
302 ILIBLKP(endLabel, expb.curbih);
303 }
304
305 /* set: 1 to set, 0 to restore
306 * eampp: if it is eampp, then subtract its value(1) from mppgcnt
307 */
308 #define SET_MPPBIH 1
309 #define RESTORE_MPPBIH 0
310 #define IS_PREVMPPG 1
311 #define IS_NOTPREVMPPG 0
312 #define USE_NEXTBIH 1
313 #define NOTUSE_NEXTBIH 0
314
315 static void
resetMppBih(int set,int eampp)316 resetMppBih(int set, int eampp)
317 {
318 static int savebih;
319 static int savex14;
320 int bih;
321
322 if (mppgcnt == 0)
323 return;
324 if (set) {
325 savebih = expb.curbih;
326 bih = mppgbih[mppgcnt - eampp];
327 if (savebih == bih) {
328 savebih = 0;
329 return;
330 }
331 savex14 = flg.x[14];
332 flg.x[14] |= 0x1000; /* don't split at calls */
333 wr_block();
334 expb.curbih = bih;
335 rdilts(expb.curbih);
336 expb.curilt = ILT_PREV(0);
337 } else if (savebih) { /* only reset if we save it */
338 wrilts(mppgbih[mppgcnt - eampp]);
339 expb.curbih = savebih;
340 rdilts(expb.curbih);
341 expb.curilt = ILT_PREV(0);
342 flg.x[14] = savex14;
343 }
344 }
345
346 static void
resetTaskBih(int set)347 resetTaskBih(int set)
348 {
349 int bih;
350 static int savebih;
351 static int savex14;
352
353 if (taskbih == 0)
354 return;
355 if (set) {
356 savebih = expb.curbih;
357 savex14 = flg.x[14];
358 flg.x[14] |= 0x1000;
359 wr_block();
360 expb.curbih = taskbih;
361 rdilts(expb.curbih);
362 expb.curilt = ILT_PREV(0);
363 } else {
364 wrilts(taskbih);
365 expb.curbih = savebih;
366 rdilts(expb.curbih);
367 expb.curilt = ILT_PREV(0);
368 flg.x[14] = savex14;
369 }
370 }
371
372 static void
sptrListAdd(sptrListT ** list,SPTR sptr,int size_ili,bool is_cmblk,int cplus_assign_rou,int vec_size_ili,SPTR o_sptr)373 sptrListAdd(sptrListT **list, SPTR sptr, int size_ili, bool is_cmblk,
374 int cplus_assign_rou, int vec_size_ili, SPTR o_sptr)
375 {
376 sptrListT *node = (sptrListT *)malloc(sizeof(sptrListT));
377
378 node->o_sptr = o_sptr;
379 node->sptr = sptr;
380 node->next = *list;
381 node->is_common_block = is_cmblk;
382 node->size_ili = size_ili;
383 node->vec_size_ili = vec_size_ili; /* used for COPYIN_CL of arrays */
384 node->cplus_assign_rou = cplus_assign_rou;
385 *list = node;
386 }
387
388 static void
sptrListFree(sptrListT ** list)389 sptrListFree(sptrListT **list)
390 {
391 sptrListT *n = *list;
392 while (n) {
393 sptrListT *next = n->next;
394 free(n);
395 n = next;
396 }
397 *list = NULL;
398 }
399
400 static int
sptrListLength(const sptrListT * list)401 sptrListLength(const sptrListT *list)
402 {
403 int count = 0;
404 const sptrListT *n;
405
406 for (n = list; n; n = n->next)
407 ++count;
408
409 return count;
410 }
411
412 /* Returns an ili of a temporary variable that conatins size information
413 * The runtime for instance, _mp_copypriv_kmpc, expects size_t* for size.
414 *
415 * 'bytes' is the actual byte size and not an sptr or ili.
416 */
417 static int
genSizeAcon(int size_ili)418 genSizeAcon(int size_ili)
419 {
420 int ili;
421 SPTR tmp;
422 int nme;
423 const DTYPE dtype = (TARGET_PTRSIZE == 8) ? DT_INT8 : DT_INT;
424
425 tmp = getPrivateTemp(dtype);
426 SCP(tmp, SC_AUTO);
427
428 ili = ad_acon(tmp, 0);
429 nme = addnme(NT_VAR, tmp, 0, 0);
430 ADDRTKNP(tmp, 1);
431
432 if (TARGET_PTRSIZE == 8) {
433 ili = ad4ili(IL_STKR, size_ili, ili, nme, MSZ_I8);
434 } else {
435 size_ili = ad1ili(IL_KIMV, size_ili);
436 ili = ad4ili(IL_ST, size_ili, ili, nme, MSZ_WORD);
437 }
438 chk_block(ili);
439
440 return ad_acon(tmp, 0);
441 }
442
443 /* Given a sptr list, create an array of pairs:
444 * (size, address) where:
445 * 'size' - Pointer to a temporary variable containing the byte size of
446 * sptr. (size_t *)
447 * 'address' - Address of sptr. (void *).
448 *
449 * These pairs are represented in an array where
450 * the even indices are the size pointers and the odd indices the
451 * addresses. The sentinel/terminator is the all-zero pair.
452 * [(sz0,addr0), (sz1,addr1), ... (0x0, 0x0)].
453 *
454 * We represent these as an array, which is more convenient to manage
455 * internally. The runtime routine _mp_copypriv_kmpc expects this format.
456 *
457 * Returns: The sptr of this majestic array that we so masterfully create here.
458 */
459 static SPTR
makeCopyprivArray(const sptrListT * list,bool pass_size_addresses)460 makeCopyprivArray(const sptrListT *list, bool pass_size_addresses)
461 {
462 int i, ili, nme, n_elts;
463 SPTR array;
464 DTYPE dtype;
465 int basenme, adsc;
466 static int id;
467 const sptrListT *node;
468
469 /* Count the number of items in the list */
470 n_elts = 0;
471 for (node = list; node; node = node->next)
472 ++n_elts;
473
474 /* We represent each entry as a pair for each private variable (each node in
475 * sptr_list): (size, sptr)
476 *
477 * +2 for the last node, the sentinel (null node), which tells the
478 * runtime it has reached the end of the array. Each node is 2 array elts.
479 */
480 n_elts = (n_elts * 2) + 2;
481
482 /* Create the array dtype: each element is word size */
483 array = getnewccsym('a', ++id, ST_ARRAY);
484 {
485 ADSC *adsc;
486 INT con[2] = {0, n_elts};
487
488 dtype = get_array_dtype(1, DT_CPTR);
489 adsc = AD_DPTR(dtype);
490 AD_LWBD(adsc, 0) = stb.i1;
491 AD_UPBD(adsc, 0) = getcon(con, DT_INT);
492 AD_NUMELM(adsc) = AD_UPBD(adsc, 0);
493 }
494
495 DTYPEP(array, dtype);
496 SCP(array, SC_AUTO);
497
498 /* Build the list: (size, sptr) pairs. */
499 basenme = addnme(NT_VAR, array, 0, 0);
500 for (node = list, i = 0; node; node = node->next, ++i) {
501 int sptr_nme, sptr_ili;
502
503 if (node->is_common_block || THREADG(node->sptr)) {
504 /* This is thread private so obtain address from the TP vector */
505 if (node->is_common_block)
506 ref_threadprivate(node->sptr, &sptr_ili, &sptr_nme);
507 else
508 ref_threadprivate_var(node->sptr, &sptr_ili, &sptr_nme, 1);
509 } else {
510 /* Else, this is not thread private */
511 sptr_nme = addnme(NT_VAR, node->sptr, 0, 0);
512 sptr_ili = mk_address(node->sptr);
513 }
514
515 /* array[i] = size */
516 nme = add_arrnme(NT_ARR, array, basenme, 0, ad_icon(i), false);
517 if (pass_size_addresses) { /* why do I need to pass address? */
518 ili = genSizeAcon(node->size_ili);
519 ili = ad3ili(IL_STA, ili, ad_acon(array, i * TARGET_PTRSIZE), nme);
520 } else {
521 ili = ad4ili(IL_ST, node->size_ili, ad_acon(array, i * TARGET_PTRSIZE),
522 nme, TARGET_PTRSIZE == 8 ? MSZ_I8 : MSZ_WORD);
523 }
524 chk_block(ili);
525
526 /* array[i+1] = local (stack based) sptr */
527 ++i;
528 nme = add_arrnme(NT_ARR, array, basenme, 0, ad_icon(i), false);
529 ili = ad3ili(IL_STA, sptr_ili, ad_acon(array, i * TARGET_PTRSIZE), nme);
530 chk_block(ili);
531 }
532
533 /* Terminate the array with a sentinel that the runtime will recognize */
534 nme = add_arrnme(NT_ARR, array, basenme, 0, ad_icon(i), false);
535 ili = ad3ili(IL_STA, ad_aconi(0), ad_acon(array, i * TARGET_PTRSIZE), nme);
536 chk_block(ili);
537
538 ++i;
539 nme = add_arrnme(NT_ARR, array, basenme, 0, ad_icon(i), false);
540 ili = ad3ili(IL_STA, ad_aconi(0), ad_acon(array, i * TARGET_PTRSIZE), nme);
541 chk_block(ili);
542
543 return array;
544 }
545
546 static int
mkMemcpy(void)547 mkMemcpy(void)
548 {
549 int func;
550 func = mk_prototype("memcpy", NULL, DT_CPTR, 3, DT_CPTR, DT_CPTR, DT_UINT8);
551 SCP(func, SC_EXTERN);
552 func = mkfunc("memcpy");
553 return func;
554 }
555
556 /**
557 \brief ...
558 \param arglist
559 \param opc IL_DAIR/IL_DAAR/IL_DADP/IL_DASP/IL_ARGxx (x86)
560 \param argili
561
562 Add argument expression \p argili to existing argument list \p arglist using
563 opcode \p opc. If \p arglist = 0, begin a new list.
564 */
565 static int
jsrAddArg(int arglist,ILI_OP opc,int argili)566 jsrAddArg(int arglist, ILI_OP opc, int argili)
567 {
568 int rg;
569 int ili;
570
571 if (arglist == 0) {
572 arglist = ad1ili(IL_NULL, 0);
573 availIreg = 0;
574 availFreg = 0;
575 }
576 /*
577 * WARNING: For the x86, this implies that the standard call mechanism is
578 * being used. If there are multiple arguments, they need to be pushed
579 * on the stack in reverse order (first jsrAddArg() call is for the last
580 * argument, ...).
581 */
582 switch (opc) {
583 case IL_ARGAR:
584 ili = ad3ili(IL_ARGAR, argili, arglist, 0);
585 return ili;
586 case IL_ARGIR:
587 case IL_ARGKR:
588 case IL_ARGSP:
589 case IL_ARGDP:
590 ili = ad2ili(opc, argili, arglist);
591 return ili;
592 default:
593 /* allow arguments to be passed in registers and on the stack */
594 break;
595 }
596 assert(is_daili_opcode(opc), "jsrAddArg: invalid opcode", opc, ERR_Fatal);
597 if (opc == IL_DAIR || opc == IL_DAAR || opc == IL_DAKR) {
598 rg = IR(availIreg++);
599 } else {
600 if (opc == IL_DADP && (availFreg & 1))
601 availFreg++;
602 rg = SP(availFreg);
603 availFreg++;
604 if (opc == IL_DADP)
605 availFreg++;
606 }
607
608 ili = ad3ili(opc, argili, rg, arglist);
609 return ili;
610 }
611
612 /** \brief Return the ili of a call to a function with name fname, and argument
613 * list argili. If argili = 0, argument list is empty.
614 *
615 * \param fname function name
616 * \param opc IL_QJSR/IL_JSR
617 * \param argili argument list
618 */
619 static int
makeCall(char * fname,ILI_OP opc,int argili)620 makeCall(char *fname, ILI_OP opc, int argili)
621 {
622 int ili;
623 bool old_share_proc, old_share_qjsr;
624
625 if (argili == 0) {
626 argili = ad1ili(IL_NULL, 0);
627 availIreg = 0;
628 availFreg = 0;
629 }
630
631 old_share_proc = share_proc_ili;
632 old_share_qjsr = share_qjsr_ili;
633 share_proc_ili = false;
634 share_qjsr_ili = false;
635 ili = ad2ili(opc, mkfunc(fname), argili);
636 share_proc_ili = old_share_proc;
637 share_qjsr_ili = old_share_qjsr;
638
639 if (availFreg > 0 && availFreg < 4)
640 availFreg = 4;
641 if (availIreg > maxIreg)
642 maxIreg = availIreg;
643 if (availFreg > maxFreg)
644 maxFreg = availFreg;
645
646 return ili;
647 }
648
649 static void
addCopyinInplace(const sptrListT * list)650 addCopyinInplace(const sptrListT *list)
651 {
652 int i, ili, nme, n_elts, dest_nme, argili, call;
653 int master_ili;
654 SPTR lab;
655 int altili, func;
656 SPTR sptr;
657 int indirect_load;
658 const sptrListT *node;
659
660 n_elts = 0;
661 lab = getlab();
662 for (node = list, i = 0; node; node = node->next, ++i) {
663 int sptr_nme, sptr_ili;
664
665 sptr = node->o_sptr;
666 indirect_load = 0;
667 if (STYPEG(sptr) == ST_CMBLK) {
668 sptr = CMEMFG(sptr);
669 if (!sptr)
670 continue;
671 } else if (SCG(sptr) == SC_BASED && POINTERG(sptr)) {
672 if (ALLOCATTRG(sptr)) {
673 indirect_load = 1;
674 }
675 sptr = MIDNUMG(sptr);
676 }
677 /* what we have here it TPxx, need to find the symbol it points to */
678 /* master copy - should be passed from previous region */
679 master_ili = mk_address(sptr);
680
681 /* current threadprivate copy */
682 sptr_ili = llGetThreadprivateAddr(node->sptr);
683 if (indirect_load == 1) {
684 sptr_nme = addnme(NT_VAR, sptr, 0, 0);
685 sptr_ili = ad2ili(IL_LDA, sptr_ili, sptr_nme);
686 master_ili = ad2ili(IL_LDA, master_ili, sptr_nme);
687 }
688 dest_nme = ILI_OPND(sptr_ili, 2);
689
690 if (n_elts == 0) {
691 ili = ad4ili(IL_ACJMP, sptr_ili, master_ili, CC_EQ, lab);
692 RFCNTI(lab);
693 chk_block(ili);
694 n_elts = 1;
695 }
696
697 /* now do a copy */
698 altili = 0;
699 {
700 func = mkMemcpy();
701 argili = jsrAddArg(0, IL_ARGKR, sel_iconv(node->size_ili, 1));
702 argili = jsrAddArg(argili, IL_ARGAR, master_ili);
703 argili = jsrAddArg(argili, IL_ARGAR, sptr_ili);
704 call = makeCall("memcpy", IL_JSR, argili);
705 argili = ad1ili(IL_NULL, 0);
706 argili =
707 ad4ili(IL_GARG, sel_iconv(node->size_ili, 1), argili, DT_INT8, 0);
708 argili = ad4ili(IL_GARG, master_ili, argili, DT_CPTR, 0);
709 argili = ad4ili(IL_GARG, sptr_ili, argili, DT_CPTR, 0);
710 altili = ad3ili(IL_GJSR, func, argili, 0);
711 }
712 ILI_ALT(call) = altili;
713 iltb.callfg = 1;
714 chk_block(call);
715 }
716 if (n_elts) {
717 wr_block();
718 cr_block();
719
720 /* create a block */
721 BIH_LABEL(expb.curbih) = lab;
722 ILIBLKP(lab, expb.curbih);
723 ili = ll_make_kmpc_barrier();
724 iltb.callfg = 1;
725 chk_block(ili);
726
727 wr_block();
728 cr_block();
729 }
730 }
731
732 static void
makeCopyprivArray_tls(const sptrListT * list)733 makeCopyprivArray_tls(const sptrListT *list)
734 {
735 int i, ili, nme, n_elts, array, dtype, basenme, adsc, argili, call;
736 int master_ili, thread_addr;
737 SPTR lab;
738 int altili, master_nme, func;
739 SPTR sptr;
740 const sptrListT *node;
741
742 n_elts = 0;
743 lab = getlab();
744 for (node = list, i = 0; node; node = node->next, ++i) {
745 int sptr_nme, sptr_ili;
746
747 sptr = MIDNUMG(node->sptr);
748 if (STYPEG(sptr) == ST_CMBLK) {
749 sptr = CMEMFG(node->o_sptr);
750 if (!sptr)
751 continue;
752 } else if (SCG(sptr) == SC_BASED && POINTERG(sptr)) {
753 sptr = MIDNUMG(sptr);
754 }
755 master_nme = addnme(NT_VAR, sptr, 0, (INT)0);
756 master_ili = mk_address(sptr);
757
758 basenme = addnme(NT_VAR, node->sptr, 0, (INT)0);
759 sptr_ili = ad2ili(IL_LDA, ad_acon(node->sptr, (INT)0), basenme);
760 if (n_elts == 0) {
761 ili = ad4ili(IL_ACJMP, sptr_ili, master_ili, CC_EQ, lab);
762 RFCNTI(lab);
763 chk_block(ili);
764 n_elts = 1;
765 }
766
767 /* now do a copy */
768 altili = 0;
769 {
770 func = mkMemcpy();
771 argili = jsrAddArg(0, IL_ARGIR, sel_iconv(node->size_ili, 0));
772 argili = jsrAddArg(argili, IL_ARGAR, master_ili);
773 argili = jsrAddArg(argili, IL_ARGAR, sptr_ili);
774 call = makeCall("memcpy", IL_JSR, argili);
775 argili = ad1ili(IL_NULL, 0);
776 argili =
777 ad4ili(IL_GARG, sel_iconv(node->size_ili, 1), argili, DT_INT8, 0);
778 argili = ad4ili(IL_GARG, master_ili, argili, DT_CPTR, 0);
779 argili = ad4ili(IL_GARG, sptr_ili, argili, DT_CPTR, 0);
780 altili = ad3ili(IL_GJSR, func, argili, 0);
781 }
782 ILI_ALT(call) = altili;
783 iltb.callfg = 1;
784 chk_block(call);
785 }
786 if (n_elts) {
787 wr_block();
788 cr_block();
789
790 /* create a block */
791 BIH_LABEL(expb.curbih) = lab;
792 ILIBLKP(lab, expb.curbih);
793 ili = ll_make_kmpc_barrier();
794 iltb.callfg = 1;
795 chk_block(ili);
796
797 wr_block();
798 cr_block();
799 }
800 }
801
802 static int
findEnlabBih(int func)803 findEnlabBih(int func)
804 {
805 int bih;
806 bih = BIH_NEXT(BIHNUMG(func));
807 return bih;
808 }
809
810 static void
setTaskloopVars(SPTR lb,SPTR ub,SPTR stride,SPTR lastitr)811 setTaskloopVars(SPTR lb, SPTR ub, SPTR stride, SPTR lastitr)
812 {
813 int nme, basenm, baseili, ili, bih;
814 SPTR arg;
815 int asym;
816 int oldbih;
817 ILI_OP ld, st;
818 MSZ msz;
819
820 oldbih = expb.curbih;
821 /* This code is in an outlined taskloop routine.
822 * Load taskloop vars from arg1 to local/private vars.
823 */
824 arg = ll_get_hostprog_arg(GBL_CURRFUNC, 2);
825 basenm = addnme(NT_VAR, arg, 0, 0);
826 baseili = ad_acon(arg, 0);
827 baseili = mk_address(arg);
828 arg = mk_argasym(arg);
829 basenm = addnme(NT_VAR, arg, 0, (INT)0);
830 baseili = ad2ili(IL_LDA, baseili, basenm);
831 nme = addnme(NT_IND, lb, basenm, 0);
832 ili = ad3ili(IL_AADD, baseili, ad_aconi(TASK_LPVAR_OFFSET), 0);
833 ldst_msz(DT_INT8, &ld, &st, &msz);
834 ili = ad3ili(ld, ili, nme, msz);
835 ldst_msz(DTYPEG(lb), &ld, &st, &msz);
836 if (msz != MSZ_I8)
837 ili = kimove(ili);
838 ili = ad4ili(st, ili, mk_address(lb), addnme(NT_VAR, lb, 0, 0), msz);
839 chk_block(ili);
840
841 nme = addnme(NT_IND, ub, basenm, 0);
842 ili = ad3ili(IL_AADD, baseili,
843 ad_aconi(TASK_LPVAR_OFFSET + zsize_of(DT_INT8)), 0);
844 ldst_msz(DT_INT8, &ld, &st, &msz);
845 ili = ad3ili(ld, ili, nme, msz);
846 ldst_msz(DTYPEG(ub), &ld, &st, &msz);
847 if (msz != MSZ_I8)
848 ili = kimove(ili);
849 ili = ad4ili(st, ili, mk_address(ub), addnme(NT_VAR, ub, 0, 0), msz);
850 chk_block(ili);
851
852 if (STYPEG(stride) != ST_CONST) {
853 nme = addnme(NT_IND, stride, basenm, 0);
854 ili = ad3ili(IL_AADD, baseili,
855 ad_aconi(TASK_LPVAR_OFFSET + (zsize_of(DT_INT8) * 2)), 0);
856 ldst_msz(DT_INT8, &ld, &st, &msz);
857 ili = ad3ili(ld, ili, nme, msz);
858 ldst_msz(DTYPEG(stride), &ld, &st, &msz);
859 if (msz != MSZ_I8)
860 ili = kimove(ili);
861 ili =
862 ad4ili(st, ili, mk_address(stride), addnme(NT_VAR, stride, 0, 0), msz);
863 chk_block(ili);
864 }
865
866 if (lastitr && STYPEG(lastitr) != ST_CONST) {
867 nme = addnme(NT_IND, lastitr, basenm, 0);
868 ldst_msz(DT_INT, &ld, &st, &msz);
869 ili = ad3ili(IL_AADD, baseili,
870 ad_aconi(TASK_LPVAR_OFFSET + (zsize_of(DT_INT8) * 3)), 0);
871 ili = ad3ili(ld, ili, nme, msz);
872 ldst_msz(DTYPEG(lastitr), &ld, &st, &msz);
873 ili = ad4ili(st, ili, ad_acon(lastitr, 0), addnme(NT_VAR, lastitr, 0, 0),
874 msz);
875 chk_block(ili);
876 }
877 if (oldbih == expb.curbih) {
878 wr_block();
879 cr_block();
880 }
881 }
882
883 /* isn't there some standard routine I can subsititue for this? */
884 static int
getElemSize(DTYPE dtype)885 getElemSize(DTYPE dtype)
886 {
887
888 DTYPE dd = dtype;
889
890 while (dd && (DTY(dd) == TY_ARRAY)) {
891 dd = DTySeqTyElement(dd);
892 }
893 if (DTY(dd) == TY_STRUCT)
894 return DTyAlgTySize(dd);
895 return 0;
896 }
897
898 static void
clearTaskloopInfo(void)899 clearTaskloopInfo(void)
900 {
901 INT offset = TASK_LPVAR_OFFSET;
902 BZERO(&taskLpInfo, char, sizeof(taskLpInfo));
903 TASK_LPVAR_OFFSET = offset;
904 }
905
906 static int
genIntStore(SPTR sym,int rhs)907 genIntStore(SPTR sym, int rhs)
908 {
909 int ili;
910 int nme;
911
912 ili = ad_acon(sym, 0);
913 nme = addnme(NT_VAR, sym, 0, 0);
914 ili = ad4ili(IL_ST, rhs, ili, nme, MSZ_WORD);
915 return ili;
916 }
917
918 static int
genIntLoad(SPTR sym)919 genIntLoad(SPTR sym)
920 {
921 int ili;
922 int nme;
923
924 ili = ad_acon(sym, 0);
925 nme = addnme(NT_VAR, sym, 0, 0);
926 ili = ad3ili(IL_LD, ili, nme, MSZ_WORD);
927 return ili;
928 }
929
930 void
exp_smp(ILM_OP opc,ILM * ilmp,int curilm)931 exp_smp(ILM_OP opc, ILM *ilmp, int curilm)
932 {
933 #ifdef IM_BPAR
934 int argili = 0;
935 int ili, tili, ili_arg;
936 int lastilt;
937 SPTR sym;
938 SPTR sptr;
939 int offset, savebih;
940 SPTR end_label, beg_label;
941 int off;
942 int addr, nmex, stili;
943 int prev_scope;
944 char name[10];
945 int argilm;
946 SPTR tpv;
947 int pv;
948 int savex14;
949 char *doschedule;
950 int semaphore, dotarget;
951 static int assign_rou = 0; /* C++ only, lets avoid more ifdefs */
952 ILM_T rou_op;
953 int num_elem, element_size;
954 loop_args_t loop_args;
955 LLTask *task;
956 bool is_cmblk;
957 static sptrListT *copysptr_list = NULL;
958 static SPTR uplevel_sptr;
959 static SPTR single_thread;
960 static SPTR in_single;
961 static SPTR targetfunc_sptr = SPTR_NULL, targetdevice_func_sptr = SPTR_NULL;
962 int target_mode = 0;
963 SPTR nlower, nupper, nstride;
964 #if defined(OMP_OFFLOAD_LLVM) || defined(OMP_OFFLOAD_PGI)
965 static int target_ili_num_threads = 0;
966 static int target_ili_num_teams= 0;
967 static int target_ili_thread_limit= 0;
968 #endif
969 int sz;
970 ISZ_T size, num_elements;
971 static int isTargetDevice = 0;
972 switch (opc) {
973 case IM_BPAR:
974 case IM_BPARN:
975 case IM_BPARD:
976 case IM_BPARA:
977 case IM_EPAR:
978 case IM_EPARD:
979 case IM_BTASK:
980 case IM_BTASKLOOP:
981 case IM_ETASK:
982 case IM_ETASKLOOP:
983 case IM_BTARGET:
984 case IM_ETARGET:
985 case IM_BTEAMS:
986 case IM_BTEAMSN:
987 case IM_ETEAMS:
988 case IM_TASKLOOPVARS:
989 case IM_TASKLOOPREG:
990 #ifdef IM_TASKPRIVATE
991 case IM_TASKPRIVATE:
992 #endif
993 case IM_TASKFIRSTPRIV:
994 case IM_ETASKLOOPREG:
995 case IM_BTASKDUP:
996 case IM_ETASKDUP:
997 break;
998 default:
999 ll_rewrite_ilms(-1, curilm, 0);
1000 break;
1001 }
1002 switch (opc) {
1003 case IM_BMPSCOPE:
1004 if (ll_ilm_is_rewriting())
1005 break;
1006 scopeSptr = ILM_SymOPND(ilmp, 1);
1007 #ifdef PARUPLEVELG
1008 uplevel_sptr = PARUPLEVELG(scopeSptr);
1009 #else
1010 uplevel_sptr = 0;
1011 #endif
1012 break;
1013 case IM_EMPSCOPE:
1014 break;
1015 case IM_BPAR:
1016 case IM_BPARN:
1017 case IM_BPARA:
1018 #ifdef OMP_OFFLOAD_LLVM
1019 if (flg.omptarget && gbl.ompaccel_intarget) {
1020 exp_ompaccel_bpar(ilmp, curilm, uplevel_sptr, scopeSptr, incrOutlinedCnt);
1021 break;
1022 }
1023 #endif
1024 if (flg.opt != 0) {
1025 wr_block();
1026 cr_block();
1027 }
1028 incrOutlinedCnt();
1029 BIH_FT(expb.curbih) = true;
1030 BIH_QJSR(expb.curbih) = true;
1031 BIH_NOMERGE(expb.curbih) = true;
1032 if (gbl.outlined)
1033 expb.sc = SC_PRIVATE;
1034 if (outlinedCnt == 1) {
1035 int isPar = ILI_OF(ILM_OPND(ilmp, 1));
1036 SPTR par_label, end_label;
1037 int iliarg, nthreads, proc_bind;
1038 {
1039 sptr = ll_make_outlined_func_wopc((SPTR)uplevel_sptr, scopeSptr, opc);
1040 if (!PARENCLFUNCG(scopeSptr))
1041 PARENCLFUNCP(scopeSptr, sptr);
1042 }
1043 ll_write_ilm_header(sptr, curilm);
1044 iliarg = ll_load_outlined_args(scopeSptr, sptr, gbl.outlined);
1045
1046 /* if (isPar == 0)
1047 call omp_do_func(.....)
1048 goto do_end;
1049 par_label:
1050 call kmpc_fork_call (....., omp_do_func,.... )
1051 do_label:
1052 */
1053
1054 par_label = getlab();
1055 end_label = getlab();
1056
1057 isPar = ad3ili(IL_ICJMPZ, isPar, CC_EQ, par_label);
1058 RFCNTI(par_label);
1059 chk_block(isPar);
1060
1061 ili = ll_make_kmpc_serialized_parallel();
1062 iltb.callfg = 1;
1063 chk_block(ili);
1064
1065 ili = ll_make_outlined_call2(sptr, iliarg);
1066 iltb.callfg = 1;
1067 chk_block(ili);
1068
1069 ili = ll_make_kmpc_end_serialized_parallel();
1070 iltb.callfg = 1;
1071 chk_block(ili);
1072
1073 ili = ad1ili(IL_JMP, end_label);
1074 RFCNTI(end_label);
1075 chk_block(ili);
1076
1077 wr_block();
1078 cr_block();
1079 exp_label(par_label);
1080 proc_bind = 0;
1081 if (opc == IM_BPARA) {
1082 int flag = ILM_OPND(ilmp, 3);
1083 if (flag & 0x2) {
1084 nthreads = ILI_OF(ILM_OPND(ilmp, 2));
1085 ili = ll_make_kmpc_push_num_threads(nthreads);
1086 iltb.callfg = 1;
1087 chk_block(ili);
1088 }
1089 if (flag & 0x1) {
1090 proc_bind = ILM_OPND(ilmp, 4);
1091 }
1092 }
1093 if (opc == IM_BPARN) {
1094 nthreads = ILI_OF(ILM_OPND(ilmp, 2));
1095 ili = ll_make_kmpc_push_num_threads(nthreads);
1096 iltb.callfg = 1;
1097 chk_block(ili);
1098 }
1099 if (proc_bind) {
1100 proc_bind = ad_icon(proc_bind);
1101 ili = ll_make_kmpc_push_proc_bind(proc_bind);
1102 iltb.callfg = 1;
1103 chk_block(ili);
1104 }
1105 ili = ll_make_kmpc_fork_call(sptr, 1, &iliarg, OPENMP, -1);
1106 iltb.callfg = 1;
1107 chk_block(ili);
1108
1109 exp_label(end_label);
1110
1111 ccff_info(MSGOPENMP, "OMP001", gbl.findex, gbl.lineno,
1112 "Parallel region activated", NULL);
1113
1114 } else if (outlinedCnt > 1) {
1115 ll_rewrite_ilms(-1, curilm, 0);
1116 }
1117
1118 break;
1119 case IM_BPARD:
1120 bpard:
1121 /* lexically nested begin parallel */
1122 incrOutlinedCnt();
1123 if (outlinedCnt > 1) {
1124 ll_rewrite_ilms(-1, curilm, 0);
1125 break;
1126 }
1127 if (flg.opt != 0) {
1128 wr_block();
1129 cr_block();
1130 }
1131 BIH_FT(expb.curbih) = true;
1132 BIH_QJSR(expb.curbih) = true;
1133 BIH_NOMERGE(expb.curbih) = true;
1134 if (outlinedCnt == 1) {
1135 int isPar = ILI_OF(ILM_OPND(ilmp, 1));
1136 SPTR par_label, end_label;
1137 int iliarg, proc_bind;
1138 sptr = ll_make_outlined_func((SPTR)uplevel_sptr, scopeSptr);
1139 if (!PARENCLFUNCG(scopeSptr))
1140 PARENCLFUNCP(scopeSptr, sptr);
1141 ll_write_ilm_header(sptr, curilm);
1142 iliarg = ll_load_outlined_args(scopeSptr, sptr, gbl.outlined);
1143
1144 /* if (isPar == 0)
1145 call omp_do_func(.....)
1146 goto do_end;
1147 par_label:
1148 call kmpc_fork_call (....., omp_do_func,.... )
1149 do_label:
1150 */
1151
1152 par_label = getlab();
1153 end_label = getlab();
1154
1155 isPar = ad3ili(IL_ICJMPZ, isPar, CC_EQ, par_label);
1156 RFCNTI(par_label);
1157 chk_block(isPar);
1158
1159 ili = ll_make_kmpc_serialized_parallel();
1160 iltb.callfg = 1;
1161 chk_block(ili);
1162
1163 ili = ll_make_outlined_call2(sptr, iliarg);
1164 iltb.callfg = 1;
1165 chk_block(ili);
1166
1167 ili = ll_make_kmpc_end_serialized_parallel();
1168 iltb.callfg = 1;
1169 chk_block(ili);
1170
1171 ili = ad1ili(IL_JMP, end_label);
1172 RFCNTI(end_label);
1173 chk_block(ili);
1174
1175 proc_bind = ILM_OPND(ilmp, 2);
1176 if (proc_bind) {
1177 proc_bind = ad_icon(proc_bind);
1178 ili = ll_make_kmpc_push_proc_bind(proc_bind);
1179 iltb.callfg = 1;
1180 chk_block(ili);
1181 }
1182
1183 wr_block();
1184 cr_block();
1185 exp_label(par_label);
1186 ili = ll_make_kmpc_fork_call(sptr, 1, &iliarg, OPENMP, -1);
1187 iltb.callfg = 1;
1188 chk_block(ili);
1189
1190 wr_block();
1191 cr_block();
1192 exp_label(end_label);
1193 }
1194 ccff_info(MSGOPENMP, "OMP001", gbl.findex, gbl.lineno,
1195 "Parallel region activated", NULL);
1196 break;
1197 case IM_BTEAMS:
1198 case IM_BTEAMSN:
1199 #ifdef OMP_OFFLOAD_LLVM
1200 if(flg.omptarget && gbl.ompaccel_intarget) {
1201 exp_ompaccel_bteams(ilmp, curilm, outlinedCnt, uplevel_sptr, scopeSptr, incrOutlinedCnt);
1202 break;
1203 }
1204 #endif
1205 if (flg.opt != 0) {
1206 wr_block();
1207 cr_block();
1208 }
1209 incrOutlinedCnt();
1210 BIH_FT(expb.curbih) = true;
1211 BIH_QJSR(expb.curbih) = true;
1212 BIH_NOMERGE(expb.curbih) = true;
1213 if (gbl.outlined)
1214 expb.sc = SC_PRIVATE;
1215 if (outlinedCnt == 1) {
1216 SPTR par_label;
1217 int iliarg, nteams, n_limit;
1218 {
1219 sptr = ll_make_outlined_func_wopc((SPTR)uplevel_sptr, scopeSptr, opc);
1220 }
1221 if (!PARENCLFUNCG(scopeSptr))
1222 PARENCLFUNCP(scopeSptr, sptr);
1223 ll_write_ilm_header(sptr, curilm);
1224 iliarg = ll_load_outlined_args(scopeSptr, sptr, gbl.outlined);
1225
1226 par_label = getlab();
1227
1228 wr_block();
1229 cr_block();
1230 exp_label(par_label);
1231 if (opc == IM_BTEAMSN) {
1232 nteams = ILI_OF(ILM_OPND(ilmp, 1));
1233 n_limit = ILI_OF(ILM_OPND(ilmp, 2));
1234 ili = ll_make_kmpc_push_num_teams(nteams, n_limit);
1235 iltb.callfg = 1;
1236 chk_block(ili);
1237 }
1238 ili = ll_make_kmpc_fork_teams(sptr, 1, &iliarg);
1239 iltb.callfg = 1;
1240 chk_block(ili);
1241
1242 ccff_info(MSGOPENMP, "OMP022", gbl.findex, gbl.lineno,
1243 "Teams region activated", NULL);
1244
1245 } else if (outlinedCnt > 1) {
1246 ll_rewrite_ilms(-1, curilm, 0);
1247 }
1248
1249 break;
1250
1251 case IM_BTARGET:
1252 #ifdef OMP_OFFLOAD_LLVM
1253 if (flg.omptarget) {
1254 exp_ompaccel_btarget(ilmp, curilm, uplevel_sptr, scopeSptr, incrOutlinedCnt, &targetfunc_sptr, &isTargetDevice);
1255 break;
1256 }
1257 #endif
1258 /* lexically nested begin parallel */
1259 incrOutlinedCnt();
1260 if (outlinedCnt > 1) {
1261 ll_rewrite_ilms(-1, curilm, 0);
1262 break;
1263 }
1264 if (flg.opt != 0) {
1265 wr_block();
1266 cr_block();
1267 }
1268 BIH_FT(expb.curbih) = true;
1269 BIH_QJSR(expb.curbih) = true;
1270 BIH_NOMERGE(expb.curbih) = true;
1271 if (outlinedCnt == 1) {
1272 isTargetDevice = ILI_OF(ILM_OPND(ilmp, 1));
1273 targetfunc_sptr = ll_make_outlined_func_wopc((SPTR)uplevel_sptr, scopeSptr, opc);
1274 if (!PARENCLFUNCG(scopeSptr))
1275 PARENCLFUNCP(scopeSptr, targetfunc_sptr);
1276 ll_write_ilm_header(targetfunc_sptr, curilm);
1277 }
1278 ccff_info(MSGOPENMP, "OMP020", gbl.findex, gbl.lineno,
1279 "Target region activated", NULL);
1280 break;
1281 case IM_ETARGET:
1282 if (outlinedCnt == 1) {
1283 ilm_outlined_pad_ilm(curilm);
1284 }
1285 decrOutlinedCnt();
1286 if (outlinedCnt >= 1) {
1287 ll_rewrite_ilms(-1, curilm, 0);
1288 break;
1289 }
1290 if (gbl.outlined)
1291 expb.sc = SC_AUTO;
1292
1293 SPTR par_label, end_label;
1294 int iliarg;
1295
1296 #ifdef OMP_OFFLOAD_LLVM
1297 if(flg.omptarget) {
1298 assert(targetfunc_sptr != SPTR_NULL,
1299 "Outlined function of target region is not found.", GBL_CURRFUNC, ERR_Fatal);
1300 // In Flang, We outline the region once, and offload it in the device
1301 // We don't generate outlined function for the host. so we don't have host fallback.
1302 exp_ompaccel_etarget(ilmp, curilm, targetfunc_sptr, outlinedCnt, (SPTR) uplevel_sptr, decrOutlinedCnt);
1303 break;
1304 }
1305 #endif
1306
1307 {
1308 assert(targetfunc_sptr != SPTR_NULL,
1309 "Outlined function of target region is not found.", GBL_CURRFUNC, ERR_Fatal);
1310 // When OpenMP Offload is not enabled, we simply call host outlined function.
1311 iliarg = ll_load_outlined_args(scopeSptr, targetfunc_sptr, gbl.outlined);
1312 ili = ll_make_outlined_call2(targetfunc_sptr, iliarg);
1313 iltb.callfg = 1;
1314 chk_block(ili);
1315 wr_block();
1316 cr_block();
1317 }
1318 targetfunc_sptr = SPTR_NULL;
1319 ccff_info(MSGOPENMP, "OMP021", gbl.findex, gbl.lineno,
1320 "Target region terminated", NULL);
1321 break;
1322 case IM_EPAR:
1323 #ifdef OMP_OFFLOAD_LLVM
1324 if (flg.omptarget && gbl.ompaccel_intarget) {
1325 exp_ompaccel_epar(ilmp, curilm, outlinedCnt, decrOutlinedCnt);
1326 break;
1327 }
1328 #endif
1329 if (outlinedCnt == 1) {
1330 ilm_outlined_pad_ilm(curilm);
1331 }
1332 decrOutlinedCnt();
1333 if (outlinedCnt >= 1)
1334 ll_rewrite_ilms(-1, curilm, 0);
1335
1336 if (gbl.outlined)
1337 expb.sc = SC_AUTO;
1338 ccff_info(MSGOPENMP, "OMP002", gbl.findex, gbl.lineno,
1339 "Parallel region terminated", NULL);
1340 break;
1341 case IM_EPARD:
1342 epard:
1343 if (outlinedCnt == 1) {
1344 ilm_outlined_pad_ilm(curilm);
1345 }
1346 decrOutlinedCnt();
1347 if (outlinedCnt >= 1)
1348 ll_rewrite_ilms(-1, curilm, 0);
1349 ccff_info(MSGOPENMP, "OMP002", gbl.findex, gbl.lineno,
1350 "Parallel region terminated", NULL);
1351 break;
1352 case IM_ETEAMS:
1353 #ifdef OMP_OFFLOAD_LLVM
1354 if (flg.omptarget) {
1355 exp_ompaccel_eteams(ilmp, curilm, outlinedCnt, decrOutlinedCnt);
1356 break;
1357 }
1358 #endif
1359 if (outlinedCnt == 1) {
1360 ilm_outlined_pad_ilm(curilm);
1361 }
1362 decrOutlinedCnt();
1363 if (outlinedCnt >= 1)
1364 ll_rewrite_ilms(-1, curilm, 0);
1365
1366 if (gbl.outlined)
1367 expb.sc = SC_AUTO;
1368 ccff_info(MSGOPENMP, "OMP023", gbl.findex, gbl.lineno,
1369 "Teams region terminated", NULL);
1370 break;
1371 case IM_BCS:
1372 /*
1373 * It's required that the front-end does not generate nested
1374 * critical sections (static only). Keeping the semaphore variable
1375 * around for the IM_ECS depends on this. If nested critical sections
1376 * need to be supported, then need to add a field to the IM_BCS and
1377 * IM_ECS ilms which will be the semaphore variable created by the
1378 * front-ends.
1379 */
1380 if (ll_ilm_is_rewriting())
1381 break;
1382 critCnt++;
1383 if (flg.opt != 0) {
1384 wr_block();
1385 cr_block();
1386 }
1387 BIH_FT(expb.curbih) = true;
1388 BIH_QJSR(expb.curbih) = true;
1389 BIH_NOMERGE(expb.curbih) = true;
1390 bihb.csfg = BIH_CS(expb.curbih) = true;
1391 ili = addMpBcsNest();
1392 iltb.callfg = 1;
1393 chk_block(ili);
1394 ccff_info(MSGOPENMP, "OMP003", gbl.findex, gbl.lineno,
1395 "Begin critical section", NULL);
1396 break;
1397 case IM_ECS:
1398 if (ll_ilm_is_rewriting())
1399 break;
1400 critCnt--;
1401 BIH_FT(expb.curbih) = true;
1402 BIH_QJSR(expb.curbih) = true;
1403 BIH_NOMERGE(expb.curbih) = true;
1404 BIH_CS(expb.curbih) = true;
1405 ili = addMpEcsNest();
1406 iltb.callfg = 1;
1407 chk_block(ili);
1408 wr_block();
1409 cr_block();
1410 if (critCnt <= 0)
1411 bihb.csfg = 0;
1412 ccff_info(MSGOPENMP, "OMP004", gbl.findex, gbl.lineno,
1413 "End critical section", NULL);
1414 break;
1415 case IM_P:
1416 if (ll_ilm_is_rewriting())
1417 break;
1418 critCnt++;
1419 if (flg.opt != 0) {
1420 wr_block();
1421 cr_block();
1422 }
1423 BIH_FT(expb.curbih) = true;
1424 BIH_QJSR(expb.curbih) = true;
1425 BIH_NOMERGE(expb.curbih) = true;
1426 bihb.csfg = BIH_CS(expb.curbih) = true;
1427 sym = ILM_SymOPND(ilmp, 1);
1428 if (!XBIT(69, 0x40) || !isUnnamedCs(sym)) {
1429 ili = add_mp_p(sym);
1430 } else {
1431 ili = addMpUnp();
1432 }
1433 iltb.callfg = 1;
1434 chk_block(ili);
1435 semaphore = MIDNUMG(sym);
1436 ccff_info(MSGOPENMP, "OMP012", gbl.findex, gbl.lineno,
1437 "Begin critical section (%semaphore)", "semaphore=%s",
1438 SYMNAME(semaphore), NULL);
1439 break;
1440 case IM_V:
1441 if (ll_ilm_is_rewriting())
1442 break;
1443 critCnt--;
1444 BIH_FT(expb.curbih) = true;
1445 BIH_QJSR(expb.curbih) = true;
1446 BIH_NOMERGE(expb.curbih) = true;
1447 BIH_CS(expb.curbih) = true;
1448 sym = ILM_SymOPND(ilmp, 1);
1449 if (!XBIT(69, 0x40) || !isUnnamedCs(sym)) {
1450 ili = add_mp_v(sym);
1451 } else {
1452 ili = addMpUnv();
1453 }
1454 iltb.callfg = 1;
1455 chk_block(ili);
1456 wr_block();
1457 cr_block();
1458 if (critCnt <= 0)
1459 bihb.csfg = 0;
1460 semaphore = MIDNUMG(sym);
1461 ccff_info(MSGOPENMP, "OMP013", gbl.findex, gbl.lineno,
1462 "End critical section (%semaphore)", "semaphore=%s",
1463 SYMNAME(semaphore), NULL);
1464 break;
1465 case IM_MPSCHED: {
1466 if (!ll_ilm_is_rewriting()) {
1467 const SPTR lower = ILM_SymOPND(ilmp, 1);
1468 const SPTR upper = ILM_SymOPND(ilmp, 2);
1469 const SPTR stride = ILM_SymOPND(ilmp, 3);
1470 const SPTR last = ILM_SymOPND(ilmp, 4);
1471 const DTYPE dtype = ILM_DTyOPND(ilmp, 5);
1472 ili = ll_make_kmpc_dispatch_next(lower, upper, stride, last, dtype);
1473 iltb.callfg = 1;
1474 chk_block(ili);
1475 ILM_RESULT(curilm) = ili;
1476 }
1477 break;
1478 }
1479 case IM_MPBORDERED: {
1480 if (!ll_ilm_is_rewriting()) {
1481 BIH_NOMERGE(expb.curbih) = true;
1482 critCnt++;
1483 bihb.csfg = BIH_CS(expb.curbih) = true;
1484 ili = ll_make_kmpc_ordered();
1485 iltb.callfg = 1;
1486 chk_block(ili);
1487 }
1488 break;
1489 }
1490 case IM_MPEORDERED: {
1491 if (!ll_ilm_is_rewriting()) {
1492 ili = ll_make_kmpc_end_ordered();
1493 iltb.callfg = 1;
1494 BIH_CS(expb.curbih) = true;
1495 chk_block(ili);
1496 wr_block();
1497 critCnt--;
1498 if (critCnt <= 0)
1499 bihb.csfg = 0;
1500 }
1501 break;
1502 }
1503 case IM_MPTASKLOOP:
1504 if (ll_ilm_is_rewriting())
1505 break;
1506
1507 {
1508 SPTR lb = ILM_SymOPND(ilmp, 1);
1509 SPTR ub = ILM_SymOPND(ilmp, 2);
1510 SPTR st = ILM_SymOPND(ilmp, 3);
1511 SPTR lastitr = ILM_SymOPND(ilmp, 4);
1512
1513 ENCLFUNCP(lb, taskFnsptr);
1514 ENCLFUNCP(ub, taskFnsptr);
1515 ENCLFUNCP(st, taskFnsptr);
1516 TASK_LASTITR = lastitr;
1517 if (lastitr) {
1518 ENCLFUNCP(lastitr, taskFnsptr);
1519 }
1520 setTaskloopVars(lb, ub, st, lastitr);
1521 }
1522
1523 break;
1524 case IM_MPLOOP: {
1525 int sched;
1526 if (outlinedCnt >= 1)
1527 break;
1528 #ifdef OMP_OFFLOAD_LLVM
1529 if (flg.omptarget && gbl.ompaccel_intarget) {
1530 exp_ompaccel_mploop(ilmp, curilm);
1531 break;
1532 }
1533 #endif
1534 loop_args.sched = (kmpc_sched_e)ILM_OPND(ilmp, 7);
1535 sched = mp_sched_to_kmpc_sched(loop_args.sched);
1536 nlower = ILM_SymOPND(ilmp, 1);
1537 nupper = ILM_SymOPND(ilmp, 2);
1538 nstride = ILM_SymOPND(ilmp, 3);
1539 if (!XBIT(183, 0x100000)) {
1540 nlower = getccsym_copy(nlower);
1541 nupper = getccsym_copy(nupper);
1542 nstride = getccsym_copy(nstride);
1543 ENCLFUNCP(nlower, GBL_CURRFUNC);
1544 ENCLFUNCP(nupper, GBL_CURRFUNC);
1545 ENCLFUNCP(nstride, GBL_CURRFUNC);
1546 exp_add_copy(nlower, ILM_SymOPND(ilmp, 1));
1547 exp_add_copy(nupper, ILM_SymOPND(ilmp, 2));
1548 exp_add_copy(nstride, ILM_SymOPND(ilmp, 3));
1549 }
1550 loop_args.lower = nlower;
1551 loop_args.upper = nupper;
1552 loop_args.stride = nstride;
1553 loop_args.chunk = ILM_SymOPND(ilmp, 4);
1554 loop_args.last = ILM_SymOPND(ilmp, 5);
1555 loop_args.dtype = ILM_DTyOPND(ilmp, 6);
1556 switch (sched) {
1557 case KMP_SCH_STATIC:
1558 case KMP_SCH_STATIC_CHUNKED:
1559 if ((ILM_OPND(ilmp, 7) & 0xff00) == MP_SCH_CHUNK_1) {
1560 doschedule = "static cyclic";
1561 ccff_info(MSGOPENMP, "OMP014", gbl.findex, gbl.lineno,
1562 "Parallel loop activated with %schedule schedule",
1563 "schedule=%s", doschedule, NULL);
1564 }
1565
1566 case KMP_DISTRIBUTE_STATIC_CHUNKED:
1567 case KMP_DISTRIBUTE_STATIC:
1568 case KMP_DISTRIBUTE_STATIC_CHUNKED_CHUNKONE:
1569 ili = ll_make_kmpc_for_static_init(&loop_args);
1570 break;
1571 default:
1572 ili = ll_make_kmpc_dispatch_init(&loop_args);
1573 }
1574 iltb.callfg = 1;
1575 chk_block(ili);
1576 BIH_NOMERGE(expb.curbih) = true;
1577 if (!XBIT(183, 0x100000)) {
1578 exp_add_copy(ILM_SymOPND(ilmp, 1), nlower);
1579 exp_add_copy(ILM_SymOPND(ilmp, 2), nupper);
1580 exp_add_copy(ILM_SymOPND(ilmp, 3), nstride);
1581 }
1582
1583 /* constant propagation stop when it sees function call. We may have some
1584 * stride that needs to propagate for computation of tripcount. */
1585 if (flg.opt != 0) {
1586 wr_block();
1587 cr_block();
1588 }
1589
1590 break;
1591 }
1592 case IM_MPDISTLOOP: {
1593 int sched;
1594 if (outlinedCnt >= 1)
1595 break;
1596 loop_args.lower = ILM_SymOPND(ilmp, 1);
1597 loop_args.upper = ILM_SymOPND(ilmp, 2);
1598 loop_args.stride = ILM_SymOPND(ilmp, 3);
1599 loop_args.chunk = ILM_SymOPND(ilmp, 4);
1600 loop_args.last = ILM_SymOPND(ilmp, 5);
1601 loop_args.upperd = ILM_SymOPND(ilmp, 6);
1602 loop_args.dtype = ILM_DTyOPND(ilmp, 7);
1603 loop_args.sched = (kmpc_sched_e)ILM_OPND(ilmp, 8);
1604 sched = mp_sched_to_kmpc_sched(loop_args.sched);
1605 switch (sched) {
1606 case KMP_SCH_STATIC:
1607 case KMP_SCH_STATIC_CHUNKED:
1608 case KMP_DISTRIBUTE_STATIC_CHUNKED:
1609 case KMP_DISTRIBUTE_STATIC:
1610 ili = ll_make_kmpc_dist_for_static_init(&loop_args);
1611 break;
1612 default:
1613 ili = ll_make_kmpc_dist_dispatch_init(&loop_args);
1614 }
1615 iltb.callfg = 1;
1616 chk_block(ili);
1617 BIH_NOMERGE(expb.curbih) = true;
1618
1619 /* constant propagation stop when it sees function call. We may have some
1620 * stride that needs to propagate for computation of tripcount. */
1621 if (flg.opt != 0) {
1622 wr_block();
1623 cr_block();
1624 }
1625
1626 break;
1627 }
1628 case IM_MPLOOPFINI: {
1629 #ifdef OMP_OFFLOAD_LLVM
1630 if (flg.omptarget) {
1631 exp_ompaccel_mploopfini(ilmp, curilm, outlinedCnt);
1632 break;
1633 }
1634 #endif
1635 if (outlinedCnt >= 1)
1636 break;
1637 const int sched = mp_sched_to_kmpc_sched(ILM_OPND(ilmp, 2));
1638 if (sched == KMP_ORD_STATIC || sched == KMP_ORD_DYNAMIC_CHUNKED) {
1639 ili = ll_make_kmpc_dispatch_fini(ILM_DTyOPND(ilmp, 1));
1640 iltb.callfg = 1;
1641 chk_block(ili);
1642 } else if (sched == KMP_SCH_STATIC || sched == KMP_SCH_STATIC_CHUNKED ||
1643 sched == KMP_DISTRIBUTE_STATIC ||
1644 sched == KMP_DISTRIBUTE_STATIC_CHUNKED) {
1645 ili = ll_make_kmpc_for_static_fini();
1646 iltb.callfg = 1;
1647 chk_block(ili);
1648 }
1649 break;
1650 }
1651 case IM_BPDO:
1652 case IM_EPDO:
1653 break;
1654
1655 case IM_PDO:
1656 if (outlinedCnt >= 1)
1657 break;
1658 sym = ILM_SymOPND(ilmp, 1);
1659 if (ILIBLKG(sym))
1660 BIH_PARLOOP(ILIBLKG(sym)) = 1;
1661 switch (ILM_OPND(ilmp, 2) & 0xff) {
1662 case 6: /* distribute static schedule */
1663 case 0: /* static schedule */
1664 switch (ILM_OPND(ilmp, 2) & 0xff00) {
1665 case 0:
1666 doschedule = "static block";
1667 break;
1668 case MP_SCH_CHUNK_1:
1669 doschedule = "static cyclic";
1670 break;
1671 case MP_SCH_BLK_CYC:
1672 doschedule = "static block-cyclic";
1673 break;
1674 case MP_SCH_BLK_ALN:
1675 /* also PARALN */
1676 doschedule = "static block";
1677 break;
1678 default:
1679 doschedule = "";
1680 break;
1681 }
1682 break;
1683 case 1:
1684 doschedule = " dynamic";
1685 break;
1686 case 2:
1687 doschedule = " guided";
1688 break;
1689 case 3:
1690 doschedule = " interleaved"; /* not used */
1691 break;
1692 case 4:
1693 doschedule = " runtime schedule";
1694 break;
1695 case 5:
1696 doschedule = " auto schedule";
1697 break;
1698 default:
1699 #if DEBUG
1700 interr("exp_smp: IM_PDO unknown schedule", ILM_OPND(ilmp, 2) & 0xff,
1701 ERR_Severe);
1702 #endif
1703 doschedule = " static";
1704 }
1705 if ((ILM_OPND(ilmp, 2) & 0xff) == 6) {
1706 ccff_info(MSGOPENMP, "OMP024", gbl.findex, gbl.lineno,
1707 "Distribute loop activated with %schedule schedule",
1708 "schedule=%s", doschedule, NULL);
1709 break;
1710 }
1711 ccff_info(MSGOPENMP, "OMP014", gbl.findex, gbl.lineno,
1712 "Parallel loop activated with %schedule schedule", "schedule=%s",
1713 doschedule, NULL);
1714 break;
1715 case IM_BARRIER:
1716 if (outlinedCnt >= 1)
1717 break;
1718 else if (!XBIT(183, 0x2000)) { /* If kmpc enabled */
1719 ili = ll_make_kmpc_barrier();
1720 iltb.callfg = 1;
1721 }
1722 chk_block(ili);
1723 ccff_info(MSGOPENMP, "OMP015", gbl.findex, gbl.lineno, "Barrier", NULL);
1724 break;
1725 case IM_BSECTIONS:
1726 if (!ll_ilm_is_rewriting()) {
1727 ccff_info(MSGOPENMP, "OMP005", gbl.findex, gbl.lineno, "Begin sections",
1728 NULL);
1729
1730 if (flg.opt != 0) {
1731 wr_block();
1732 cr_block();
1733 }
1734 parsectCnt++;
1735 BIH_PARSECT(expb.curbih) = bihb.parsectfg = true;
1736 expSmpSectionInit();
1737 wr_block();
1738 cr_block();
1739 }
1740 break;
1741 case IM_MASTER:
1742 if (outlinedCnt >= 1)
1743 break;
1744 ccff_info(MSGOPENMP, "OMP008", gbl.findex, gbl.lineno,
1745 "Begin master region", NULL);
1746
1747 parsectCnt++;
1748 if (flg.opt != 0) {
1749 wr_block();
1750 cr_block();
1751 }
1752 sym = ILM_SymOPND(ilmp, 1);
1753 ili = ll_make_kmpc_master();
1754 ili = ad3ili(IL_ICJMPZ, ili, CC_EQ, sym);
1755 iltb.callfg = 1;
1756 BIH_PARSECT(expb.curbih) = bihb.parsectfg = true;
1757 chk_block(ili);
1758 break;
1759 case IM_SECTION:
1760 if (!ll_ilm_is_rewriting()) {
1761 ccff_info(MSGOPENMP, "OMP006", gbl.findex, gbl.lineno, "New section",
1762 NULL);
1763
1764 if (SECT_CNT == 0) { /* first section make call */
1765 /* we should know lower bound but don't know upper bound */
1766 /* make a call to static_for_init here - we will fill upper bound later
1767 */
1768 int *args, lb, ub, st, last;
1769 wr_block();
1770 cr_block();
1771 ili = ad4ili(IL_ST, ad_icon(0), ad_acon(SECT_LB, 0),
1772 addnme(NT_VAR, SECT_LB, 0, 0), MSZ_WORD);
1773 chk_block(ili);
1774 ili = ad4ili(IL_ST, ad_icon(1), ad_acon(SECT_ST, 0),
1775 addnme(NT_VAR, SECT_ST, 0, 0), MSZ_WORD);
1776 chk_block(ili);
1777 args = ll_make_sections_args(SECT_LB, SECT_UB, SECT_ST, SECT_LAST);
1778 ili = ll_make_kmpc_for_static_init_args(DT_UINT, args);
1779 iltb.callfg = 1;
1780 chk_block(ili);
1781 }
1782
1783 /*
1784 * if (lb != cnt)
1785 * jmp to next label
1786 */
1787 wr_block();
1788 cr_block();
1789 exp_label(ILM_SymOPND(ilmp, 3));
1790 BIH_LABEL(expb.curbih) = ILM_SymOPND(ilmp, 3);
1791 ILIBLKP(BIH_LABEL(expb.curbih), expb.curbih);
1792
1793 ili = sectionCreateBlock(ILM_OPND(ilmp, 2), SECT_LB, SECT_UB, SECT_CNT);
1794 chk_block(ili);
1795 RFCNTI(ILM_OPND(ilmp, 2));
1796
1797 ++SECT_CNT;
1798 }
1799 break;
1800 case IM_LSECTION:
1801 if (!ll_ilm_is_rewriting()) {
1802 wr_block();
1803 cr_block();
1804 exp_label(ILM_SymOPND(ilmp, 3));
1805 BIH_LABEL(expb.curbih) = ILM_SymOPND(ilmp, 3);
1806 ILIBLKP(BIH_LABEL(expb.curbih), expb.curbih);
1807 wr_block();
1808 cr_block();
1809
1810 /* now assign the upper bound to SECT_UB */
1811 savebih = expb.curbih;
1812 savex14 = flg.x[14];
1813 flg.x[14] |= 0x1000;
1814 wr_block();
1815 expb.curbih = SECT_BBIH;
1816 rdilts(expb.curbih);
1817 expb.curilt = ILT_PREV(0);
1818 ili = ad4ili(IL_ST, ad_icon(SECT_CNT - 1), ad_acon(SECT_UB, 0),
1819 addnme(NT_VAR, SECT_UB, 0, 0), MSZ_WORD);
1820 expb.curilt = addilt(expb.curilt, ili);
1821 wrilts(SECT_BBIH);
1822 expb.curbih = savebih;
1823 rdilts(expb.curbih);
1824 expb.curilt = ILT_PREV(0);
1825 flg.x[14] = savex14;
1826 }
1827 break;
1828 case IM_ESECTIONS:
1829 if (!ll_ilm_is_rewriting()) {
1830 expSmpSectionEnd();
1831 ccff_info(MSGOPENMP, "OMP007", gbl.findex, gbl.lineno, "End sections",
1832 NULL);
1833 goto esect_shared;
1834 }
1835 break;
1836 case IM_CANCEL:
1837 if (!ll_ilm_is_rewriting()) {
1838 int ifcancel = ILI_OF(ILM_OPND(ilmp, 3));
1839 int cancel_kind = ILM_OPND(ilmp, 2);
1840 int label = ILM_OPND(ilmp, 1);
1841
1842 SPTR cancel_label = getlab();
1843 ifcancel = ad3ili(IL_ICJMPZ, ifcancel, CC_EQ, cancel_label);
1844 RFCNTI(cancel_label);
1845 chk_block(ifcancel);
1846
1847 ili = ll_make_kmpc_cancel(ad_icon(cancel_kind));
1848 ifcancel = ad3ili(IL_ICJMPZ, ili, CC_NE, label);
1849 iltb.callfg = 1;
1850 chk_block(ifcancel);
1851
1852 wr_block();
1853 cr_block();
1854 exp_label(cancel_label);
1855 ccff_info(MSGOPENMP, "OMP026", gbl.findex, gbl.lineno, "Cancel", NULL);
1856 }
1857 break;
1858 case IM_CANCELPOINT:
1859 if (!ll_ilm_is_rewriting()) {
1860 int cancel_kind = ILM_OPND(ilmp, 2);
1861 int label = ILM_OPND(ilmp, 1);
1862 ili = ll_make_kmpc_cancellationpoint(ad_icon(cancel_kind));
1863 ili = ad3ili(IL_ICJMPZ, ili, CC_NE, label);
1864 iltb.callfg = 1;
1865 chk_block(ili);
1866 ccff_info(MSGOPENMP, "OMP027", gbl.findex, gbl.lineno,
1867 "Cancellation point", NULL);
1868 }
1869 break;
1870 case IM_SINGLE:
1871 if (flg.opt != 0) {
1872 wr_block();
1873 cr_block();
1874 }
1875 if (!ll_ilm_is_rewriting()) {
1876 parsectCnt++;
1877 ccff_info(MSGOPENMP, "OMP010", gbl.findex, gbl.lineno,
1878 "Begin single region", NULL);
1879 single_thread = getPrivateTemp(DT_INT);
1880 in_single = getPrivateTemp(DT_INT);
1881 ili = genIntStore(single_thread, ad_icon(-1));
1882 chk_block(ili);
1883 ili = genIntStore(in_single, ad_icon(0));
1884 chk_block(ili);
1885 if (!gbl.outlined) {
1886 SCP(single_thread, SC_AUTO);
1887 SCP(in_single, SC_AUTO);
1888 }
1889 ili = ll_make_kmpc_single();
1890 sym = ILM_SymOPND(ilmp, 2);
1891 ili = ad3ili(IL_ICJMPZ, ili, CC_EQ, sym), iltb.callfg = 1;
1892 BIH_PARSECT(expb.curbih) = bihb.parsectfg = true;
1893 chk_block(ili);
1894 }
1895 break;
1896 case IM_EMASTER:
1897 if (outlinedCnt >= 1)
1898 break;
1899 ili = ll_make_kmpc_end_master();
1900 iltb.callfg = 1;
1901 chk_block(ili);
1902 ccff_info(MSGOPENMP, "OMP009", gbl.findex, gbl.lineno, "End master region",
1903 NULL);
1904 goto esect_shared;
1905 case IM_ESINGLE:
1906 if (flg.opt != 0) {
1907 wr_block();
1908 cr_block();
1909 }
1910 if (!ll_ilm_is_rewriting()) {
1911 int threadili;
1912 threadili = ll_get_gtid_val_ili();
1913 ili = genIntStore(single_thread, threadili);
1914 chk_block(ili);
1915 ili = genIntStore(in_single, ad_icon(1));
1916 chk_block(ili);
1917 ili = ll_make_kmpc_end_single();
1918 iltb.callfg = 1;
1919 chk_block(ili);
1920 ccff_info(MSGOPENMP, "OMP011", gbl.findex, gbl.lineno,
1921 "End single region", NULL);
1922 } else {
1923 break;
1924 }
1925
1926 esect_shared:
1927 BIH_PARSECT(expb.curbih) = true;
1928 exp_label(ILM_SymOPND(ilmp, 1));
1929 parsectCnt--;
1930 if (parsectCnt <= 0)
1931 bihb.parsectfg = false;
1932 break;
1933
1934 /* C, FORTRAN */
1935 case IM_BCOPYIN:
1936 case IM_ECOPYIN:
1937 if (!ll_ilm_is_rewriting()) {
1938 if (opc == IM_ECOPYIN) {
1939 const int n = sptrListLength(copysptr_list);
1940
1941 if (XBIT(69, 0x80)) {
1942 makeCopyprivArray_tls(copysptr_list);
1943 sptrListFree(©sptr_list);
1944 break;
1945 }
1946
1947 addCopyinInplace(copysptr_list);
1948 sptrListFree(©sptr_list);
1949 break;
1950 }
1951 }
1952 break;
1953
1954 case IM_COPYIN:
1955 if (ll_ilm_is_rewriting()) {
1956 break;
1957 }
1958 /* variable/common block to be copied */
1959 sym = ILM_SymOPND(ilmp, 1);
1960 tpv = MIDNUMG(sym);
1961
1962 if (STYPEG(sym) == ST_CMBLK) {
1963 /* entire common block is being copied */
1964 size = SIZEG(sym);
1965 off = ad1ili(IL_ICON, stb.i0);
1966 } else if (SCG(sym) == SC_CMBLK) {
1967 /* a variable of the common block is being copied */
1968 size = size_of(DTYPEG(sym));
1969 /* locate common block */
1970 sym = MIDNUMG(sym);
1971 tpv = MIDNUMG(sym);
1972 }
1973 else if (SCG(sym) == SC_BASED && POINTERG(sym)) {
1974 pv = MIDNUMG(sym);
1975 if (SCG(pv) == SC_CMBLK) {
1976 /* f90 pointer or allocatable common block member:
1977 *
1978 * MIDNUM locates the user/compiler-created pointer;
1979 * its MIDNUM locates the common block.
1980 */
1981 int sdsptr;
1982 size = size_of(DTYPEG(pv));
1983 sdsptr = SDSCG(sym); /* $sd */
1984 if (sdsptr) {
1985 size += size_of(DT_ADDR); /* $o */
1986 size += size_of(DTYPEG(sdsptr)); /* $sd */
1987 }
1988 ADDRTKNP(sym, 1);
1989 tpv = MIDNUMG(MIDNUMG(pv));
1990 } else {
1991 /* f90 pointer or allocatable:
1992 *
1993 * MIDNUM locates the user/compiler-created pointer;
1994 * its MIDNUM locates the variable's thread pointer vector
1995 * Could compute the size of that variable or just of the
1996 * pointer dtype ...
1997 */
1998 size = size_of(DT_ADDR);
1999 #if DEBUG
2000 assert(size == size_of(DTYPEG(pv)),
2001 "COPYIN size incorrect for SC_BASED sym", sym, ERR_Fatal);
2002 #endif
2003 ADDRTKNP(sym, 1);
2004 tpv = MIDNUMG(pv);
2005 }
2006 }
2007 else if (SCG(sym) == SC_BASED) {
2008 /* Cray pointee:
2009 * MIDNUM locates the variable's thread pointer vector and
2010 * its MIDNUM locates the user/compiler-created pointer that's
2011 * actually copied. Could compute the size of that variable
2012 * or just of the pointer dtype ...
2013 */
2014 size = size_of(DT_ADDR);
2015 #if DEBUG
2016 assert(size == size_of(DTYPEG(MIDNUMG(MIDNUMG(sym)))),
2017 "COPYIN size incorrect for SC_BASED sym", sym, ERR_Fatal);
2018 #endif
2019 ADDRTKNP(sym, 1);
2020 } else {
2021 /* regular user var being copied */
2022 size = size_of(DTYPEG(sym));
2023 ADDRTKNP(sym, 1);
2024 }
2025 /* false: Because we want to always use the vector/cache (tpv)
2026 * and not the data item from the cache.
2027 */
2028 sz = ad_kconi(size);
2029 sptrListAdd(©sptr_list, tpv, sz, false, 0, 0, sym);
2030 break;
2031
2032 #ifdef IM_COPYIN_A
2033 case IM_COPYIN_A:
2034 if (ll_ilm_is_rewriting()) {
2035 break;
2036 }
2037 /* allocatable to be copied */
2038 sym = ILM_SymOPND(ilmp, 1);
2039 /* MIDNUM locates the user/compiler-created pointer; its MIDNUM locates the
2040 * variable's thread pointer vector */
2041 pv = MIDNUMG(sym);
2042 if (SCG(sym) == SC_BASED && POINTERG(sym)) {
2043 if (SCG(pv) == SC_CMBLK) {
2044 /* f90 pointer or allocatable common block member:
2045 *
2046 * MIDNUM(sym)locates the user/compiler-created pointer (pv)
2047 * which is a member of the common block. Its MIDNUM locates
2048 * the common block.
2049 */
2050 pv = MIDNUMG(pv); /* locate common block */
2051 }
2052 }
2053 sz = ILI_OF(ILM_OPND(ilmp, 2));
2054 ADDRTKNP(sym, 1);
2055 tpv = MIDNUMG(pv);
2056
2057 sptrListAdd(©sptr_list, tpv, sz, false, 0, 0, sym);
2058 break;
2059 #endif
2060
2061 case IM_BCOPYPRIVATE:
2062 case IM_ECOPYPRIVATE:
2063 if (!ll_ilm_is_rewriting()) {
2064 if (opc == IM_ECOPYPRIVATE) {
2065 SPTR sptr_addr = makeCopyprivArray(copysptr_list, true);
2066 addr = sptr_addr;
2067 stili = genIntLoad(in_single);
2068
2069 /* c++ will set up assign_rou from IM_COPYPRIVATE_CL (_P) */
2070 if (!assign_rou) {
2071 assign_rou = ad_acon(mkfunc("_mp_copypriv_kmpc"), 0);
2072 }
2073
2074 ili = ll_make_kmpc_copyprivate(sptr_addr, stili, assign_rou);
2075
2076 assign_rou = 0;
2077 iltb.callfg = 1;
2078 chk_block(ili);
2079 sptrListFree(©sptr_list);
2080 }
2081 }
2082 break;
2083 case IM_COPYPRIVATE_CL_P:
2084 if (ll_ilm_is_rewriting()) {
2085 break;
2086 }
2087 /* C++ ONLY class copyprivate */
2088 argilm = ILM_OPND(ilmp, 2);
2089 sym = ILM_SymOPND((ILM *)(ilmb.ilm_base + argilm), 1);
2090 assign_rou = ad_acon(ILM_SymOPND(ilmp, 3), 0);
2091 if (DTY(DTYPEG(sym)) == TY_ARRAY) {
2092 element_size = getElemSize(DTYPEG(sym));
2093 num_elements = extent_of(DTYPEG(sym));
2094 size = num_elements * element_size; /* Total size required for
2095 llvm memcpy */
2096 } else {
2097 size = size_of(DTYPEG(sym)); /* used for POD */
2098 }
2099 sz = ad_kconi(size);
2100 sptrListAdd(©sptr_list, sym, sz, false, assign_rou, 0, sym);
2101 ADDRTKNP(sym, 1);
2102 break;
2103
2104 #ifdef IM_COPYPRIVATE_PA
2105 case IM_COPYPRIVATE_PA:
2106 if (ll_ilm_is_rewriting()) {
2107 break;
2108 }
2109 argilm = ILM_OPND(ilmp, 2);
2110 sym = ILM_SymOPND((ILM *)(ilmb.ilm_base + argilm), 1);
2111 ili = ILI_OF(ILM_OPND(ilmp, 3));
2112 ili = sel_iconv(ili, 1);
2113
2114 sptrListAdd(©sptr_list, sym, ili, false, 0, 0, sym);
2115 break;
2116 #endif
2117
2118 case IM_COPYPRIVATE_P:
2119 if (ll_ilm_is_rewriting()) {
2120 break;
2121 }
2122 argilm = ILM_OPND(ilmp, 2);
2123 sym = ILM_SymOPND((ILM *)(ilmb.ilm_base + argilm), 1);
2124 sz = 0;
2125 if (SCG(sym) == SC_DUMMY && DTY(DTYPEG(sym)) != TY_PTR &&
2126 (DDTG(DTYPEG(sym)) == DT_ASSCHAR)) {
2127 sz = charlen(sym);
2128 }
2129 if (sz == 0) {
2130 size = size_of(DTYPEG(sym));
2131 sz = ad_kconi(size);
2132 }
2133 sptrListAdd(©sptr_list, sym, sz, false, 0, 0, sym);
2134 break;
2135
2136 case IM_COPYPRIVATE_CL:
2137 if (ll_ilm_is_rewriting()) {
2138 break;
2139 }
2140 /* C++ ONLY class copyprivate */
2141 /* variable/class to be copied out */
2142 sym = ILM_SymOPND(ilmp, 2);
2143 assign_rou = ad_acon(ILM_SymOPND(ilmp, 3), 0);
2144 if (DTY(DTYPEG(sym)) == TY_ARRAY) {
2145 element_size = getElemSize(DTYPEG(sym));
2146 num_elements = extent_of(DTYPEG(sym));
2147 size = num_elements * element_size; /* Total size required for
2148 llvm memcpy */
2149 } else {
2150 size = size_of(DTYPEG(sym)); /* used for POD */
2151 }
2152 sz = ad_kconi(size);
2153 sptrListAdd(©sptr_list, sym, sz, false, assign_rou, 0, sym);
2154 ADDRTKNP(sym, 1);
2155 break;
2156
2157 case IM_COPYPRIVATE:
2158 if (ll_ilm_is_rewriting()) {
2159 break;
2160 }
2161
2162 /* variable/common block to be copied out */
2163 sym = ILM_SymOPND(ilmp, 2);
2164 is_cmblk = false;
2165
2166 if (STYPEG(sym) == ST_CMBLK) {
2167 /* Entire common block */
2168 size = SIZEG(sym);
2169 sym = MIDNUMG(sym);
2170 is_cmblk = true;
2171 } else if (SCG(sym) == SC_CMBLK) {
2172 /* Var in common block */
2173 size = size_of(DTYPEG(sym));
2174 sym = MIDNUMG(sym);
2175 sym = MIDNUMG(sym);
2176 is_cmblk = true;
2177 } else {
2178 size = size_of(DTYPEG(sym));
2179 }
2180 sz = ad_kconi(size);
2181 sptrListAdd(©sptr_list, sym, sz, is_cmblk, 0, 0, sym);
2182 ADDRTKNP(sym, 1);
2183 break;
2184
2185 #ifdef IM_FLUSH
2186 case IM_FLUSH:
2187 if (ll_ilm_is_rewriting()) {
2188 break;
2189 }
2190 ili = ll_make_kmpc_flush();
2191 iltb.callfg = 1;
2192 chk_block(ili);
2193 break;
2194 #endif
2195 case IM_TASKGROUP:
2196 if (ll_ilm_is_rewriting()) {
2197 break;
2198 }
2199 ili = ll_make_kmpc_taskgroup();
2200 iltb.callfg = 1;
2201 chk_block(ili);
2202 break;
2203 case IM_ETASKGROUP:
2204 if (ll_ilm_is_rewriting()) {
2205 break;
2206 }
2207 ili = ll_make_kmpc_end_taskgroup();
2208 iltb.callfg = 1;
2209 chk_block(ili);
2210 break;
2211
2212 case IM_BTASK:
2213 case IM_BTASKLOOP:
2214 incrOutlinedCnt();
2215 if (outlinedCnt > 1) {
2216 ll_rewrite_ilms(-1, curilm, 0);
2217 break;
2218 }
2219 taskCnt++;
2220 wr_block();
2221 cr_block();
2222
2223 if (gbl.outlined)
2224 expb.sc = SC_PRIVATE;
2225 else
2226 expb.sc = SC_AUTO;
2227 taskBv = ILM_OPND(ilmp, 2);
2228 taskIfv = ILI_OF(ILM_OPND(ilmp, 3));
2229 taskAllocSptr = getnewccsym('z', GBL_CURRFUNC, ST_VAR);
2230 SCP(taskAllocSptr, expb.sc);
2231 DTYPEP(taskAllocSptr, DT_CPTR);
2232 taskFlags = getPrivateTemp(DT_INT);
2233 SCP(taskFlags, expb.sc);
2234 /* Note: kmpc(5.0) does not use mergeable and priority flags */
2235 if (taskBv & MP_TASK_FINAL) {
2236 const int kmpc_flags = mp_to_kmpc_tasking_flags(taskBv);
2237
2238 /* Expand the 'final' expression */
2239 const SPTR lab = getlab();
2240 RFCNTI(lab);
2241 ili = ad3ili(IL_ICJMPZ, ILI_OF(ILM_OPND(ilmp, 4)), CC_EQ, lab);
2242 chk_block(ili);
2243
2244 /* In the branch: update the flags variable */
2245 ili = genIntStore(taskFlags, ad_icon(kmpc_flags));
2246 chk_block(ili);
2247 wr_block();
2248 exp_label(lab);
2249 } else {
2250 if (taskBv & MP_TASK_UNTIED) {
2251 ili = ad_icon(0);
2252 } else {
2253 ili = ad_icon(1);
2254 }
2255 ili = genIntStore(taskFlags, ili);
2256 chk_block(ili);
2257 }
2258 wr_block();
2259
2260 /* mark for __kmpc_task_alloc */
2261 taskbih = expb.curbih;
2262
2263 cr_block();
2264 /* mark block to place taskloop vars */
2265 NEED(mppgcnt + 1, mppgbih, int, mppgBihSiz, mppgBihSiz + 16);
2266 mppgbih[mppgcnt] = expb.curbih;
2267 mppgcnt++;
2268
2269 /* create task here because we want to set ENCLFUNC for all private
2270 * variables, including loop variables(for taskloop)*/
2271 task = llGetTask(scopeSptr);
2272 taskFnsptr = ll_make_outlined_task(uplevel_sptr, scopeSptr);
2273 llmp_task_set_fnsptr(task, taskFnsptr);
2274 if (!PARENCLFUNCG(scopeSptr))
2275 PARENCLFUNCP(scopeSptr, taskFnsptr);
2276 if (opc == IM_BTASKLOOP) {
2277 /* Reserve space for taskloop vars & lastiter on task_alloc ptr. */
2278 TASK_LPVAR_OFFSET = llmp_task_add_loopvar(task, 4, DT_INT8);
2279 taskdup = 0;
2280 taskLoopCnt++;
2281
2282 if (taskBv & MP_TASK_IF) {
2283 int tmp0, tmp1;
2284 tmp0 = ad_icon(0);
2285 taskIfv = sel_iconv(taskIfv, 0);
2286 ili = ad3ili(IL_ICMP, taskIfv, tmp0, CC_EQ);
2287 tmp1 = ad_icon(1);
2288 TASKLP_IF = ad3ili(IL_ISELECT, ili, tmp1, tmp0);
2289 } else {
2290 TASKLP_IF = ad_icon(1);
2291 }
2292 if (taskBv & MP_TASK_NOGROUP) {
2293 TASKLP_NOGROUP = ad_icon(1);
2294 } else {
2295 TASKLP_NOGROUP = ad_icon(0);
2296 }
2297 if (taskBv & MP_TASK_GRAINSIZE) {
2298 TASKLP_SCHED = ad_icon(1);
2299 } else if (taskBv & MP_TASK_NUM_TASKS) {
2300 TASKLP_SCHED = ad_icon(2);
2301 } else {
2302 TASKLP_SCHED = ad_icon(0);
2303 }
2304 TASKLP_GRAINSIZE = ILI_OF(ILM_OPND(ilmp, 6));
2305
2306 ccff_info(MSGOPENMP, "OMP028", gbl.findex, gbl.lineno, "Begin taskloop",
2307 NULL);
2308 ll_write_ilm_header(taskFnsptr, curilm);
2309 } else {
2310 ccff_info(MSGOPENMP, "OMP016", gbl.findex, gbl.lineno, "Begin task",
2311 NULL);
2312 ll_write_ilm_header(taskFnsptr, curilm);
2313 }
2314
2315 expb.sc = SC_PRIVATE;
2316 break;
2317 #ifdef IM_BTASKDUP
2318 case IM_BTASKDUP:
2319 if (taskCnt != 1 || outlinedCnt > 1) {
2320 ll_rewrite_ilms(-1, curilm, 0);
2321 break;
2322 }
2323 /* for normal task: Stop writint to temp parfile and evaluate
2324 * ILMs.
2325 * for taskloop: write ilms between IM_BTASKDUP and IM_ETASKDUP
2326 * to taskdup routine. Also don't write to
2327 * temp parfile because we want to evaluate
2328 * ILMS in between in host routine too.
2329 */
2330 outlinedCnt = 0;
2331 if (taskdup == 0) { /* allow nested taskdup */
2332 /* make sure we write after taskAlloc is allocated */
2333 wr_block();
2334 cr_block();
2335
2336 if (taskLoopCnt) {
2337 ilm_outlined_pad_ilm(curilm);
2338 unsetRewritingILM();
2339 start_taskdup(taskFnsptr, curilm);
2340 } else {
2341 ilm_outlined_pad_ilm(curilm);
2342 unsetRewritingILM();
2343 }
2344 if (gbl.outlined)
2345 expb.sc = SC_PRIVATE;
2346 else
2347 expb.sc = SC_AUTO;
2348 } else {
2349 llWriteNopILM(-1, curilm, 0);
2350 }
2351 taskdup++;
2352 break;
2353 case IM_ETASKDUP:
2354 if (taskCnt != 1 || outlinedCnt > 0) {
2355 ll_rewrite_ilms(-1, curilm, 0);
2356 break;
2357 }
2358 --taskdup;
2359 if (taskdup == 0) {
2360 if (taskLoopCnt) {
2361 stop_taskdup(taskFnsptr, curilm);
2362 }
2363 restartRewritingILM(curilm);
2364 outlinedCnt = 1;
2365 if (gbl.outlined)
2366 expb.sc = SC_PRIVATE;
2367 } else {
2368 llWriteNopILM(-1, curilm, 0);
2369 }
2370 break;
2371 #endif
2372
2373 #ifdef IM_TASKFIRSTPRIV
2374 case IM_TASKFIRSTPRIV:
2375 if (taskCnt != 1 || outlinedCnt > 1) {
2376 ll_rewrite_ilms(-1, curilm, 0);
2377 break;
2378 }
2379 {
2380 /* Must set ADDRESSG field in caller.
2381 * The reason to do it in caller is that
2382 * currently some allocataion is done before
2383 * we emit IM_TASKFIRSTPRIV.
2384 * We then can get its address in callee
2385 * for the allocation.
2386 */
2387 task = llGetTask(scopeSptr);
2388 sym = ILM_SymOPND(ilmp, 1);
2389 sptr = ILM_SymOPND(ilmp, 2);
2390 offset = llmp_task_add_private(task, sym, sptr);
2391 ADDRESSP(sptr, offset);
2392 ENCLFUNCP(sptr, taskFnsptr);
2393 llWriteNopILM(-1, curilm, 0);
2394 }
2395 break;
2396 #endif
2397 #ifdef IM_TASKPRIVATE
2398 case IM_TASKPRIVATE:
2399 if (taskCnt != 1 || outlinedCnt > 1) {
2400 ll_rewrite_ilms(-1, curilm, 0);
2401 break;
2402 }
2403 {
2404 task = llGetTask(scopeSptr);
2405 sym = ILM_OPND(ilmp, 1);
2406 sptr = ILM_OPND(ilmp, 2);
2407 offset = llmp_task_add_private(task, sym, sptr);
2408 ADDRESSP(sptr, offset);
2409 ENCLFUNCP(sptr, taskFnsptr);
2410 llWriteNopILM(-1, curilm, 0);
2411 }
2412 break;
2413 #endif
2414
2415 #ifdef IM_TASKLOOPVARS
2416 case IM_TASKLOOPVARS:
2417 if (taskLoopCnt != 1) {
2418 ll_rewrite_ilms(-1, curilm, 0);
2419 break;
2420 }
2421 /* store the ilm rewrite so that we can work on taskloop bounds/stride */
2422 if (outlinedCnt > 1) {
2423 ll_rewrite_ilms(-1, curilm, 0);
2424 } else if (outlinedCnt == 1) {
2425 outlinedCnt = 0;
2426 ilm_outlined_pad_ilm(curilm);
2427 unsetRewritingILM();
2428 if (gbl.outlined)
2429 expb.sc = SC_PRIVATE;
2430 else
2431 expb.sc = SC_AUTO;
2432 resetMppBih(SET_MPPBIH, IS_PREVMPPG);
2433 }
2434 break;
2435 #endif
2436 case IM_TASKLOOPREG:
2437 if (taskLoopCnt != 1) {
2438 ll_rewrite_ilms(-1, curilm, 0);
2439 break;
2440 }
2441 if (outlinedCnt > 0) {
2442 ll_rewrite_ilms(-1, curilm, 0);
2443 } else {
2444 restartRewritingILM(curilm);
2445 outlinedCnt = 1;
2446 TASK_LB = ILI_OF(ILM_OPND(ilmp, 1));
2447 TASK_UB = ILI_OF(ILM_OPND(ilmp, 2));
2448 TASK_ST = ILI_OF(ILM_OPND(ilmp, 3));
2449 if (gbl.outlined)
2450 expb.sc = SC_PRIVATE;
2451 resetMppBih(RESTORE_MPPBIH, IS_PREVMPPG);
2452 }
2453 break;
2454
2455 case IM_ETASKLOOPREG:
2456 if (taskCnt != 1 || outlinedCnt > 1) {
2457 ll_rewrite_ilms(-1, curilm, 0);
2458 break;
2459 }
2460 if (outlinedCnt == 1) {
2461 llWriteNopILM(-1, curilm, 0);
2462 }
2463 break;
2464
2465 case IM_ETASKLOOP:
2466 if (outlinedCnt == 1) {
2467 /* do following so that we get the ILM right in case ILM is
2468 * written in the same ILM block.
2469 */
2470 ilm_outlined_pad_ilm(curilm);
2471 ccff_info(MSGOPENMP, "OMP029", gbl.findex, gbl.lineno, "End taskloop",
2472 NULL);
2473 taskLoopCnt--;
2474 }
2475 decrOutlinedCnt();
2476 if (outlinedCnt >= 1) {
2477 ll_rewrite_ilms(-1, curilm, 0);
2478 break;
2479 }
2480 goto shared_etask;
2481
2482 case IM_ETASK:
2483 if (outlinedCnt == 1) {
2484 ilm_outlined_pad_ilm(curilm);
2485 ccff_info(MSGOPENMP, "OMP017", gbl.findex, gbl.lineno, "End task", NULL);
2486 }
2487 decrOutlinedCnt();
2488 if (outlinedCnt >= 1) {
2489 ll_rewrite_ilms(-1, curilm, 0);
2490 break;
2491 }
2492 shared_etask:
2493 /* Insert kmpc_task_alloc here because default firstprivate assignment can
2494 * be done after IM_ETASKREG/ETASKLOOPREG and we need to collect
2495 * the size of all firstprivate vars and pass to kmpc.
2496 */
2497 if (gbl.outlined)
2498 expb.sc = SC_PRIVATE;
2499 else
2500 expb.sc = SC_AUTO;
2501 {
2502 SPTR lab;
2503 int end_lab;
2504 SPTR s_scope;
2505
2506 if (opc == IM_ETASKLOOP) {
2507 /* must be called after decrOutlinedCnt so that outlined
2508 * function ILMs are done emitted before taskdup is emitted.
2509 */
2510 finish_taskdup_routine(curilm, taskFnsptr,
2511 TASK_LPVAR_OFFSET + (zsize_of(DT_INT8) * 3));
2512 }
2513
2514 resetTaskBih(SET_MPPBIH);
2515 /* Load args first */
2516 s_scope = scopeSptr;
2517 scopeSptr = (SPTR)OUTLINEDG(taskFnsptr); // ???
2518 taskAllocSptr = ll_make_kmpc_task_arg(
2519 taskAllocSptr, taskFnsptr, scopeSptr, taskFlags, ili_arg);
2520 ili_arg = ll_load_outlined_args(scopeSptr, taskFnsptr, false);
2521 /* Load taskloop vars and store onto task_alloc ptr
2522 * Also get its address on task_alloc ptr to pass
2523 * to __kmpc_taskloop.
2524 */
2525 if (opc == IM_ETASKLOOP) {
2526 int nme, ldnme, task_ili, addr, ilix;
2527 ILI_OP ld, st;
2528 MSZ msz;
2529 INT offset = 0;
2530
2531 ili = ad_acon(taskAllocSptr, offset);
2532 nme = addnme(NT_VAR, taskAllocSptr, (INT)0, 0);
2533 task_ili = ad2ili(IL_LDA, ili, nme);
2534 ldst_msz(DT_INT8, &ld, &st, &msz);
2535
2536 offset = ad_aconi(TASK_LPVAR_OFFSET);
2537 ili = ad3ili(IL_AADD, task_ili, offset, 0);
2538 TASKLP_LB = ili;
2539 ili = ad4ili(st, TASK_LB, ili, nme, msz);
2540 chk_block(ili);
2541
2542 offset = ad_aconi(TASK_LPVAR_OFFSET + zsize_of(DT_INT8));
2543 ili = ad3ili(IL_AADD, task_ili, offset, 0);
2544 TASKLP_UB = ili;
2545 ili = ad4ili(st, TASK_UB, ili, nme, msz);
2546 chk_block(ili);
2547
2548 offset = ad_aconi(TASK_LPVAR_OFFSET + (zsize_of(DT_INT8) * 2));
2549 ili = ad3ili(IL_AADD, task_ili, offset, 0);
2550 ili = ad4ili(st, TASK_ST, ili, nme, msz);
2551 TASKLP_ST = TASK_ST;
2552
2553 iltb.callfg = 1; /* Call task */
2554 chk_block(ili);
2555 }
2556
2557 resetTaskBih(RESTORE_MPPBIH);
2558 scopeSptr = s_scope;
2559
2560 /* If 'if' clause is used, this is the false branch, if (0) then... */
2561 end_lab = ILM_OPND(ilmp, 1);
2562 if (opc == IM_ETASK) {
2563 if (taskBv & MP_TASK_IF) {
2564 lab = getlab();
2565 RFCNTI(lab);
2566 ili = ad3ili(IL_ICJMPZ, taskIfv, CC_NE, lab);
2567 chk_block(ili);
2568
2569 iltb.callfg = 1; /* Begin */
2570 ili = ll_make_kmpc_task_begin_if0(taskAllocSptr);
2571 chk_block(ili);
2572
2573 iltb.callfg = 1; /* Call task */
2574 ili = ll_make_outlined_task_call(taskFnsptr, taskAllocSptr);
2575 chk_block(ili);
2576
2577 iltb.callfg = 1; /* End */
2578 ili = ll_make_kmpc_task_complete_if0(taskAllocSptr);
2579 chk_block(ili);
2580
2581 /* Create and jump to an end label at the end of the task */
2582 RFCNTI(end_lab);
2583 ili = ad1ili(IL_JMP, end_lab);
2584 chk_block(ili);
2585
2586 exp_label(lab);
2587 }
2588 }
2589 if (opc == IM_ETASK) {
2590 /* Make api call */
2591 ili = ll_make_kmpc_task(taskAllocSptr);
2592 } else {
2593 TASKLP_TASK = ad2ili(IL_LDA, ad_acon(taskAllocSptr, 0),
2594 addnme(NT_VAR, taskAllocSptr, 0, 0));
2595 /* FIXME: if there is no firstprivate and lastprivate
2596 don't pass taskdup - performance issue maybe?
2597 */
2598 if (TASKDUPG(taskFnsptr)) {
2599 ll_process_routine_parameters(TASKDUPG(taskFnsptr));
2600 TASKLP_TASKDUP = ad_acon(TASKDUPG(taskFnsptr), 0);
2601 } else {
2602 TASKLP_TASKDUP = 0;
2603 }
2604 ili = ll_make_kmpc_taskloop(TASKLPARGS);
2605 clearTaskloopInfo();
2606 }
2607 iltb.callfg = 1;
2608 chk_block(ili);
2609 }
2610 wr_block();
2611 cr_block();
2612
2613 /* reset once done processing a task - need this for mk_address to work */
2614 taskAllocSptr = SPTR_NULL;
2615
2616 taskCnt--;
2617 mppgcnt--;
2618 taskbih = 0;
2619
2620 exp_label(ILM_SymOPND(ilmp, 1));
2621 break;
2622
2623 case IM_TASKWAIT:
2624 if (ll_ilm_is_rewriting())
2625 break;
2626 ccff_info(MSGOPENMP, "OMP018", gbl.findex, gbl.lineno, "Taskwait", NULL);
2627 ili = ll_make_kmpc_task_wait();
2628 iltb.callfg = 1;
2629 chk_block(ili);
2630 break;
2631
2632 case IM_TASKYIELD:
2633 if (ll_ilm_is_rewriting())
2634 break;
2635 ccff_info(MSGOPENMP, "OMP019", gbl.findex, gbl.lineno, "Taskyield", NULL);
2636 ili = ll_make_kmpc_task_yield();
2637 iltb.callfg = 1;
2638 chk_block(ili);
2639 break;
2640
2641 case IM_BMPPG:
2642 if (ll_ilm_is_rewriting())
2643 break;
2644
2645 /* create a block for kmpc_task_alloc */
2646
2647 NEED(mppgcnt + 1, mppgbih, int, mppgBihSiz, mppgBihSiz + 16);
2648 mppgbih[mppgcnt] = expb.curbih;
2649 mppgcnt++;
2650
2651 /* for task call */
2652 wr_block();
2653 cr_block();
2654 break;
2655
2656 case IM_EMPPG:
2657 if (ll_ilm_is_rewriting())
2658 break;
2659 mppgcnt--;
2660 break;
2661
2662 case IM_BAMPPG:
2663 if (ll_ilm_is_rewriting())
2664 break;
2665
2666 resetMppBih(SET_MPPBIH, IS_PREVMPPG);
2667
2668 break;
2669
2670 case IM_EAMPPG:
2671 if (ll_ilm_is_rewriting())
2672 break;
2673
2674 resetMppBih(RESTORE_MPPBIH, IS_PREVMPPG);
2675 break;
2676 case IM_BTARGETUPDATE:
2677 case IM_BTARGETDATA:
2678 case IM_TARGETENTERDATA:
2679 case IM_TARGETEXITDATA:
2680 #if defined(OMP_OFFLOAD_LLVM) || defined(OMP_OFFLOAD_PGI)
2681 if(!flg.omptarget)
2682 break;
2683 dotarget = ILI_OF(ILM_OPND(ilmp, 1));
2684 beg_label = getlab();
2685 end_label = getlab();
2686
2687 dotarget = ad3ili(IL_ICJMPZ, dotarget, CC_EQ, end_label);
2688 RFCNTI(end_label);
2689 chk_block(dotarget);
2690
2691 wr_block();
2692 cr_block();
2693 exp_label(beg_label);
2694
2695 if(!IS_OMP_DEVICE_CG)
2696 exp_ompaccel_targetdata(ilmp, curilm, opc);
2697
2698 exp_label(end_label);
2699 #endif
2700 break;
2701 case IM_ETARGETUPDATE:
2702 case IM_ETARGETDATA:
2703 #if defined(OMP_OFFLOAD_LLVM) || defined(OMP_OFFLOAD_PGI)
2704 if(!flg.omptarget || IS_OMP_DEVICE_CG) break;
2705 OMPACCEL_TINFO *targetinfo;
2706 int ili;
2707 wr_block();
2708 cr_block();
2709 if(opc == IM_ETARGETDATA) {
2710 targetinfo = ompaccel_tinfo_current_get_targetdata();
2711 ili = ll_make_tgt_target_data_end(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2712 }
2713 iltb.callfg = 1;
2714 chk_block(ili);
2715 wr_block();
2716 cr_block();
2717 break;
2718 #endif
2719 break;
2720 case IM_BDISTRIBUTE:
2721 if (ll_ilm_is_rewriting())
2722 ccff_info(MSGOPENMP, "OMP024", gbl.findex, gbl.lineno,
2723 "Distribute loop activated", NULL);
2724 break;
2725 case IM_EDISTRIBUTE:
2726 if (ll_ilm_is_rewriting())
2727 ccff_info(MSGOPENMP, "OMP025", gbl.findex, gbl.lineno,
2728 "Distribute loop terminated", NULL);
2729 break;
2730 break;
2731
2732 case IM_MP_ATOMIC:
2733 if (ll_ilm_is_rewriting())
2734 break;
2735 wr_block();
2736 cr_block();
2737 bihb.csfg = BIH_CS(expb.curbih) = true;
2738 break;
2739 case IM_MP_ENDATOMIC:
2740 if (ll_ilm_is_rewriting())
2741 break;
2742 wr_block();
2743 cr_block();
2744 bihb.csfg = BIH_CS(expb.curbih) = false;
2745 break;
2746
2747 case IM_MP_ATOMICREAD:
2748 if (ll_ilm_is_rewriting())
2749 break;
2750 ILM_RESULT(curilm) = exp_mp_atomic_read(ilmp);
2751 break;
2752
2753 case IM_MP_ATOMICWRITE:
2754 if (ll_ilm_is_rewriting())
2755 break;
2756 exp_mp_atomic_write(ilmp);
2757 break;
2758 case IM_MP_ATOMICUPDATE:
2759 if (ll_ilm_is_rewriting())
2760 break;
2761 exp_mp_atomic_update(ilmp);
2762 break;
2763 case IM_MP_ATOMICCAPTURE:
2764 if (ll_ilm_is_rewriting())
2765 break;
2766 exp_mp_atomic_capture(ilmp);
2767 break;
2768 case IM_TASKLASTPRIV:
2769 if (ISTASKDUPG(GBL_CURRFUNC)) {
2770 INT offset;
2771 int offset_sptr, ioffset, acon, load, nme;
2772 SPTR secarg = ll_get_hostprog_arg(GBL_CURRFUNC, 1);
2773 SPTR lastitr = ll_get_hostprog_arg(GBL_CURRFUNC, 3);
2774 offset_sptr = ILM_OPND(ilmp, 1);
2775 offset = get_isz_cval(offset_sptr);
2776 /* load from 3rd argument(int litr) into 1st argument at offset */
2777 acon = ad_acon(lastitr, 0);
2778 load = ad3ili(IL_LD, acon, addnme(NT_VAR, lastitr, 0, 0), MSZ_WORD);
2779
2780 nme = addnme(NT_VAR, secarg, 0, 0);
2781 acon = mk_address(secarg);
2782 ioffset = ad_aconi(offset);
2783 acon = ad3ili(IL_AADD, acon, ioffset, 0);
2784 ili =
2785 ad4ili(IL_ST, load, acon, addnme(NT_IND, lastitr, nme, 0), MSZ_WORD);
2786
2787 chk_block(ili);
2788 }
2789 break;
2790
2791 #ifdef IM_TASKREG
2792 case IM_TASKREG:
2793 break;
2794 #endif
2795 /* unused: to be removed in future release */
2796 #ifdef IM_ETASKREG
2797 case IM_ETASKREG:
2798 break;
2799 #endif
2800 #ifdef IM_ETASKFIRSTPRIV
2801 case IM_ETASKFIRSTPRIV:
2802 break;
2803 #endif
2804 #ifdef OMP_OFFLOAD_LLVM
2805 case IM_MP_REDUCTIONITEM:
2806 if (flg.omptarget && gbl.ompaccel_intarget)
2807 exp_ompaccel_reductionitem(ilmp, curilm);
2808 break;
2809 case IM_MP_BREDUCTION:
2810 case IM_MP_EREDUCTION:
2811 break;
2812 case IM_MP_TARGETLOOPTRIPCOUNT:
2813 if(flg.omptarget)
2814 exp_ompaccel_looptripcount(ilmp, curilm);
2815 break;
2816 case IM_MP_MAP:
2817 if(flg.omptarget && !(IS_OMP_DEVICE_CG || gbl.ompaccel_intarget))
2818 exp_ompaccel_map(ilmp, curilm, outlinedCnt);
2819 break;
2820 case IM_MP_EMAP:
2821 if(flg.omptarget && !(IS_OMP_DEVICE_CG || gbl.ompaccel_intarget)) {
2822 exp_ompaccel_emap(ilmp, curilm);
2823 }
2824 break;
2825 case IM_MP_TARGETMODE:
2826 if(flg.omptarget) {
2827 ompaccel_tinfo_set_mode_next_target((OMP_TARGET_MODE)ILM_OPND(ilmp, 1));
2828 target_ili_num_teams = ILI_OF(ILM_OPND(ilmp, 2));
2829 target_ili_thread_limit = ILI_OF(ILM_OPND(ilmp, 3));
2830 target_ili_num_threads = ILI_OF(ILM_OPND(ilmp, 4));
2831 }
2832 break;
2833 #endif /* end #ifdef OMP_OFFLOAD_LLVM */
2834 default:
2835 interr("exp_smp: unsupported opc", opc, ERR_Severe);
2836 break;
2837 }
2838
2839 #endif /* end #ifdef IM_BPAR */
2840 }
2841
2842 /* opc: IL_DFRDP / IL_DFRSP, depending on result type of call */
2843 /* Return the ili of a call to a function that returns a result, using ili
2844 * callili, followed by freeing of the appropriate argument registers with
2845 * opcode opc. */
2846 static int
makeCallResult(ILI_OP opc,int callili)2847 makeCallResult(ILI_OP opc, int callili)
2848 {
2849 int rg;
2850 int ili;
2851
2852 assert(IL_DFRIR <= opc && opc <= IL_DFRAR, "makeCallResult: invalid opcode",
2853 opc, ERR_Fatal);
2854
2855 switch (opc) {
2856 case IL_DFRIR:
2857 rg = IR_RETVAL;
2858 break;
2859 case IL_DFRSP:
2860 rg = SP_RETVAL;
2861 break;
2862 case IL_DFRDP:
2863 rg = DP_RETVAL;
2864 break;
2865 case IL_DFRAR:
2866 rg = AR_RETVAL;
2867 break;
2868 default:
2869 interr("makeCallResult: invalid register free opcode", opc, ERR_Fatal);
2870 }
2871 ili = ad2ili(opc, callili, rg);
2872
2873 return ili;
2874 }
2875
2876 SPTR
lcpu_temp(SC_KIND sc)2877 lcpu_temp(SC_KIND sc)
2878 {
2879 SPTR sym;
2880 char name[10];
2881 static int lcpu_cnt = 0; /* counter for lcpu temporaries */
2882
2883 strcpy(name, ".lcp");
2884 sprintf(&name[4], "%05d", lcpu_cnt);
2885 lcpu_cnt++;
2886 sym = getcctemp_sc(name, ST_VAR,
2887 sc); /* lcpu variable, 1 per critical section */
2888 DTYPEP(sym, DT_INT);
2889 return sym;
2890 }
2891
2892 SPTR
ncpus_temp(SC_KIND sc)2893 ncpus_temp(SC_KIND sc)
2894 {
2895 SPTR sym;
2896 char name[10];
2897 static int ncpus_cnt = 0; /* counter for ncpus temporaries */
2898
2899 strcpy(name, ".ncp");
2900 sprintf(&name[4], "%05d", ncpus_cnt);
2901 ncpus_cnt++;
2902 sym = getcctemp_sc(name, ST_VAR,
2903 sc); /* ncpus variable, 1 per critical section */
2904 DTYPEP(sym, DT_INT);
2905 return sym;
2906 }
2907
2908 static int
addMpBcsNest(void)2909 addMpBcsNest(void)
2910 {
2911 int ili;
2912 ili = makeCall("_mp_bcs_nest_red", IL_JSR, 0);
2913 return ili;
2914 }
2915
2916 static int
addMpEcsNest(void)2917 addMpEcsNest(void)
2918 {
2919 int ili;
2920 ili = makeCall("_mp_ecs_nest_red", IL_JSR, 0);
2921 return ili;
2922 }
2923
2924 int
add_mp_p(SPTR semaphore)2925 add_mp_p(SPTR semaphore)
2926 {
2927 int ili;
2928 ili = ll_make_kmpc_critical(semaphore);
2929 return ili;
2930 }
2931
2932 int
add_mp_v(SPTR semaphore)2933 add_mp_v(SPTR semaphore)
2934 {
2935 int ili;
2936 ili = ll_make_kmpc_end_critical(semaphore);
2937 return ili;
2938 }
2939
2940 int
add_mp_penter(int ispar)2941 add_mp_penter(int ispar)
2942 {
2943 int size_symptr;
2944 int sizeili, argili, ili;
2945 int funcsptr;
2946 return ili;
2947 }
2948
2949 int
add_mp_pexit(void)2950 add_mp_pexit(void)
2951 {
2952 return 0;
2953 }
2954
2955 int
add_mp_ncpus(void)2956 add_mp_ncpus(void)
2957 {
2958 return 0;
2959 }
2960
2961 int
add_mp_ncpus3(void)2962 add_mp_ncpus3(void)
2963 {
2964 return 0;
2965 }
2966
2967 int
add_mp_lcpu(void)2968 add_mp_lcpu(void)
2969 {
2970 return 0;
2971 }
2972
2973 int
add_mp_barrier2(void)2974 add_mp_barrier2(void)
2975 {
2976 return 0;
2977 }
2978
2979 /* for compiler generated routines that have referenced the threadprivate
2980 variables, but do not need the kmpc_threadprivate_cached set up
2981 */
2982 void
clear_tplnk(void)2983 clear_tplnk(void)
2984 {
2985 int sym;
2986 for (sym = gbl.threadprivate; sym > NOSYM; sym = TPLNKG(sym)) {
2987 TPLNKP(sym, 0);
2988 THPRVTOPTP(sym, 0); /* so much trouble clear this too */
2989 }
2990 gbl.threadprivate = NOSYM;
2991 }
2992
2993 /** \brief Generate any mp-specific prologue for a function.
2994 */
2995 void
exp_mp_func_prologue(bool process_tp)2996 exp_mp_func_prologue(bool process_tp)
2997 {
2998 SPTR sym;
2999 int ili, tmpthread;
3000 int func;
3001 int next_tp;
3002 int cond_ili = 0;
3003 int class_register = 0;
3004 int bih = 0;
3005
3006 #ifdef CUDAG
3007 if (CUDAG(GBL_CURRFUNC) == CUDA_GLOBAL || CUDAG(GBL_CURRFUNC) == CUDA_DEVICE)
3008 return;
3009 #endif
3010 if (process_tp) {
3011 for (sym = gbl.threadprivate; sym > NOSYM; sym = TPLNKG(sym)) {
3012 /* For each threadprivate common, must 'declare' the threads'
3013 * copies by calling:
3014 * _kmpc_threadprivate_cached(&cmn_block, &cmn_vector, size(cmn_block))
3015 */
3016 int call;
3017
3018 tmpthread = allocThreadprivate(sym, &cond_ili);
3019 if (gbl.outlined)
3020 func = gbl.currsub;
3021 else
3022 func =
3023 gbl.entries; /* this does not really work for entry because for llvm
3024 * entry are done very late and in separate function.
3025 */
3026 for (func = gbl.currsub; func != NOSYM; func = SYMLKG(func)) {
3027 if (EXPDBG(8, 256))
3028 fprintf(gbl.dbgfil, "---_kmpc_threadprivate_cached: in %s ---\n",
3029 SYMNAME(func));
3030
3031 bih = expb.curbih = findEnlabBih(func);
3032 rdilts(expb.curbih); /* get block after entry */
3033 expb.curilt = 0;
3034 iltb.callfg = 1;
3035 chk_block(tmpthread);
3036 wrilts(expb.curbih);
3037 }
3038 THPRVTOPTP(sym, 0);
3039 }
3040 }
3041
3042 ll_save_gtid_val(bih);
3043 }
3044
3045 static void
no_pad_func(char * fname)3046 no_pad_func(char *fname)
3047 {
3048 int sptr;
3049
3050 sptr = mkfunc(fname);
3051 NOPADP(sptr, 1);
3052 }
3053
3054 static int
decrOutlinedCnt(void)3055 decrOutlinedCnt(void)
3056 {
3057 outlinedCnt--;
3058 if (outlinedCnt == 0) {
3059 ll_write_ilm_end();
3060 unsetRewritingILM();
3061 }
3062 return outlinedCnt;
3063 }
3064
3065 static int
incrOutlinedCnt(void)3066 incrOutlinedCnt(void)
3067 {
3068 parCnt++;
3069 if (parCnt > maxOutlinedCnt)
3070 maxOutlinedCnt = parCnt;
3071 outlinedCnt++;
3072 return outlinedCnt;
3073 }
3074
3075 static int
getOutlinedTemp(char * pfx,int dtype)3076 getOutlinedTemp(char *pfx, int dtype)
3077 {
3078 char name[32];
3079 int sym;
3080
3081 sprintf(name, "%s%05d", pfx, maxOutlinedCnt + outlinedCnt);
3082 sym = getcctemp_sc(name, ST_VAR, expb.sc);
3083 DTYPEP(sym, DT_INT);
3084 return sym;
3085 }
3086
3087 static int
isUnnamedCs(int sem)3088 isUnnamedCs(int sem)
3089 {
3090
3091 if (strcmp(SYMNAME(MIDNUMG(sem)), "__cs_unspc") == 0)
3092 return 1;
3093 return 0;
3094 }
3095
3096 static int
addMpUnp(void)3097 addMpUnp(void)
3098 {
3099 int ili;
3100 ili = ll_make_kmpc_critical(SPTR_NULL);
3101 return ili;
3102 }
3103
3104 static int
addMpUnv(void)3105 addMpUnv(void)
3106 {
3107 int ili;
3108 ili = ll_make_kmpc_end_critical(SPTR_NULL);
3109 return ili;
3110 }
3111
3112 int
_make_mp_get_threadprivate(int data_ili,int size_ili,int cache_ili)3113 _make_mp_get_threadprivate(int data_ili, int size_ili, int cache_ili)
3114 {
3115 int argili, ili, con;
3116 int null_arg;
3117 INT tmp[2];
3118 tmp[0] = 0;
3119 tmp[1] = 0;
3120 con = getcon(tmp, DT_INT);
3121 null_arg = ad1ili(IL_ACON, con);
3122
3123 argili = jsrAddArg(0, IL_ARGAR, cache_ili);
3124 mk_prototype("_mp_get_threadprivate", NULL, DT_CPTR, 5, DT_CPTR, DT_INT,
3125 DT_CPTR, DT_INT8, DT_CPTR);
3126 size_ili = sel_iconv(size_ili, 1);
3127 argili = jsrAddArg(argili, IL_ARGKR, size_ili);
3128 argili = jsrAddArg(argili, IL_ARGAR, data_ili);
3129 argili = jsrAddArg(argili, IL_ARGIR, ll_get_gtid_val_ili());
3130 argili = jsrAddArg(argili, IL_ARGAR, null_arg);
3131 ili = makeCall("_mp_get_threadprivate", IL_QJSR, argili);
3132 ili = genretvalue(ili, IL_DFRAR);
3133 return ili;
3134 }
3135
3136 /** \brief C and Fortran threadprivate : for simple POD */
3137 static int
allocThreadprivate(SPTR sym,int * tmpthr)3138 allocThreadprivate(SPTR sym, int *tmpthr)
3139 {
3140 SPTR cm;
3141 int size;
3142 int adr_vector;
3143 int adr_cm;
3144 int call;
3145
3146 cm = MIDNUMG(sym); /* corresponding common block or threadprivate var */
3147 if (STYPEG(cm) == ST_CMBLK) {
3148 adr_cm = ad_acon(CMEMFG(cm), 0); /* &cmn_block */
3149 size = ad_icon((INT)SIZEG(cm));
3150 }
3151 else if (SCG(cm) == SC_BASED && POINTERG(cm)) {
3152 /*
3153 * Cannot rely on the SYMLK chain appearing as
3154 * $p -> $o -> $sd
3155 * Apparently, these links only occur for the
3156 * pointer's internal variables if the pointer
3157 * does not have the SAVE attribute. Without
3158 * these fields, the correct size of the threads'
3159 * copies cannot be computed.
3160 * Just explicitly look for the internal pointer
3161 * and descriptor. If the descriptor is present,
3162 * can assume that there is an offest variable which
3163 * only needs to be accounted for in the size
3164 * computation of the threads' copies.
3165 * Setup up the MIDNUM fields as follows where
3166 * foo is the symtab entry which has the POINTER
3167 * flag set:
3168 * foo -> foo$p
3169 * TPpfoo -> foo
3170 * foo$p -> TPpfoo
3171 * foo$sd -> TPpfoo
3172 * Note that foo's SDSC -> foo$sd.
3173 * Before we had:
3174 * foo -> TPpfoo
3175 * TPpfoo -> foo$p
3176 * which is a problem for computing the size
3177 * when starting with TPpfoo.
3178 */
3179 SPTR tptr;
3180 int sdsptr;
3181 tptr = MIDNUMG(cm);
3182 adr_cm = ad_acon(tptr, 0); /* &tp_var */
3183 size = size_of(DTYPEG(tptr));
3184 sdsptr = SDSCG(cm); /* $sd */
3185 if (sdsptr) {
3186 size += size_of(DT_ADDR); /* $o */
3187 size += size_of(DTYPEG(sdsptr)); /* $sd */
3188 }
3189 size = ad_icon(size);
3190 }
3191 else if (DTY(DTYPEG(cm)) == TY_PTR) {
3192 /*
3193 * Given the above code for POINTER, this code is
3194 * probably dead, but leave it just in case.
3195 */
3196 adr_cm = ad_acon(cm, 0); /* &tp_var */
3197 size = size_of(DTYPEG(cm));
3198 if (SYMLKG(cm) != NOSYM) {
3199 size += size_of(DTYPEG(SYMLKG(cm))); /* $o */
3200 if (SYMLKG(SYMLKG(cm)) != NOSYM) {
3201 size += size_of(DTYPEG(SYMLKG(SYMLKG(cm)))); /* $sd */
3202 }
3203 }
3204 size = ad_icon(size);
3205 } else if (SCG(sym) == SC_BASED) {
3206 adr_cm = ad_acon(cm, 0); /* &tp_var */
3207 size = ad_icon(size_of(DTYPEG(cm)));
3208 } else {
3209 adr_cm = ad_acon(cm, 0); /* &tp_var */
3210 size = ad_icon(size_of(DTYPEG(cm)));
3211 }
3212 adr_vector = ad_acon(sym, 0); /* &cmn_vector/tp_vector */
3213
3214 if (tmpthr) {
3215 int tili;
3216 SPTR tsym;
3217 ll_set_new_threadprivate(sym);
3218 tsym = THPRVTOPTG(sym);
3219 if (XBIT(69, 0x80)) { /* experiment flag */
3220 tili = _make_mp_get_threadprivate(adr_cm, size, adr_vector);
3221 *tmpthr = ad3ili(IL_STA, tili, adr_vector, addnme(NT_VAR, sym, 0, 0));
3222 } else {
3223 tili = ll_make_kmpc_threadprivate_cached(adr_cm, size, adr_vector);
3224 *tmpthr =
3225 ad3ili(IL_STA, tili, ad_acon(tsym, 0), addnme(NT_VAR, tsym, 0, 0));
3226 }
3227 }
3228 return *tmpthr;
3229 }
3230
3231 int
get_threadprivate_origsize(int sym)3232 get_threadprivate_origsize(int sym)
3233 {
3234 int cm;
3235 int size;
3236
3237 if (SCG(sym) == SC_CMBLK)
3238 sym = MIDNUMG(sym); /* get the original common block */
3239
3240 sym = MIDNUMG(sym);
3241 cm = MIDNUMG(sym);
3242 if (STYPEG(cm) == ST_CMBLK) {
3243 size = ad_icon((INT)SIZEG(cm));
3244 }
3245 else if (SCG(cm) == SC_BASED && POINTERG(cm)) {
3246 int tptr;
3247 int sdsptr;
3248 tptr = MIDNUMG(cm);
3249 size = size_of(DTYPEG(tptr));
3250 sdsptr = SDSCG(cm); /* $sd */
3251 if (sdsptr) {
3252 size += size_of(DT_ADDR); /* $o */
3253 size += size_of(DTYPEG(sdsptr)); /* $sd */
3254 }
3255 size = ad_icon(size);
3256 }
3257 else if (DTY(DTYPEG(cm)) == TY_PTR) {
3258 size = size_of(DTYPEG(cm));
3259 if (SYMLKG(cm) != NOSYM) {
3260 size += size_of(DTYPEG(SYMLKG(cm))); /* $o */
3261 if (SYMLKG(SYMLKG(cm)) != NOSYM) {
3262 size += size_of(DTYPEG(SYMLKG(SYMLKG(cm)))); /* $sd */
3263 }
3264 }
3265 size = ad_icon(size);
3266 } else if (SCG(sym) == SC_BASED) {
3267 size = ad_icon(size_of(DTYPEG(cm)));
3268 } else {
3269 size = ad_icon(size_of(DTYPEG(cm)));
3270 }
3271
3272 return size;
3273 }
3274
3275 static int
getNumSect(int * tab)3276 getNumSect(int *tab)
3277 {
3278 int i;
3279 if (!tab)
3280 return 0;
3281 for (i = 0; tab[i] != -1; i++) {
3282 }
3283 return i;
3284 }
3285
3286 SPTR
llTaskAllocSptr(void)3287 llTaskAllocSptr(void)
3288 {
3289 return taskAllocSptr;
3290 }
3291
3292 LLTask *
llGetTask(int scope)3293 llGetTask(int scope)
3294 {
3295 int sptr = scope;
3296 if (!scope)
3297 sptr = scopeSptr;
3298 assert(sptr, "No scope for task found ", sptr, ERR_Fatal);
3299 LLTask *task = llmp_get_task(sptr);
3300 if (!task)
3301 task = llmp_create_task(sptr);
3302 assert(task, "No task associated to this scope sptr", sptr, ERR_Fatal);
3303 return task;
3304 }
3305