1 /*
2 * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /**
19 * \file
20 * \brief ompaccel.c - OpenMP GPU Offload for NVVM Targets. It uses
21 * libomptarget
22 */
23 #ifdef OMP_OFFLOAD_LLVM
24
25 #include "kmpcutil.h"
26 #include "error.h"
27 #include "semant.h"
28 #include "ilmtp.h"
29 #include "ilm.h"
30 #include "ili.h"
31 #include "expand.h"
32 #include "exputil.h"
33 #include "outliner.h"
34 #include "machreg.h"
35 #include "mp.h"
36 #include "ll_structure.h"
37 #include "llmputil.h"
38 #include "ccffinfo.h"
39 #include "llutil.h"
40 #include "ompaccel.h"
41 #include "tgtutil.h"
42 #include "dinit.h"
43 #include "assem.h"
44 #include "dinitutl.h"
45 #include "cgllvm.h"
46 #include "cgmain.h"
47
48 #include "regutil.h"
49 #include "dtypeutl.h"
50 #include "llassem.h"
51 #include "ll_ftn.h"
52 #include "symfun.h"
53 #include "../../flang1/flang1exe/global.h"
54
55 #define NOT_IMPLEMENTED(_pragma) \
56 error((error_code_t)1200, ERR_Fatal, 0, _pragma, NULL)
57 #define NOT_IMPLEMENTED_CANTCOMBINED(_pragma, _pragma2) \
58 error((error_code_t)1201, ERR_Fatal, 0, _pragma, _pragma2)
59 #define NOT_IMPLEMENTED_NEEDCOMBINED(_pragma, _pragma2) \
60 error((error_code_t)1202, ERR_Fatal, 0, _pragma, _pragma2)
61
62 /* Initial Max target region */
63 #define INC_EXP 2
64 int tinfo_size = 50;
65 int tinfo_size_reductions = 10;
66
67 int num_tinfos = 0;
68 OMPACCEL_TINFO **tinfos;
69 OMPACCEL_TINFO *current_tinfo = nullptr;
70 OMP_TARGET_MODE NextTargetMode = mode_none_target;
71
72 const char *nvvm_target_triple;
73 void
ompaccel_set_targetriple(const char * tp)74 ompaccel_set_targetriple(const char *tp)
75 {
76 nvvm_target_triple = tp;
77 }
78 const char *
ompaccel_get_targetriple()79 ompaccel_get_targetriple()
80 {
81 return nvvm_target_triple;
82 }
83 static int reductionFunctionCounter = 0;
84
85 static void
_long_unsigned(int lilix,int * dt,bool * punsigned,DTYPE dtype)86 _long_unsigned(int lilix, int *dt, bool *punsigned, DTYPE dtype)
87 {
88 ILI_OP opc;
89 opc = ILI_OPC(lilix);
90 int dty = DTY(dtype);
91 if (dtype == DT_INT || dtype == DT_UINT) {
92 if (size_of(dtype) > 4)
93 *dt = 2;
94 else
95 *dt = 1;
96 } else if (dtype == DT_FLOAT || dtype == DT_UINT8) {
97 *dt = 3;
98 } else if (dty == TY_INT8) {
99 *dt = 2;
100 } else if (dty == TY_DBLE) {
101 *dt = 4;
102 }
103
104 // todo ompaccel I don't know how to handle others
105
106 switch (DTY(dtype)) {
107 case TY_UINT:
108 case TY_USINT:
109 case TY_UINT8:
110 case TY_UINT128:
111 if (opc != IL_ICON)
112 *punsigned = TRUE;
113 break;
114 default:
115 break;
116 }
117
118 } /* _long_unsigned */
119
120 static bool
_pointer_type(DTYPE dtype)121 _pointer_type(DTYPE dtype)
122 {
123 if (dtype && DTY(dtype) == TY_PTR)
124 return TRUE;
125 return FALSE;
126 } /* _pointer_type */
127
128 int
mk_ompaccel_ldsptr(SPTR sptr)129 mk_ompaccel_ldsptr(SPTR sptr)
130 {
131 ISZ_T sz;
132 DTYPE dtype;
133 // it's function
134 if (DTYPEG(sptr) == DT_NONE && STYPEG(sptr) == ST_ENTRY) {
135 int nme = addnme(NT_VAR, sptr, 0, 0);
136 int ili = mk_address(sptr);
137 return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
138 } else {
139 sz = size_of(DTYPEG(sptr));
140 dtype = DTYPEG(sptr);
141
142 if (STYPEG(sptr) == ST_CONST) {
143 if (sz == 8)
144 return ad_kcon(CONVAL1G(sptr), CONVAL2G(sptr));
145 else
146 return ad_icon(CONVAL2G(sptr));
147 } else {
148 int nme = addnme(NT_VAR, sptr, 0, 0);
149 int ili = mk_address(sptr);
150 if (ILI_OPC(ili) == IL_LDA)
151 nme = ILI_OPND(ili, 2);
152 if (_pointer_type(dtype) || DTY(dtype) == TY_ARRAY) {
153 return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
154 } else {
155 if (sz == 8)
156 return ad3ili(IL_LDKR, ili, nme, MSZ_I8);
157 else
158 return ad3ili(IL_LD, ili, nme, mem_size(DTY(DTYPEG(sptr))));
159 }
160 }
161 }
162 }
163
164 int
mk_ompaccel_load(int ili,DTYPE dtype,int nme)165 mk_ompaccel_load(int ili, DTYPE dtype, int nme)
166 {
167 if (_pointer_type(dtype))
168 return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
169 else {
170 switch (dtype) {
171 case DT_INT:
172 if (size_of(dtype) > 4)
173 return ad3ili(IL_LDKR, ili, nme, MSZ_WORD);
174 else
175 return ad3ili(IL_LD, ili, nme, MSZ_WORD);
176 case DT_REAL:
177 if (size_of(dtype) > 4)
178 return ad3ili(IL_LDKR, ili, nme, MSZ_F8);
179 else
180 return ad3ili(IL_LDSP, ili, nme, MSZ_F8);
181 break;
182 case DT_DBLE:
183 return ad3ili(IL_LDDP, ili, nme, MSZ_DBLE);
184 break;
185 case DT_CMPLX:
186 return ad3ili(IL_LDDCMPLX, ili, nme, MSZ_F16);
187 break;
188 case DT_NONE:
189 return ad3ili(IL_LD, ili, nme, MSZ_WORD);
190 break;
191 default:
192 return 0;
193 break;
194 }
195 }
196 }
197
198 static int
mk_ompaccel_ld(int ili,int nme)199 mk_ompaccel_ld(int ili, int nme)
200 {
201 return mk_ompaccel_load(ili, DT_NONE, nme);
202 }
203
204 int
mk_ompaccel_store(int ili_value,DTYPE dtype,int nme,int ili_address)205 mk_ompaccel_store(int ili_value, DTYPE dtype, int nme, int ili_address)
206 {
207 if (_pointer_type(dtype))
208 return ad4ili(IL_STA, ili_value, ili_address, nme, MSZ_PTR);
209 else {
210 switch (dtype) {
211 case DT_LOG:
212 return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
213 break;
214 case DT_INT:
215 return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
216 break;
217 case DT_REAL:
218 return ad4ili(IL_STSP, ili_value, ili_address, nme, MSZ_F4);
219 break;
220 case DT_DBLE:
221 return ad4ili(IL_STDP, ili_value, ili_address, nme, MSZ_DBLE);
222 break;
223 case DT_INT8:
224 return ad4ili(IL_STKR, ili_value, ili_address, nme, MSZ_I8);
225 break;
226 case DT_NONE:
227 return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
228 break;
229 default:
230 return 0;
231 break;
232 }
233 }
234 }
235
236 static int
mk_ompaccel_stsptr(int ili_value,SPTR sptr)237 mk_ompaccel_stsptr(int ili_value, SPTR sptr)
238 {
239 ISZ_T sz = size_of(DTYPEG(sptr));
240 DTYPE dtype = DTYPEG(sptr);
241 int ili;
242 int nme = addnme(NT_VAR, sptr, 0, 0);
243 if (STYPEG(sptr) == ST_CONST) {
244 if (sz == 8)
245 ili = ad_kcon(CONVAL1G(sptr), CONVAL2G(sptr));
246 else
247 ili = ad_icon(CONVAL2G(sptr));
248 } else {
249 ili = mk_address(sptr);
250 if (ILI_OPC(ili) == IL_LDA)
251 nme = ILI_OPND(ili, 2);
252 }
253 return mk_ompaccel_store(ili_value, dtype, nme, ili);
254 }
255
256 static int
mk_ompaccel_and(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)257 mk_ompaccel_and(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
258 {
259 ILI_OP opc;
260 int dt = 0;
261 bool uu = FALSE;
262 if (!ili1)
263 return ili2;
264 if (!ili2)
265 return ili1;
266 if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
267 return ad3ili(IL_AADD, ili1, ili2, 0);
268 } else {
269 _long_unsigned(ili1, &dt, &uu, dtype1);
270 _long_unsigned(ili2, &dt, &uu, dtype2);
271 /* signed */
272 if (!uu) {
273 opc = IL_AND;
274 } else {
275 opc = IL_KAND;
276 }
277 }
278 return ad2ili(opc, ili1, ili2);
279 }
280
281 static int
mk_ompaccel_iand(int ili1,int ili2)282 mk_ompaccel_iand(int ili1, int ili2)
283 {
284 return mk_ompaccel_and(ili1, DT_INT, ili2, DT_INT);
285 }
286
287 static int
mk_ompaccel_shift(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)288 mk_ompaccel_shift(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
289 {
290 ILI_OP opc = IL_NONE;
291 int dt = 0;
292 bool uu = FALSE;
293 if (!ili1)
294 return ili2;
295 if (!ili2)
296 return ili1;
297
298 _long_unsigned(ili1, &dt, &uu, dtype1);
299 _long_unsigned(ili2, &dt, &uu, dtype2);
300 /* signed */
301 if (!uu) {
302 if (dt == 1)
303 opc = IL_RSHIFT;
304 else if (dt == 2)
305 opc = IL_KARSHIFT;
306 } else {
307 if (dt == 1)
308 opc = IL_URSHIFT;
309 else if (dt == 2)
310 opc = IL_KURSHIFT;
311 }
312 assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
313 return ad2ili(opc, ili1, ili2);
314 }
315
316 int
mk_ompaccel_compare(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2,int CC)317 mk_ompaccel_compare(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2, int CC)
318 {
319 ILI_OP opc = IL_NONE;
320 int dt = 0;
321 bool uu = FALSE;
322 if (!ili1)
323 return ili2;
324 if (!ili2)
325 return ili1;
326
327 _long_unsigned(ili1, &dt, &uu, dtype1);
328 _long_unsigned(ili2, &dt, &uu, dtype2);
329 /* signed */
330 if (!uu) {
331 if (dt == 1)
332 opc = IL_ICMP;
333 else if (dt == 2)
334 opc = IL_KCMP;
335 } else {
336 if (dt == 1)
337 opc = IL_UICMP;
338 else if (dt == 2)
339 opc = IL_UKCMP;
340 }
341 assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
342 return ad3ili(opc, ili1, ili2, CC);
343 }
344
345 int
mk_ompaccel_add(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)346 mk_ompaccel_add(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
347 {
348 ILI_OP opc = IL_NONE;
349 int dt = 0;
350 bool uu = FALSE;
351 if (!ili1)
352 return ili2;
353 if (!ili2)
354 return ili1;
355 if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
356 return ad3ili(IL_AADD, ili1, ili2, 0);
357 } else {
358 _long_unsigned(ili1, &dt, &uu, dtype1);
359 _long_unsigned(ili2, &dt, &uu, dtype2);
360 /* signed */
361 if (!uu) {
362 if (dt == 1)
363 opc = IL_IADD;
364 else if (dt == 2)
365 opc = IL_KADD;
366 else if (dt == 3)
367 opc = IL_FADD;
368 else if (dt == 4)
369 opc = IL_DADD;
370 else if (dt == 5)
371 opc = IL_SCMPLXADD;
372 else if (dt == 6)
373 opc = IL_DCMPLXADD;
374 } else {
375 if (dt == 1)
376 opc = IL_UIADD;
377 else if (dt == 2)
378 opc = IL_UKADD;
379 }
380 }
381 assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
382 return ad2ili(opc, ili1, ili2);
383 } /* mk_ompaccel_add */
384
385 int
mk_ompaccel_mul(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)386 mk_ompaccel_mul(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
387 {
388 ILI_OP opc = IL_NONE;
389 int dt = 0;
390 bool uu = FALSE;
391 if (!ili1)
392 return ili2;
393 if (!ili2)
394 return ili1;
395 if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
396 // todo ompaccel not sure what to do here.
397 return ad3ili(IL_KMUL, ili1, ili2, 0);
398 } else {
399 _long_unsigned(ili1, &dt, &uu, dtype1);
400 _long_unsigned(ili2, &dt, &uu, dtype2);
401 /* signed */
402 if (!uu) {
403 if (dt == 1)
404 opc = IL_IMUL;
405 else if (dt == 2)
406 opc = IL_KMUL;
407 else if (dt == 3)
408 opc = IL_FMUL;
409 else if (dt == 4)
410 opc = IL_DMUL;
411 else if (dt == 5)
412 opc = IL_SCMPLXMUL;
413 else if (dt == 6)
414 opc = IL_DCMPLXMUL;
415 } else {
416 if (dt == 1)
417 opc = IL_UIMUL;
418 else if (dt == 2)
419 opc = IL_UKMUL;
420 }
421 }
422 assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
423 return ad2ili(opc, ili1, ili2);
424 } /* mk_ompaccel_mul */
425
426 static SPTR
mk_ompaccel_getnewccsym(int letter,int n,DTYPE dtype,SC_KIND SCkind,SYMTYPE symtype)427 mk_ompaccel_getnewccsym(int letter, int n, DTYPE dtype, SC_KIND SCkind,
428 SYMTYPE symtype)
429 {
430 SPTR sptr = getnewccsym(letter, n, symtype);
431 DTYPEP(sptr, dtype);
432 SCP(sptr, SCkind);
433 OMPACCDEVSYMP(sptr, 1);
434 return sptr;
435 }
436
437 SPTR
mk_ompaccel_addsymbol(const char * name,DTYPE dtype,SC_KIND SCkind,SYMTYPE symtype)438 mk_ompaccel_addsymbol(const char *name, DTYPE dtype, SC_KIND SCkind,
439 SYMTYPE symtype)
440 {
441 SPTR sptr = getsymbol(name);
442 DTYPEP(sptr, dtype);
443 STYPEP(sptr, symtype);
444 SCP(sptr, SCkind);
445 OMPACCDEVSYMP(sptr, 1);
446 return sptr;
447 }
448
449 static void
mk_ompaccel_function_end(SPTR func_sptr)450 mk_ompaccel_function_end(SPTR func_sptr)
451 {
452 int bihx, endlab;
453 bihx = expb.curbih;
454 bihx = addbih(bihx);
455 rdilts(bihx);
456 addilt(0, ad1ili(IL_EXIT, func_sptr));
457 wrilts(bihx);
458 BIH_XT(bihx) = 1;
459 BIH_LAST(bihx) = 1;
460 endlab = getlab();
461 STYPEP(endlab, ST_LABEL);
462 RFCNTP(endlab, 1);
463 CCSYMP(endlab, 1);
464 ILIBLKP(endlab, bihx);
465 BIH_LABEL(bihx) = SPTR(endlab);
466 }
467
468 static SPTR
mk_ompaccel_function(char * name,int n_params,const SPTR * param_sptrs,bool isDeviceFunc)469 mk_ompaccel_function(char *name, int n_params, const SPTR *param_sptrs,
470 bool isDeviceFunc)
471 {
472 /* Create a function symbol along with parameters */
473 int dpdscp, bihx;
474 SPTR func_sptr, sym;
475 func_sptr = getsymbol(name);
476 TASKFNP(func_sptr, FALSE);
477 ISTASKDUPP(func_sptr, FALSE);
478 FUNCLINEP(func_sptr, gbl.lineno);
479 STYPEP(func_sptr, ST_ENTRY);
480 CFUNCP(func_sptr, 1);
481 DEFDP(func_sptr, 1);
482 SCP(func_sptr, SC_EXTERN);
483 ADDRTKNP(func_sptr, 1);
484 DCLDP(func_sptr, 1);
485 DTYPEP(func_sptr, DT_NONE);
486
487 if (isDeviceFunc)
488 OMPACCFUNCDEVP(func_sptr, 1);
489 PARAMCTP(func_sptr, n_params);
490 dpdscp = aux.dpdsc_avl;
491 DPDSCP(func_sptr, dpdscp);
492 aux.dpdsc_avl += n_params;
493 NEED(aux.dpdsc_avl, aux.dpdsc_base, int, aux.dpdsc_size,
494 aux.dpdsc_size + n_params + 100);
495
496 for (int i = 0; i < n_params; ++i) {
497 sym = param_sptrs[i];
498 aux.dpdsc_base[dpdscp++] = sym;
499 }
500
501 /* Initialize with an Entry Block */
502 GBL_CURRFUNC = func_sptr;
503 gbl.entries = GBL_CURRFUNC;
504
505 ds_init();
506
507 gbl.lineno = 0;
508 gbl.findex = 0;
509 bihx = addbih(0);
510 gbl.entbih = bihx;
511 BIH_LABEL(bihx) = GBL_CURRFUNC;
512 rdilts(bihx);
513 addilt(0, ad1ili(IL_ENTRY, GBL_CURRFUNC));
514 wrilts(bihx);
515 BIH_FT(bihx) = 1;
516 BIH_EN(bihx) = 1;
517 BIHNUMP(GBL_CURRFUNC, bihx);
518 BIH_LABEL(bihx) = GBL_CURRFUNC;
519
520 expb.curbih = bihx;
521
522 return func_sptr;
523 }
524
525 static int
mk_reduction_op(int redop,int lili,DTYPE dtype1,int rili,DTYPE dtype2)526 mk_reduction_op(int redop, int lili, DTYPE dtype1, int rili, DTYPE dtype2)
527 {
528 switch (redop) {
529 case 1:
530 case 2:
531 return mk_ompaccel_add(lili, dtype1, rili, dtype2);
532 case 3:
533 return mk_ompaccel_mul(lili, dtype1, rili, dtype2);
534 default:
535 static_assert(true, "Rest of reduction operators are not implemented yet.");
536 break;
537 }
538 return 0;
539 }
540
541 DTYPE
mk_ompaccel_array_dtype(DTYPE atype,int size)542 mk_ompaccel_array_dtype(DTYPE atype, int size)
543 {
544 DTYPE dtype;
545 {
546 ADSC *adsc;
547 INT con[2] = {0, size};
548
549 dtype = get_array_dtype(1, atype);
550 adsc = AD_DPTR(dtype);
551 AD_LWBD(adsc, 0) = stb.i1;
552 AD_UPBD(adsc, 0) = getcon(con, DT_INT);
553 AD_NUMELM(adsc) = AD_UPBD(adsc, 0);
554 }
555
556 return dtype;
557 } /* make_array_dtype */
558
559 static void
open_OMP_OFFLOAD_LLVM_file()560 open_OMP_OFFLOAD_LLVM_file()
561 {
562 FILE *F;
563 F = fopen(gbl.ompaccfilename, "w");
564 if (F == nullptr) {
565 #if DEBUG
566 fprintf(stderr, "Trying to open temp file %s\n", gbl.ompaccfilename);
567 #endif
568 }
569 gbl.ompaccfile = F;
570 }
571
572 INLINE static SPTR
create_nvvm_sym(const char * name,DTYPE dtype)573 create_nvvm_sym(const char *name, DTYPE dtype)
574 {
575 SPTR sptr = getsymbol(name);
576 DEFDP(sptr, 1);
577 DTYPEP(sptr, dtype);
578 CFUNCP(sptr, 1);
579 STYPEP(sptr, ST_ENTRY);
580 SCP(sptr, SC_STATIC);
581 ADDRTKNP(sptr, 1);
582 PARAMCTP(sptr, 0);
583 return sptr;
584 }
585
586 INLINE static SPTR
create_sregs(const char * name)587 create_sregs(const char *name)
588 {
589 return create_nvvm_sym(name, DT_INT);
590 }
591
592 void
ompaccel_init()593 ompaccel_init()
594 {
595 /* Create file to write device code */
596 open_OMP_OFFLOAD_LLVM_file();
597 /* Create target pool */
598 tinfos = (OMPACCEL_TINFO **)sccrelal(
599 (char *)tinfos, ((BIGUINT64)((tinfo_size) * sizeof(OMPACCEL_TINFO *))));
600 }
601
602 void
ompaccel_initsyms()603 ompaccel_initsyms()
604 {
605 /* Create thread id sreg symbols */
606 init_nvvm_syms = create_sregs(NVVM_SREG[threadIdX]);
607 create_sregs(NVVM_SREG[threadIdY]);
608 create_sregs(NVVM_SREG[threadIdZ]);
609 /* Create block id sreg symbols */
610 create_sregs(NVVM_SREG[blockIdX]);
611 create_sregs(NVVM_SREG[blockIdY]);
612 create_sregs(NVVM_SREG[blockIdZ]);
613 /* Create block id sreg symbols */
614 create_sregs(NVVM_SREG[blockDimX]);
615 create_sregs(NVVM_SREG[blockDimY]);
616 create_sregs(NVVM_SREG[blockDimZ]);
617 /* Create block id sreg symbols */
618 create_sregs(NVVM_SREG[gridDimX]);
619 create_sregs(NVVM_SREG[gridDimY]);
620 create_sregs(NVVM_SREG[gridDimZ]);
621 // todo create others nvvm things too
622 create_sregs(NVVM_SREG[warpSize]);
623
624 /* Create llvm intrinsics symbols */
625 init_nvvm_intrinsics = create_nvvm_sym(NVVM_INTRINSICS[barrier0], DT_NONE);
626 create_nvvm_sym(NVVM_INTRINSICS[barrier], DT_NONE);
627 }
628
629 int
ompaccel_nvvm_get(nvvm_sregs sreg)630 ompaccel_nvvm_get(nvvm_sregs sreg)
631 {
632 SPTR sptr = SPTR(init_nvvm_syms + sreg);
633 ll_make_ftn_outlined_params(sptr, 0, nullptr);
634 ll_process_routine_parameters(sptr);
635 return ll_ad_outlined_func2(IL_DFRIR, IL_JSR, sptr, 0, nullptr);
636 }
637
638 int
ompaccel_nvvm_mk_barrier(nvvm_barriers btype)639 ompaccel_nvvm_mk_barrier(nvvm_barriers btype)
640 {
641 SPTR sptr;
642 if (btype == CTA_BARRIER) {
643 sptr = (SPTR)(init_nvvm_intrinsics + barrier0);
644 ll_make_ftn_outlined_params(sptr, 0, 0);
645 ll_process_routine_parameters(sptr);
646 return ll_ad_outlined_func2(IL_NONE, IL_JSR, sptr, 0, nullptr);
647 }
648 static_assert(true, "Other nvvm intrinsics are not implemented yet.");
649 }
650
651 int
ompaccel_nvvm_get_gbl_tid()652 ompaccel_nvvm_get_gbl_tid()
653 {
654 int ilix, iliy, iliz;
655 ilix = ad2ili(IL_ISUB, ompaccel_nvvm_get(blockDimX), ad_icon(32));
656 ilix = ad2ili(IL_IMUL, ompaccel_nvvm_get(blockIdX), ilix);
657
658 iliy = ad2ili(IL_ISUB, ompaccel_nvvm_get(warpSize), ad_icon(1));
659 iliy = ad2ili(IL_XOR, iliy, ad_icon(-1));
660 iliz = ad2ili(IL_ISUB, ompaccel_nvvm_get(blockDimX), ad_icon(1));
661 iliz = ad2ili(IL_AND, iliy, iliz);
662 iliz = ad2ili(IL_ISUB, iliz, ad_icon(1));
663 iliz = ad2ili(IL_AND, iliz, ompaccel_nvvm_get(threadIdX));
664
665 iliy = ad2ili(IL_IADD, iliz, ilix);
666 return iliy;
667 }
668
669 void
ompaccel_tinfo_current_set_mode(OMP_TARGET_MODE type)670 ompaccel_tinfo_current_set_mode(OMP_TARGET_MODE type)
671 {
672 current_tinfo->mode = type;
673 }
674
675 void
ompaccel_tinfo_set_mode_next_target(OMP_TARGET_MODE type)676 ompaccel_tinfo_set_mode_next_target(OMP_TARGET_MODE type)
677 {
678 NextTargetMode = type;
679 }
680
681 OMP_TARGET_MODE
ompaccel_tinfo_current_target_mode()682 ompaccel_tinfo_current_target_mode()
683 {
684 return current_tinfo->mode;
685 }
686
687 OMPACCEL_TINFO *
ompaccel_tinfo_create(SPTR func_sptr,int max_nargs)688 ompaccel_tinfo_create(SPTR func_sptr, int max_nargs)
689 {
690 OMPACCEL_TINFO *info;
691 if (DBGBIT(61, 0x10) && gbl.dbgfil != nullptr)
692 fprintf(gbl.dbgfil, "#target add request for sptr:%d [%s]\n", func_sptr,
693 SYMNAME(func_sptr));
694
695 NEW(info, OMPACCEL_TINFO, 1);
696 info->func_sptr = func_sptr;
697 info->n_symbols = 0;
698 if (max_nargs != 0) {
699 NEW(info->symbols, OMPACCEL_SYM, max_nargs);
700 NEW(info->quiet_symbols, OMPACCEL_SYM, max_nargs);
701 } else {
702 info->symbols = nullptr;
703 info->quiet_symbols = nullptr;
704 }
705 info->sz_symbols = info->sz_quiet_symbols = max_nargs;
706 info->mode = NextTargetMode;
707 NextTargetMode = mode_none_target;
708 info->nowait = false;
709 info->n_quiet_symbols = 0;
710 NEW(info->reduction_symbols, OMPACCEL_RED_SYM, tinfo_size_reductions);
711 info->n_reduction_symbols = 0;
712
713 /* add ot to array */
714 NEED(num_tinfos + 1, tinfos, OMPACCEL_TINFO *, tinfo_size,
715 tinfo_size * INC_EXP);
716 tinfos[num_tinfos++] = info;
717
718 /* linking */
719 if (current_tinfo != nullptr)
720 info->parent_tinfo = current_tinfo;
721 else
722 info->parent_tinfo = nullptr;
723 current_tinfo = info;
724 return info;
725 }
726
727 bool
ompaccel_tinfo_has(int func_sptr)728 ompaccel_tinfo_has(int func_sptr)
729 {
730 for (int i = 0; i < num_tinfos; ++i) {
731 if (tinfos[i]->func_sptr == func_sptr) {
732 return true;
733 }
734 }
735 return false;
736 }
737
738 OMPACCEL_TINFO *
ompaccel_tinfo_get(int func_sptr)739 ompaccel_tinfo_get(int func_sptr)
740 {
741 int i;
742 for (i = 0; i < num_tinfos; ++i) {
743 if (tinfos[i]->func_sptr == func_sptr) {
744 return tinfos[i];
745 }
746 }
747 return nullptr;
748 }
749
750 SPTR
ompaccel_create_device_symbol(SPTR sptr,int count)751 ompaccel_create_device_symbol(SPTR sptr, int count)
752 {
753 SPTR sym, sptr_alloc;
754 char name[252];
755 DTYPE dtype = DTYPEG(sptr);
756 bool byval;
757 if (DTYPEG(sptr) == DT_ADDR || DTY(DTYPEG(sptr)) == TY_ARRAY)
758 byval = false;
759 else
760 byval = true;
761 if (byval) {
762 sprintf(name, "Arg_%s_%d", SYMNAME(sptr), count);
763 } else {
764 if (strlen(SYMNAME(sptr)) == 0)
765 sprintf(name, "Arg_%s%d", SYMNAME(sptr), count);
766 else
767 sprintf(name, "Arg_%s", SYMNAME(sptr));
768 }
769 sym = getsymbol(name);
770
771 SCP(sym, SC_DUMMY);
772
773 if (dtype == DT_CPTR) {
774 dtype = DT_INT8;
775 }
776 // assume it's base of allocatable descriptor
777 if (strncmp(SYMNAME(sptr), ".Z", 2) == 0) {
778 for (int j = 0; j < current_tinfo->n_quiet_symbols; ++j)
779 if (MIDNUMG(current_tinfo->quiet_symbols[j].host_sym) == sptr)
780 sptr_alloc = current_tinfo->quiet_symbols[j].host_sym;
781 byval = false;
782 DTYPEP(sym, DTYPE(DTYPEG(sptr_alloc) + 1));
783 sptr_alloc = ((SPTR)0);
784
785 } else {
786 DTYPEP(sym, dtype);
787 }
788 STYPEP(sym, ST_VAR);
789 PASSBYVALP(sym, byval);
790
791 OMPACCDEVSYMP(sym, TRUE);
792 return sym;
793 }
794
795 INLINE static SPTR
add_symbol_to_function(SPTR func,SPTR sym)796 add_symbol_to_function(SPTR func, SPTR sym)
797 {
798 int dpdscp, paramct;
799 paramct = PARAMCTG(func);
800 paramct += 1;
801 aux.dpdsc_base[paramct] = sym;
802 PARAMCTP(func, paramct);
803 aux.dpdsc_avl += 1;
804 }
805
806 INLINE static SPTR
get_devsptr(OMPACCEL_TINFO * tinfo,SPTR host_symbol)807 get_devsptr(OMPACCEL_TINFO *tinfo, SPTR host_symbol)
808 {
809 int i;
810 if (tinfo == nullptr)
811 return host_symbol;
812
813 for (i = 0; i < tinfo->n_symbols; ++i) {
814 if (tinfo->symbols[i].host_sym == host_symbol) {
815 if (tinfo->symbols[i].device_sym == NOSYM) {
816 /* It is second case that we catch the symbols in target region from the
817 * ILM. In case there is a symbol that has no device symbol created, we
818 * should create device symbol for it also we should add it function
819 * parameter. */
820 tinfo->symbols[i].device_sym =
821 ompaccel_create_device_symbol(tinfo->symbols[i].host_sym, 1);
822 add_symbol_to_function(tinfo->func_sptr, tinfo->symbols[i].device_sym);
823 }
824 return tinfo->symbols[i].device_sym;
825 }
826 }
827 return host_symbol;
828 }
829
830 INLINE static SPTR
get_devsptr2(OMPACCEL_TINFO * tinfo,SPTR host_symbol)831 get_devsptr2(OMPACCEL_TINFO *tinfo, SPTR host_symbol)
832 {
833 int i;
834 for (i = 0; i < tinfo->n_symbols; ++i) {
835 if (tinfo->symbols[i].device_sym == host_symbol) {
836 return tinfo->symbols[i].host_sym;
837 }
838 }
839 return host_symbol;
840 }
841
842 OMPACCEL_TINFO *
ompaccel_tinfo_current_get_targetdata()843 ompaccel_tinfo_current_get_targetdata()
844 {
845 OMPACCEL_TINFO *tinfo = current_tinfo;
846 while (tinfo != nullptr) {
847 if (tinfo->mode == mode_target_data_region)
848 return tinfo;
849 if (tinfo->parent_tinfo == nullptr)
850 break;
851 tinfo = tinfo->parent_tinfo;
852 }
853 ompaccel_msg_interr("XXX", "Beginning of 'target data' is not found. ");
854 return nullptr;
855 }
856
857 OMPACCEL_TINFO *
ompaccel_tinfo_current_get()858 ompaccel_tinfo_current_get()
859 {
860 return current_tinfo;
861 }
862
863 DTYPE
ompaccel_tinfo_current_get_dev_dtype(DTYPE org_dtype)864 ompaccel_tinfo_current_get_dev_dtype(DTYPE org_dtype)
865 {
866 int i;
867 DTYPE dev_dtype = org_dtype;
868 if (current_tinfo != nullptr) {
869 for (i = 0; i < current_tinfo->n_quiet_symbols; ++i) {
870 if (DTYPEG(current_tinfo->quiet_symbols[i].host_sym) == org_dtype) {
871 dev_dtype = DTYPEG(current_tinfo->quiet_symbols[i].device_sym);
872 break;
873 }
874 }
875
876 for (i = 0; i < current_tinfo->n_symbols; ++i) {
877 if (DTYPEG(current_tinfo->symbols[i].host_sym) == org_dtype) {
878 dev_dtype = DTYPEG(current_tinfo->symbols[i].device_sym);
879 break;
880 }
881 }
882 }
883 if (DBGBIT(61, 2) && gbl.dbgfil != nullptr) {
884 if (org_dtype != dev_dtype) {
885 fprintf(gbl.dbgfil, "[ompaccel] REPLACED org_dtype:%d --> dev_dtype:%d",
886 org_dtype, dev_dtype);
887 }
888 }
889 return dev_dtype;
890 }
891
892 SPTR
ompaccel_tinfo_parent_get_devsptr(SPTR host_symbol)893 ompaccel_tinfo_parent_get_devsptr(SPTR host_symbol)
894 {
895 int i;
896 if (current_tinfo->parent_tinfo == nullptr)
897 return host_symbol;
898 for (i = 0; i < current_tinfo->parent_tinfo->n_quiet_symbols; ++i) {
899 if (current_tinfo->parent_tinfo->quiet_symbols[i].host_sym == host_symbol) {
900 return current_tinfo->parent_tinfo->quiet_symbols[i].device_sym;
901 }
902 }
903 return host_symbol;
904 }
905
906 bool
ompaccel_tinfo_current_is_registered(SPTR host_symbol)907 ompaccel_tinfo_current_is_registered(SPTR host_symbol)
908 {
909 int i;
910 if (current_tinfo == nullptr || !host_symbol)
911 return false;
912
913 for (i = 0; i < current_tinfo->n_symbols; ++i) {
914 if (current_tinfo->symbols[i].host_sym == host_symbol) {
915 return true;
916 }
917 }
918 return false;
919 }
920
921 SPTR
ompaccel_tinfo_current_get_devsptr(SPTR host_symbol)922 ompaccel_tinfo_current_get_devsptr(SPTR host_symbol)
923 {
924 SPTR device_symbol;
925 if (current_tinfo == nullptr || !host_symbol)
926 return host_symbol;
927
928 device_symbol = get_devsptr(current_tinfo, host_symbol);
929
930 if (device_symbol == host_symbol && current_tinfo->parent_tinfo != nullptr)
931 device_symbol = get_devsptr2(current_tinfo->parent_tinfo, host_symbol);
932
933 if ((DBGBIT(61, 2)) && gbl.dbgfil != nullptr &&
934 device_symbol != host_symbol) {
935 fprintf(gbl.dbgfil,
936 "[ompaccel] REPLACED host_symbol:%d[%s] --> device_symbol:%d[%s]",
937 host_symbol, SYMNAME(host_symbol), device_symbol,
938 SYMNAME(device_symbol));
939 fprintf(gbl.dbgfil, "\n");
940 }
941
942 return device_symbol;
943 }
944
945 static bool
tinfo_update_maptype(OMPACCEL_SYM * tsyms,int nargs,SPTR host_symbol,int map_type)946 tinfo_update_maptype(OMPACCEL_SYM *tsyms, int nargs, SPTR host_symbol,
947 int map_type)
948 {
949 int i;
950 for (i = 0; i < nargs; ++i) {
951 if (tsyms[i].host_sym == host_symbol) {
952 tsyms[i].map_type = map_type;
953 if (STYPEG(tsyms[i].host_sym) != ST_ARRAY) {
954 /* if scalar variables are used in map clause, pass them by reference */
955 if (map_type & OMP_TGT_MAPTYPE_FROM || map_type & OMP_TGT_MAPTYPE_TO)
956 PASSBYREFP(tsyms[i].device_sym, 1);
957 PASSBYVALP(tsyms[i].device_sym, 0);
958 }
959 return true;
960 }
961 }
962 return false;
963 }
964
965 void
ompaccel_tinfo_current_add_reductionitem(SPTR private_sym,SPTR shared_sym,int redop)966 ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym,
967 int redop)
968 {
969 if (current_tinfo == nullptr)
970 ompaccel_msg_interr("XXX", "Current target info is not found.\n");
971
972 current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
973 .private_sym = private_sym;
974 current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
975 .shared_sym = shared_sym;
976 current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols].redop =
977 redop;
978 current_tinfo->n_reduction_symbols++;
979 // it is initially created pass by value. It should be address, it should be
980 // copied back to the host.
981 PASSBYVALP(private_sym, 0);
982
983 /* Mark reduction variable as tofrom */
984 if (ompaccel_tinfo_current_target_mode() ==
985 mode_target_teams_distribute_parallel_for ||
986 ompaccel_tinfo_current_target_mode() ==
987 mode_target_teams_distribute_parallel_for_simd)
988 ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(private_sym),
989 OMP_TGT_MAPTYPE_TARGET_PARAM |
990 OMP_TGT_MAPTYPE_TO |
991 OMP_TGT_MAPTYPE_FROM);
992 else
993 ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(shared_sym),
994 OMP_TGT_MAPTYPE_TARGET_PARAM |
995 OMP_TGT_MAPTYPE_TO |
996 OMP_TGT_MAPTYPE_FROM);
997 }
998
999 void
ompaccel_tinfo_current_addupdate_mapitem(SPTR host_symbol,int map_type)1000 ompaccel_tinfo_current_addupdate_mapitem(SPTR host_symbol, int map_type)
1001 {
1002 SPTR midsptr;
1003 if (current_tinfo == nullptr)
1004 ompaccel_msg_interr("XXX", "Current target info is not found\n");
1005
1006 // check whether it is allocatable or not
1007 if (SCG(host_symbol) == SC_BASED) {
1008 /* if it is in data mode, we should keep midnum at active symbols*/
1009 if (current_tinfo->mode == mode_target_data_enter_region ||
1010 current_tinfo->mode == mode_target_data_exit_region ||
1011 current_tinfo->mode == mode_target_data_region) {
1012 midsptr = (SPTR)MIDNUMG(host_symbol);
1013 if (!tinfo_update_maptype(current_tinfo->symbols,
1014 current_tinfo->n_symbols, midsptr, map_type))
1015 ompaccel_tinfo_current_add_sym(midsptr, NOSYM, map_type);
1016 }
1017 /* Main argument will be kept at passive */
1018 if (!tinfo_update_maptype(current_tinfo->quiet_symbols,
1019 current_tinfo->n_quiet_symbols, host_symbol,
1020 map_type))
1021 ompaccel_tinfo_current_add_sym(host_symbol, NOSYM, map_type);
1022 } else {
1023 if (!tinfo_update_maptype(current_tinfo->symbols, current_tinfo->n_symbols,
1024 host_symbol, map_type))
1025 ompaccel_tinfo_current_add_sym(host_symbol, NOSYM, map_type);
1026 }
1027 }
1028
1029 void
ompaccel_tinfo_current_add_sym(SPTR host_symbol,SPTR device_symbol,int map_type)1030 ompaccel_tinfo_current_add_sym(SPTR host_symbol, SPTR device_symbol,
1031 int map_type)
1032 {
1033 if ((MIDNUMG(host_symbol) && SCG(host_symbol) == SC_BASED)) {
1034 NEED((current_tinfo->n_quiet_symbols + 1), current_tinfo->quiet_symbols,
1035 OMPACCEL_SYM, current_tinfo->sz_quiet_symbols,
1036 current_tinfo->sz_quiet_symbols * INC_EXP);
1037 current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].host_sym =
1038 host_symbol;
1039 current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].device_sym =
1040 device_symbol;
1041 current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].map_type =
1042 map_type;
1043 current_tinfo->n_quiet_symbols++;
1044 } else {
1045 NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM,
1046 current_tinfo->sz_symbols, current_tinfo->sz_symbols * INC_EXP);
1047 current_tinfo->symbols[current_tinfo->n_symbols].host_sym = host_symbol;
1048 current_tinfo->symbols[current_tinfo->n_symbols].device_sym = device_symbol;
1049 current_tinfo->symbols[current_tinfo->n_symbols].map_type = map_type;
1050 current_tinfo->n_symbols++;
1051 }
1052 }
1053
1054 INLINE static void
dumptargetsym(OMPACCEL_SYM targetsym)1055 dumptargetsym(OMPACCEL_SYM targetsym)
1056 {
1057 const char *dev_sptr_name, *org_sptr_name;
1058 if (gbl.dbgfil == nullptr)
1059 return;
1060
1061 dev_sptr_name =
1062 targetsym.device_sym == NOSYM ? "NOSYM" : SYMNAME(targetsym.device_sym);
1063 org_sptr_name =
1064 targetsym.host_sym == NOSYM ? "NOSYM" : SYMNAME(targetsym.host_sym);
1065
1066 fprintf(gbl.dbgfil,
1067 "\t(org:%d[%s], dev:%d[%s], map-type: ", targetsym.host_sym,
1068 org_sptr_name, targetsym.device_sym, dev_sptr_name);
1069
1070 if (targetsym.map_type & OMP_TGT_MAPTYPE_ALWAYS)
1071 fprintf(gbl.dbgfil, "always ");
1072 if (targetsym.map_type & OMP_TGT_MAPTYPE_TO)
1073 fprintf(gbl.dbgfil, "to ");
1074 if (targetsym.map_type & OMP_TGT_MAPTYPE_FROM)
1075 fprintf(gbl.dbgfil, "from ");
1076 if (targetsym.map_type & OMP_TGT_MAPTYPE_DELETE)
1077 fprintf(gbl.dbgfil, "delete ");
1078 if (targetsym.map_type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
1079 fprintf(gbl.dbgfil, "ptr_obj ");
1080 if (targetsym.map_type & OMP_TGT_MAPTYPE_TARGET_PARAM)
1081 fprintf(gbl.dbgfil, "target ");
1082 if (targetsym.map_type & OMP_TGT_MAPTYPE_RETURN_PARAM)
1083 fprintf(gbl.dbgfil, "return ");
1084 if (targetsym.map_type & OMP_TGT_MAPTYPE_PRIVATE)
1085 fprintf(gbl.dbgfil, "private ");
1086 if (targetsym.map_type & OMP_TGT_MAPTYPE_LITERAL)
1087 fprintf(gbl.dbgfil, "literal ");
1088 if (targetsym.map_type & OMP_TGT_MAPTYPE_IMPLICIT)
1089 fprintf(gbl.dbgfil, "implicit ");
1090 if (targetsym.map_type & OMP_TGT_MAPTYPE_MEMBER_OF)
1091 fprintf(gbl.dbgfil, "member ");
1092 if (targetsym.map_type & OMP_TGT_MAPTYPE_NONE)
1093 fprintf(gbl.dbgfil, "none ");
1094 fprintf(gbl.dbgfil, " )\n");
1095 }
1096
1097 INLINE static void
dumptargetreduction(OMPACCEL_RED_SYM targetred)1098 dumptargetreduction(OMPACCEL_RED_SYM targetred)
1099 {
1100 if (gbl.dbgfil == nullptr)
1101 return;
1102 switch (targetred.redop) {
1103 case 1:
1104 fprintf(gbl.dbgfil, "+: ");
1105 break;
1106 case 2:
1107 fprintf(gbl.dbgfil, "-: ");
1108 break;
1109 case 3:
1110 fprintf(gbl.dbgfil, "*: ");
1111 break;
1112 case 346:
1113 fprintf(gbl.dbgfil, "max:");
1114 break;
1115 case 347:
1116 fprintf(gbl.dbgfil, "min:");
1117 break;
1118 case 327:
1119 fprintf(gbl.dbgfil, "iand:");
1120 break;
1121 case 328:
1122 fprintf(gbl.dbgfil, "ior:");
1123 break;
1124 case 329:
1125 fprintf(gbl.dbgfil, "ieor:");
1126 break;
1127 case 14:
1128 fprintf(gbl.dbgfil, "???:");
1129 break;
1130 default:
1131 break;
1132 }
1133
1134 fprintf(gbl.dbgfil, "\t(shared:%d[%s], private:%d[%s] \n",
1135 targetred.shared_sym, SYMNAME(targetred.shared_sym),
1136 targetred.private_sym, SYMNAME(targetred.private_sym));
1137 }
1138
1139 void
dumpomptarget(OMPACCEL_TINFO * tinfo)1140 dumpomptarget(OMPACCEL_TINFO *tinfo)
1141 {
1142 if (tinfo == nullptr)
1143 return;
1144 if (gbl.dbgfil == nullptr)
1145 return;
1146
1147 switch (tinfo->mode) {
1148 case mode_none_target:
1149
1150 fprintf(gbl.dbgfil, " <mode none>");
1151 break;
1152 case mode_target:
1153 fprintf(gbl.dbgfil, " <target>");
1154 break;
1155 case mode_target_teams:
1156 fprintf(gbl.dbgfil, " <target teams>");
1157 break;
1158 case mode_target_parallel_for_simd:
1159 fprintf(gbl.dbgfil, " <target parallel for simd>");
1160 break;
1161 case mode_target_parallel_for:
1162 fprintf(gbl.dbgfil, " <target parallel for>");
1163 break;
1164 case mode_target_teams_distribute_parallel_for:
1165 fprintf(gbl.dbgfil, " <target teams distribute parallel for>");
1166 break;
1167 case mode_target_teams_distribute_parallel_for_simd:
1168 fprintf(gbl.dbgfil, " <target teams distribute parallel for simd>");
1169 break;
1170 case mode_target_teams_distribute:
1171 fprintf(gbl.dbgfil, " <target teams distribute >");
1172 break;
1173 case mode_target_data_region:
1174 fprintf(gbl.dbgfil, " <target data>");
1175 break;
1176 case mode_target_data_enter_region:
1177 fprintf(gbl.dbgfil, " <target data enter>");
1178 break;
1179 case mode_target_data_exit_region:
1180 fprintf(gbl.dbgfil, " <target data exit>");
1181 break;
1182 }
1183 fprintf(gbl.dbgfil, " \n");
1184 //}
1185
1186 if ((tinfo->mode != mode_target_data_region) &&
1187 (tinfo->mode != mode_target_data_enter_region) &&
1188 (tinfo->mode != mode_target_data_exit_region)) {
1189 if (OMPACCFUNCKERNELG(tinfo->func_sptr))
1190 fprintf(gbl.dbgfil, " (__global__) ");
1191 else if (OMPACCFUNCDEVG(tinfo->func_sptr))
1192 fprintf(gbl.dbgfil, " (__device__) ");
1193 else
1194 fprintf(gbl.dbgfil, " ??? ");
1195 fprintf(gbl.dbgfil, "%s\t sptr: %d \n", SYMNAME(tinfo->func_sptr),
1196 tinfo->func_sptr);
1197 }
1198
1199 fprintf(gbl.dbgfil, " ** Active Symbols ** \n");
1200 for (int j = 0; j < tinfo->n_symbols; ++j) {
1201 dumptargetsym(tinfo->symbols[j]);
1202 }
1203 fprintf(gbl.dbgfil, " ** Passive Symbols ** \n");
1204 for (int j = 0; j < tinfo->n_quiet_symbols; ++j) {
1205 dumptargetsym(tinfo->quiet_symbols[j]);
1206 }
1207 fprintf(gbl.dbgfil, " ** Reductions ** \n");
1208 for (int j = 0; j < tinfo->n_reduction_symbols; ++j) {
1209 dumptargetreduction(tinfo->reduction_symbols[j]);
1210 }
1211 fprintf(gbl.dbgfil, "\n");
1212 }
1213
1214 void
dumpomptargets()1215 dumpomptargets()
1216 {
1217 int i, j;
1218 if (gbl.dbgfil == NULL)
1219 return;
1220 fprintf(gbl.dbgfil, "------------OpenMP Target Regions ---------------\n");
1221 for (i = 0; i < num_tinfos; ++i) {
1222 dumpomptarget(tinfos[i]);
1223 }
1224 }
1225
1226 void
dumpomptargetsymbols()1227 dumpomptargetsymbols()
1228 {
1229 int i, l, u;
1230 l = stb.firstusym;
1231 u = stb.stg_avail - 1;
1232 if (u >= stb.stg_avail)
1233 u = stb.stg_avail - 1;
1234 for (i = l; i <= u; ++i) {
1235 if (OMPACCDEVSYMG(i))
1236 fprintf(gbl.dbgfil, "(sym) sptr:%d [%s]\n", i, SYMNAME(i));
1237 if (OMPACCFUNCDEVG(i))
1238 fprintf(gbl.dbgfil, "(func) sptr:%d [%s]\n", i, SYMNAME(i));
1239 if (OMPACCFUNCKERNELG(i))
1240 fprintf(gbl.dbgfil, "(kernel) sptr:%d [%s]\n", i, SYMNAME(i));
1241 }
1242 }
1243
1244 void
dumptargetsymbols(OMPACCEL_SYM * targetsyms,int n)1245 dumptargetsymbols(OMPACCEL_SYM *targetsyms, int n)
1246 {
1247 for (int i = 0; i < n; ++i) {
1248 dumptargetsym(targetsyms[i]);
1249 }
1250 }
1251
1252 void
ompaccel_msg_interr(char * id,const char * message)1253 ompaccel_msg_interr(char *id, const char *message)
1254 {
1255 interr(message, MSGOMPACCEL, ERR_Fatal);
1256 }
1257
1258 void
ompaccel_msg_info(char * id,const char * message)1259 ompaccel_msg_info(char *id, const char *message)
1260 {
1261 ccff_info(MSGOMPACCEL, id, gbl.findex, gbl.lineno, message, NULL);
1262 }
1263
1264 bool
ompaccel_is_tgt_registered()1265 ompaccel_is_tgt_registered()
1266 {
1267 return isOmpaccelRegistered;
1268 }
1269
1270 void
ompaccel_register_tgt()1271 ompaccel_register_tgt()
1272 {
1273 isOmpaccelRegistered = true;
1274 }
1275
1276 void
ompaccel_emit_tgt_register()1277 ompaccel_emit_tgt_register()
1278 {
1279 int ilix;
1280 SPTR sptrFn;
1281 char *name = "ompaccel.register";
1282 sptrFn = mk_ompaccel_function(name, 0, NULL, false);
1283 CONSTRUCTORP(sptrFn, 1);
1284 TEXTSTARTUPP(sptrFn, 1);
1285 PRIORITYP(sptrFn, 65535 /* LLVM_DEFAULT_PRIORITY */);
1286 cr_block();
1287 ilix = ll_make_tgt_register_lib();
1288 iltb.callfg = 1;
1289 chk_block(ilix);
1290 wr_block();
1291 mk_ompaccel_function_end(sptrFn);
1292 }
1293
1294 SPTR
ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM * ReductionItems,int NumReductions)1295 ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM *ReductionItems, int NumReductions)
1296 {
1297 int ili, bili, rili;
1298 SPTR sptrFn, sptrRhs, sptrReduceData, func_params[2];
1299 DTYPE dtypeReductionItem, dtypeReduceData;
1300 int nmeReduceData, nmeRhs;
1301 int params_dtypes[2] = {DT_ADDR, DT_ADDR};
1302 char name[30];
1303
1304 /* Generate function parameters */
1305 dtypeReduceData = get_type(2, TY_PTR, DT_ANY);
1306 sptrReduceData = func_params[0] =
1307 mk_ompaccel_addsymbol(".reducedata", dtypeReduceData, SC_DUMMY, ST_ARRAY);
1308 sptrRhs = func_params[1] =
1309 mk_ompaccel_addsymbol(".rhs", dtypeReduceData, SC_DUMMY, ST_VAR);
1310
1311 /* Generate function symbol */
1312 sprintf(name, "%s%d", "ompaccel_reduction", reductionFunctionCounter++);
1313 sptrFn = mk_ompaccel_function(name, 2, func_params, true);
1314 cr_block();
1315
1316 for (int i = 0; i < NumReductions; ++i) {
1317 dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1318
1319 bili = mk_ompaccel_ldsptr(sptrReduceData);
1320 rili = mk_ompaccel_ldsptr(sptrRhs);
1321
1322 if (i != 0) {
1323 bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1324 DT_ADDR);
1325 rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1326 DT_ADDR);
1327 }
1328
1329 nmeReduceData =
1330 add_arrnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), 0,
1331 ad_icon(i), FALSE);
1332 nmeRhs =
1333 add_arrnme(NT_IND, SPTR_NULL,
1334 addnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrRhs, 0, 0), 0),
1335 i, ad_icon(i), FALSE);
1336
1337 bili =
1338 mk_ompaccel_load(bili, DT_ADDR, addnme(NT_VAR, sptrReduceData, 0, 0));
1339 rili = mk_ompaccel_load(rili, DT_ADDR, addnme(NT_VAR, sptrRhs, 0, 0));
1340
1341 rili = mk_ompaccel_load(rili, dtypeReductionItem, nmeRhs);
1342 ili = mk_ompaccel_load(bili, dtypeReductionItem, nmeReduceData);
1343
1344 ili = mk_reduction_op(ReductionItems[i].redop, ili, dtypeReductionItem,
1345 rili, dtypeReductionItem);
1346
1347 ili = mk_ompaccel_store(ili, dtypeReductionItem, nmeReduceData, bili);
1348 chk_block(ili);
1349 }
1350 wr_block();
1351 mk_ompaccel_function_end(sptrFn);
1352 return sptrFn;
1353 }
1354
1355 SPTR
ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM * ReductionItems,int NumReductions,SPTR sptrFnReduce)1356 ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM *ReductionItems,
1357 int NumReductions, SPTR sptrFnReduce)
1358 {
1359 int ili, rili, bili;
1360 SPTR sptrFn, sptrRhs, sptrReduceData, sptrShuffleReturn, sptrLaneOffset,
1361 func_params[4];
1362 DTYPE dtypeReductionItem, dtypeReduceData, dtypeRHS;
1363 int nmeReduceData, nmeRhs, params[2];
1364 char name[30];
1365 DTYPE params_dtypes[2] = {DT_ADDR, DT_ADDR};
1366
1367 /* Generate function parameters */
1368 dtypeReduceData = get_type(2, TY_PTR, DT_ANY);
1369 sptrReduceData = func_params[0] = mk_ompaccel_addsymbol(
1370 ".reducedata2", dtypeReduceData, SC_DUMMY, ST_ARRAY);
1371 func_params[1] = mk_ompaccel_addsymbol(".laneid", DT_SINT, SC_DUMMY, ST_VAR);
1372 sptrLaneOffset = func_params[2] =
1373 mk_ompaccel_addsymbol(".laneoffset", DT_SINT, SC_DUMMY, ST_VAR);
1374 func_params[3] =
1375 mk_ompaccel_addsymbol(".shortcircuit", DT_SINT, SC_DUMMY, ST_VAR);
1376 PASSBYVALP(func_params[1], 1);
1377 PASSBYVALP(func_params[2], 1);
1378 PASSBYVALP(func_params[3], 1);
1379
1380 /* Generate function symbol */
1381 sprintf(name, "%s%d", "ompaccel_shufflereduce", reductionFunctionCounter++);
1382 sptrFn = mk_ompaccel_function(name, 4, func_params, true);
1383 cr_block();
1384
1385 dtypeRHS = mk_ompaccel_array_dtype(dtypeReduceData, NumReductions);
1386 sptrRhs = mk_ompaccel_addsymbol(".rhs", dtypeRHS, SC_LOCAL, ST_ARRAY);
1387
1388 for (int i = 0; i < NumReductions; ++i) {
1389
1390 dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1391 sptrShuffleReturn =
1392 mk_ompaccel_getnewccsym('r', i, dtypeReductionItem, SC_LOCAL, ST_VAR);
1393
1394 bili = mk_ompaccel_ldsptr(sptrReduceData);
1395 rili = mk_address(sptrRhs);
1396
1397 nmeReduceData =
1398 add_arrnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), i,
1399 ad_icon(i), FALSE);
1400
1401 if (i != 0) {
1402 rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1403 DT_ADDR);
1404 bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1405 DT_ADDR);
1406 }
1407
1408 ili = mk_ompaccel_load(bili, DT_ADDR, nmeReduceData);
1409 ili = mk_ompaccel_load(ili, dtypeReductionItem, nmeReduceData);
1410
1411 if (dtypeReductionItem == DT_DBLE)
1412 ili =
1413 ll_make_kmpc_shuffle(ili, mk_ompaccel_ldsptr(func_params[2]),
1414 ad_icon(size_of(dtypeReductionItem) * 8), true);
1415 else
1416 ili =
1417 ll_make_kmpc_shuffle(ili, mk_ompaccel_ldsptr(func_params[2]),
1418 ad_icon(size_of(dtypeReductionItem) * 8), false);
1419
1420 ili = mk_ompaccel_store(ili, dtypeReductionItem,
1421 addnme(NT_VAR, sptrShuffleReturn, 0, 0),
1422 ad_acon(sptrShuffleReturn, 0));
1423 chk_block(ili);
1424
1425 nmeRhs = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrRhs, 0, 0), i,
1426 ad_icon(i), FALSE);
1427 ili =
1428 mk_ompaccel_store(ad_acon(sptrShuffleReturn, 0), DT_ADDR, nmeRhs, rili);
1429
1430 chk_block(ili);
1431 }
1432
1433 params[0] = mk_address(sptrRhs);
1434 params[1] = mk_address(sptrReduceData);
1435
1436 /* Call reduce function */
1437 ili = mk_function_call(DT_NONE, 2, params_dtypes, params, sptrFnReduce);
1438
1439 /* Write to block */
1440 iltb.callfg = 1;
1441 chk_block(ili);
1442
1443 wr_block();
1444 mk_ompaccel_function_end(sptrFn);
1445
1446 return sptrFn;
1447 }
1448
1449 /**
1450 \brief This function emits code that gathers reduce_data from the first lane
1451 of every active warp to lanes in the first warp.
1452 */
1453 SPTR
ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM * ReductionItems,int NumReductions)1454 ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM *ReductionItems,
1455 int NumReductions)
1456 {
1457 int ili, rili;
1458 SPTR sptrFn, sptrReduceData, sptrWarpNum, sptrShmem, sptrWarpId,
1459 sptrMasterWarp, sptrRedItem, sptrRedItemAddress, func_params[2];
1460 SPTR lFirstLane, lBarrier, lFirstWarp, lFinalBarrier;
1461 int nmeShmem;
1462 DTYPE dtypeReductionItem;
1463 char name[30];
1464
1465 sprintf(name, "%s%d", "ompaccel_InterWarpCopy", reductionFunctionCounter++);
1466 sptrReduceData = func_params[0] = mk_ompaccel_addsymbol(
1467 ".reduceData", mk_ompaccel_array_dtype(DT_INT8, NumReductions), SC_DUMMY,
1468 ST_ARRAY);
1469 sptrWarpNum = func_params[1] =
1470 mk_ompaccel_addsymbol(".warpNum", DT_INT, SC_DUMMY, ST_VAR);
1471 PASSBYVALP(sptrWarpNum, 1);
1472 /* Generate function symbol, Create a block */
1473 sptrFn = mk_ompaccel_function(name, 2, func_params, true);
1474 cr_block();
1475
1476 sptrShmem = mk_ompaccel_addsymbol(
1477 "ompaccelshmem", mk_ompaccel_array_dtype(DT_INT8, NVVM_WARPSIZE),
1478 SC_EXTERN, ST_ARRAY);
1479 OMPACCSHMEMP(sptrShmem, true);
1480 SYMLKP(sptrShmem, gbl.externs);
1481 gbl.externs = sptrShmem;
1482
1483 /* MasterWarp */
1484 sptrMasterWarp =
1485 mk_ompaccel_addsymbol(".masterwarp", DT_INT, SC_LOCAL, ST_VAR);
1486 ili = ompaccel_nvvm_get(threadIdX);
1487 ili = mk_ompaccel_iand(ili, ad_icon(31));
1488 ili = mk_ompaccel_stsptr(ili, sptrMasterWarp);
1489
1490 chk_block(ili);
1491
1492 /* MasterWarp */
1493 sptrWarpId = mk_ompaccel_addsymbol(".warpid", DT_INT, SC_LOCAL, ST_VAR);
1494 ili = ompaccel_nvvm_get(threadIdX);
1495 ili = mk_ompaccel_shift(ili, DT_UINT, ad_icon(5), DT_UINT);
1496 ili = mk_ompaccel_stsptr(ili, sptrWarpId);
1497
1498 chk_block(ili);
1499 iltb.callfg = 1;
1500 wr_block();
1501
1502 sptrRedItem =
1503 mk_ompaccel_addsymbol(".reductionitem", DT_ADDR, SC_LOCAL, ST_VAR);
1504 sptrRedItemAddress =
1505 mk_ompaccel_addsymbol(".reductionitemaddr", DT_ADDR, SC_LOCAL, ST_VAR);
1506
1507 for (int i = 0; i < NumReductions; ++i) {
1508 cr_block();
1509 dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1510 rili = mk_ompaccel_ldsptr(sptrReduceData);
1511 if (i != 0) {
1512 rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1513 DT_ADDR);
1514 }
1515 rili =
1516 mk_ompaccel_load(rili, DT_ADDR, addnme(NT_VAR, sptrReduceData, 0, 0));
1517
1518 ili =
1519 mk_ompaccel_store(rili, DT_ADDR, addnme(NT_VAR, sptrRedItem, 0, (INT)0),
1520 mk_address(sptrRedItem));
1521 chk_block(ili);
1522 ili = mk_ompaccel_store(rili, DT_ADDR,
1523 addnme(NT_VAR, sptrRedItemAddress, 0, (INT)0),
1524 mk_address(sptrRedItemAddress));
1525 chk_block(ili);
1526
1527 ili = mk_ompaccel_ldsptr(sptrMasterWarp);
1528 lFirstLane = getlab();
1529 lBarrier = getlab();
1530 ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lBarrier);
1531 RFCNTI(lFirstLane);
1532 chk_block(ili);
1533 iltb.callfg = 1;
1534 wr_block();
1535
1536 cr_block();
1537 exp_label(lFirstLane);
1538
1539 rili = ad1ili(IL_IKMV, mk_ompaccel_ldsptr(sptrWarpId));
1540 ili = mk_ompaccel_mul(rili, DT_INT8, ad_kconi(8), DT_INT8);
1541 ili = ad1ili(IL_KAMV, ili);
1542 ili = mk_ompaccel_add(mk_address(sptrShmem), DT_ADDR, ili, DT_ADDR);
1543 nmeShmem = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrShmem, 0, 0), 0,
1544 rili, FALSE);
1545
1546 rili = mk_ompaccel_ldsptr(sptrRedItem);
1547 // todo ompaccel more
1548 if (dtypeReductionItem == DT_DBLE) {
1549 rili = mk_ompaccel_load(rili, DT_DBLE, addnme(NT_VAR, sptrRedItem, 0, 0));
1550 } else if (dtypeReductionItem == DT_INT) {
1551 rili = mk_ompaccel_ld(rili, addnme(NT_IND, SPTR_NULL,
1552 addnme(NT_VAR, sptrRedItem, 0, 0), 0));
1553 rili = ad1ili(IL_FLOAT, rili);
1554 } else if (dtypeReductionItem == DT_FLOAT) {
1555 rili =
1556 mk_ompaccel_load(rili, DT_FLOAT, addnme(NT_VAR, sptrRedItem, 0, 0));
1557 rili = ad1ili(IL_DBLE, rili);
1558 }
1559 ili = mk_ompaccel_store(rili, DT_DBLE, nmeShmem, ili);
1560 chk_block(ili);
1561 iltb.callfg = 1;
1562 wr_block();
1563
1564 /* Sync */
1565 cr_block();
1566 RFCNTI(lBarrier);
1567 exp_label(lBarrier);
1568 ili = ompaccel_nvvm_mk_barrier(CTA_BARRIER);
1569 iltb.callfg = 1;
1570 chk_block(ili);
1571 wr_block();
1572
1573 cr_block();
1574 ili = mk_ompaccel_ldsptr(sptrWarpNum);
1575 rili = ompaccel_nvvm_get(threadIdX);
1576 ili = mk_ompaccel_compare(rili, DT_INT, ili, DT_INT, CC_GE);
1577 lFirstWarp = getlab();
1578 lFinalBarrier = getlab();
1579 ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lFinalBarrier);
1580 chk_block(ili);
1581 iltb.callfg = 1;
1582 wr_block();
1583
1584 cr_block();
1585 RFCNTI(lFirstWarp);
1586 exp_label(lFirstWarp);
1587 rili = ad1ili(IL_IKMV, ompaccel_nvvm_get(threadIdX));
1588 ili = mk_ompaccel_mul(rili, DT_INT8, ad_kconi(8), DT_INT8);
1589 ili = ad1ili(IL_KAMV, ili);
1590 ili = mk_ompaccel_add(mk_address(sptrShmem), DT_ADDR, ili, DT_ADDR);
1591 nmeShmem = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrShmem, 0, 0), 0,
1592 rili, FALSE);
1593
1594 ili = mk_ompaccel_load(ili, DT_DBLE, nmeShmem);
1595 rili = mk_ompaccel_ldsptr(sptrRedItemAddress);
1596
1597 if (dtypeReductionItem == DT_DBLE) {
1598 ili = mk_ompaccel_store(
1599 ili, DT_DBLE,
1600 addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1601 rili);
1602 } else if (dtypeReductionItem == DT_INT) {
1603 ili = ad1ili(IL_DFIX, ili);
1604 ili = mk_ompaccel_store(
1605 ili, DT_NONE,
1606 addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1607 rili);
1608 } else if (dtypeReductionItem == DT_FLOAT) {
1609 ili = ad1ili(IL_SNGL, ili);
1610 ili = mk_ompaccel_store(
1611 ili, DT_FLOAT,
1612 addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1613 rili);
1614 }
1615 chk_block(ili);
1616 iltb.callfg = 1;
1617 wr_block();
1618
1619 /* Sync */
1620 cr_block();
1621 RFCNTI(lFinalBarrier);
1622 exp_label(lFinalBarrier);
1623 ili = ompaccel_nvvm_mk_barrier(CTA_BARRIER);
1624 iltb.callfg = 1;
1625 chk_block(ili);
1626 wr_block();
1627 }
1628 /* Finalize the function */
1629 mk_ompaccel_function_end(sptrFn);
1630 return sptrFn;
1631 }
1632
1633 /* Expander - OpenMP Accelerator Model */
1634 void
exp_ompaccel_bpar(ILM * ilmp,int curilm,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()))1635 exp_ompaccel_bpar(ILM *ilmp, int curilm, SPTR uplevel_sptr, SPTR scopeSptr,
1636 int(incrOutlinedCnt()))
1637 {
1638 int ili, outlinedCnt;
1639 SPTR sptr;
1640 if (flg.opt != 0) {
1641 wr_block();
1642 cr_block();
1643 }
1644 if (!XBIT(232, 0x1)) {
1645 ll_rewrite_ilms(-1, curilm, 0);
1646 return;
1647 }
1648 outlinedCnt = incrOutlinedCnt();
1649 BIH_FT(expb.curbih) = TRUE;
1650 BIH_QJSR(expb.curbih) = TRUE;
1651 BIH_NOMERGE(expb.curbih) = TRUE;
1652 if (gbl.outlined)
1653 expb.sc = SC_PRIVATE;
1654 if (outlinedCnt == 1) {
1655 sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, FALSE);
1656
1657 if (!PARENCLFUNCG(scopeSptr))
1658 PARENCLFUNCP(scopeSptr, sptr);
1659 ll_write_ilm_header(sptr, curilm);
1660
1661 ili = ompaccel_nvvm_get(threadIdX);
1662 ili = ll_make_kmpc_spmd_kernel_init(ili);
1663 iltb.callfg = 1;
1664 chk_block(ili);
1665
1666 ili = ll_make_outlined_ompaccel_call(gbl.ompoutlinedfunc, sptr);
1667 iltb.callfg = 1;
1668 chk_block(ili);
1669 gbl.ompoutlinedfunc = sptr;
1670
1671 } else if (outlinedCnt > 1) {
1672 ll_rewrite_ilms(-1, curilm, 0);
1673 }
1674 }
1675
1676 void
exp_ompaccel_epar(ILM * ilmp,int curilm,int outlinedCnt,int (decrOutlinedCnt ()))1677 exp_ompaccel_epar(ILM *ilmp, int curilm, int outlinedCnt,
1678 int(decrOutlinedCnt()))
1679 {
1680 if (XBIT(232, 0x1)) {
1681 if (flg.opt != 0) {
1682 wr_block();
1683 cr_block();
1684 }
1685
1686 if (outlinedCnt == 1) {
1687 ilm_outlined_pad_ilm(curilm);
1688 }
1689 outlinedCnt = decrOutlinedCnt();
1690 }
1691 if (outlinedCnt >= 1)
1692 ll_rewrite_ilms(-1, curilm, 0);
1693
1694 if (gbl.outlined)
1695 expb.sc = SC_AUTO;
1696 ccff_info(MSGOPENMP, "OMP002", gbl.findex, gbl.lineno,
1697 "Parallel region terminated", NULL);
1698 }
1699
1700 void
exp_ompaccel_eteams(ILM * ilmp,int curilm,int outlinedCnt,int (decrOutlinedCnt ()))1701 exp_ompaccel_eteams(ILM *ilmp, int curilm, int outlinedCnt,
1702 int(decrOutlinedCnt()))
1703 {
1704 if (XBIT(232, 0x1)) {
1705 if (outlinedCnt == 1) {
1706 ilm_outlined_pad_ilm(curilm);
1707 }
1708 outlinedCnt = decrOutlinedCnt();
1709 }
1710 if (outlinedCnt >= 1)
1711 ll_rewrite_ilms(-1, curilm, 0);
1712
1713 if (gbl.outlined)
1714 expb.sc = SC_AUTO;
1715 ccff_info(MSGOPENMP, "OMP023", gbl.findex, gbl.lineno,
1716 "Teams region terminated", NULL);
1717 }
1718
1719 void
exp_ompaccel_mploopfini(ILM * ilmp,int curilm,int outlinedCnt)1720 exp_ompaccel_mploopfini(ILM *ilmp, int curilm, int outlinedCnt)
1721 {
1722 int ili;
1723 if (outlinedCnt >= 1)
1724 return;
1725 const int sched = mp_sched_to_kmpc_sched(ILM_OPND(ilmp, 2));
1726 if (sched == KMP_ORD_STATIC || sched == KMP_ORD_DYNAMIC_CHUNKED) {
1727 ili = ll_make_kmpc_dispatch_fini((DTYPE)ILM_OPND(ilmp, 1));
1728 iltb.callfg = 1;
1729 chk_block(ili);
1730 } else if (sched == KMP_SCH_STATIC || sched == KMP_SCH_STATIC_CHUNKED) {
1731 ili = ll_make_kmpc_for_static_fini();
1732 iltb.callfg = 1;
1733 chk_block(ili);
1734 }
1735 }
1736
1737 void
exp_ompaccel_mploop(ILM * ilmp,int curilm)1738 exp_ompaccel_mploop(ILM *ilmp, int curilm)
1739 {
1740 SPTR nlower, nupper, nstride;
1741 int sched, ili;
1742 char *doschedule;
1743 loop_args_t loop_args;
1744 #if LLVM_YKT
1745 /* frontend generates two MPLOOP ILM, one for distribute, other for parallel
1746 * If it is combined construct like ttdpf, I don't need to do something
1747 * special for distribute I need to pass different scheduling type to device
1748 * runtime.
1749 */
1750 if (mp_sched_to_kmpc_sched(ILM_OPND(ilmp, 7)) == KMP_DISTRIBUTE_STATIC) {
1751 if ((ompaccel_tinfo_current_target_mode() ==
1752 mode_target_teams_distribute_parallel_for ||
1753 ompaccel_tinfo_current_target_mode() ==
1754 mode_target_teams_distribute_parallel_for_simd))
1755 return;
1756 }
1757 #endif
1758 nlower = ILM_SymOPND(ilmp, 1);
1759 nupper = ILM_SymOPND(ilmp, 2);
1760 nstride = ILM_SymOPND(ilmp, 3);
1761 if (!XBIT(183, 0x100000)) {
1762 nlower = (SPTR)getccsym_copy(nlower); // ???
1763 nupper = (SPTR)getccsym_copy(nupper); // ???
1764 nstride = (SPTR)getccsym_copy(nstride); // ???
1765 SCP(nlower, SC_PRIVATE);
1766 ENCLFUNCP(nlower, GBL_CURRFUNC);
1767 ENCLFUNCP(nupper, GBL_CURRFUNC);
1768 ENCLFUNCP(nstride, GBL_CURRFUNC);
1769 exp_add_copy(nlower, ILM_SymOPND(ilmp, 1));
1770 exp_add_copy(nupper, ILM_SymOPND(ilmp, 2));
1771 exp_add_copy(nstride, ILM_SymOPND(ilmp, 3));
1772 }
1773 loop_args.lower = nlower;
1774 loop_args.upper = nupper;
1775 loop_args.stride = nstride;
1776 loop_args.chunk = ILM_SymOPND(ilmp, 4);
1777 loop_args.last = ILM_SymOPND(ilmp, 5);
1778 loop_args.dtype = (DTYPE)ILM_OPND(ilmp, 6); // ???
1779 loop_args.sched = (kmpc_sched_e)ILM_OPND(ilmp, 7);
1780 sched = mp_sched_to_kmpc_sched(loop_args.sched);
1781 switch (sched) {
1782 case KMP_SCH_STATIC:
1783 case KMP_SCH_STATIC_CHUNKED:
1784 if ((ILM_OPND(ilmp, 7) & 0xff00) == MP_SCH_CHUNK_1) {
1785 doschedule = "static cyclic";
1786 ccff_info(MSGOPENMP, "OMP014", gbl.findex, gbl.lineno,
1787 "Parallel loop activated with %schedule schedule",
1788 "schedule=%s", doschedule, NULL);
1789 }
1790 case KMP_DISTRIBUTE_STATIC_CHUNKED:
1791 case KMP_DISTRIBUTE_STATIC:
1792 ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched);
1793 break;
1794 default:
1795 ili = ll_make_kmpc_dispatch_init(&loop_args);
1796 }
1797
1798 iltb.callfg = 1;
1799 chk_block(ili);
1800 BIH_NOMERGE(expb.curbih) = TRUE;
1801 if (!XBIT(183, 0x100000)) {
1802 exp_add_copy(ILM_SymOPND(ilmp, 1), nlower);
1803 exp_add_copy(ILM_SymOPND(ilmp, 2), nupper);
1804 exp_add_copy(ILM_SymOPND(ilmp, 3), nstride);
1805 }
1806
1807 /* constant propagation stop when it sees function call. We may have some
1808 * stride that needs to propagate for computation of tripcount. */
1809 if (flg.opt != 0) {
1810 wr_block();
1811 cr_block();
1812 }
1813 }
1814
1815 void
exp_ompaccel_btarget(ILM * ilmp,int curilm,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()),SPTR * targetfunc_sptr,int * isTargetDevice)1816 exp_ompaccel_btarget(ILM *ilmp, int curilm, SPTR uplevel_sptr, SPTR scopeSptr,
1817 int(incrOutlinedCnt()), SPTR *targetfunc_sptr,
1818 int *isTargetDevice)
1819 {
1820 int ili, outlinedCnt;
1821 SPTR sptr;
1822 /* lexically nested begin parallel */
1823 outlinedCnt = incrOutlinedCnt();
1824 if (outlinedCnt > 1) {
1825 ll_rewrite_ilms(-1, curilm, 0);
1826 return;
1827 }
1828 ompaccel_symreplacer(false);
1829 if (flg.opt != 0) {
1830 wr_block();
1831 cr_block();
1832 }
1833
1834 BIH_FT(expb.curbih) = TRUE;
1835 BIH_QJSR(expb.curbih) = TRUE;
1836 BIH_NOMERGE(expb.curbih) = TRUE;
1837 if (outlinedCnt == 1) {
1838 /* inomptarget used to figure out whether other directives, statements are
1839 * in target region or not */
1840 gbl.ompaccel_intarget = true;
1841 /* Outline function, create sptr as ptx kernel, duplicate all the sptrs*/
1842 sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, TRUE);
1843 /* set global outlined function with the latest */
1844 gbl.ompoutlinedfunc = sptr;
1845
1846 if (!PARENCLFUNCG(scopeSptr))
1847 PARENCLFUNCP(scopeSptr, sptr);
1848 ll_write_ilm_header(sptr, curilm);
1849 }
1850 ccff_info(MSGOPENMP, "OMP020", gbl.findex, gbl.lineno,
1851 "Target region activated for offload", NULL);
1852 *targetfunc_sptr = sptr;
1853 *isTargetDevice = ILI_OF(ILM_OPND(ilmp, 1));
1854 return;
1855 }
1856
1857 static void
exp_ompaccel_ereduction(ILM * ilmp,int curilm)1858 exp_ompaccel_ereduction(ILM *ilmp, int curilm)
1859 {
1860 int ili;
1861 cr_block();
1862 ili = ll_make_kmpc_nvptx_end_reduce_nowait();
1863
1864 iltb.callfg = 1;
1865 chk_block(ili);
1866 wr_block();
1867 }
1868
1869 void
exp_ompaccel_etarget(ILM * ilmp,int curilm,SPTR targetfunc_sptr,int outlinedCnt,SPTR uplevel_sptr,int (decrOutlinedCnt ()))1870 exp_ompaccel_etarget(ILM *ilmp, int curilm, SPTR targetfunc_sptr,
1871 int outlinedCnt, SPTR uplevel_sptr, int(decrOutlinedCnt()))
1872 {
1873 int ili;
1874 if (outlinedCnt == 1) {
1875 ilm_outlined_pad_ilm(curilm);
1876 }
1877 outlinedCnt = decrOutlinedCnt();
1878 if (outlinedCnt >= 1) {
1879 ll_rewrite_ilms(-1, curilm, 0);
1880 return;
1881 }
1882 if (gbl.outlined)
1883 expb.sc = SC_AUTO;
1884
1885 if (ompaccel_tinfo_current_target_mode() == mode_target) {
1886 ili = ll_make_tgt_target(gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID,
1887 uplevel_sptr);
1888 } else if (ompaccel_tinfo_current_target_mode() == mode_target_parallel_for ||
1889 ompaccel_tinfo_current_target_mode() ==
1890 mode_target_parallel_for_simd) {
1891 // Create kernel with single team.
1892 ili = ll_make_tgt_target_teams(
1893 gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID, uplevel_sptr, 1, 0);
1894 } else {
1895 ili = ll_make_tgt_target_teams(
1896 gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID, uplevel_sptr, 0, 0);
1897 }
1898
1899 iltb.callfg = 1;
1900 chk_block(ili);
1901
1902 gbl.ompaccel_intarget = false;
1903
1904 ccff_info(MSGOPENMP, "OMP021", gbl.findex, gbl.lineno,
1905 "Target region terminated", NULL);
1906 }
1907
1908 void
exp_ompaccel_reduction(ILM * ilmp,int curilm)1909 exp_ompaccel_reduction(ILM *ilmp, int curilm)
1910 {
1911 int ili, bili, nmeReduceData, sizeRed = 0;
1912 SPTR lAssignReduction, sptrReduceData, sptrReductionItem;
1913 DTYPE dtypeReduceData, dtypeReductionItem;
1914 dtypeReduceData = mk_ompaccel_array_dtype(
1915 get_type(2, TY_PTR, DT_ANY),
1916 ompaccel_tinfo_current_get()->n_reduction_symbols);
1917 sptrReduceData =
1918 mk_ompaccel_addsymbol(".reduceData", dtypeReduceData, SC_LOCAL, ST_ARRAY);
1919
1920 cr_block();
1921 for (int i = 0; i < ompaccel_tinfo_current_get()->n_reduction_symbols; ++i) {
1922 sptrReductionItem =
1923 ompaccel_tinfo_current_get()->reduction_symbols[i].shared_sym;
1924 dtypeReductionItem = DTYPEG(sptrReductionItem);
1925
1926 ili = mk_address(sptrReductionItem);
1927 nmeReduceData =
1928 add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), i,
1929 ad_icon(i), FALSE);
1930
1931 bili = mk_address(sptrReduceData);
1932 if (i != 0)
1933 bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1934 DT_ADDR);
1935
1936 ili = mk_ompaccel_store(ili, DT_ADDR, nmeReduceData, bili);
1937 chk_block(ili);
1938 }
1939 wr_block();
1940
1941 cr_block();
1942 ili = ll_make_kmpc_nvptx_parallel_reduce_nowait_simple_spmd(
1943 ad_icon(ompaccel_tinfo_current_get()->n_reduction_symbols),
1944 ad_icon(sizeRed), mk_address(sptrReduceData),
1945 ompaccel_tinfo_current_get()->reduction_funcs.shuffleFn,
1946 ompaccel_tinfo_current_get()->reduction_funcs.interWarpCopy);
1947 iltb.callfg = 1;
1948 chk_block(ili);
1949 wr_block();
1950
1951 exp_ompaccel_ereduction(ilmp, curilm);
1952
1953 lAssignReduction = getlab();
1954 RFCNTI(lAssignReduction);
1955
1956 ili = ompaccel_nvvm_get(threadIdX);
1957 ili = mk_ompaccel_compare(ili, DT_INT, ad_icon(0), DT_INT, CC_NE);
1958 ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lAssignReduction);
1959 chk_block(ili);
1960
1961 // Load reduced items to the origina laddress
1962 for (int i = 0; i < ompaccel_tinfo_current_get()->n_reduction_symbols; ++i) {
1963 bili = mk_address(sptrReduceData);
1964 sptrReductionItem =
1965 ompaccel_tinfo_current_get()->reduction_symbols[i].private_sym;
1966 dtypeReductionItem = DTYPEG(sptrReductionItem);
1967
1968 if (i != 0) {
1969 bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1970 DT_ADDR);
1971 }
1972
1973 bili = mk_ompaccel_load(bili, DT_ADDR, nmeReduceData);
1974 bili = mk_ompaccel_load(bili, dtypeReductionItem, nmeReduceData);
1975
1976 ili = mk_ompaccel_ldsptr(sptrReductionItem);
1977
1978 switch (ompaccel_tinfo_current_get()->reduction_symbols[i].redop) {
1979 case 1:
1980 case 2:
1981 ili = mk_ompaccel_add(ili, dtypeReductionItem, bili, dtypeReductionItem);
1982 ili = mk_ompaccel_store(ili, dtypeReductionItem,
1983 addnme(NT_VAR, sptrReductionItem, 0, 0),
1984 mk_address(sptrReductionItem));
1985 break;
1986 }
1987
1988 chk_block(ili);
1989 }
1990 wr_block();
1991 cr_block();
1992 exp_label(lAssignReduction);
1993 }
1994
1995 void
exp_ompaccel_bteams(ILM * ilmp,int curilm,int outlinedCnt,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()))1996 exp_ompaccel_bteams(ILM *ilmp, int curilm, int outlinedCnt, SPTR uplevel_sptr,
1997 SPTR scopeSptr, int(incrOutlinedCnt()))
1998 {
1999 int ili, opc;
2000 SPTR sptr;
2001 if (flg.opt != 0) {
2002 wr_block();
2003 cr_block();
2004 }
2005
2006 if (flg.omptarget) {
2007 ll_rewrite_ilms(-1, curilm, 0);
2008 return;
2009 }
2010
2011 if (XBIT(232, 0x1)) {
2012 outlinedCnt = incrOutlinedCnt();
2013 }
2014 BIH_FT(expb.curbih) = TRUE;
2015 BIH_QJSR(expb.curbih) = TRUE;
2016 BIH_NOMERGE(expb.curbih) = TRUE;
2017 if (gbl.outlined)
2018 expb.sc = SC_PRIVATE;
2019 if (outlinedCnt == 1) {
2020 if (flg.omptarget)
2021 sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, FALSE);
2022 else
2023 sptr = ll_make_outlined_func(uplevel_sptr, scopeSptr);
2024 if (!PARENCLFUNCG(scopeSptr))
2025 PARENCLFUNCP(scopeSptr, sptr);
2026 ll_write_ilm_header(sptr, curilm);
2027 if (flg.omptarget) {
2028 ili = ompaccel_nvvm_get(threadIdX);
2029 ili = ll_make_kmpc_spmd_kernel_init(ili);
2030 iltb.callfg = 1;
2031 chk_block(ili);
2032 ili = ll_make_outlined_ompaccel_call(gbl.ompoutlinedfunc, sptr);
2033 iltb.callfg = 1;
2034 chk_block(ili);
2035 gbl.ompoutlinedfunc = sptr;
2036 return;
2037 }
2038 ccff_info(MSGOPENMP, "OMP022", gbl.findex, gbl.lineno,
2039 "Teams region activated", NULL);
2040
2041 } else if (outlinedCnt > 1) {
2042 ll_rewrite_ilms(-1, curilm, 0);
2043 }
2044 }
2045 void
exp_ompaccel_map(ILM * ilmp,int curilm,int outlinedCnt)2046 exp_ompaccel_map(ILM *ilmp, int curilm, int outlinedCnt)
2047 {
2048 int label, argilm;
2049 SPTR sptr;
2050 if (outlinedCnt >= 2)
2051 return;
2052 argilm = ILM_OPND(ilmp, 1);
2053 ILM *mapop = (ILM *)(ilmb.ilm_base + argilm);
2054 if (ILM_OPC(mapop) == IM_BASE) {
2055 sptr = ILM_SymOPND(mapop, 1); // make 2
2056 label = ILM_OPND(ilmp, 2); /* map type */
2057 } else if (ILM_OPC(mapop) == IM_PLD) {
2058 sptr = ILM_SymOPND(mapop, 2); // make 2
2059 label = ILM_OPND(ilmp, 2); /* map type */
2060 }
2061 ompaccel_tinfo_current_addupdate_mapitem(sptr, label);
2062 }
2063
2064 void
exp_ompaccel_emap(ILM * ilmp,int curilm)2065 exp_ompaccel_emap(ILM *ilmp, int curilm)
2066 {
2067 int ili;
2068 OMPACCEL_TINFO *targetinfo;
2069 if (ompaccel_tinfo_has(gbl.currsub))
2070 return;
2071 ompaccel_symreplacer(true);
2072 targetinfo = ompaccel_tinfo_current_get();
2073 if (targetinfo != NULL) {
2074 if (ompaccel_tinfo_current_target_mode() == mode_target_data_enter_region ||
2075 ompaccel_tinfo_current_target_mode() == mode_target_data_region) {
2076 wr_block();
2077 cr_block();
2078 ili =
2079 ll_make_tgt_target_data_begin(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2080 iltb.callfg = 1;
2081 chk_block(ili);
2082 } else if (ompaccel_tinfo_current_target_mode() ==
2083 mode_target_data_exit_region) {
2084 wr_block();
2085 cr_block();
2086 ili = ll_make_tgt_target_data_end(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2087 iltb.callfg = 1;
2088 chk_block(ili);
2089 }
2090 }
2091 }
2092
2093 void
exp_ompaccel_looptripcount(ILM * ilmp,int curilm)2094 exp_ompaccel_looptripcount(ILM *ilmp, int curilm)
2095 {
2096 /* push loop trip count is disabled because of performance issue */
2097 if (XBIT(232, 0x20)) {
2098 SPTR sptr;
2099 int ili;
2100 wr_block();
2101 cr_block();
2102 sptr = ILM_SymOPND(ilmp, 1);
2103 ili = ll_make_kmpc_push_target_tripcount(OMPACCEL_DEFAULT_DEVICEID, sptr);
2104 iltb.callfg = 1;
2105 chk_block(ili);
2106 }
2107 }
2108
2109 void
exp_ompaccel_reductionitem(ILM * ilmp,int curilm)2110 exp_ompaccel_reductionitem(ILM *ilmp, int curilm)
2111 {
2112 ompaccel_tinfo_current_add_reductionitem(
2113 ILM_SymOPND(ilmp, 1), ILM_SymOPND(ilmp, 2), ILM_SymOPND(ilmp, 3));
2114 }
2115
2116 void
exp_ompaccel_targetdata(ILM * ilmp,int curilm,ILM_OP opc)2117 exp_ompaccel_targetdata(ILM *ilmp, int curilm, ILM_OP opc)
2118 {
2119 int dotarget;
2120 SPTR beg_label, end_label;
2121 ompaccel_symreplacer(false);
2122 ompaccel_tinfo_create(OMPACCEL_DATA_FUNCTION, OMPACCEL_DATA_MAX_SYM);
2123 if (opc == IM_TARGETEXITDATA)
2124 ompaccel_tinfo_current_set_mode(mode_target_data_exit_region);
2125 else if (opc == IM_TARGETENTERDATA)
2126 ompaccel_tinfo_current_set_mode(mode_target_data_enter_region);
2127 else if (opc == IM_BTARGETDATA)
2128 ompaccel_tinfo_current_set_mode(mode_target_data_region);
2129 dotarget = ILI_OF(ILM_OPND(ilmp, 1));
2130 beg_label = getlab();
2131 end_label = getlab();
2132
2133 dotarget = ad3ili(IL_ICJMPZ, dotarget, CC_EQ, end_label);
2134 RFCNTI(end_label);
2135 chk_block(dotarget);
2136
2137 wr_block();
2138 cr_block();
2139 exp_label(beg_label);
2140
2141 /* .... TODO: call to runtime target data here */
2142
2143 exp_label(end_label);
2144 }
2145 void
exp_ompaccel_etargetdata(ILM * ilmp,int curilm)2146 exp_ompaccel_etargetdata(ILM *ilmp, int curilm)
2147 {
2148 OMPACCEL_TINFO *targetinfo;
2149 int ili;
2150 if (gbl.outlined)
2151 return;
2152 ompaccel_symreplacer(true);
2153 targetinfo = ompaccel_tinfo_current_get_targetdata();
2154 wr_block();
2155 cr_block();
2156 ili = ll_make_tgt_target_data_end(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2157 iltb.callfg = 1;
2158 chk_block(ili);
2159 }
2160
2161 void
init_test()2162 init_test()
2163 {
2164 init_tgtutil();
2165 }
2166
2167 #endif
2168 /* Expander - OpenMP Accelerator Model */
2169