1 /*
2  * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19  *  \file
20  *  \brief ompaccel.c - OpenMP GPU Offload for NVVM Targets. It uses
21  * libomptarget
22  */
23 #ifdef OMP_OFFLOAD_LLVM
24 
25 #include "kmpcutil.h"
26 #include "error.h"
27 #include "semant.h"
28 #include "ilmtp.h"
29 #include "ilm.h"
30 #include "ili.h"
31 #include "expand.h"
32 #include "exputil.h"
33 #include "outliner.h"
34 #include "machreg.h"
35 #include "mp.h"
36 #include "ll_structure.h"
37 #include "llmputil.h"
38 #include "ccffinfo.h"
39 #include "llutil.h"
40 #include "ompaccel.h"
41 #include "tgtutil.h"
42 #include "dinit.h"
43 #include "assem.h"
44 #include "dinitutl.h"
45 #include "cgllvm.h"
46 #include "cgmain.h"
47 
48 #include "regutil.h"
49 #include "dtypeutl.h"
50 #include "llassem.h"
51 #include "ll_ftn.h"
52 #include "symfun.h"
53 #include "../../flang1/flang1exe/global.h"
54 
55 #define NOT_IMPLEMENTED(_pragma) \
56   error((error_code_t)1200, ERR_Fatal, 0, _pragma, NULL)
57 #define NOT_IMPLEMENTED_CANTCOMBINED(_pragma, _pragma2) \
58   error((error_code_t)1201, ERR_Fatal, 0, _pragma, _pragma2)
59 #define NOT_IMPLEMENTED_NEEDCOMBINED(_pragma, _pragma2) \
60   error((error_code_t)1202, ERR_Fatal, 0, _pragma, _pragma2)
61 
62 /* Initial Max target region */
63 #define INC_EXP 2
64 int tinfo_size = 50;
65 int tinfo_size_reductions = 10;
66 
67 int num_tinfos = 0;
68 OMPACCEL_TINFO **tinfos;
69 OMPACCEL_TINFO *current_tinfo = nullptr;
70 OMP_TARGET_MODE NextTargetMode = mode_none_target;
71 
72 const char *nvvm_target_triple;
73 void
ompaccel_set_targetriple(const char * tp)74 ompaccel_set_targetriple(const char *tp)
75 {
76   nvvm_target_triple = tp;
77 }
78 const char *
ompaccel_get_targetriple()79 ompaccel_get_targetriple()
80 {
81   return nvvm_target_triple;
82 }
83 static int reductionFunctionCounter = 0;
84 
85 static void
_long_unsigned(int lilix,int * dt,bool * punsigned,DTYPE dtype)86 _long_unsigned(int lilix, int *dt, bool *punsigned, DTYPE dtype)
87 {
88   ILI_OP opc;
89   opc = ILI_OPC(lilix);
90   int dty = DTY(dtype);
91   if (dtype == DT_INT || dtype == DT_UINT) {
92     if (size_of(dtype) > 4)
93       *dt = 2;
94     else
95       *dt = 1;
96   } else if (dtype == DT_FLOAT || dtype == DT_UINT8) {
97     *dt = 3;
98   } else if (dty == TY_INT8) {
99     *dt = 2;
100   } else if (dty == TY_DBLE) {
101     *dt = 4;
102   }
103 
104   // todo ompaccel I don't know how to handle others
105 
106   switch (DTY(dtype)) {
107   case TY_UINT:
108   case TY_USINT:
109   case TY_UINT8:
110   case TY_UINT128:
111     if (opc != IL_ICON)
112       *punsigned = TRUE;
113     break;
114   default:
115     break;
116   }
117 
118 } /* _long_unsigned */
119 
120 static bool
_pointer_type(DTYPE dtype)121 _pointer_type(DTYPE dtype)
122 {
123   if (dtype && DTY(dtype) == TY_PTR)
124     return TRUE;
125   return FALSE;
126 } /* _pointer_type */
127 
128 int
mk_ompaccel_ldsptr(SPTR sptr)129 mk_ompaccel_ldsptr(SPTR sptr)
130 {
131   ISZ_T sz;
132   DTYPE dtype;
133   // it's function
134   if (DTYPEG(sptr) == DT_NONE && STYPEG(sptr) == ST_ENTRY) {
135     int nme = addnme(NT_VAR, sptr, 0, 0);
136     int ili = mk_address(sptr);
137     return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
138   } else {
139     sz = size_of(DTYPEG(sptr));
140     dtype = DTYPEG(sptr);
141 
142     if (STYPEG(sptr) == ST_CONST) {
143       if (sz == 8)
144         return ad_kcon(CONVAL1G(sptr), CONVAL2G(sptr));
145       else
146         return ad_icon(CONVAL2G(sptr));
147     } else {
148       int nme = addnme(NT_VAR, sptr, 0, 0);
149       int ili = mk_address(sptr);
150       if (ILI_OPC(ili) == IL_LDA)
151         nme = ILI_OPND(ili, 2);
152       if (_pointer_type(dtype) || DTY(dtype) == TY_ARRAY) {
153         return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
154       } else {
155         if (sz == 8)
156           return ad3ili(IL_LDKR, ili, nme, MSZ_I8);
157         else
158           return ad3ili(IL_LD, ili, nme, mem_size(DTY(DTYPEG(sptr))));
159       }
160     }
161   }
162 }
163 
164 int
mk_ompaccel_load(int ili,DTYPE dtype,int nme)165 mk_ompaccel_load(int ili, DTYPE dtype, int nme)
166 {
167   if (_pointer_type(dtype))
168     return ad3ili(IL_LDA, ili, nme, MSZ_PTR);
169   else {
170     switch (dtype) {
171     case DT_INT:
172       if (size_of(dtype) > 4)
173         return ad3ili(IL_LDKR, ili, nme, MSZ_WORD);
174       else
175         return ad3ili(IL_LD, ili, nme, MSZ_WORD);
176     case DT_REAL:
177       if (size_of(dtype) > 4)
178         return ad3ili(IL_LDKR, ili, nme, MSZ_F8);
179       else
180         return ad3ili(IL_LDSP, ili, nme, MSZ_F8);
181       break;
182     case DT_DBLE:
183       return ad3ili(IL_LDDP, ili, nme, MSZ_DBLE);
184       break;
185     case DT_CMPLX:
186       return ad3ili(IL_LDDCMPLX, ili, nme, MSZ_F16);
187       break;
188     case DT_NONE:
189       return ad3ili(IL_LD, ili, nme, MSZ_WORD);
190       break;
191     default:
192       return 0;
193       break;
194     }
195   }
196 }
197 
198 static int
mk_ompaccel_ld(int ili,int nme)199 mk_ompaccel_ld(int ili, int nme)
200 {
201   return mk_ompaccel_load(ili, DT_NONE, nme);
202 }
203 
204 int
mk_ompaccel_store(int ili_value,DTYPE dtype,int nme,int ili_address)205 mk_ompaccel_store(int ili_value, DTYPE dtype, int nme, int ili_address)
206 {
207   if (_pointer_type(dtype))
208     return ad4ili(IL_STA, ili_value, ili_address, nme, MSZ_PTR);
209   else {
210     switch (dtype) {
211     case DT_LOG:
212       return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
213       break;
214     case DT_INT:
215       return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
216       break;
217     case DT_REAL:
218       return ad4ili(IL_STSP, ili_value, ili_address, nme, MSZ_F4);
219       break;
220     case DT_DBLE:
221       return ad4ili(IL_STDP, ili_value, ili_address, nme, MSZ_DBLE);
222       break;
223     case DT_INT8:
224       return ad4ili(IL_STKR, ili_value, ili_address, nme, MSZ_I8);
225       break;
226     case DT_NONE:
227       return ad4ili(IL_ST, ili_value, ili_address, nme, MSZ_WORD);
228       break;
229     default:
230       return 0;
231       break;
232     }
233   }
234 }
235 
236 static int
mk_ompaccel_stsptr(int ili_value,SPTR sptr)237 mk_ompaccel_stsptr(int ili_value, SPTR sptr)
238 {
239   ISZ_T sz = size_of(DTYPEG(sptr));
240   DTYPE dtype = DTYPEG(sptr);
241   int ili;
242   int nme = addnme(NT_VAR, sptr, 0, 0);
243   if (STYPEG(sptr) == ST_CONST) {
244     if (sz == 8)
245       ili = ad_kcon(CONVAL1G(sptr), CONVAL2G(sptr));
246     else
247       ili = ad_icon(CONVAL2G(sptr));
248   } else {
249     ili = mk_address(sptr);
250     if (ILI_OPC(ili) == IL_LDA)
251       nme = ILI_OPND(ili, 2);
252   }
253   return mk_ompaccel_store(ili_value, dtype, nme, ili);
254 }
255 
256 static int
mk_ompaccel_and(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)257 mk_ompaccel_and(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
258 {
259   ILI_OP opc;
260   int dt = 0;
261   bool uu = FALSE;
262   if (!ili1)
263     return ili2;
264   if (!ili2)
265     return ili1;
266   if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
267     return ad3ili(IL_AADD, ili1, ili2, 0);
268   } else {
269     _long_unsigned(ili1, &dt, &uu, dtype1);
270     _long_unsigned(ili2, &dt, &uu, dtype2);
271     /* signed */
272     if (!uu) {
273       opc = IL_AND;
274     } else {
275       opc = IL_KAND;
276     }
277   }
278   return ad2ili(opc, ili1, ili2);
279 }
280 
281 static int
mk_ompaccel_iand(int ili1,int ili2)282 mk_ompaccel_iand(int ili1, int ili2)
283 {
284   return mk_ompaccel_and(ili1, DT_INT, ili2, DT_INT);
285 }
286 
287 static int
mk_ompaccel_shift(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)288 mk_ompaccel_shift(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
289 {
290   ILI_OP opc = IL_NONE;
291   int dt = 0;
292   bool uu = FALSE;
293   if (!ili1)
294     return ili2;
295   if (!ili2)
296     return ili1;
297 
298   _long_unsigned(ili1, &dt, &uu, dtype1);
299   _long_unsigned(ili2, &dt, &uu, dtype2);
300   /* signed */
301   if (!uu) {
302     if (dt == 1)
303       opc = IL_RSHIFT;
304     else if (dt == 2)
305       opc = IL_KARSHIFT;
306   } else {
307     if (dt == 1)
308       opc = IL_URSHIFT;
309     else if (dt == 2)
310       opc = IL_KURSHIFT;
311   }
312   assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
313   return ad2ili(opc, ili1, ili2);
314 }
315 
316 int
mk_ompaccel_compare(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2,int CC)317 mk_ompaccel_compare(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2, int CC)
318 {
319   ILI_OP opc = IL_NONE;
320   int dt = 0;
321   bool uu = FALSE;
322   if (!ili1)
323     return ili2;
324   if (!ili2)
325     return ili1;
326 
327   _long_unsigned(ili1, &dt, &uu, dtype1);
328   _long_unsigned(ili2, &dt, &uu, dtype2);
329   /* signed */
330   if (!uu) {
331     if (dt == 1)
332       opc = IL_ICMP;
333     else if (dt == 2)
334       opc = IL_KCMP;
335   } else {
336     if (dt == 1)
337       opc = IL_UICMP;
338     else if (dt == 2)
339       opc = IL_UKCMP;
340   }
341   assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
342   return ad3ili(opc, ili1, ili2, CC);
343 }
344 
345 int
mk_ompaccel_add(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)346 mk_ompaccel_add(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
347 {
348   ILI_OP opc = IL_NONE;
349   int dt = 0;
350   bool uu = FALSE;
351   if (!ili1)
352     return ili2;
353   if (!ili2)
354     return ili1;
355   if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
356     return ad3ili(IL_AADD, ili1, ili2, 0);
357   } else {
358     _long_unsigned(ili1, &dt, &uu, dtype1);
359     _long_unsigned(ili2, &dt, &uu, dtype2);
360     /* signed */
361     if (!uu) {
362       if (dt == 1)
363         opc = IL_IADD;
364       else if (dt == 2)
365         opc = IL_KADD;
366       else if (dt == 3)
367         opc = IL_FADD;
368       else if (dt == 4)
369         opc = IL_DADD;
370       else if (dt == 5)
371         opc = IL_SCMPLXADD;
372       else if (dt == 6)
373         opc = IL_DCMPLXADD;
374     } else {
375       if (dt == 1)
376         opc = IL_UIADD;
377       else if (dt == 2)
378         opc = IL_UKADD;
379     }
380   }
381   assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
382   return ad2ili(opc, ili1, ili2);
383 } /* mk_ompaccel_add */
384 
385 int
mk_ompaccel_mul(int ili1,DTYPE dtype1,int ili2,DTYPE dtype2)386 mk_ompaccel_mul(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2)
387 {
388   ILI_OP opc = IL_NONE;
389   int dt = 0;
390   bool uu = FALSE;
391   if (!ili1)
392     return ili2;
393   if (!ili2)
394     return ili1;
395   if (_pointer_type(dtype1) || _pointer_type(dtype2)) {
396     // todo ompaccel not sure what to do here.
397     return ad3ili(IL_KMUL, ili1, ili2, 0);
398   } else {
399     _long_unsigned(ili1, &dt, &uu, dtype1);
400     _long_unsigned(ili2, &dt, &uu, dtype2);
401     /* signed */
402     if (!uu) {
403       if (dt == 1)
404         opc = IL_IMUL;
405       else if (dt == 2)
406         opc = IL_KMUL;
407       else if (dt == 3)
408         opc = IL_FMUL;
409       else if (dt == 4)
410         opc = IL_DMUL;
411       else if (dt == 5)
412         opc = IL_SCMPLXMUL;
413       else if (dt == 6)
414         opc = IL_DCMPLXMUL;
415     } else {
416       if (dt == 1)
417         opc = IL_UIMUL;
418       else if (dt == 2)
419         opc = IL_UKMUL;
420     }
421   }
422   assert(opc != IL_NONE, "Correct IL is not found.", 0, ERR_Fatal);
423   return ad2ili(opc, ili1, ili2);
424 } /* mk_ompaccel_mul */
425 
426 static SPTR
mk_ompaccel_getnewccsym(int letter,int n,DTYPE dtype,SC_KIND SCkind,SYMTYPE symtype)427 mk_ompaccel_getnewccsym(int letter, int n, DTYPE dtype, SC_KIND SCkind,
428                         SYMTYPE symtype)
429 {
430   SPTR sptr = getnewccsym(letter, n, symtype);
431   DTYPEP(sptr, dtype);
432   SCP(sptr, SCkind);
433   OMPACCDEVSYMP(sptr, 1);
434   return sptr;
435 }
436 
437 SPTR
mk_ompaccel_addsymbol(const char * name,DTYPE dtype,SC_KIND SCkind,SYMTYPE symtype)438 mk_ompaccel_addsymbol(const char *name, DTYPE dtype, SC_KIND SCkind,
439                       SYMTYPE symtype)
440 {
441   SPTR sptr = getsymbol(name);
442   DTYPEP(sptr, dtype);
443   STYPEP(sptr, symtype);
444   SCP(sptr, SCkind);
445   OMPACCDEVSYMP(sptr, 1);
446   return sptr;
447 }
448 
449 static void
mk_ompaccel_function_end(SPTR func_sptr)450 mk_ompaccel_function_end(SPTR func_sptr)
451 {
452   int bihx, endlab;
453   bihx = expb.curbih;
454   bihx = addbih(bihx);
455   rdilts(bihx);
456   addilt(0, ad1ili(IL_EXIT, func_sptr));
457   wrilts(bihx);
458   BIH_XT(bihx) = 1;
459   BIH_LAST(bihx) = 1;
460   endlab = getlab();
461   STYPEP(endlab, ST_LABEL);
462   RFCNTP(endlab, 1);
463   CCSYMP(endlab, 1);
464   ILIBLKP(endlab, bihx);
465   BIH_LABEL(bihx) = SPTR(endlab);
466 }
467 
468 static SPTR
mk_ompaccel_function(char * name,int n_params,const SPTR * param_sptrs,bool isDeviceFunc)469 mk_ompaccel_function(char *name, int n_params, const SPTR *param_sptrs,
470                      bool isDeviceFunc)
471 {
472   /* Create a function symbol along with parameters */
473   int dpdscp, bihx;
474   SPTR func_sptr, sym;
475   func_sptr = getsymbol(name);
476   TASKFNP(func_sptr, FALSE);
477   ISTASKDUPP(func_sptr, FALSE);
478   FUNCLINEP(func_sptr, gbl.lineno);
479   STYPEP(func_sptr, ST_ENTRY);
480   CFUNCP(func_sptr, 1);
481   DEFDP(func_sptr, 1);
482   SCP(func_sptr, SC_EXTERN);
483   ADDRTKNP(func_sptr, 1);
484   DCLDP(func_sptr, 1);
485   DTYPEP(func_sptr, DT_NONE);
486 
487   if (isDeviceFunc)
488     OMPACCFUNCDEVP(func_sptr, 1);
489   PARAMCTP(func_sptr, n_params);
490   dpdscp = aux.dpdsc_avl;
491   DPDSCP(func_sptr, dpdscp);
492   aux.dpdsc_avl += n_params;
493   NEED(aux.dpdsc_avl, aux.dpdsc_base, int, aux.dpdsc_size,
494        aux.dpdsc_size + n_params + 100);
495 
496   for (int i = 0; i < n_params; ++i) {
497     sym = param_sptrs[i];
498     aux.dpdsc_base[dpdscp++] = sym;
499   }
500 
501   /* Initialize with an Entry Block */
502   GBL_CURRFUNC = func_sptr;
503   gbl.entries = GBL_CURRFUNC;
504 
505   ds_init();
506 
507   gbl.lineno = 0;
508   gbl.findex = 0;
509   bihx = addbih(0);
510   gbl.entbih = bihx;
511   BIH_LABEL(bihx) = GBL_CURRFUNC;
512   rdilts(bihx);
513   addilt(0, ad1ili(IL_ENTRY, GBL_CURRFUNC));
514   wrilts(bihx);
515   BIH_FT(bihx) = 1;
516   BIH_EN(bihx) = 1;
517   BIHNUMP(GBL_CURRFUNC, bihx);
518   BIH_LABEL(bihx) = GBL_CURRFUNC;
519 
520   expb.curbih = bihx;
521 
522   return func_sptr;
523 }
524 
525 static int
mk_reduction_op(int redop,int lili,DTYPE dtype1,int rili,DTYPE dtype2)526 mk_reduction_op(int redop, int lili, DTYPE dtype1, int rili, DTYPE dtype2)
527 {
528   switch (redop) {
529   case 1:
530   case 2:
531     return mk_ompaccel_add(lili, dtype1, rili, dtype2);
532   case 3:
533     return mk_ompaccel_mul(lili, dtype1, rili, dtype2);
534   default:
535     static_assert(true, "Rest of reduction operators are not implemented yet.");
536     break;
537   }
538   return 0;
539 }
540 
541 DTYPE
mk_ompaccel_array_dtype(DTYPE atype,int size)542 mk_ompaccel_array_dtype(DTYPE atype, int size)
543 {
544   DTYPE dtype;
545   {
546     ADSC *adsc;
547     INT con[2] = {0, size};
548 
549     dtype = get_array_dtype(1, atype);
550     adsc = AD_DPTR(dtype);
551     AD_LWBD(adsc, 0) = stb.i1;
552     AD_UPBD(adsc, 0) = getcon(con, DT_INT);
553     AD_NUMELM(adsc) = AD_UPBD(adsc, 0);
554   }
555 
556   return dtype;
557 } /* make_array_dtype */
558 
559 static void
open_OMP_OFFLOAD_LLVM_file()560 open_OMP_OFFLOAD_LLVM_file()
561 {
562   FILE *F;
563   F = fopen(gbl.ompaccfilename, "w");
564   if (F == nullptr) {
565 #if DEBUG
566     fprintf(stderr, "Trying to open temp file %s\n", gbl.ompaccfilename);
567 #endif
568   }
569   gbl.ompaccfile = F;
570 }
571 
572 INLINE static SPTR
create_nvvm_sym(const char * name,DTYPE dtype)573 create_nvvm_sym(const char *name, DTYPE dtype)
574 {
575   SPTR sptr = getsymbol(name);
576   DEFDP(sptr, 1);
577   DTYPEP(sptr, dtype);
578   CFUNCP(sptr, 1);
579   STYPEP(sptr, ST_ENTRY);
580   SCP(sptr, SC_STATIC);
581   ADDRTKNP(sptr, 1);
582   PARAMCTP(sptr, 0);
583   return sptr;
584 }
585 
586 INLINE static SPTR
create_sregs(const char * name)587 create_sregs(const char *name)
588 {
589   return create_nvvm_sym(name, DT_INT);
590 }
591 
592 void
ompaccel_init()593 ompaccel_init()
594 {
595   /* Create file to write device code */
596   open_OMP_OFFLOAD_LLVM_file();
597   /* Create target pool */
598   tinfos = (OMPACCEL_TINFO **)sccrelal(
599       (char *)tinfos, ((BIGUINT64)((tinfo_size) * sizeof(OMPACCEL_TINFO *))));
600 }
601 
602 void
ompaccel_initsyms()603 ompaccel_initsyms()
604 {
605   /* Create thread id sreg symbols */
606   init_nvvm_syms = create_sregs(NVVM_SREG[threadIdX]);
607   create_sregs(NVVM_SREG[threadIdY]);
608   create_sregs(NVVM_SREG[threadIdZ]);
609   /* Create block id sreg symbols */
610   create_sregs(NVVM_SREG[blockIdX]);
611   create_sregs(NVVM_SREG[blockIdY]);
612   create_sregs(NVVM_SREG[blockIdZ]);
613   /* Create block id sreg symbols */
614   create_sregs(NVVM_SREG[blockDimX]);
615   create_sregs(NVVM_SREG[blockDimY]);
616   create_sregs(NVVM_SREG[blockDimZ]);
617   /* Create block id sreg symbols */
618   create_sregs(NVVM_SREG[gridDimX]);
619   create_sregs(NVVM_SREG[gridDimY]);
620   create_sregs(NVVM_SREG[gridDimZ]);
621   // todo create others nvvm things too
622   create_sregs(NVVM_SREG[warpSize]);
623 
624   /* Create llvm intrinsics symbols */
625   init_nvvm_intrinsics = create_nvvm_sym(NVVM_INTRINSICS[barrier0], DT_NONE);
626   create_nvvm_sym(NVVM_INTRINSICS[barrier], DT_NONE);
627 }
628 
629 int
ompaccel_nvvm_get(nvvm_sregs sreg)630 ompaccel_nvvm_get(nvvm_sregs sreg)
631 {
632   SPTR sptr = SPTR(init_nvvm_syms + sreg);
633   ll_make_ftn_outlined_params(sptr, 0, nullptr);
634   ll_process_routine_parameters(sptr);
635   return ll_ad_outlined_func2(IL_DFRIR, IL_JSR, sptr, 0, nullptr);
636 }
637 
638 int
ompaccel_nvvm_mk_barrier(nvvm_barriers btype)639 ompaccel_nvvm_mk_barrier(nvvm_barriers btype)
640 {
641   SPTR sptr;
642   if (btype == CTA_BARRIER) {
643     sptr = (SPTR)(init_nvvm_intrinsics + barrier0);
644     ll_make_ftn_outlined_params(sptr, 0, 0);
645     ll_process_routine_parameters(sptr);
646     return ll_ad_outlined_func2(IL_NONE, IL_JSR, sptr, 0, nullptr);
647   }
648   static_assert(true, "Other nvvm intrinsics are not implemented yet.");
649 }
650 
651 int
ompaccel_nvvm_get_gbl_tid()652 ompaccel_nvvm_get_gbl_tid()
653 {
654   int ilix, iliy, iliz;
655   ilix = ad2ili(IL_ISUB, ompaccel_nvvm_get(blockDimX), ad_icon(32));
656   ilix = ad2ili(IL_IMUL, ompaccel_nvvm_get(blockIdX), ilix);
657 
658   iliy = ad2ili(IL_ISUB, ompaccel_nvvm_get(warpSize), ad_icon(1));
659   iliy = ad2ili(IL_XOR, iliy, ad_icon(-1));
660   iliz = ad2ili(IL_ISUB, ompaccel_nvvm_get(blockDimX), ad_icon(1));
661   iliz = ad2ili(IL_AND, iliy, iliz);
662   iliz = ad2ili(IL_ISUB, iliz, ad_icon(1));
663   iliz = ad2ili(IL_AND, iliz, ompaccel_nvvm_get(threadIdX));
664 
665   iliy = ad2ili(IL_IADD, iliz, ilix);
666   return iliy;
667 }
668 
669 void
ompaccel_tinfo_current_set_mode(OMP_TARGET_MODE type)670 ompaccel_tinfo_current_set_mode(OMP_TARGET_MODE type)
671 {
672   current_tinfo->mode = type;
673 }
674 
675 void
ompaccel_tinfo_set_mode_next_target(OMP_TARGET_MODE type)676 ompaccel_tinfo_set_mode_next_target(OMP_TARGET_MODE type)
677 {
678   NextTargetMode = type;
679 }
680 
681 OMP_TARGET_MODE
ompaccel_tinfo_current_target_mode()682 ompaccel_tinfo_current_target_mode()
683 {
684   return current_tinfo->mode;
685 }
686 
687 OMPACCEL_TINFO *
ompaccel_tinfo_create(SPTR func_sptr,int max_nargs)688 ompaccel_tinfo_create(SPTR func_sptr, int max_nargs)
689 {
690   OMPACCEL_TINFO *info;
691   if (DBGBIT(61, 0x10) && gbl.dbgfil != nullptr)
692     fprintf(gbl.dbgfil, "#target add request for sptr:%d [%s]\n", func_sptr,
693             SYMNAME(func_sptr));
694 
695   NEW(info, OMPACCEL_TINFO, 1);
696   info->func_sptr = func_sptr;
697   info->n_symbols = 0;
698   if (max_nargs != 0) {
699     NEW(info->symbols, OMPACCEL_SYM, max_nargs);
700     NEW(info->quiet_symbols, OMPACCEL_SYM, max_nargs);
701   } else {
702     info->symbols = nullptr;
703     info->quiet_symbols = nullptr;
704   }
705   info->sz_symbols = info->sz_quiet_symbols = max_nargs;
706   info->mode = NextTargetMode;
707   NextTargetMode = mode_none_target;
708   info->nowait = false;
709   info->n_quiet_symbols = 0;
710   NEW(info->reduction_symbols, OMPACCEL_RED_SYM, tinfo_size_reductions);
711   info->n_reduction_symbols = 0;
712 
713   /* add ot to array */
714   NEED(num_tinfos + 1, tinfos, OMPACCEL_TINFO *, tinfo_size,
715        tinfo_size * INC_EXP);
716   tinfos[num_tinfos++] = info;
717 
718   /* linking */
719   if (current_tinfo != nullptr)
720     info->parent_tinfo = current_tinfo;
721   else
722     info->parent_tinfo = nullptr;
723   current_tinfo = info;
724   return info;
725 }
726 
727 bool
ompaccel_tinfo_has(int func_sptr)728 ompaccel_tinfo_has(int func_sptr)
729 {
730   for (int i = 0; i < num_tinfos; ++i) {
731     if (tinfos[i]->func_sptr == func_sptr) {
732       return true;
733     }
734   }
735   return false;
736 }
737 
738 OMPACCEL_TINFO *
ompaccel_tinfo_get(int func_sptr)739 ompaccel_tinfo_get(int func_sptr)
740 {
741   int i;
742   for (i = 0; i < num_tinfos; ++i) {
743     if (tinfos[i]->func_sptr == func_sptr) {
744       return tinfos[i];
745     }
746   }
747   return nullptr;
748 }
749 
750 SPTR
ompaccel_create_device_symbol(SPTR sptr,int count)751 ompaccel_create_device_symbol(SPTR sptr, int count)
752 {
753   SPTR sym, sptr_alloc;
754   char name[252];
755   DTYPE dtype = DTYPEG(sptr);
756   bool byval;
757   if (DTYPEG(sptr) == DT_ADDR || DTY(DTYPEG(sptr)) == TY_ARRAY)
758     byval = false;
759   else
760     byval = true;
761   if (byval) {
762     sprintf(name, "Arg_%s_%d", SYMNAME(sptr), count);
763   } else {
764     if (strlen(SYMNAME(sptr)) == 0)
765       sprintf(name, "Arg_%s%d", SYMNAME(sptr), count);
766     else
767       sprintf(name, "Arg_%s", SYMNAME(sptr));
768   }
769   sym = getsymbol(name);
770 
771   SCP(sym, SC_DUMMY);
772 
773   if (dtype == DT_CPTR) {
774     dtype = DT_INT8;
775   }
776   // assume it's base of allocatable descriptor
777   if (strncmp(SYMNAME(sptr), ".Z", 2) == 0) {
778     for (int j = 0; j < current_tinfo->n_quiet_symbols; ++j)
779       if (MIDNUMG(current_tinfo->quiet_symbols[j].host_sym) == sptr)
780         sptr_alloc = current_tinfo->quiet_symbols[j].host_sym;
781     byval = false;
782     DTYPEP(sym, DTYPE(DTYPEG(sptr_alloc) + 1));
783     sptr_alloc = ((SPTR)0);
784 
785   } else {
786     DTYPEP(sym, dtype);
787   }
788   STYPEP(sym, ST_VAR);
789   PASSBYVALP(sym, byval);
790 
791   OMPACCDEVSYMP(sym, TRUE);
792   return sym;
793 }
794 
795 INLINE static SPTR
add_symbol_to_function(SPTR func,SPTR sym)796 add_symbol_to_function(SPTR func, SPTR sym)
797 {
798   int dpdscp, paramct;
799   paramct = PARAMCTG(func);
800   paramct += 1;
801   aux.dpdsc_base[paramct] = sym;
802   PARAMCTP(func, paramct);
803   aux.dpdsc_avl += 1;
804 }
805 
806 INLINE static SPTR
get_devsptr(OMPACCEL_TINFO * tinfo,SPTR host_symbol)807 get_devsptr(OMPACCEL_TINFO *tinfo, SPTR host_symbol)
808 {
809   int i;
810   if (tinfo == nullptr)
811     return host_symbol;
812 
813   for (i = 0; i < tinfo->n_symbols; ++i) {
814     if (tinfo->symbols[i].host_sym == host_symbol) {
815       if (tinfo->symbols[i].device_sym == NOSYM) {
816         /* It is second case that we catch the symbols in target region from the
817          * ILM. In case there is a symbol that has no device symbol created, we
818          * should create device symbol for it also we should add it function
819          * parameter. */
820         tinfo->symbols[i].device_sym =
821             ompaccel_create_device_symbol(tinfo->symbols[i].host_sym, 1);
822         add_symbol_to_function(tinfo->func_sptr, tinfo->symbols[i].device_sym);
823       }
824       return tinfo->symbols[i].device_sym;
825     }
826   }
827   return host_symbol;
828 }
829 
830 INLINE static SPTR
get_devsptr2(OMPACCEL_TINFO * tinfo,SPTR host_symbol)831 get_devsptr2(OMPACCEL_TINFO *tinfo, SPTR host_symbol)
832 {
833   int i;
834   for (i = 0; i < tinfo->n_symbols; ++i) {
835     if (tinfo->symbols[i].device_sym == host_symbol) {
836       return tinfo->symbols[i].host_sym;
837     }
838   }
839   return host_symbol;
840 }
841 
842 OMPACCEL_TINFO *
ompaccel_tinfo_current_get_targetdata()843 ompaccel_tinfo_current_get_targetdata()
844 {
845   OMPACCEL_TINFO *tinfo = current_tinfo;
846   while (tinfo != nullptr) {
847     if (tinfo->mode == mode_target_data_region)
848       return tinfo;
849     if (tinfo->parent_tinfo == nullptr)
850       break;
851     tinfo = tinfo->parent_tinfo;
852   }
853   ompaccel_msg_interr("XXX", "Beginning of 'target data' is not found. ");
854   return nullptr;
855 }
856 
857 OMPACCEL_TINFO *
ompaccel_tinfo_current_get()858 ompaccel_tinfo_current_get()
859 {
860   return current_tinfo;
861 }
862 
863 DTYPE
ompaccel_tinfo_current_get_dev_dtype(DTYPE org_dtype)864 ompaccel_tinfo_current_get_dev_dtype(DTYPE org_dtype)
865 {
866   int i;
867   DTYPE dev_dtype = org_dtype;
868   if (current_tinfo != nullptr) {
869     for (i = 0; i < current_tinfo->n_quiet_symbols; ++i) {
870       if (DTYPEG(current_tinfo->quiet_symbols[i].host_sym) == org_dtype) {
871         dev_dtype = DTYPEG(current_tinfo->quiet_symbols[i].device_sym);
872         break;
873       }
874     }
875 
876     for (i = 0; i < current_tinfo->n_symbols; ++i) {
877       if (DTYPEG(current_tinfo->symbols[i].host_sym) == org_dtype) {
878         dev_dtype = DTYPEG(current_tinfo->symbols[i].device_sym);
879         break;
880       }
881     }
882   }
883   if (DBGBIT(61, 2) && gbl.dbgfil != nullptr) {
884     if (org_dtype != dev_dtype) {
885       fprintf(gbl.dbgfil, "[ompaccel] REPLACED org_dtype:%d --> dev_dtype:%d",
886               org_dtype, dev_dtype);
887     }
888   }
889   return dev_dtype;
890 }
891 
892 SPTR
ompaccel_tinfo_parent_get_devsptr(SPTR host_symbol)893 ompaccel_tinfo_parent_get_devsptr(SPTR host_symbol)
894 {
895   int i;
896   if (current_tinfo->parent_tinfo == nullptr)
897     return host_symbol;
898   for (i = 0; i < current_tinfo->parent_tinfo->n_quiet_symbols; ++i) {
899     if (current_tinfo->parent_tinfo->quiet_symbols[i].host_sym == host_symbol) {
900       return current_tinfo->parent_tinfo->quiet_symbols[i].device_sym;
901     }
902   }
903   return host_symbol;
904 }
905 
906 bool
ompaccel_tinfo_current_is_registered(SPTR host_symbol)907 ompaccel_tinfo_current_is_registered(SPTR host_symbol)
908 {
909   int i;
910   if (current_tinfo == nullptr || !host_symbol)
911     return false;
912 
913   for (i = 0; i < current_tinfo->n_symbols; ++i) {
914     if (current_tinfo->symbols[i].host_sym == host_symbol) {
915       return true;
916     }
917   }
918   return false;
919 }
920 
921 SPTR
ompaccel_tinfo_current_get_devsptr(SPTR host_symbol)922 ompaccel_tinfo_current_get_devsptr(SPTR host_symbol)
923 {
924   SPTR device_symbol;
925   if (current_tinfo == nullptr || !host_symbol)
926     return host_symbol;
927 
928   device_symbol = get_devsptr(current_tinfo, host_symbol);
929 
930   if (device_symbol == host_symbol && current_tinfo->parent_tinfo != nullptr)
931     device_symbol = get_devsptr2(current_tinfo->parent_tinfo, host_symbol);
932 
933   if ((DBGBIT(61, 2)) && gbl.dbgfil != nullptr &&
934       device_symbol != host_symbol) {
935     fprintf(gbl.dbgfil,
936             "[ompaccel] REPLACED host_symbol:%d[%s] --> device_symbol:%d[%s]",
937             host_symbol, SYMNAME(host_symbol), device_symbol,
938             SYMNAME(device_symbol));
939     fprintf(gbl.dbgfil, "\n");
940   }
941 
942   return device_symbol;
943 }
944 
945 static bool
tinfo_update_maptype(OMPACCEL_SYM * tsyms,int nargs,SPTR host_symbol,int map_type)946 tinfo_update_maptype(OMPACCEL_SYM *tsyms, int nargs, SPTR host_symbol,
947                      int map_type)
948 {
949   int i;
950   for (i = 0; i < nargs; ++i) {
951     if (tsyms[i].host_sym == host_symbol) {
952       tsyms[i].map_type = map_type;
953       if (STYPEG(tsyms[i].host_sym) != ST_ARRAY) {
954         /* if scalar variables are used in map clause, pass them by reference */
955         if (map_type & OMP_TGT_MAPTYPE_FROM || map_type & OMP_TGT_MAPTYPE_TO)
956           PASSBYREFP(tsyms[i].device_sym, 1);
957         PASSBYVALP(tsyms[i].device_sym, 0);
958       }
959       return true;
960     }
961   }
962   return false;
963 }
964 
965 void
ompaccel_tinfo_current_add_reductionitem(SPTR private_sym,SPTR shared_sym,int redop)966 ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym,
967                                          int redop)
968 {
969   if (current_tinfo == nullptr)
970     ompaccel_msg_interr("XXX", "Current target info is not found.\n");
971 
972   current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
973       .private_sym = private_sym;
974   current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
975       .shared_sym = shared_sym;
976   current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols].redop =
977       redop;
978   current_tinfo->n_reduction_symbols++;
979   // it is initially created pass by value. It should be address, it should be
980   // copied back to the host.
981   PASSBYVALP(private_sym, 0);
982 
983   /* Mark reduction variable as tofrom */
984   if (ompaccel_tinfo_current_target_mode() ==
985       mode_target_teams_distribute_parallel_for ||
986       ompaccel_tinfo_current_target_mode() ==
987           mode_target_teams_distribute_parallel_for_simd)
988     ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(private_sym),
989                                              OMP_TGT_MAPTYPE_TARGET_PARAM |
990                                                  OMP_TGT_MAPTYPE_TO |
991                                                  OMP_TGT_MAPTYPE_FROM);
992   else
993     ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(shared_sym),
994                                              OMP_TGT_MAPTYPE_TARGET_PARAM |
995                                                  OMP_TGT_MAPTYPE_TO |
996                                                  OMP_TGT_MAPTYPE_FROM);
997 }
998 
999 void
ompaccel_tinfo_current_addupdate_mapitem(SPTR host_symbol,int map_type)1000 ompaccel_tinfo_current_addupdate_mapitem(SPTR host_symbol, int map_type)
1001 {
1002   SPTR midsptr;
1003   if (current_tinfo == nullptr)
1004     ompaccel_msg_interr("XXX", "Current target info is not found\n");
1005 
1006   // check whether it is allocatable or not
1007   if (SCG(host_symbol) == SC_BASED) {
1008     /* if it is in data mode, we should keep midnum at active symbols*/
1009     if (current_tinfo->mode == mode_target_data_enter_region ||
1010         current_tinfo->mode == mode_target_data_exit_region ||
1011         current_tinfo->mode == mode_target_data_region) {
1012       midsptr = (SPTR)MIDNUMG(host_symbol);
1013       if (!tinfo_update_maptype(current_tinfo->symbols,
1014                                 current_tinfo->n_symbols, midsptr, map_type))
1015         ompaccel_tinfo_current_add_sym(midsptr, NOSYM, map_type);
1016     }
1017     /* Main argument will be kept at passive */
1018     if (!tinfo_update_maptype(current_tinfo->quiet_symbols,
1019                               current_tinfo->n_quiet_symbols, host_symbol,
1020                               map_type))
1021       ompaccel_tinfo_current_add_sym(host_symbol, NOSYM, map_type);
1022   } else {
1023     if (!tinfo_update_maptype(current_tinfo->symbols, current_tinfo->n_symbols,
1024                               host_symbol, map_type))
1025       ompaccel_tinfo_current_add_sym(host_symbol, NOSYM, map_type);
1026   }
1027 }
1028 
1029 void
ompaccel_tinfo_current_add_sym(SPTR host_symbol,SPTR device_symbol,int map_type)1030 ompaccel_tinfo_current_add_sym(SPTR host_symbol, SPTR device_symbol,
1031                                int map_type)
1032 {
1033   if ((MIDNUMG(host_symbol) && SCG(host_symbol) == SC_BASED)) {
1034     NEED((current_tinfo->n_quiet_symbols + 1), current_tinfo->quiet_symbols,
1035          OMPACCEL_SYM, current_tinfo->sz_quiet_symbols,
1036          current_tinfo->sz_quiet_symbols * INC_EXP);
1037     current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].host_sym =
1038         host_symbol;
1039     current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].device_sym =
1040         device_symbol;
1041     current_tinfo->quiet_symbols[current_tinfo->n_quiet_symbols].map_type =
1042         map_type;
1043     current_tinfo->n_quiet_symbols++;
1044   } else {
1045     NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM,
1046          current_tinfo->sz_symbols, current_tinfo->sz_symbols * INC_EXP);
1047     current_tinfo->symbols[current_tinfo->n_symbols].host_sym = host_symbol;
1048     current_tinfo->symbols[current_tinfo->n_symbols].device_sym = device_symbol;
1049     current_tinfo->symbols[current_tinfo->n_symbols].map_type = map_type;
1050     current_tinfo->n_symbols++;
1051   }
1052 }
1053 
1054 INLINE static void
dumptargetsym(OMPACCEL_SYM targetsym)1055 dumptargetsym(OMPACCEL_SYM targetsym)
1056 {
1057   const char *dev_sptr_name, *org_sptr_name;
1058   if (gbl.dbgfil == nullptr)
1059     return;
1060 
1061   dev_sptr_name =
1062       targetsym.device_sym == NOSYM ? "NOSYM" : SYMNAME(targetsym.device_sym);
1063   org_sptr_name =
1064       targetsym.host_sym == NOSYM ? "NOSYM" : SYMNAME(targetsym.host_sym);
1065 
1066   fprintf(gbl.dbgfil,
1067           "\t(org:%d[%s], dev:%d[%s], map-type: ", targetsym.host_sym,
1068           org_sptr_name, targetsym.device_sym, dev_sptr_name);
1069 
1070   if (targetsym.map_type & OMP_TGT_MAPTYPE_ALWAYS)
1071     fprintf(gbl.dbgfil, "always ");
1072   if (targetsym.map_type & OMP_TGT_MAPTYPE_TO)
1073     fprintf(gbl.dbgfil, "to ");
1074   if (targetsym.map_type & OMP_TGT_MAPTYPE_FROM)
1075     fprintf(gbl.dbgfil, "from ");
1076   if (targetsym.map_type & OMP_TGT_MAPTYPE_DELETE)
1077     fprintf(gbl.dbgfil, "delete ");
1078   if (targetsym.map_type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
1079     fprintf(gbl.dbgfil, "ptr_obj ");
1080   if (targetsym.map_type & OMP_TGT_MAPTYPE_TARGET_PARAM)
1081     fprintf(gbl.dbgfil, "target ");
1082   if (targetsym.map_type & OMP_TGT_MAPTYPE_RETURN_PARAM)
1083     fprintf(gbl.dbgfil, "return ");
1084   if (targetsym.map_type & OMP_TGT_MAPTYPE_PRIVATE)
1085     fprintf(gbl.dbgfil, "private ");
1086   if (targetsym.map_type & OMP_TGT_MAPTYPE_LITERAL)
1087     fprintf(gbl.dbgfil, "literal ");
1088   if (targetsym.map_type & OMP_TGT_MAPTYPE_IMPLICIT)
1089     fprintf(gbl.dbgfil, "implicit ");
1090   if (targetsym.map_type & OMP_TGT_MAPTYPE_MEMBER_OF)
1091     fprintf(gbl.dbgfil, "member ");
1092   if (targetsym.map_type & OMP_TGT_MAPTYPE_NONE)
1093     fprintf(gbl.dbgfil, "none ");
1094   fprintf(gbl.dbgfil, " )\n");
1095 }
1096 
1097 INLINE static void
dumptargetreduction(OMPACCEL_RED_SYM targetred)1098 dumptargetreduction(OMPACCEL_RED_SYM targetred)
1099 {
1100   if (gbl.dbgfil == nullptr)
1101     return;
1102   switch (targetred.redop) {
1103   case 1:
1104     fprintf(gbl.dbgfil, "+:  ");
1105     break;
1106   case 2:
1107     fprintf(gbl.dbgfil, "-:  ");
1108     break;
1109   case 3:
1110     fprintf(gbl.dbgfil, "*:  ");
1111     break;
1112   case 346:
1113     fprintf(gbl.dbgfil, "max:");
1114     break;
1115   case 347:
1116     fprintf(gbl.dbgfil, "min:");
1117     break;
1118   case 327:
1119     fprintf(gbl.dbgfil, "iand:");
1120     break;
1121   case 328:
1122     fprintf(gbl.dbgfil, "ior:");
1123     break;
1124   case 329:
1125     fprintf(gbl.dbgfil, "ieor:");
1126     break;
1127   case 14:
1128     fprintf(gbl.dbgfil, "???:");
1129     break;
1130   default:
1131     break;
1132   }
1133 
1134   fprintf(gbl.dbgfil, "\t(shared:%d[%s], private:%d[%s] \n",
1135           targetred.shared_sym, SYMNAME(targetred.shared_sym),
1136           targetred.private_sym, SYMNAME(targetred.private_sym));
1137 }
1138 
1139 void
dumpomptarget(OMPACCEL_TINFO * tinfo)1140 dumpomptarget(OMPACCEL_TINFO *tinfo)
1141 {
1142   if (tinfo == nullptr)
1143     return;
1144   if (gbl.dbgfil == nullptr)
1145     return;
1146 
1147   switch (tinfo->mode) {
1148   case mode_none_target:
1149 
1150     fprintf(gbl.dbgfil, " <mode none>");
1151     break;
1152   case mode_target:
1153     fprintf(gbl.dbgfil, " <target>");
1154     break;
1155   case mode_target_teams:
1156     fprintf(gbl.dbgfil, " <target teams>");
1157     break;
1158   case mode_target_parallel_for_simd:
1159     fprintf(gbl.dbgfil, " <target parallel for simd>");
1160     break;
1161   case mode_target_parallel_for:
1162     fprintf(gbl.dbgfil, " <target parallel for>");
1163     break;
1164   case mode_target_teams_distribute_parallel_for:
1165     fprintf(gbl.dbgfil, " <target teams distribute parallel for>");
1166     break;
1167   case mode_target_teams_distribute_parallel_for_simd:
1168     fprintf(gbl.dbgfil, " <target teams distribute parallel for simd>");
1169     break;
1170   case mode_target_teams_distribute:
1171     fprintf(gbl.dbgfil, " <target teams distribute >");
1172     break;
1173   case mode_target_data_region:
1174     fprintf(gbl.dbgfil, " <target data>");
1175     break;
1176   case mode_target_data_enter_region:
1177     fprintf(gbl.dbgfil, " <target data enter>");
1178     break;
1179   case mode_target_data_exit_region:
1180     fprintf(gbl.dbgfil, " <target data exit>");
1181     break;
1182   }
1183   fprintf(gbl.dbgfil, " \n");
1184   //}
1185 
1186   if ((tinfo->mode != mode_target_data_region) &&
1187       (tinfo->mode != mode_target_data_enter_region) &&
1188       (tinfo->mode != mode_target_data_exit_region)) {
1189     if (OMPACCFUNCKERNELG(tinfo->func_sptr))
1190       fprintf(gbl.dbgfil, " (__global__) ");
1191     else if (OMPACCFUNCDEVG(tinfo->func_sptr))
1192       fprintf(gbl.dbgfil, " (__device__) ");
1193     else
1194       fprintf(gbl.dbgfil, " ??? ");
1195     fprintf(gbl.dbgfil, "%s\t sptr: %d \n", SYMNAME(tinfo->func_sptr),
1196             tinfo->func_sptr);
1197   }
1198 
1199   fprintf(gbl.dbgfil, " ** Active Symbols ** \n");
1200   for (int j = 0; j < tinfo->n_symbols; ++j) {
1201     dumptargetsym(tinfo->symbols[j]);
1202   }
1203   fprintf(gbl.dbgfil, " ** Passive Symbols ** \n");
1204   for (int j = 0; j < tinfo->n_quiet_symbols; ++j) {
1205     dumptargetsym(tinfo->quiet_symbols[j]);
1206   }
1207   fprintf(gbl.dbgfil, " ** Reductions ** \n");
1208   for (int j = 0; j < tinfo->n_reduction_symbols; ++j) {
1209     dumptargetreduction(tinfo->reduction_symbols[j]);
1210   }
1211   fprintf(gbl.dbgfil, "\n");
1212 }
1213 
1214 void
dumpomptargets()1215 dumpomptargets()
1216 {
1217   int i, j;
1218   if (gbl.dbgfil == NULL)
1219     return;
1220   fprintf(gbl.dbgfil, "------------OpenMP Target Regions ---------------\n");
1221   for (i = 0; i < num_tinfos; ++i) {
1222     dumpomptarget(tinfos[i]);
1223   }
1224 }
1225 
1226 void
dumpomptargetsymbols()1227 dumpomptargetsymbols()
1228 {
1229   int i, l, u;
1230   l = stb.firstusym;
1231   u = stb.stg_avail - 1;
1232   if (u >= stb.stg_avail)
1233     u = stb.stg_avail - 1;
1234   for (i = l; i <= u; ++i) {
1235     if (OMPACCDEVSYMG(i))
1236       fprintf(gbl.dbgfil, "(sym) sptr:%d [%s]\n", i, SYMNAME(i));
1237     if (OMPACCFUNCDEVG(i))
1238       fprintf(gbl.dbgfil, "(func) sptr:%d [%s]\n", i, SYMNAME(i));
1239     if (OMPACCFUNCKERNELG(i))
1240       fprintf(gbl.dbgfil, "(kernel) sptr:%d [%s]\n", i, SYMNAME(i));
1241   }
1242 }
1243 
1244 void
dumptargetsymbols(OMPACCEL_SYM * targetsyms,int n)1245 dumptargetsymbols(OMPACCEL_SYM *targetsyms, int n)
1246 {
1247   for (int i = 0; i < n; ++i) {
1248     dumptargetsym(targetsyms[i]);
1249   }
1250 }
1251 
1252 void
ompaccel_msg_interr(char * id,const char * message)1253 ompaccel_msg_interr(char *id, const char *message)
1254 {
1255   interr(message, MSGOMPACCEL, ERR_Fatal);
1256 }
1257 
1258 void
ompaccel_msg_info(char * id,const char * message)1259 ompaccel_msg_info(char *id, const char *message)
1260 {
1261   ccff_info(MSGOMPACCEL, id, gbl.findex, gbl.lineno, message, NULL);
1262 }
1263 
1264 bool
ompaccel_is_tgt_registered()1265 ompaccel_is_tgt_registered()
1266 {
1267   return isOmpaccelRegistered;
1268 }
1269 
1270 void
ompaccel_register_tgt()1271 ompaccel_register_tgt()
1272 {
1273   isOmpaccelRegistered = true;
1274 }
1275 
1276 void
ompaccel_emit_tgt_register()1277 ompaccel_emit_tgt_register()
1278 {
1279   int ilix;
1280   SPTR sptrFn;
1281   char *name = "ompaccel.register";
1282   sptrFn = mk_ompaccel_function(name, 0, NULL, false);
1283   CONSTRUCTORP(sptrFn, 1);
1284   TEXTSTARTUPP(sptrFn, 1);
1285   PRIORITYP(sptrFn, 65535 /* LLVM_DEFAULT_PRIORITY */);
1286   cr_block();
1287   ilix = ll_make_tgt_register_lib();
1288   iltb.callfg = 1;
1289   chk_block(ilix);
1290   wr_block();
1291   mk_ompaccel_function_end(sptrFn);
1292 }
1293 
1294 SPTR
ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM * ReductionItems,int NumReductions)1295 ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM *ReductionItems, int NumReductions)
1296 {
1297   int ili, bili, rili;
1298   SPTR sptrFn, sptrRhs, sptrReduceData, func_params[2];
1299   DTYPE dtypeReductionItem, dtypeReduceData;
1300   int nmeReduceData, nmeRhs;
1301   int params_dtypes[2] = {DT_ADDR, DT_ADDR};
1302   char name[30];
1303 
1304   /* Generate function parameters */
1305   dtypeReduceData = get_type(2, TY_PTR, DT_ANY);
1306   sptrReduceData = func_params[0] =
1307       mk_ompaccel_addsymbol(".reducedata", dtypeReduceData, SC_DUMMY, ST_ARRAY);
1308   sptrRhs = func_params[1] =
1309       mk_ompaccel_addsymbol(".rhs", dtypeReduceData, SC_DUMMY, ST_VAR);
1310 
1311   /* Generate function symbol */
1312   sprintf(name, "%s%d", "ompaccel_reduction", reductionFunctionCounter++);
1313   sptrFn = mk_ompaccel_function(name, 2, func_params, true);
1314   cr_block();
1315 
1316   for (int i = 0; i < NumReductions; ++i) {
1317     dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1318 
1319     bili = mk_ompaccel_ldsptr(sptrReduceData);
1320     rili = mk_ompaccel_ldsptr(sptrRhs);
1321 
1322     if (i != 0) {
1323       bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1324                              DT_ADDR);
1325       rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1326                              DT_ADDR);
1327     }
1328 
1329     nmeReduceData =
1330         add_arrnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), 0,
1331                    ad_icon(i), FALSE);
1332     nmeRhs =
1333         add_arrnme(NT_IND, SPTR_NULL,
1334                    addnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrRhs, 0, 0), 0),
1335                    i, ad_icon(i), FALSE);
1336 
1337     bili =
1338         mk_ompaccel_load(bili, DT_ADDR, addnme(NT_VAR, sptrReduceData, 0, 0));
1339     rili = mk_ompaccel_load(rili, DT_ADDR, addnme(NT_VAR, sptrRhs, 0, 0));
1340 
1341     rili = mk_ompaccel_load(rili, dtypeReductionItem, nmeRhs);
1342     ili = mk_ompaccel_load(bili, dtypeReductionItem, nmeReduceData);
1343 
1344     ili = mk_reduction_op(ReductionItems[i].redop, ili, dtypeReductionItem,
1345                           rili, dtypeReductionItem);
1346 
1347     ili = mk_ompaccel_store(ili, dtypeReductionItem, nmeReduceData, bili);
1348     chk_block(ili);
1349   }
1350   wr_block();
1351   mk_ompaccel_function_end(sptrFn);
1352   return sptrFn;
1353 }
1354 
1355 SPTR
ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM * ReductionItems,int NumReductions,SPTR sptrFnReduce)1356 ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM *ReductionItems,
1357                                   int NumReductions, SPTR sptrFnReduce)
1358 {
1359   int ili, rili, bili;
1360   SPTR sptrFn, sptrRhs, sptrReduceData, sptrShuffleReturn, sptrLaneOffset,
1361       func_params[4];
1362   DTYPE dtypeReductionItem, dtypeReduceData, dtypeRHS;
1363   int nmeReduceData, nmeRhs, params[2];
1364   char name[30];
1365   DTYPE params_dtypes[2] = {DT_ADDR, DT_ADDR};
1366 
1367   /* Generate function parameters */
1368   dtypeReduceData = get_type(2, TY_PTR, DT_ANY);
1369   sptrReduceData = func_params[0] = mk_ompaccel_addsymbol(
1370       ".reducedata2", dtypeReduceData, SC_DUMMY, ST_ARRAY);
1371   func_params[1] = mk_ompaccel_addsymbol(".laneid", DT_SINT, SC_DUMMY, ST_VAR);
1372   sptrLaneOffset = func_params[2] =
1373       mk_ompaccel_addsymbol(".laneoffset", DT_SINT, SC_DUMMY, ST_VAR);
1374   func_params[3] =
1375       mk_ompaccel_addsymbol(".shortcircuit", DT_SINT, SC_DUMMY, ST_VAR);
1376   PASSBYVALP(func_params[1], 1);
1377   PASSBYVALP(func_params[2], 1);
1378   PASSBYVALP(func_params[3], 1);
1379 
1380   /* Generate function symbol */
1381   sprintf(name, "%s%d", "ompaccel_shufflereduce", reductionFunctionCounter++);
1382   sptrFn = mk_ompaccel_function(name, 4, func_params, true);
1383   cr_block();
1384 
1385   dtypeRHS = mk_ompaccel_array_dtype(dtypeReduceData, NumReductions);
1386   sptrRhs = mk_ompaccel_addsymbol(".rhs", dtypeRHS, SC_LOCAL, ST_ARRAY);
1387 
1388   for (int i = 0; i < NumReductions; ++i) {
1389 
1390     dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1391     sptrShuffleReturn =
1392         mk_ompaccel_getnewccsym('r', i, dtypeReductionItem, SC_LOCAL, ST_VAR);
1393 
1394     bili = mk_ompaccel_ldsptr(sptrReduceData);
1395     rili = mk_address(sptrRhs);
1396 
1397     nmeReduceData =
1398         add_arrnme(NT_IND, SPTR_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), i,
1399                    ad_icon(i), FALSE);
1400 
1401     if (i != 0) {
1402       rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1403                              DT_ADDR);
1404       bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1405                              DT_ADDR);
1406     }
1407 
1408     ili = mk_ompaccel_load(bili, DT_ADDR, nmeReduceData);
1409     ili = mk_ompaccel_load(ili, dtypeReductionItem, nmeReduceData);
1410 
1411     if (dtypeReductionItem == DT_DBLE)
1412       ili =
1413           ll_make_kmpc_shuffle(ili, mk_ompaccel_ldsptr(func_params[2]),
1414                                ad_icon(size_of(dtypeReductionItem) * 8), true);
1415     else
1416       ili =
1417           ll_make_kmpc_shuffle(ili, mk_ompaccel_ldsptr(func_params[2]),
1418                                ad_icon(size_of(dtypeReductionItem) * 8), false);
1419 
1420     ili = mk_ompaccel_store(ili, dtypeReductionItem,
1421                             addnme(NT_VAR, sptrShuffleReturn, 0, 0),
1422                             ad_acon(sptrShuffleReturn, 0));
1423     chk_block(ili);
1424 
1425     nmeRhs = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrRhs, 0, 0), i,
1426                         ad_icon(i), FALSE);
1427     ili =
1428         mk_ompaccel_store(ad_acon(sptrShuffleReturn, 0), DT_ADDR, nmeRhs, rili);
1429 
1430     chk_block(ili);
1431   }
1432 
1433   params[0] = mk_address(sptrRhs);
1434   params[1] = mk_address(sptrReduceData);
1435 
1436   /* Call reduce function */
1437   ili = mk_function_call(DT_NONE, 2, params_dtypes, params, sptrFnReduce);
1438 
1439   /* Write to block */
1440   iltb.callfg = 1;
1441   chk_block(ili);
1442 
1443   wr_block();
1444   mk_ompaccel_function_end(sptrFn);
1445 
1446   return sptrFn;
1447 }
1448 
1449 /**
1450    \brief This function emits code that gathers reduce_data from the first lane
1451    of every active warp to lanes in the first warp.
1452  */
1453 SPTR
ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM * ReductionItems,int NumReductions)1454 ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM *ReductionItems,
1455                                    int NumReductions)
1456 {
1457   int ili, rili;
1458   SPTR sptrFn, sptrReduceData, sptrWarpNum, sptrShmem, sptrWarpId,
1459       sptrMasterWarp, sptrRedItem, sptrRedItemAddress, func_params[2];
1460   SPTR lFirstLane, lBarrier, lFirstWarp, lFinalBarrier;
1461   int nmeShmem;
1462   DTYPE dtypeReductionItem;
1463   char name[30];
1464 
1465   sprintf(name, "%s%d", "ompaccel_InterWarpCopy", reductionFunctionCounter++);
1466   sptrReduceData = func_params[0] = mk_ompaccel_addsymbol(
1467       ".reduceData", mk_ompaccel_array_dtype(DT_INT8, NumReductions), SC_DUMMY,
1468       ST_ARRAY);
1469   sptrWarpNum = func_params[1] =
1470       mk_ompaccel_addsymbol(".warpNum", DT_INT, SC_DUMMY, ST_VAR);
1471   PASSBYVALP(sptrWarpNum, 1);
1472   /* Generate function symbol, Create a block */
1473   sptrFn = mk_ompaccel_function(name, 2, func_params, true);
1474   cr_block();
1475 
1476   sptrShmem = mk_ompaccel_addsymbol(
1477       "ompaccelshmem", mk_ompaccel_array_dtype(DT_INT8, NVVM_WARPSIZE),
1478       SC_EXTERN, ST_ARRAY);
1479   OMPACCSHMEMP(sptrShmem, true);
1480   SYMLKP(sptrShmem, gbl.externs);
1481   gbl.externs = sptrShmem;
1482 
1483   /* MasterWarp */
1484   sptrMasterWarp =
1485       mk_ompaccel_addsymbol(".masterwarp", DT_INT, SC_LOCAL, ST_VAR);
1486   ili = ompaccel_nvvm_get(threadIdX);
1487   ili = mk_ompaccel_iand(ili, ad_icon(31));
1488   ili = mk_ompaccel_stsptr(ili, sptrMasterWarp);
1489 
1490   chk_block(ili);
1491 
1492   /* MasterWarp */
1493   sptrWarpId = mk_ompaccel_addsymbol(".warpid", DT_INT, SC_LOCAL, ST_VAR);
1494   ili = ompaccel_nvvm_get(threadIdX);
1495   ili = mk_ompaccel_shift(ili, DT_UINT, ad_icon(5), DT_UINT);
1496   ili = mk_ompaccel_stsptr(ili, sptrWarpId);
1497 
1498   chk_block(ili);
1499   iltb.callfg = 1;
1500   wr_block();
1501 
1502   sptrRedItem =
1503       mk_ompaccel_addsymbol(".reductionitem", DT_ADDR, SC_LOCAL, ST_VAR);
1504   sptrRedItemAddress =
1505       mk_ompaccel_addsymbol(".reductionitemaddr", DT_ADDR, SC_LOCAL, ST_VAR);
1506 
1507   for (int i = 0; i < NumReductions; ++i) {
1508     cr_block();
1509     dtypeReductionItem = DTYPEG(ReductionItems[i].shared_sym);
1510     rili = mk_ompaccel_ldsptr(sptrReduceData);
1511     if (i != 0) {
1512       rili = mk_ompaccel_add(rili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1513                              DT_ADDR);
1514     }
1515     rili =
1516         mk_ompaccel_load(rili, DT_ADDR, addnme(NT_VAR, sptrReduceData, 0, 0));
1517 
1518     ili =
1519         mk_ompaccel_store(rili, DT_ADDR, addnme(NT_VAR, sptrRedItem, 0, (INT)0),
1520                           mk_address(sptrRedItem));
1521     chk_block(ili);
1522     ili = mk_ompaccel_store(rili, DT_ADDR,
1523                             addnme(NT_VAR, sptrRedItemAddress, 0, (INT)0),
1524                             mk_address(sptrRedItemAddress));
1525     chk_block(ili);
1526 
1527     ili = mk_ompaccel_ldsptr(sptrMasterWarp);
1528     lFirstLane = getlab();
1529     lBarrier = getlab();
1530     ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lBarrier);
1531     RFCNTI(lFirstLane);
1532     chk_block(ili);
1533     iltb.callfg = 1;
1534     wr_block();
1535 
1536     cr_block();
1537     exp_label(lFirstLane);
1538 
1539     rili = ad1ili(IL_IKMV, mk_ompaccel_ldsptr(sptrWarpId));
1540     ili = mk_ompaccel_mul(rili, DT_INT8, ad_kconi(8), DT_INT8);
1541     ili = ad1ili(IL_KAMV, ili);
1542     ili = mk_ompaccel_add(mk_address(sptrShmem), DT_ADDR, ili, DT_ADDR);
1543     nmeShmem = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrShmem, 0, 0), 0,
1544                           rili, FALSE);
1545 
1546     rili = mk_ompaccel_ldsptr(sptrRedItem);
1547     // todo ompaccel more
1548     if (dtypeReductionItem == DT_DBLE) {
1549       rili = mk_ompaccel_load(rili, DT_DBLE, addnme(NT_VAR, sptrRedItem, 0, 0));
1550     } else if (dtypeReductionItem == DT_INT) {
1551       rili = mk_ompaccel_ld(rili, addnme(NT_IND, SPTR_NULL,
1552                                          addnme(NT_VAR, sptrRedItem, 0, 0), 0));
1553       rili = ad1ili(IL_FLOAT, rili);
1554     } else if (dtypeReductionItem == DT_FLOAT) {
1555       rili =
1556           mk_ompaccel_load(rili, DT_FLOAT, addnme(NT_VAR, sptrRedItem, 0, 0));
1557       rili = ad1ili(IL_DBLE, rili);
1558     }
1559     ili = mk_ompaccel_store(rili, DT_DBLE, nmeShmem, ili);
1560     chk_block(ili);
1561     iltb.callfg = 1;
1562     wr_block();
1563 
1564     /* Sync */
1565     cr_block();
1566     RFCNTI(lBarrier);
1567     exp_label(lBarrier);
1568     ili = ompaccel_nvvm_mk_barrier(CTA_BARRIER);
1569     iltb.callfg = 1;
1570     chk_block(ili);
1571     wr_block();
1572 
1573     cr_block();
1574     ili = mk_ompaccel_ldsptr(sptrWarpNum);
1575     rili = ompaccel_nvvm_get(threadIdX);
1576     ili = mk_ompaccel_compare(rili, DT_INT, ili, DT_INT, CC_GE);
1577     lFirstWarp = getlab();
1578     lFinalBarrier = getlab();
1579     ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lFinalBarrier);
1580     chk_block(ili);
1581     iltb.callfg = 1;
1582     wr_block();
1583 
1584     cr_block();
1585     RFCNTI(lFirstWarp);
1586     exp_label(lFirstWarp);
1587     rili = ad1ili(IL_IKMV, ompaccel_nvvm_get(threadIdX));
1588     ili = mk_ompaccel_mul(rili, DT_INT8, ad_kconi(8), DT_INT8);
1589     ili = ad1ili(IL_KAMV, ili);
1590     ili = mk_ompaccel_add(mk_address(sptrShmem), DT_ADDR, ili, DT_ADDR);
1591     nmeShmem = add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrShmem, 0, 0), 0,
1592                           rili, FALSE);
1593 
1594     ili = mk_ompaccel_load(ili, DT_DBLE, nmeShmem);
1595     rili = mk_ompaccel_ldsptr(sptrRedItemAddress);
1596 
1597     if (dtypeReductionItem == DT_DBLE) {
1598       ili = mk_ompaccel_store(
1599           ili, DT_DBLE,
1600           addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1601           rili);
1602     } else if (dtypeReductionItem == DT_INT) {
1603       ili = ad1ili(IL_DFIX, ili);
1604       ili = mk_ompaccel_store(
1605           ili, DT_NONE,
1606           addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1607           rili);
1608     } else if (dtypeReductionItem == DT_FLOAT) {
1609       ili = ad1ili(IL_SNGL, ili);
1610       ili = mk_ompaccel_store(
1611           ili, DT_FLOAT,
1612           addnme(NT_IND, NME_NULL, addnme(NT_VAR, sptrRedItemAddress, 0, 0), 0),
1613           rili);
1614     }
1615     chk_block(ili);
1616     iltb.callfg = 1;
1617     wr_block();
1618 
1619     /* Sync */
1620     cr_block();
1621     RFCNTI(lFinalBarrier);
1622     exp_label(lFinalBarrier);
1623     ili = ompaccel_nvvm_mk_barrier(CTA_BARRIER);
1624     iltb.callfg = 1;
1625     chk_block(ili);
1626     wr_block();
1627   }
1628   /* Finalize the function */
1629   mk_ompaccel_function_end(sptrFn);
1630   return sptrFn;
1631 }
1632 
1633 /* Expander - OpenMP Accelerator Model */
1634 void
exp_ompaccel_bpar(ILM * ilmp,int curilm,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()))1635 exp_ompaccel_bpar(ILM *ilmp, int curilm, SPTR uplevel_sptr, SPTR scopeSptr,
1636                   int(incrOutlinedCnt()))
1637 {
1638   int ili, outlinedCnt;
1639   SPTR sptr;
1640   if (flg.opt != 0) {
1641     wr_block();
1642     cr_block();
1643   }
1644   if (!XBIT(232, 0x1)) {
1645     ll_rewrite_ilms(-1, curilm, 0);
1646     return;
1647   }
1648   outlinedCnt = incrOutlinedCnt();
1649   BIH_FT(expb.curbih) = TRUE;
1650   BIH_QJSR(expb.curbih) = TRUE;
1651   BIH_NOMERGE(expb.curbih) = TRUE;
1652   if (gbl.outlined)
1653     expb.sc = SC_PRIVATE;
1654   if (outlinedCnt == 1) {
1655     sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, FALSE);
1656 
1657     if (!PARENCLFUNCG(scopeSptr))
1658       PARENCLFUNCP(scopeSptr, sptr);
1659     ll_write_ilm_header(sptr, curilm);
1660 
1661     ili = ompaccel_nvvm_get(threadIdX);
1662     ili = ll_make_kmpc_spmd_kernel_init(ili);
1663     iltb.callfg = 1;
1664     chk_block(ili);
1665 
1666     ili = ll_make_outlined_ompaccel_call(gbl.ompoutlinedfunc, sptr);
1667     iltb.callfg = 1;
1668     chk_block(ili);
1669     gbl.ompoutlinedfunc = sptr;
1670 
1671   } else if (outlinedCnt > 1) {
1672     ll_rewrite_ilms(-1, curilm, 0);
1673   }
1674 }
1675 
1676 void
exp_ompaccel_epar(ILM * ilmp,int curilm,int outlinedCnt,int (decrOutlinedCnt ()))1677 exp_ompaccel_epar(ILM *ilmp, int curilm, int outlinedCnt,
1678                   int(decrOutlinedCnt()))
1679 {
1680   if (XBIT(232, 0x1)) {
1681     if (flg.opt != 0) {
1682       wr_block();
1683       cr_block();
1684     }
1685 
1686     if (outlinedCnt == 1) {
1687       ilm_outlined_pad_ilm(curilm);
1688     }
1689     outlinedCnt = decrOutlinedCnt();
1690   }
1691   if (outlinedCnt >= 1)
1692     ll_rewrite_ilms(-1, curilm, 0);
1693 
1694   if (gbl.outlined)
1695     expb.sc = SC_AUTO;
1696   ccff_info(MSGOPENMP, "OMP002", gbl.findex, gbl.lineno,
1697             "Parallel region terminated", NULL);
1698 }
1699 
1700 void
exp_ompaccel_eteams(ILM * ilmp,int curilm,int outlinedCnt,int (decrOutlinedCnt ()))1701 exp_ompaccel_eteams(ILM *ilmp, int curilm, int outlinedCnt,
1702                     int(decrOutlinedCnt()))
1703 {
1704   if (XBIT(232, 0x1)) {
1705     if (outlinedCnt == 1) {
1706       ilm_outlined_pad_ilm(curilm);
1707     }
1708     outlinedCnt = decrOutlinedCnt();
1709   }
1710   if (outlinedCnt >= 1)
1711     ll_rewrite_ilms(-1, curilm, 0);
1712 
1713   if (gbl.outlined)
1714     expb.sc = SC_AUTO;
1715   ccff_info(MSGOPENMP, "OMP023", gbl.findex, gbl.lineno,
1716             "Teams region terminated", NULL);
1717 }
1718 
1719 void
exp_ompaccel_mploopfini(ILM * ilmp,int curilm,int outlinedCnt)1720 exp_ompaccel_mploopfini(ILM *ilmp, int curilm, int outlinedCnt)
1721 {
1722   int ili;
1723   if (outlinedCnt >= 1)
1724     return;
1725   const int sched = mp_sched_to_kmpc_sched(ILM_OPND(ilmp, 2));
1726   if (sched == KMP_ORD_STATIC || sched == KMP_ORD_DYNAMIC_CHUNKED) {
1727     ili = ll_make_kmpc_dispatch_fini((DTYPE)ILM_OPND(ilmp, 1));
1728     iltb.callfg = 1;
1729     chk_block(ili);
1730   } else if (sched == KMP_SCH_STATIC || sched == KMP_SCH_STATIC_CHUNKED) {
1731     ili = ll_make_kmpc_for_static_fini();
1732     iltb.callfg = 1;
1733     chk_block(ili);
1734   }
1735 }
1736 
1737 void
exp_ompaccel_mploop(ILM * ilmp,int curilm)1738 exp_ompaccel_mploop(ILM *ilmp, int curilm)
1739 {
1740   SPTR nlower, nupper, nstride;
1741   int sched, ili;
1742   char *doschedule;
1743   loop_args_t loop_args;
1744 #if LLVM_YKT
1745   /* frontend generates two MPLOOP ILM, one for distribute, other for parallel
1746    * If it is combined construct like ttdpf, I don't need to do something
1747    * special for distribute I need to pass different scheduling type to device
1748    * runtime.
1749    */
1750   if (mp_sched_to_kmpc_sched(ILM_OPND(ilmp, 7)) == KMP_DISTRIBUTE_STATIC) {
1751     if ((ompaccel_tinfo_current_target_mode() ==
1752              mode_target_teams_distribute_parallel_for ||
1753          ompaccel_tinfo_current_target_mode() ==
1754              mode_target_teams_distribute_parallel_for_simd))
1755       return;
1756   }
1757 #endif
1758   nlower = ILM_SymOPND(ilmp, 1);
1759   nupper = ILM_SymOPND(ilmp, 2);
1760   nstride = ILM_SymOPND(ilmp, 3);
1761   if (!XBIT(183, 0x100000)) {
1762     nlower = (SPTR)getccsym_copy(nlower);   // ???
1763     nupper = (SPTR)getccsym_copy(nupper);   // ???
1764     nstride = (SPTR)getccsym_copy(nstride); // ???
1765     SCP(nlower, SC_PRIVATE);
1766     ENCLFUNCP(nlower, GBL_CURRFUNC);
1767     ENCLFUNCP(nupper, GBL_CURRFUNC);
1768     ENCLFUNCP(nstride, GBL_CURRFUNC);
1769     exp_add_copy(nlower, ILM_SymOPND(ilmp, 1));
1770     exp_add_copy(nupper, ILM_SymOPND(ilmp, 2));
1771     exp_add_copy(nstride, ILM_SymOPND(ilmp, 3));
1772   }
1773   loop_args.lower = nlower;
1774   loop_args.upper = nupper;
1775   loop_args.stride = nstride;
1776   loop_args.chunk = ILM_SymOPND(ilmp, 4);
1777   loop_args.last = ILM_SymOPND(ilmp, 5);
1778   loop_args.dtype = (DTYPE)ILM_OPND(ilmp, 6); // ???
1779   loop_args.sched = (kmpc_sched_e)ILM_OPND(ilmp, 7);
1780   sched = mp_sched_to_kmpc_sched(loop_args.sched);
1781   switch (sched) {
1782   case KMP_SCH_STATIC:
1783   case KMP_SCH_STATIC_CHUNKED:
1784     if ((ILM_OPND(ilmp, 7) & 0xff00) == MP_SCH_CHUNK_1) {
1785       doschedule = "static cyclic";
1786       ccff_info(MSGOPENMP, "OMP014", gbl.findex, gbl.lineno,
1787                 "Parallel loop activated with %schedule schedule",
1788                 "schedule=%s", doschedule, NULL);
1789     }
1790   case KMP_DISTRIBUTE_STATIC_CHUNKED:
1791   case KMP_DISTRIBUTE_STATIC:
1792     ili = ll_make_kmpc_for_static_init_simple_spmd(&loop_args, sched);
1793     break;
1794   default:
1795     ili = ll_make_kmpc_dispatch_init(&loop_args);
1796   }
1797 
1798   iltb.callfg = 1;
1799   chk_block(ili);
1800   BIH_NOMERGE(expb.curbih) = TRUE;
1801   if (!XBIT(183, 0x100000)) {
1802     exp_add_copy(ILM_SymOPND(ilmp, 1), nlower);
1803     exp_add_copy(ILM_SymOPND(ilmp, 2), nupper);
1804     exp_add_copy(ILM_SymOPND(ilmp, 3), nstride);
1805   }
1806 
1807   /* constant propagation stop when it sees function call. We may have some
1808    * stride that needs to propagate for computation of tripcount. */
1809   if (flg.opt != 0) {
1810     wr_block();
1811     cr_block();
1812   }
1813 }
1814 
1815 void
exp_ompaccel_btarget(ILM * ilmp,int curilm,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()),SPTR * targetfunc_sptr,int * isTargetDevice)1816 exp_ompaccel_btarget(ILM *ilmp, int curilm, SPTR uplevel_sptr, SPTR scopeSptr,
1817                      int(incrOutlinedCnt()), SPTR *targetfunc_sptr,
1818                      int *isTargetDevice)
1819 {
1820   int ili, outlinedCnt;
1821   SPTR sptr;
1822   /* lexically nested begin parallel */
1823   outlinedCnt = incrOutlinedCnt();
1824   if (outlinedCnt > 1) {
1825     ll_rewrite_ilms(-1, curilm, 0);
1826     return;
1827   }
1828   ompaccel_symreplacer(false);
1829   if (flg.opt != 0) {
1830     wr_block();
1831     cr_block();
1832   }
1833 
1834   BIH_FT(expb.curbih) = TRUE;
1835   BIH_QJSR(expb.curbih) = TRUE;
1836   BIH_NOMERGE(expb.curbih) = TRUE;
1837   if (outlinedCnt == 1) {
1838     /* inomptarget used to figure out whether other directives, statements are
1839      * in target region or not */
1840     gbl.ompaccel_intarget = true;
1841     /* Outline function, create sptr as ptx kernel, duplicate all the sptrs*/
1842     sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, TRUE);
1843     /* set global outlined function with the latest */
1844     gbl.ompoutlinedfunc = sptr;
1845 
1846     if (!PARENCLFUNCG(scopeSptr))
1847       PARENCLFUNCP(scopeSptr, sptr);
1848     ll_write_ilm_header(sptr, curilm);
1849   }
1850   ccff_info(MSGOPENMP, "OMP020", gbl.findex, gbl.lineno,
1851             "Target region activated for offload", NULL);
1852   *targetfunc_sptr = sptr;
1853   *isTargetDevice = ILI_OF(ILM_OPND(ilmp, 1));
1854   return;
1855 }
1856 
1857 static void
exp_ompaccel_ereduction(ILM * ilmp,int curilm)1858 exp_ompaccel_ereduction(ILM *ilmp, int curilm)
1859 {
1860   int ili;
1861   cr_block();
1862   ili = ll_make_kmpc_nvptx_end_reduce_nowait();
1863 
1864   iltb.callfg = 1;
1865   chk_block(ili);
1866   wr_block();
1867 }
1868 
1869 void
exp_ompaccel_etarget(ILM * ilmp,int curilm,SPTR targetfunc_sptr,int outlinedCnt,SPTR uplevel_sptr,int (decrOutlinedCnt ()))1870 exp_ompaccel_etarget(ILM *ilmp, int curilm, SPTR targetfunc_sptr,
1871                      int outlinedCnt, SPTR uplevel_sptr, int(decrOutlinedCnt()))
1872 {
1873   int ili;
1874   if (outlinedCnt == 1) {
1875     ilm_outlined_pad_ilm(curilm);
1876   }
1877   outlinedCnt = decrOutlinedCnt();
1878   if (outlinedCnt >= 1) {
1879     ll_rewrite_ilms(-1, curilm, 0);
1880     return;
1881   }
1882   if (gbl.outlined)
1883     expb.sc = SC_AUTO;
1884 
1885   if (ompaccel_tinfo_current_target_mode() == mode_target) {
1886     ili = ll_make_tgt_target(gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID,
1887                              uplevel_sptr);
1888   } else if (ompaccel_tinfo_current_target_mode() == mode_target_parallel_for ||
1889       ompaccel_tinfo_current_target_mode() ==
1890           mode_target_parallel_for_simd) {
1891     // Create kernel with single team.
1892     ili = ll_make_tgt_target_teams(
1893         gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID, uplevel_sptr, 1, 0);
1894   } else {
1895     ili = ll_make_tgt_target_teams(
1896         gbl.ompoutlinedfunc, OMPACCEL_DEFAULT_DEVICEID, uplevel_sptr, 0, 0);
1897   }
1898 
1899   iltb.callfg = 1;
1900   chk_block(ili);
1901 
1902   gbl.ompaccel_intarget = false;
1903 
1904   ccff_info(MSGOPENMP, "OMP021", gbl.findex, gbl.lineno,
1905             "Target region terminated", NULL);
1906 }
1907 
1908 void
exp_ompaccel_reduction(ILM * ilmp,int curilm)1909 exp_ompaccel_reduction(ILM *ilmp, int curilm)
1910 {
1911   int ili, bili, nmeReduceData, sizeRed = 0;
1912   SPTR lAssignReduction, sptrReduceData, sptrReductionItem;
1913   DTYPE dtypeReduceData, dtypeReductionItem;
1914   dtypeReduceData = mk_ompaccel_array_dtype(
1915       get_type(2, TY_PTR, DT_ANY),
1916       ompaccel_tinfo_current_get()->n_reduction_symbols);
1917   sptrReduceData =
1918       mk_ompaccel_addsymbol(".reduceData", dtypeReduceData, SC_LOCAL, ST_ARRAY);
1919 
1920   cr_block();
1921   for (int i = 0; i < ompaccel_tinfo_current_get()->n_reduction_symbols; ++i) {
1922     sptrReductionItem =
1923         ompaccel_tinfo_current_get()->reduction_symbols[i].shared_sym;
1924     dtypeReductionItem = DTYPEG(sptrReductionItem);
1925 
1926     ili = mk_address(sptrReductionItem);
1927     nmeReduceData =
1928         add_arrnme(NT_ARR, NME_NULL, addnme(NT_VAR, sptrReduceData, 0, 0), i,
1929                    ad_icon(i), FALSE);
1930 
1931     bili = mk_address(sptrReduceData);
1932     if (i != 0)
1933       bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1934                              DT_ADDR);
1935 
1936     ili = mk_ompaccel_store(ili, DT_ADDR, nmeReduceData, bili);
1937     chk_block(ili);
1938   }
1939   wr_block();
1940 
1941   cr_block();
1942   ili = ll_make_kmpc_nvptx_parallel_reduce_nowait_simple_spmd(
1943       ad_icon(ompaccel_tinfo_current_get()->n_reduction_symbols),
1944       ad_icon(sizeRed), mk_address(sptrReduceData),
1945       ompaccel_tinfo_current_get()->reduction_funcs.shuffleFn,
1946       ompaccel_tinfo_current_get()->reduction_funcs.interWarpCopy);
1947   iltb.callfg = 1;
1948   chk_block(ili);
1949   wr_block();
1950 
1951   exp_ompaccel_ereduction(ilmp, curilm);
1952 
1953   lAssignReduction = getlab();
1954   RFCNTI(lAssignReduction);
1955 
1956   ili = ompaccel_nvvm_get(threadIdX);
1957   ili = mk_ompaccel_compare(ili, DT_INT, ad_icon(0), DT_INT, CC_NE);
1958   ili = ad3ili(IL_ICJMPZ, ili, CC_NE, lAssignReduction);
1959   chk_block(ili);
1960 
1961   // Load reduced items to the origina laddress
1962   for (int i = 0; i < ompaccel_tinfo_current_get()->n_reduction_symbols; ++i) {
1963     bili = mk_address(sptrReduceData);
1964     sptrReductionItem =
1965         ompaccel_tinfo_current_get()->reduction_symbols[i].private_sym;
1966     dtypeReductionItem = DTYPEG(sptrReductionItem);
1967 
1968     if (i != 0) {
1969       bili = mk_ompaccel_add(bili, DT_ADDR, ad_aconi(i * size_of(DT_ADDR)),
1970                              DT_ADDR);
1971     }
1972 
1973     bili = mk_ompaccel_load(bili, DT_ADDR, nmeReduceData);
1974     bili = mk_ompaccel_load(bili, dtypeReductionItem, nmeReduceData);
1975 
1976     ili = mk_ompaccel_ldsptr(sptrReductionItem);
1977 
1978     switch (ompaccel_tinfo_current_get()->reduction_symbols[i].redop) {
1979     case 1:
1980     case 2:
1981       ili = mk_ompaccel_add(ili, dtypeReductionItem, bili, dtypeReductionItem);
1982       ili = mk_ompaccel_store(ili, dtypeReductionItem,
1983                               addnme(NT_VAR, sptrReductionItem, 0, 0),
1984                               mk_address(sptrReductionItem));
1985       break;
1986     }
1987 
1988     chk_block(ili);
1989   }
1990   wr_block();
1991   cr_block();
1992   exp_label(lAssignReduction);
1993 }
1994 
1995 void
exp_ompaccel_bteams(ILM * ilmp,int curilm,int outlinedCnt,SPTR uplevel_sptr,SPTR scopeSptr,int (incrOutlinedCnt ()))1996 exp_ompaccel_bteams(ILM *ilmp, int curilm, int outlinedCnt, SPTR uplevel_sptr,
1997                     SPTR scopeSptr, int(incrOutlinedCnt()))
1998 {
1999   int ili, opc;
2000   SPTR sptr;
2001   if (flg.opt != 0) {
2002     wr_block();
2003     cr_block();
2004   }
2005 
2006   if (flg.omptarget) {
2007     ll_rewrite_ilms(-1, curilm, 0);
2008     return;
2009   }
2010 
2011   if (XBIT(232, 0x1)) {
2012     outlinedCnt = incrOutlinedCnt();
2013   }
2014   BIH_FT(expb.curbih) = TRUE;
2015   BIH_QJSR(expb.curbih) = TRUE;
2016   BIH_NOMERGE(expb.curbih) = TRUE;
2017   if (gbl.outlined)
2018     expb.sc = SC_PRIVATE;
2019   if (outlinedCnt == 1) {
2020     if (flg.omptarget)
2021       sptr = ll_make_outlined_ompaccel_func(uplevel_sptr, scopeSptr, FALSE);
2022     else
2023       sptr = ll_make_outlined_func(uplevel_sptr, scopeSptr);
2024     if (!PARENCLFUNCG(scopeSptr))
2025       PARENCLFUNCP(scopeSptr, sptr);
2026     ll_write_ilm_header(sptr, curilm);
2027     if (flg.omptarget) {
2028       ili = ompaccel_nvvm_get(threadIdX);
2029       ili = ll_make_kmpc_spmd_kernel_init(ili);
2030       iltb.callfg = 1;
2031       chk_block(ili);
2032       ili = ll_make_outlined_ompaccel_call(gbl.ompoutlinedfunc, sptr);
2033       iltb.callfg = 1;
2034       chk_block(ili);
2035       gbl.ompoutlinedfunc = sptr;
2036       return;
2037     }
2038     ccff_info(MSGOPENMP, "OMP022", gbl.findex, gbl.lineno,
2039               "Teams region activated", NULL);
2040 
2041   } else if (outlinedCnt > 1) {
2042     ll_rewrite_ilms(-1, curilm, 0);
2043   }
2044 }
2045 void
exp_ompaccel_map(ILM * ilmp,int curilm,int outlinedCnt)2046 exp_ompaccel_map(ILM *ilmp, int curilm, int outlinedCnt)
2047 {
2048   int label, argilm;
2049   SPTR sptr;
2050   if (outlinedCnt >= 2)
2051     return;
2052   argilm = ILM_OPND(ilmp, 1);
2053   ILM *mapop = (ILM *)(ilmb.ilm_base + argilm);
2054   if (ILM_OPC(mapop) == IM_BASE) {
2055     sptr = ILM_SymOPND(mapop, 1); // make 2
2056     label = ILM_OPND(ilmp, 2);    /* map type */
2057   } else if (ILM_OPC(mapop) == IM_PLD) {
2058     sptr = ILM_SymOPND(mapop, 2); // make 2
2059     label = ILM_OPND(ilmp, 2);    /* map type */
2060   }
2061   ompaccel_tinfo_current_addupdate_mapitem(sptr, label);
2062 }
2063 
2064 void
exp_ompaccel_emap(ILM * ilmp,int curilm)2065 exp_ompaccel_emap(ILM *ilmp, int curilm)
2066 {
2067   int ili;
2068   OMPACCEL_TINFO *targetinfo;
2069   if (ompaccel_tinfo_has(gbl.currsub))
2070     return;
2071   ompaccel_symreplacer(true);
2072   targetinfo = ompaccel_tinfo_current_get();
2073   if (targetinfo != NULL) {
2074     if (ompaccel_tinfo_current_target_mode() == mode_target_data_enter_region ||
2075         ompaccel_tinfo_current_target_mode() == mode_target_data_region) {
2076       wr_block();
2077       cr_block();
2078       ili =
2079           ll_make_tgt_target_data_begin(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2080       iltb.callfg = 1;
2081       chk_block(ili);
2082     } else if (ompaccel_tinfo_current_target_mode() ==
2083         mode_target_data_exit_region) {
2084       wr_block();
2085       cr_block();
2086       ili = ll_make_tgt_target_data_end(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2087       iltb.callfg = 1;
2088       chk_block(ili);
2089     }
2090   }
2091 }
2092 
2093 void
exp_ompaccel_looptripcount(ILM * ilmp,int curilm)2094 exp_ompaccel_looptripcount(ILM *ilmp, int curilm)
2095 {
2096   /* push loop trip count is disabled because of performance issue */
2097   if (XBIT(232, 0x20)) {
2098     SPTR sptr;
2099     int ili;
2100     wr_block();
2101     cr_block();
2102     sptr = ILM_SymOPND(ilmp, 1);
2103     ili = ll_make_kmpc_push_target_tripcount(OMPACCEL_DEFAULT_DEVICEID, sptr);
2104     iltb.callfg = 1;
2105     chk_block(ili);
2106   }
2107 }
2108 
2109 void
exp_ompaccel_reductionitem(ILM * ilmp,int curilm)2110 exp_ompaccel_reductionitem(ILM *ilmp, int curilm)
2111 {
2112   ompaccel_tinfo_current_add_reductionitem(
2113           ILM_SymOPND(ilmp, 1), ILM_SymOPND(ilmp, 2), ILM_SymOPND(ilmp, 3));
2114 }
2115 
2116 void
exp_ompaccel_targetdata(ILM * ilmp,int curilm,ILM_OP opc)2117 exp_ompaccel_targetdata(ILM *ilmp, int curilm, ILM_OP opc)
2118 {
2119   int dotarget;
2120   SPTR beg_label, end_label;
2121   ompaccel_symreplacer(false);
2122   ompaccel_tinfo_create(OMPACCEL_DATA_FUNCTION, OMPACCEL_DATA_MAX_SYM);
2123   if (opc == IM_TARGETEXITDATA)
2124     ompaccel_tinfo_current_set_mode(mode_target_data_exit_region);
2125   else if (opc == IM_TARGETENTERDATA)
2126     ompaccel_tinfo_current_set_mode(mode_target_data_enter_region);
2127   else if (opc == IM_BTARGETDATA)
2128     ompaccel_tinfo_current_set_mode(mode_target_data_region);
2129   dotarget = ILI_OF(ILM_OPND(ilmp, 1));
2130   beg_label = getlab();
2131   end_label = getlab();
2132 
2133   dotarget = ad3ili(IL_ICJMPZ, dotarget, CC_EQ, end_label);
2134   RFCNTI(end_label);
2135   chk_block(dotarget);
2136 
2137   wr_block();
2138   cr_block();
2139   exp_label(beg_label);
2140 
2141   /* .... TODO: call to runtime target data here  */
2142 
2143   exp_label(end_label);
2144 }
2145 void
exp_ompaccel_etargetdata(ILM * ilmp,int curilm)2146 exp_ompaccel_etargetdata(ILM *ilmp, int curilm)
2147 {
2148   OMPACCEL_TINFO *targetinfo;
2149   int ili;
2150   if (gbl.outlined)
2151     return;
2152   ompaccel_symreplacer(true);
2153   targetinfo = ompaccel_tinfo_current_get_targetdata();
2154   wr_block();
2155   cr_block();
2156   ili = ll_make_tgt_target_data_end(OMPACCEL_DEFAULT_DEVICEID, targetinfo);
2157   iltb.callfg = 1;
2158   chk_block(ili);
2159 }
2160 
2161 void
init_test()2162 init_test()
2163 {
2164   init_tgtutil();
2165 }
2166 
2167 #endif
2168 /* Expander - OpenMP Accelerator Model */
2169