1 /*
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /**
19  *  \file
20  *  \brief ompaccel.c - OpenMP GPU Offload for NVVM Targets. It uses
21  * libomptarget
22  */
23 
24 #ifndef OMPACCEL_H_
25 #define OMPACCEL_H_
26 
27 #include "llmputil.h"
28 #include "expand.h"
29 
30 /* Find if the func_sptr whether it is a kernel or not. */
31 #define IS_OMP_DEVICE_KERNEL(func_sptr) (OMPACCFUNCKERNELG(func_sptr))
32 /* Find if the func_sptr whether device function or not. */
33 #define IS_OMP_DEVICE_FUNC(func_sptr) (OMPACCFUNCDEVG(func_sptr))
34 /* Find whether we build ILI for OpenMP target or not.*/
35 #define IS_OMP_DEVICE_CG                     \
36   (flg.omptarget && gbl.ompaccel_isdevice && \
37    (IS_OMP_DEVICE_FUNC(GBL_CURRFUNC) | IS_OMP_DEVICE_KERNEL(GBL_CURRFUNC)))
38 
39 #define OMPACCEL_DATA_FUNCTION NOSYM
40 #define OMPACCEL_DATA_MAX_SYM 50
41 
42 typedef struct {
43   SPTR shared_sym;
44   SPTR private_sym;
45   int redop;
46 } OMPACCEL_RED_SYM;
47 
48 typedef struct {
49   SPTR shuffleFn;
50   SPTR interWarpCopy;
51   SPTR copyToScratchPad;
52   SPTR reduceScratchPad;
53 } OMPACCEL_RED_FUNCS;
54 
55 typedef struct {
56   SPTR host_sym;    /* host symbol */
57   SPTR device_sym;  /* device symbol */
58   int map_type;     /* map type */
59   bool in_map;          /* set if it occurs in map */
60   int ili_base;         /* symbol base */
61   int ili_lowerbound;   /* lower bound */
62   int ili_length;       /* length */
63 } OMPACCEL_SYM;
64 
65 /* Target Info is the main struct which keeps all the information about target
66  * or target data region. Each outlined function must have target info, if there
67  * several nested outlining their information are kept hierarchically. Also,
68  * each target data construct creates a target info. */
69 typedef struct _OMPACCEL_TARGET OMPACCEL_TINFO;
70 
71 struct _OMPACCEL_TARGET{
72   SPTR func_sptr;                         /*  Kernel or device function sptr          */
73   OMPACCEL_SYM *symbols;                  /*  Keeps host and device symbols along with map-type */
74   int n_symbols;                          /*  Number of parameters         */
75   int sz_symbols;                         /*  Size of symbols array */
76   OMPACCEL_SYM *quiet_symbols;            /*  Keeps sc_based symbols. They don't be passed to the device */
77   int n_quiet_symbols;                    /*  Number of quiet_symbols */
78   int sz_quiet_symbols;                   /*  Size of quite_symbols */
79   OMP_TARGET_MODE mode;                   /*  Combined construct mode */
80   OMPACCEL_TINFO* parent_tinfo;           /*  Parent tinfo is used for nested outlining in device. */
81   bool nowait;                            /*  async      */
82   int n_reduction_symbols;                /*  Number of reduction symbols */
83   OMPACCEL_RED_SYM *reduction_symbols;    /*  Reduction symbols along with the reduction operator */
84   OMPACCEL_RED_FUNCS reduction_funcs;     /*  Auxiliary functions for reduction */
85 };
86 
87 static bool isOmpaccelRegistered = false;
88 
89 extern OMPACCEL_TINFO **tinfos;
90 
91 #define NVVM_WARPSIZE 32
92 
93 typedef enum NVVM_SREG_ENUM {
94   threadIdX,
95   threadIdY,
96   threadIdZ,
97   blockIdX,
98   blockIdY,
99   blockIdZ,
100   blockDimX,
101   blockDimY,
102   blockDimZ,
103   gridDimX,
104   gridDimY,
105   gridDimZ,
106   warpSize
107 } nvvm_sregs;
108 
109 static const char *NVVM_SREG[] = {
110     "llvm.nvvm.read.ptx.sreg.tid.x",    "llvm.nvvm.read.ptx.sreg.tid.y",
111     "llvm.nvvm.read.ptx.sreg.tid.z",    "llvm.nvvm.read.ptx.sreg.ctaid.x",
112     "llvm.nvvm.read.ptx.sreg.ctaid.y",  "llvm.nvvm.read.ptx.sreg.ctaid.z",
113     "llvm.nvvm.read.ptx.sreg.ntid.x",   "llvm.nvvm.read.ptx.sreg.ntid.y",
114     "llvm.nvvm.read.ptx.sreg.ntid.z",   "llvm.nvvm.read.ptx.sreg.nctaid.x",
115     "llvm.nvvm.read.ptx.sreg.nctaid.y", "llvm.nvvm.read.ptx.sreg.nctaid.z",
116     "llvm.nvvm.read.ptx.sreg.warpsize"};
117 
118 typedef enum NVVM_INTRINSICS_ENUM { barrier0, barrier } nvvm_intrinsics;
119 
120 static const char *NVVM_INTRINSICS[] = {"llvm.nvvm.barrier0",
121                                         "llvm.nvvm.barrier"};
122 
123 typedef enum NVVM_BARRIERS { CTA_BARRIER, PARTIAL_BARRIER } nvvm_barriers;
124 
125 /* keeps beginning of the nvvm special register symbols */
126 static SPTR init_nvvm_syms = NOSYM;
127 static SPTR init_nvvm_intrinsics = NOSYM;
128 
129 /* ################################################ */
130 /* OpenMP ACCEL - Utils                             */
131 /* ################################################ */
132 /**
133    \brief Set LLVM's target-triple which is passed with
134    -fopenmp-target=<target-triple>
135  */
136 void ompaccel_set_targetriple(const char *);
137 
138 /**
139    \brief Return fopenmp-targets triple
140  */
141 const char *ompaccel_get_targetriple(void);
142 
143 /**
144    \brief return whether tgt runtime is registered or not
145  */
146 bool ompaccel_is_tgt_registered(void);
147 
148 /**
149    \brief Set tgt runtime as registered
150  */
151 void ompaccel_register_tgt(void);
152 
153 /**
154    \brief Emit a ctor function which register tgt runtime
155  */
156 void ompaccel_emit_tgt_register(void);
157 
158 #ifdef DEBUG
159 #define ompaccelInternalFail(message)                                   \
160   do {                                                                  \
161     char buffer[10000];                                                 \
162     sprintf(buffer, "File:%s Line %d, Function:%s", __FILE__, __LINE__, \
163             __FUNCTION__);                                              \
164     error((error_code_t)1204, ERR_Fatal, 0, message, buffer);           \
165   } while (0)
166 #define ompaccelInternalFailure(format, ...)                                \
167   do {                                                                      \
168   char buffer[10000];                                                       \
169   sprintf(buffer,                                                           \
170           "[OpenMP-Offload] Error: [%s] at [File:%s Line %d, Function:%s]", \
171           format, __FILE__, __LINE__, __FUNCTION__);                        \
172   interrf(ERR_Fatal, buffer, ##__VA_ARGS__);                                \
173   } while (0)
174 
175 #else
176 #define ompaccelInternalFail(message)
177 #define ompaccelInternalFailures(format, ...)
178 #endif
179 
180 /* ################################################ */
181 /* OpenMP ACCEL - NVVM Helpers                      */
182 /* ################################################ */
183 /**
184    \brief Create NVVM special symbols and intrinsics
185  */
186 void ompaccel_initsyms(void);
187 
188 /**
189    \brief Get special register. (nvvm device only)
190  */
191 int ompaccel_nvvm_get(nvvm_sregs sreg);
192 
193 /**
194    \brief Get global thread id. It does not take into account master-warp. (nvvm
195    device only)
196  */
197 int ompaccel_nvvm_get_gbl_tid(void);
198 
199 /**
200    \brief Emit shuffle reduce for reduction. (nvvm device only)
201  */
202 SPTR ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM *, int, SPTR);
203 
204 /**
205    \brief Emit reduce for reduction. (nvvm device only)
206  */
207 SPTR ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM *, int);
208 
209 /**
210    \brief Emit inter warp copy for reduction. (nvvm device only)
211  */
212 SPTR ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM *, int);
213 
214 /* ################################################ */
215 /* OpenMP ACCEL - Target Information data structure */
216 /* ################################################ */
217 /**
218    \brief Initialize ompaccel, which keeps target region and data information
219    structures
220  */
221 void ompaccel_init(void);
222 
223 /**
224    \brief Create target info with data. Target info is designed to keep all the
225    symbols which occur in the OpenMP construct's region along with their
226    map-types. It also keeps the hierarchy.
227  */
228 OMPACCEL_TINFO *ompaccel_tinfo_create(SPTR, int);
229 
230 /**
231    \brief Get target and data info of function
232  */
233 OMPACCEL_TINFO *ompaccel_tinfo_get(int);
234 /**
235    \brief Return whether parameter function sptr has target info or not.
236  */
237 bool ompaccel_tinfo_has(int);
238 /**
239    \brief Return current target info.
240  */
241 OMPACCEL_TINFO *ompaccel_tinfo_current_get(void);
242 /**
243    \brief Return current target data info.
244  */
245 OMPACCEL_TINFO *ompaccel_tinfo_current_get_targetdata(void);
246 /**
247    \brief Return current target region mode.
248  */
249 OMP_TARGET_MODE ompaccel_tinfo_current_target_mode(void);
250 /**
251    \brief Set the target region mode if it is combined construct.
252  */
253 void ompaccel_tinfo_current_set_mode(OMP_TARGET_MODE);
254 /**
255    \brief Set the target region mode if it is combined construct.
256  */
257 void ompaccel_tinfo_set_mode_next_target(OMP_TARGET_MODE);
258 /**
259    \brief Add a host symbol to the current target info.
260  */
261 void ompaccel_tinfo_current_add_sym(SPTR, SPTR, int);
262 /**
263    \brief Update map-type of the host symbol of the current target info.
264  */
265 void ompaccel_tinfo_current_addupdate_mapitem(SPTR, int);
266 /**
267    \brief Add reduction symbols to the current target info.
268  */
269 void ompaccel_tinfo_current_add_reductionitem(SPTR, SPTR, int);
270 
271 /**
272    \brief Return whether is the the symbol is current tinfo or not.
273  */
274 bool ompaccel_tinfo_current_is_registered(SPTR);
275 
276 /**
277    \brief Return device symbol of passed host symbol of current target info. It
278    is designed to replace host symbols of outlined function code with device
279    symbols.
280  */
281 SPTR ompaccel_tinfo_current_get_devsptr(SPTR);
282 /**
283    \brief Return device symbol's datatype of the passed host symbol's datatype
284    of current target info.
285  */
286 DTYPE ompaccel_tinfo_current_get_dev_dtype(DTYPE);
287 /**
288    \brief Return device symbol of parent target info of the passed host symbol.
289    It is used when there nested outlining in the device code.
290  */
291 SPTR ompaccel_tinfo_parent_get_devsptr(SPTR);
292 
293 /**
294    \brief Create device symbol from the host symbol.
295    Parameter count can be anything.
296  */
297 SPTR
298 ompaccel_create_device_symbol(SPTR sptr, int count);
299 
300 /* OpenMP ACCEL - Target Information data structure */
301 
302 /* ################################################ */
303 /* OpenMP ACCEL - Dump routines                     */
304 /* ################################################ */
305 /**
306    \brief Dump single target region.
307  */
308 void dumpomptarget(OMPACCEL_TINFO *);
309 
310 /**
311    \brief Dump all target regions.
312  */
313 void dumpomptargets(void);
314 
315 /* ################################################ */
316 /* OpenMP ACCEL - Error messages                    */
317 /* ################################################ */
318 #define OMPACCELMESSAGE "OpenMP Accelerator Model:"
319 void ompaccel_msg_interr(char *, const char *);
320 void ompaccel_msg_err(char *, const char *);
321 void ompaccel_msg_warn(char *, const char *);
322 void ompaccel_msg_info(char *, const char *);
323 
324 /* ################################################ */
325 /* OpenMP ACCEL - Expander                          */
326 /* ################################################ */
327 /**
328    \brief Expand ILM and emit code for mploop
329  */
330 void exp_ompaccel_mploop(ILM *ilmp, int);
331 /**
332    \brief Expand ILM and emit code for mploopfini
333  */
334 void exp_ompaccel_mploopfini(ILM *ilmp, int, int);
335 /**
336    \brief Expand ILM and emit code for bpar
337  */
338 void exp_ompaccel_bpar(ILM *ilmp, int, SPTR, SPTR, int(incrOutlinedCnt()));
339 /**
340    \brief Expand ILM and emit code for epar
341  */
342 void exp_ompaccel_epar(ILM *, int, int, int(decrOutlinedCnt()));
343 /**
344    \brief Expand ILM and emit code for bteams
345  */
346 void exp_ompaccel_bteams(ILM *ilmp, int, int, SPTR, SPTR,
347                          int(incrOutlinedCnt()));
348 /**
349    \brief Expand ILM and emit code for eteams
350  */
351 void exp_ompaccel_eteams(ILM *ilmp, int, int, int(decrOutlinedCnt()));
352 /**
353    \brief Expand ILM and emit code for btarget
354  */
355 void exp_ompaccel_btarget(ILM *, int, SPTR, SPTR, int(incrOutlinedCnt()),
356                           SPTR *, int *);
357 /**
358    \brief Expand ILM and emit code for etarget
359  */
360 void exp_ompaccel_etarget(ILM *, int, SPTR, int, SPTR, int(decrOutlinedCnt()));
361 /**
362    \brief Expand ILM and emit code for reduction
363  */
364 void exp_ompaccel_reduction(ILM *, int);
365 /**
366    \brief Expand ILM and emit code for map
367  */
368 void exp_ompaccel_map(ILM *, int, int);
369 /**
370    \brief Expand ILM and emit code for emap
371  */
372 void exp_ompaccel_emap(ILM *, int);
373 /**
374    \brief Expand ILM and emit code for looptripcount
375  */
376 void exp_ompaccel_looptripcount(ILM *, int);
377 /**
378    \brief Expand ILM and emit code for reductionitem
379  */
380 void exp_ompaccel_reductionitem(ILM *, int);
381 /**
382    \brief Expand ILM and emit code for targetdata
383  */
384 void exp_ompaccel_targetdata(ILM *, int, ILM_OP);
385 /**
386    \brief Expand ILM and emit code for etargetdata
387  */
388 void exp_ompaccel_etargetdata(ILM *, int);
389 
390 int mk_ompaccel_store(int ili_value, DTYPE dtype, int nme, int ili_address);
391 int mk_ompaccel_mul(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2);
392 int mk_ompaccel_add(int ili1, DTYPE dtype1, int ili2, DTYPE dtype2);
393 int mk_ompaccel_ldsptr(SPTR sptr);
394 void init_test();
395 #endif
396