1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "private.h"
16 #include "rtl.h"
17 
18 #include <cassert>
19 #include <cstdio>
20 #include <cstdlib>
21 #include <mutex>
22 
23 ////////////////////////////////////////////////////////////////////////////////
24 /// manage the success or failure of a target construct
HandleDefaultTargetOffload()25 static void HandleDefaultTargetOffload() {
26   PM->TargetOffloadMtx.lock();
27   if (PM->TargetOffloadPolicy == tgt_default) {
28     if (omp_get_num_devices() > 0) {
29       DP("Default TARGET OFFLOAD policy is now mandatory "
30          "(devices were found)\n");
31       PM->TargetOffloadPolicy = tgt_mandatory;
32     } else {
33       DP("Default TARGET OFFLOAD policy is now disabled "
34          "(no devices were found)\n");
35       PM->TargetOffloadPolicy = tgt_disabled;
36     }
37   }
38   PM->TargetOffloadMtx.unlock();
39 }
40 
IsOffloadDisabled()41 static int IsOffloadDisabled() {
42   if (PM->TargetOffloadPolicy == tgt_default)
43     HandleDefaultTargetOffload();
44   return PM->TargetOffloadPolicy == tgt_disabled;
45 }
46 
HandleTargetOutcome(bool success,ident_t * loc=nullptr)47 static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) {
48   switch (PM->TargetOffloadPolicy) {
49   case tgt_disabled:
50     if (success) {
51       FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled");
52     }
53     break;
54   case tgt_default:
55     FATAL_MESSAGE0(1, "default offloading policy must be switched to "
56                       "mandatory or disabled");
57     break;
58   case tgt_mandatory:
59     if (!success) {
60       if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
61         for (auto &Device : PM->Devices)
62           dumpTargetPointerMappings(loc, Device);
63       else
64         FAILURE_MESSAGE("Run with LIBOMPTARGET_INFO=%d to dump host-target "
65                         "pointer mappings.\n",
66                         OMP_INFOTYPE_DUMP_TABLE);
67 
68       SourceInfo info(loc);
69       if (info.isAvailible())
70         fprintf(stderr, "%s:%d:%d: ", info.getFilename(), info.getLine(),
71                 info.getColumn());
72       else
73         FAILURE_MESSAGE("Source location information not present. Compile with "
74                         "-g or -gline-tables-only.\n");
75       FATAL_MESSAGE0(
76           1, "failure of target construct while offloading is mandatory");
77     } else {
78       if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
79         for (auto &Device : PM->Devices)
80           dumpTargetPointerMappings(loc, Device);
81     }
82     break;
83   }
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 /// adds requires flags
__tgt_register_requires(int64_t flags)88 EXTERN void __tgt_register_requires(int64_t flags) {
89   TIMESCOPE();
90   PM->RTLs.RegisterRequires(flags);
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// adds a target shared library to the target execution image
__tgt_register_lib(__tgt_bin_desc * desc)95 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
96   TIMESCOPE();
97   std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
98   for (auto &RTL : PM->RTLs.AllRTLs) {
99     if (RTL.register_lib) {
100       if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
101         DP("Could not register library with %s", RTL.RTLName.c_str());
102       }
103     }
104   }
105   PM->RTLs.RegisterLib(desc);
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// unloads a target shared library
__tgt_unregister_lib(__tgt_bin_desc * desc)110 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
111   TIMESCOPE();
112   PM->RTLs.UnregisterLib(desc);
113   for (auto &RTL : PM->RTLs.UsedRTLs) {
114     if (RTL->unregister_lib) {
115       if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
116         DP("Could not register library with %s", RTL->RTLName.c_str());
117       }
118     }
119   }
120 }
121 
122 /// creates host-to-target data mapping, stores it in the
123 /// libomptarget.so internal structure (an entry in a stack of data maps)
124 /// and passes the data to the device.
__tgt_target_data_begin(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)125 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
126     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
127   TIMESCOPE();
128   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
129                                  arg_sizes, arg_types, nullptr, nullptr);
130 }
131 
__tgt_target_data_begin_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)132 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
133     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
134     int32_t depNum, void *depList, int32_t noAliasDepNum,
135     void *noAliasDepList) {
136   TIMESCOPE();
137   if (depNum + noAliasDepNum > 0)
138     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
139 
140   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
141                                  arg_sizes, arg_types, nullptr, nullptr);
142 }
143 
__tgt_target_data_begin_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)144 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
145                                            int32_t arg_num, void **args_base,
146                                            void **args, int64_t *arg_sizes,
147                                            int64_t *arg_types,
148                                            map_var_info_t *arg_names,
149                                            void **arg_mappers) {
150   TIMESCOPE_WITH_IDENT(loc);
151   if (IsOffloadDisabled()) return;
152 
153   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
154       device_id, arg_num);
155 
156   // No devices available?
157   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
158     device_id = omp_get_default_device();
159     DP("Use default device id %" PRId64 "\n", device_id);
160   }
161 
162   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
163     DP("Failed to get device %" PRId64 " ready\n", device_id);
164     HandleTargetOutcome(false, loc);
165     return;
166   }
167 
168   DeviceTy &Device = PM->Devices[device_id];
169 
170   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
171     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
172                          arg_names, "Entering OpenMP data region");
173 #ifdef OMPTARGET_DEBUG
174   for (int i = 0; i < arg_num; ++i) {
175     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
176        ", Type=0x%" PRIx64 ", Name=%s\n",
177        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
178        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
179   }
180 #endif
181 
182   int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
183                            arg_types, arg_names, arg_mappers, nullptr);
184   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
185 }
186 
__tgt_target_data_begin_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)187 EXTERN void __tgt_target_data_begin_nowait_mapper(
188     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
189     void **args, int64_t *arg_sizes, int64_t *arg_types,
190     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
191     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
192   TIMESCOPE_WITH_IDENT(loc);
193   if (depNum + noAliasDepNum > 0)
194     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
195 
196   __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
197                                  arg_sizes, arg_types, arg_names, arg_mappers);
198 }
199 
200 /// passes data from the target, releases target memory and destroys
201 /// the host-target mapping (top entry from the stack of data maps)
202 /// created by the last __tgt_target_data_begin.
__tgt_target_data_end(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)203 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
204     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
205   TIMESCOPE();
206   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
207                                arg_sizes, arg_types, nullptr, nullptr);
208 }
209 
__tgt_target_data_end_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)210 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
211     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
212     int32_t depNum, void *depList, int32_t noAliasDepNum,
213     void *noAliasDepList) {
214   TIMESCOPE();
215   if (depNum + noAliasDepNum > 0)
216     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
217 
218   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
219                                arg_sizes, arg_types, nullptr, nullptr);
220 }
221 
__tgt_target_data_end_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)222 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
223                                          int32_t arg_num, void **args_base,
224                                          void **args, int64_t *arg_sizes,
225                                          int64_t *arg_types,
226                                          map_var_info_t *arg_names,
227                                          void **arg_mappers) {
228   TIMESCOPE_WITH_IDENT(loc);
229   if (IsOffloadDisabled()) return;
230   DP("Entering data end region with %d mappings\n", arg_num);
231 
232   // No devices available?
233   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
234     device_id = omp_get_default_device();
235   }
236 
237   PM->RTLsMtx.lock();
238   size_t DevicesSize = PM->Devices.size();
239   PM->RTLsMtx.unlock();
240   if (DevicesSize <= (size_t)device_id) {
241     DP("Device ID  %" PRId64 " does not have a matching RTL.\n", device_id);
242     HandleTargetOutcome(false, loc);
243     return;
244   }
245 
246   DeviceTy &Device = PM->Devices[device_id];
247   if (!Device.IsInit) {
248     DP("Uninit device: ignore");
249     HandleTargetOutcome(false, loc);
250     return;
251   }
252 
253   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
254     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
255                          arg_names, "Exiting OpenMP data region");
256 #ifdef OMPTARGET_DEBUG
257   for (int i=0; i<arg_num; ++i) {
258     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
259        ", Type=0x%" PRIx64 ", Name=%s\n",
260        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
261        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
262   }
263 #endif
264 
265   int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
266                          arg_types, arg_names, arg_mappers, nullptr);
267   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
268 }
269 
__tgt_target_data_end_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)270 EXTERN void __tgt_target_data_end_nowait_mapper(
271     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
272     void **args, int64_t *arg_sizes, int64_t *arg_types,
273     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
274     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
275   TIMESCOPE_WITH_IDENT(loc);
276   if (depNum + noAliasDepNum > 0)
277     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
278 
279   __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
280                                arg_sizes, arg_types, arg_names, arg_mappers);
281 }
282 
__tgt_target_data_update(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)283 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
284     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
285   TIMESCOPE();
286   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
287                                   arg_sizes, arg_types, nullptr, nullptr);
288 }
289 
__tgt_target_data_update_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)290 EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
291     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
292     int32_t depNum, void *depList, int32_t noAliasDepNum,
293     void *noAliasDepList) {
294   TIMESCOPE();
295   if (depNum + noAliasDepNum > 0)
296     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
297 
298   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
299                                   arg_sizes, arg_types, nullptr, nullptr);
300 }
301 
__tgt_target_data_update_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)302 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
303                                             int32_t arg_num, void **args_base,
304                                             void **args, int64_t *arg_sizes,
305                                             int64_t *arg_types,
306                                             map_var_info_t *arg_names,
307                                             void **arg_mappers) {
308   TIMESCOPE_WITH_IDENT(loc);
309   if (IsOffloadDisabled()) return;
310   DP("Entering data update with %d mappings\n", arg_num);
311 
312   // No devices available?
313   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
314     device_id = omp_get_default_device();
315   }
316 
317   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
318     DP("Failed to get device %" PRId64 " ready\n", device_id);
319     HandleTargetOutcome(false, loc);
320     return;
321   }
322 
323   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
324     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
325                          arg_names, "Updating OpenMP data");
326 
327   DeviceTy &Device = PM->Devices[device_id];
328   int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
329                             arg_types, arg_names, arg_mappers);
330   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
331 }
332 
__tgt_target_data_update_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)333 EXTERN void __tgt_target_data_update_nowait_mapper(
334     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
335     void **args, int64_t *arg_sizes, int64_t *arg_types,
336     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
337     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
338   TIMESCOPE_WITH_IDENT(loc);
339   if (depNum + noAliasDepNum > 0)
340     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
341 
342   __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
343                                   arg_sizes, arg_types, arg_names, arg_mappers);
344 }
345 
__tgt_target(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)346 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
347     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
348   TIMESCOPE();
349   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
350                              args, arg_sizes, arg_types, nullptr, nullptr);
351 }
352 
__tgt_target_nowait(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)353 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
354     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
355     int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
356     void *noAliasDepList) {
357   TIMESCOPE();
358   if (depNum + noAliasDepNum > 0)
359     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
360 
361   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
362                              args, arg_sizes, arg_types, nullptr, nullptr);
363 }
364 
__tgt_target_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)365 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
366                                int32_t arg_num, void **args_base, void **args,
367                                int64_t *arg_sizes, int64_t *arg_types,
368                                map_var_info_t *arg_names, void **arg_mappers) {
369   TIMESCOPE_WITH_IDENT(loc);
370   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
371   DP("Entering target region with entry point " DPxMOD " and device Id %"
372       PRId64 "\n", DPxPTR(host_ptr), device_id);
373 
374   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
375     device_id = omp_get_default_device();
376   }
377 
378   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
379     REPORT("Failed to get device %" PRId64 " ready\n", device_id);
380     HandleTargetOutcome(false, loc);
381     return OFFLOAD_FAIL;
382   }
383 
384   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
385     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
386                          arg_names, "Entering OpenMP kernel");
387 #ifdef OMPTARGET_DEBUG
388   for (int i=0; i<arg_num; ++i) {
389     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
390        ", Type=0x%" PRIx64 ", Name=%s\n",
391        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
392        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
393   }
394 #endif
395 
396   int rc = target(loc, device_id, host_ptr, arg_num, args_base, args, arg_sizes,
397                   arg_types, arg_names, arg_mappers, 0, 0, false /*team*/);
398   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
399   return rc;
400 }
401 
__tgt_target_nowait_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)402 EXTERN int __tgt_target_nowait_mapper(
403     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
404     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
405     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
406     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
407   TIMESCOPE_WITH_IDENT(loc);
408   if (depNum + noAliasDepNum > 0)
409     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
410 
411   return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
412                              arg_sizes, arg_types, arg_names, arg_mappers);
413 }
414 
__tgt_target_teams(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t team_num,int32_t thread_limit)415 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
416     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
417     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
418   TIMESCOPE();
419   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
420                                    args_base, args, arg_sizes, arg_types,
421                                    nullptr, nullptr, team_num, thread_limit);
422 }
423 
__tgt_target_teams_nowait(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t team_num,int32_t thread_limit,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)424 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
425     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
426     int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
427     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
428   TIMESCOPE();
429   if (depNum + noAliasDepNum > 0)
430     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
431 
432   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
433                                    args_base, args, arg_sizes, arg_types,
434                                    nullptr, nullptr, team_num, thread_limit);
435 }
436 
__tgt_target_teams_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t team_num,int32_t thread_limit)437 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
438                                      void *host_ptr, int32_t arg_num,
439                                      void **args_base, void **args,
440                                      int64_t *arg_sizes, int64_t *arg_types,
441                                      map_var_info_t *arg_names,
442                                      void **arg_mappers, int32_t team_num,
443                                      int32_t thread_limit) {
444   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
445   DP("Entering target region with entry point " DPxMOD " and device Id %"
446       PRId64 "\n", DPxPTR(host_ptr), device_id);
447 
448   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
449     device_id = omp_get_default_device();
450   }
451 
452   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
453     REPORT("Failed to get device %" PRId64 " ready\n", device_id);
454     HandleTargetOutcome(false, loc);
455     return OFFLOAD_FAIL;
456   }
457 
458   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
459     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
460                          arg_names, "Entering OpenMP kernel");
461 #ifdef OMPTARGET_DEBUG
462   for (int i=0; i<arg_num; ++i) {
463     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
464        ", Type=0x%" PRIx64 ", Name=%s\n",
465        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
466        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
467   }
468 #endif
469 
470   int rc = target(loc, device_id, host_ptr, arg_num, args_base, args, arg_sizes,
471                   arg_types, arg_names, arg_mappers, team_num, thread_limit,
472                   true /*team*/);
473   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
474   return rc;
475 }
476 
__tgt_target_teams_nowait_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t team_num,int32_t thread_limit,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)477 EXTERN int __tgt_target_teams_nowait_mapper(
478     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
479     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
480     map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
481     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
482     void *noAliasDepList) {
483   TIMESCOPE_WITH_IDENT(loc);
484   if (depNum + noAliasDepNum > 0)
485     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
486 
487   return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
488                                    args, arg_sizes, arg_types, arg_names,
489                                    arg_mappers, team_num, thread_limit);
490 }
491 
492 // Get the current number of components for a user-defined mapper.
__tgt_mapper_num_components(void * rt_mapper_handle)493 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
494   TIMESCOPE();
495   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
496   int64_t size = MapperComponentsPtr->Components.size();
497   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
498      DPxPTR(rt_mapper_handle), size);
499   return size;
500 }
501 
502 // Push back one component for a user-defined mapper.
__tgt_push_mapper_component(void * rt_mapper_handle,void * base,void * begin,int64_t size,int64_t type,void * name)503 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
504                                         void *begin, int64_t size, int64_t type,
505                                         void *name) {
506   TIMESCOPE();
507   DP("__tgt_push_mapper_component(Handle=" DPxMOD
508      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
509      ", Type=0x%" PRIx64 ", Name=%s).\n",
510      DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type,
511      (name) ? getNameFromMapping(name).c_str() : "unknown");
512   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
513   MapperComponentsPtr->Components.push_back(
514       MapComponentInfoTy(base, begin, size, type, name));
515 }
516 
__kmpc_push_target_tripcount(int64_t device_id,uint64_t loop_tripcount)517 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
518                                          uint64_t loop_tripcount) {
519   __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
520 }
521 
__kmpc_push_target_tripcount_mapper(ident_t * loc,int64_t device_id,uint64_t loop_tripcount)522 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
523                                                 uint64_t loop_tripcount) {
524   TIMESCOPE_WITH_IDENT(loc);
525   if (IsOffloadDisabled())
526     return;
527 
528   if (device_id == OFFLOAD_DEVICE_DEFAULT) {
529     device_id = omp_get_default_device();
530   }
531 
532   if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) {
533     DP("Failed to get device %" PRId64 " ready\n", device_id);
534     HandleTargetOutcome(false, loc);
535     return;
536   }
537 
538   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
539       loop_tripcount);
540   PM->TblMapMtx.lock();
541   PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
542                                              loop_tripcount);
543   PM->TblMapMtx.unlock();
544 }
545