1 //===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the interface to be used by Clang during the codegen of a
10 // target region.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "device.h"
15 #include "omptarget.h"
16 #include "private.h"
17 #include "rtl.h"
18 
19 #include <cassert>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <mutex>
23 
24 ////////////////////////////////////////////////////////////////////////////////
25 /// adds requires flags
__tgt_register_requires(int64_t flags)26 EXTERN void __tgt_register_requires(int64_t flags) {
27   TIMESCOPE();
28   PM->RTLs.RegisterRequires(flags);
29 }
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 /// adds a target shared library to the target execution image
__tgt_register_lib(__tgt_bin_desc * desc)33 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
34   TIMESCOPE();
35   std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
36   for (auto &RTL : PM->RTLs.AllRTLs) {
37     if (RTL.register_lib) {
38       if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {
39         DP("Could not register library with %s", RTL.RTLName.c_str());
40       }
41     }
42   }
43   PM->RTLs.RegisterLib(desc);
44 }
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 /// Initialize all available devices without registering any image
__tgt_init_all_rtls()48 EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); }
49 
50 ////////////////////////////////////////////////////////////////////////////////
51 /// unloads a target shared library
__tgt_unregister_lib(__tgt_bin_desc * desc)52 EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
53   TIMESCOPE();
54   PM->RTLs.UnregisterLib(desc);
55   for (auto &RTL : PM->RTLs.UsedRTLs) {
56     if (RTL->unregister_lib) {
57       if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) {
58         DP("Could not register library with %s", RTL->RTLName.c_str());
59       }
60     }
61   }
62 }
63 
64 /// creates host-to-target data mapping, stores it in the
65 /// libomptarget.so internal structure (an entry in a stack of data maps)
66 /// and passes the data to the device.
__tgt_target_data_begin(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)67 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
68                                     void **args_base, void **args,
69                                     int64_t *arg_sizes, int64_t *arg_types) {
70   TIMESCOPE();
71   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
72                                  arg_sizes, arg_types, nullptr, nullptr);
73 }
74 
__tgt_target_data_begin_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)75 EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
76                                            void **args_base, void **args,
77                                            int64_t *arg_sizes,
78                                            int64_t *arg_types, int32_t depNum,
79                                            void *depList, int32_t noAliasDepNum,
80                                            void *noAliasDepList) {
81   TIMESCOPE();
82   if (depNum + noAliasDepNum > 0)
83     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
84 
85   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
86                                  arg_sizes, arg_types, nullptr, nullptr);
87 }
88 
__tgt_target_data_begin_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)89 EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
90                                            int32_t arg_num, void **args_base,
91                                            void **args, int64_t *arg_sizes,
92                                            int64_t *arg_types,
93                                            map_var_info_t *arg_names,
94                                            void **arg_mappers) {
95   TIMESCOPE_WITH_IDENT(loc);
96   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
97      device_id, arg_num);
98   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
99     DP("Not offloading to device %" PRId64 "\n", device_id);
100     return;
101   }
102 
103   DeviceTy &Device = PM->Devices[device_id];
104 
105   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
106     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
107                          arg_names, "Entering OpenMP data region");
108 #ifdef OMPTARGET_DEBUG
109   for (int i = 0; i < arg_num; ++i) {
110     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
111        ", Type=0x%" PRIx64 ", Name=%s\n",
112        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
113        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
114   }
115 #endif
116 
117   AsyncInfoTy AsyncInfo(Device);
118   int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes,
119                            arg_types, arg_names, arg_mappers, AsyncInfo);
120   if (rc == OFFLOAD_SUCCESS)
121     rc = AsyncInfo.synchronize();
122   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
123 }
124 
__tgt_target_data_begin_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)125 EXTERN void __tgt_target_data_begin_nowait_mapper(
126     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
127     void **args, int64_t *arg_sizes, int64_t *arg_types,
128     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
129     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
130   TIMESCOPE_WITH_IDENT(loc);
131   if (depNum + noAliasDepNum > 0)
132     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
133 
134   __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
135                                  arg_sizes, arg_types, arg_names, arg_mappers);
136 }
137 
138 /// passes data from the target, releases target memory and destroys
139 /// the host-target mapping (top entry from the stack of data maps)
140 /// created by the last __tgt_target_data_begin.
__tgt_target_data_end(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)141 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
142                                   void **args_base, void **args,
143                                   int64_t *arg_sizes, int64_t *arg_types) {
144   TIMESCOPE();
145   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
146                                arg_sizes, arg_types, nullptr, nullptr);
147 }
148 
__tgt_target_data_end_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)149 EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
150                                          void **args_base, void **args,
151                                          int64_t *arg_sizes, int64_t *arg_types,
152                                          int32_t depNum, void *depList,
153                                          int32_t noAliasDepNum,
154                                          void *noAliasDepList) {
155   TIMESCOPE();
156   if (depNum + noAliasDepNum > 0)
157     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
158 
159   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
160                                arg_sizes, arg_types, nullptr, nullptr);
161 }
162 
__tgt_target_data_end_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)163 EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
164                                          int32_t arg_num, void **args_base,
165                                          void **args, int64_t *arg_sizes,
166                                          int64_t *arg_types,
167                                          map_var_info_t *arg_names,
168                                          void **arg_mappers) {
169   TIMESCOPE_WITH_IDENT(loc);
170   DP("Entering data end region with %d mappings\n", arg_num);
171   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
172     DP("Not offloading to device %" PRId64 "\n", device_id);
173     return;
174   }
175 
176   DeviceTy &Device = PM->Devices[device_id];
177 
178   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
179     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
180                          arg_names, "Exiting OpenMP data region");
181 #ifdef OMPTARGET_DEBUG
182   for (int i = 0; i < arg_num; ++i) {
183     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
184        ", Type=0x%" PRIx64 ", Name=%s\n",
185        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
186        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
187   }
188 #endif
189 
190   AsyncInfoTy AsyncInfo(Device);
191   int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes,
192                          arg_types, arg_names, arg_mappers, AsyncInfo);
193   if (rc == OFFLOAD_SUCCESS)
194     rc = AsyncInfo.synchronize();
195   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
196 }
197 
__tgt_target_data_end_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)198 EXTERN void __tgt_target_data_end_nowait_mapper(
199     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
200     void **args, int64_t *arg_sizes, int64_t *arg_types,
201     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
202     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
203   TIMESCOPE_WITH_IDENT(loc);
204   if (depNum + noAliasDepNum > 0)
205     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
206 
207   __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
208                                arg_sizes, arg_types, arg_names, arg_mappers);
209 }
210 
__tgt_target_data_update(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)211 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
212                                      void **args_base, void **args,
213                                      int64_t *arg_sizes, int64_t *arg_types) {
214   TIMESCOPE();
215   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
216                                   arg_sizes, arg_types, nullptr, nullptr);
217 }
218 
__tgt_target_data_update_nowait(int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)219 EXTERN void __tgt_target_data_update_nowait(
220     int64_t device_id, int32_t arg_num, void **args_base, void **args,
221     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
222     int32_t noAliasDepNum, void *noAliasDepList) {
223   TIMESCOPE();
224   if (depNum + noAliasDepNum > 0)
225     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
226 
227   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
228                                   arg_sizes, arg_types, nullptr, nullptr);
229 }
230 
__tgt_target_data_update_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)231 EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
232                                             int32_t arg_num, void **args_base,
233                                             void **args, int64_t *arg_sizes,
234                                             int64_t *arg_types,
235                                             map_var_info_t *arg_names,
236                                             void **arg_mappers) {
237   TIMESCOPE_WITH_IDENT(loc);
238   DP("Entering data update with %d mappings\n", arg_num);
239   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
240     DP("Not offloading to device %" PRId64 "\n", device_id);
241     return;
242   }
243 
244   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
245     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
246                          arg_names, "Updating OpenMP data");
247 
248   DeviceTy &Device = PM->Devices[device_id];
249   AsyncInfoTy AsyncInfo(Device);
250   int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes,
251                             arg_types, arg_names, arg_mappers, AsyncInfo);
252   if (rc == OFFLOAD_SUCCESS)
253     rc = AsyncInfo.synchronize();
254   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
255 }
256 
__tgt_target_data_update_nowait_mapper(ident_t * loc,int64_t device_id,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)257 EXTERN void __tgt_target_data_update_nowait_mapper(
258     ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base,
259     void **args, int64_t *arg_sizes, int64_t *arg_types,
260     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
261     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
262   TIMESCOPE_WITH_IDENT(loc);
263   if (depNum + noAliasDepNum > 0)
264     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
265 
266   __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
267                                   arg_sizes, arg_types, arg_names, arg_mappers);
268 }
269 
__tgt_target(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types)270 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
271                         void **args_base, void **args, int64_t *arg_sizes,
272                         int64_t *arg_types) {
273   TIMESCOPE();
274   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
275                              args, arg_sizes, arg_types, nullptr, nullptr);
276 }
277 
__tgt_target_nowait(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)278 EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
279                                int32_t arg_num, void **args_base, void **args,
280                                int64_t *arg_sizes, int64_t *arg_types,
281                                int32_t depNum, void *depList,
282                                int32_t noAliasDepNum, void *noAliasDepList) {
283   TIMESCOPE();
284   if (depNum + noAliasDepNum > 0)
285     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
286 
287   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
288                              args, arg_sizes, arg_types, nullptr, nullptr);
289 }
290 
__tgt_target_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers)291 EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
292                                int32_t arg_num, void **args_base, void **args,
293                                int64_t *arg_sizes, int64_t *arg_types,
294                                map_var_info_t *arg_names, void **arg_mappers) {
295   TIMESCOPE_WITH_IDENT(loc);
296   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
297      "\n",
298      DPxPTR(host_ptr), device_id);
299   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
300     DP("Not offloading to device %" PRId64 "\n", device_id);
301     return OFFLOAD_FAIL;
302   }
303 
304   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
305     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
306                          arg_names, "Entering OpenMP kernel");
307 #ifdef OMPTARGET_DEBUG
308   for (int i = 0; i < arg_num; ++i) {
309     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
310        ", Type=0x%" PRIx64 ", Name=%s\n",
311        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
312        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
313   }
314 #endif
315 
316   DeviceTy &Device = PM->Devices[device_id];
317   AsyncInfoTy AsyncInfo(Device);
318   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
319                   arg_types, arg_names, arg_mappers, 0, 0, false /*team*/,
320                   AsyncInfo);
321   if (rc == OFFLOAD_SUCCESS)
322     rc = AsyncInfo.synchronize();
323   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
324   return rc;
325 }
326 
__tgt_target_nowait_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)327 EXTERN int __tgt_target_nowait_mapper(
328     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
329     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
330     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
331     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
332   TIMESCOPE_WITH_IDENT(loc);
333   if (depNum + noAliasDepNum > 0)
334     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
335 
336   return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
337                              arg_sizes, arg_types, arg_names, arg_mappers);
338 }
339 
__tgt_target_teams(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t team_num,int32_t thread_limit)340 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
341                               int32_t arg_num, void **args_base, void **args,
342                               int64_t *arg_sizes, int64_t *arg_types,
343                               int32_t team_num, int32_t thread_limit) {
344   TIMESCOPE();
345   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
346                                    args_base, args, arg_sizes, arg_types,
347                                    nullptr, nullptr, team_num, thread_limit);
348 }
349 
__tgt_target_teams_nowait(int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,int32_t team_num,int32_t thread_limit,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)350 EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
351                                      int32_t arg_num, void **args_base,
352                                      void **args, int64_t *arg_sizes,
353                                      int64_t *arg_types, int32_t team_num,
354                                      int32_t thread_limit, int32_t depNum,
355                                      void *depList, int32_t noAliasDepNum,
356                                      void *noAliasDepList) {
357   TIMESCOPE();
358   if (depNum + noAliasDepNum > 0)
359     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
360 
361   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
362                                    args_base, args, arg_sizes, arg_types,
363                                    nullptr, nullptr, team_num, thread_limit);
364 }
365 
__tgt_target_teams_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t team_num,int32_t thread_limit)366 EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
367                                      void *host_ptr, int32_t arg_num,
368                                      void **args_base, void **args,
369                                      int64_t *arg_sizes, int64_t *arg_types,
370                                      map_var_info_t *arg_names,
371                                      void **arg_mappers, int32_t team_num,
372                                      int32_t thread_limit) {
373   DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64
374      "\n",
375      DPxPTR(host_ptr), device_id);
376   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
377     DP("Not offloading to device %" PRId64 "\n", device_id);
378     return OFFLOAD_FAIL;
379   }
380 
381   if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
382     printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
383                          arg_names, "Entering OpenMP kernel");
384 #ifdef OMPTARGET_DEBUG
385   for (int i = 0; i < arg_num; ++i) {
386     DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
387        ", Type=0x%" PRIx64 ", Name=%s\n",
388        i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
389        (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
390   }
391 #endif
392 
393   DeviceTy &Device = PM->Devices[device_id];
394   AsyncInfoTy AsyncInfo(Device);
395   int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes,
396                   arg_types, arg_names, arg_mappers, team_num, thread_limit,
397                   true /*team*/, AsyncInfo);
398   if (rc == OFFLOAD_SUCCESS)
399     rc = AsyncInfo.synchronize();
400   handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
401   return rc;
402 }
403 
__tgt_target_teams_nowait_mapper(ident_t * loc,int64_t device_id,void * host_ptr,int32_t arg_num,void ** args_base,void ** args,int64_t * arg_sizes,int64_t * arg_types,map_var_info_t * arg_names,void ** arg_mappers,int32_t team_num,int32_t thread_limit,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)404 EXTERN int __tgt_target_teams_nowait_mapper(
405     ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num,
406     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
407     map_var_info_t *arg_names, void **arg_mappers, int32_t team_num,
408     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
409     void *noAliasDepList) {
410   TIMESCOPE_WITH_IDENT(loc);
411   if (depNum + noAliasDepNum > 0)
412     __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
413 
414   return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
415                                    args, arg_sizes, arg_types, arg_names,
416                                    arg_mappers, team_num, thread_limit);
417 }
418 
419 // Get the current number of components for a user-defined mapper.
__tgt_mapper_num_components(void * rt_mapper_handle)420 EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) {
421   TIMESCOPE();
422   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
423   int64_t size = MapperComponentsPtr->Components.size();
424   DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
425      DPxPTR(rt_mapper_handle), size);
426   return size;
427 }
428 
429 // Push back one component for a user-defined mapper.
__tgt_push_mapper_component(void * rt_mapper_handle,void * base,void * begin,int64_t size,int64_t type,void * name)430 EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
431                                         void *begin, int64_t size, int64_t type,
432                                         void *name) {
433   TIMESCOPE();
434   DP("__tgt_push_mapper_component(Handle=" DPxMOD
435      ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
436      ", Type=0x%" PRIx64 ", Name=%s).\n",
437      DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type,
438      (name) ? getNameFromMapping(name).c_str() : "unknown");
439   auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle;
440   MapperComponentsPtr->Components.push_back(
441       MapComponentInfoTy(base, begin, size, type, name));
442 }
443 
__kmpc_push_target_tripcount(int64_t device_id,uint64_t loop_tripcount)444 EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
445                                          uint64_t loop_tripcount) {
446   __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
447 }
448 
__kmpc_push_target_tripcount_mapper(ident_t * loc,int64_t device_id,uint64_t loop_tripcount)449 EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
450                                                 uint64_t loop_tripcount) {
451   TIMESCOPE_WITH_IDENT(loc);
452   if (checkDeviceAndCtors(device_id, loc) != OFFLOAD_SUCCESS) {
453     DP("Not offloading to device %" PRId64 "\n", device_id);
454     return;
455   }
456 
457   DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id,
458      loop_tripcount);
459   PM->TblMapMtx.lock();
460   PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
461                                              loop_tripcount);
462   PM->TblMapMtx.unlock();
463 }
464 
__tgt_set_info_flag(uint32_t NewInfoLevel)465 EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
466   std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
467   InfoLevel.store(NewInfoLevel);
468   for (auto &R : PM->RTLs.AllRTLs) {
469     if (R.set_info_flag)
470       R.set_info_flag(NewInfoLevel);
471   }
472 }
473 
__tgt_print_device_info(int64_t device_id)474 EXTERN int __tgt_print_device_info(int64_t device_id) {
475   return PM->Devices[device_id].printDeviceInfo(
476       PM->Devices[device_id].RTLDeviceID);
477 }
478