1 /* Plugin for HSAIL execution.
2 
3    Copyright (C) 2013-2018 Free Software Foundation, Inc.
4 
5    Contributed by Martin Jambor <mjambor@suse.cz> and
6    Martin Liska <mliska@suse.cz>.
7 
8    This file is part of the GNU Offloading and Multi Processing Library
9    (libgomp).
10 
11    Libgomp is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published by
13    the Free Software Foundation; either version 3, or (at your option)
14    any later version.
15 
16    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19    more details.
20 
21    Under Section 7 of GPL version 3, you are granted additional
22    permissions described in the GCC Runtime Library Exception, version
23    3.1, as published by the Free Software Foundation.
24 
25    You should have received a copy of the GNU General Public License and
26    a copy of the GCC Runtime Library Exception along with this program;
27    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #include "config.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <hsa.h>
38 #include <plugin/hsa_ext_finalize.h>
39 #include <dlfcn.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
42 #include "secure_getenv.h"
43 
44 /* As an HSA runtime is dlopened, following structure defines function
45    pointers utilized by the HSA plug-in.  */
46 
47 struct hsa_runtime_fn_info
48 {
49   /* HSA runtime.  */
50   hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
51 					const char **status_string);
52   hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
53 					 hsa_agent_info_t attribute,
54 					 void *value);
55   hsa_status_t (*hsa_init_fn) (void);
56   hsa_status_t (*hsa_iterate_agents_fn)
57     (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
58   hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
59 					  hsa_region_info_t attribute,
60 					  void *value);
61   hsa_status_t (*hsa_queue_create_fn)
62     (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
63      void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
64      void *data, uint32_t private_segment_size,
65      uint32_t group_segment_size, hsa_queue_t **queue);
66   hsa_status_t (*hsa_agent_iterate_regions_fn)
67     (hsa_agent_t agent,
68      hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
69   hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
70   hsa_status_t (*hsa_executable_create_fn)
71     (hsa_profile_t profile, hsa_executable_state_t executable_state,
72      const char *options, hsa_executable_t *executable);
73   hsa_status_t (*hsa_executable_global_variable_define_fn)
74     (hsa_executable_t executable, const char *variable_name, void *address);
75   hsa_status_t (*hsa_executable_load_code_object_fn)
76     (hsa_executable_t executable, hsa_agent_t agent,
77      hsa_code_object_t code_object, const char *options);
78   hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
79 					   const char *options);
80   hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
81 					uint32_t num_consumers,
82 					const hsa_agent_t *consumers,
83 					hsa_signal_t *signal);
84   hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
85 					  void **ptr);
86   hsa_status_t (*hsa_memory_free_fn) (void *ptr);
87   hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
88   hsa_status_t (*hsa_executable_get_symbol_fn)
89     (hsa_executable_t executable, const char *module_name,
90      const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
91      hsa_executable_symbol_t *symbol);
92   hsa_status_t (*hsa_executable_symbol_get_info_fn)
93     (hsa_executable_symbol_t executable_symbol,
94      hsa_executable_symbol_info_t attribute, void *value);
95   uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
96 						    uint64_t value);
97   uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
98   void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
99 				       hsa_signal_value_t value);
100   void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
101 				       hsa_signal_value_t value);
102   hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
103     (hsa_signal_t signal, hsa_signal_condition_t condition,
104      hsa_signal_value_t compare_value, uint64_t timeout_hint,
105      hsa_wait_state_t wait_state_hint);
106   hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
107   hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
108 
109   /* HSA finalizer.  */
110   hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
111 						 hsa_ext_module_t module);
112   hsa_status_t (*hsa_ext_program_create_fn)
113     (hsa_machine_model_t machine_model, hsa_profile_t profile,
114      hsa_default_float_rounding_mode_t default_float_rounding_mode,
115      const char *options, hsa_ext_program_t *program);
116   hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
117   hsa_status_t (*hsa_ext_program_finalize_fn)
118     (hsa_ext_program_t program,hsa_isa_t isa,
119      int32_t call_convention, hsa_ext_control_directives_t control_directives,
120      const char *options, hsa_code_object_type_t code_object_type,
121      hsa_code_object_t *code_object);
122 };
123 
124 /* HSA runtime functions that are initialized in init_hsa_context.  */
125 
126 static struct hsa_runtime_fn_info hsa_fns;
127 
128 /* Keep the following GOMP prefixed structures in sync with respective parts of
129    the compiler.  */
130 
131 /* Structure describing the run-time and grid properties of an HSA kernel
132    lauch.  */
133 
134 struct GOMP_kernel_launch_attributes
135 {
136   /* Number of dimensions the workload has.  Maximum number is 3.  */
137   uint32_t ndim;
138   /* Size of the grid in the three respective dimensions.  */
139   uint32_t gdims[3];
140   /* Size of work-groups in the respective dimensions.  */
141   uint32_t wdims[3];
142 };
143 
144 /* Collection of information needed for a dispatch of a kernel from a
145    kernel.  */
146 
147 struct GOMP_hsa_kernel_dispatch
148 {
149   /* Pointer to a command queue associated with a kernel dispatch agent.  */
150   void *queue;
151   /* Pointer to reserved memory for OMP data struct copying.  */
152   void *omp_data_memory;
153   /* Pointer to a memory space used for kernel arguments passing.  */
154   void *kernarg_address;
155   /* Kernel object.  */
156   uint64_t object;
157   /* Synchronization signal used for dispatch synchronization.  */
158   uint64_t signal;
159   /* Private segment size.  */
160   uint32_t private_segment_size;
161   /* Group segment size.  */
162   uint32_t group_segment_size;
163   /* Number of children kernel dispatches.  */
164   uint64_t kernel_dispatch_count;
165   /* Debug purpose argument.  */
166   uint64_t debug;
167   /* Levels-var ICV.  */
168   uint64_t omp_level;
169   /* Kernel dispatch structures created for children kernel dispatches.  */
170   struct GOMP_hsa_kernel_dispatch **children_dispatches;
171   /* Number of threads.  */
172   uint32_t omp_num_threads;
173 };
174 
175 /* Part of the libgomp plugin interface.  Return the name of the accelerator,
176    which is "hsa".  */
177 
178 const char *
GOMP_OFFLOAD_get_name(void)179 GOMP_OFFLOAD_get_name (void)
180 {
181   return "hsa";
182 }
183 
184 /* Part of the libgomp plugin interface.  Return the specific capabilities the
185    HSA accelerator have.  */
186 
187 unsigned int
GOMP_OFFLOAD_get_caps(void)188 GOMP_OFFLOAD_get_caps (void)
189 {
190   return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
191 }
192 
193 /* Part of the libgomp plugin interface.  Identify as HSA accelerator.  */
194 
195 int
GOMP_OFFLOAD_get_type(void)196 GOMP_OFFLOAD_get_type (void)
197 {
198   return OFFLOAD_TARGET_TYPE_HSA;
199 }
200 
201 /* Return the libgomp version number we're compatible with.  There is
202    no requirement for cross-version compatibility.  */
203 
204 unsigned
GOMP_OFFLOAD_version(void)205 GOMP_OFFLOAD_version (void)
206 {
207   return GOMP_VERSION;
208 }
209 
210 /* Flag to decide whether print to stderr information about what is going on.
211    Set in init_debug depending on environment variables.  */
212 
213 static bool debug;
214 
215 /* Flag to decide if the runtime should suppress a possible fallback to host
216    execution.  */
217 
218 static bool suppress_host_fallback;
219 
220 /* Flag to locate HSA runtime shared library that is dlopened
221    by this plug-in.  */
222 
223 static const char *hsa_runtime_lib;
224 
225 /* Flag to decide if the runtime should support also CPU devices (can be
226    a simulator).  */
227 
228 static bool support_cpu_devices;
229 
230 /* Initialize debug and suppress_host_fallback according to the environment.  */
231 
232 static void
init_enviroment_variables(void)233 init_enviroment_variables (void)
234 {
235   if (secure_getenv ("HSA_DEBUG"))
236     debug = true;
237   else
238     debug = false;
239 
240   if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
241     suppress_host_fallback = true;
242   else
243     suppress_host_fallback = false;
244 
245   hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
246   if (hsa_runtime_lib == NULL)
247     hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
248 
249   support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
250 }
251 
252 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
253    is set to true.  */
254 
255 #define HSA_LOG(prefix, ...) \
256   do \
257   { \
258     if (debug) \
259       { \
260 	fprintf (stderr, prefix); \
261 	fprintf (stderr, __VA_ARGS__); \
262       } \
263   } \
264   while (false)
265 
266 /* Print a debugging message to stderr.  */
267 
268 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
269 
270 /* Print a warning message to stderr.  */
271 
272 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
273 
274 /* Print HSA warning STR with an HSA STATUS code.  */
275 
276 static void
hsa_warn(const char * str,hsa_status_t status)277 hsa_warn (const char *str, hsa_status_t status)
278 {
279   if (!debug)
280     return;
281 
282   const char *hsa_error_msg;
283   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
284 
285   fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
286 }
287 
288 /* Report a fatal error STR together with the HSA error corresponding to STATUS
289    and terminate execution of the current process.  */
290 
291 static void
hsa_fatal(const char * str,hsa_status_t status)292 hsa_fatal (const char *str, hsa_status_t status)
293 {
294   const char *hsa_error_msg;
295   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
296   GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
297 		     hsa_error_msg);
298 }
299 
300 /* Like hsa_fatal, except only report error message, and return FALSE
301    for propagating error processing to outside of plugin.  */
302 
303 static bool
hsa_error(const char * str,hsa_status_t status)304 hsa_error (const char *str, hsa_status_t status)
305 {
306   const char *hsa_error_msg;
307   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
308   GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
309 		     hsa_error_msg);
310   return false;
311 }
312 
313 struct hsa_kernel_description
314 {
315   const char *name;
316   unsigned omp_data_size;
317   bool gridified_kernel_p;
318   unsigned kernel_dependencies_count;
319   const char **kernel_dependencies;
320 };
321 
322 struct global_var_info
323 {
324   const char *name;
325   void *address;
326 };
327 
328 /* Data passed by the static initializer of a compilation unit containing BRIG
329    to GOMP_offload_register.  */
330 
331 struct brig_image_desc
332 {
333   hsa_ext_module_t brig_module;
334   const unsigned kernel_count;
335   struct hsa_kernel_description *kernel_infos;
336   const unsigned global_variable_count;
337   struct global_var_info *global_variables;
338 };
339 
340 struct agent_info;
341 
342 /* Information required to identify, finalize and run any given kernel.  */
343 
344 struct kernel_info
345 {
346   /* Name of the kernel, required to locate it within the brig module.  */
347   const char *name;
348   /* Size of memory space for OMP data.  */
349   unsigned omp_data_size;
350   /* The specific agent the kernel has been or will be finalized for and run
351      on.  */
352   struct agent_info *agent;
353   /* The specific module where the kernel takes place.  */
354   struct module_info *module;
355   /* Mutex enforcing that at most once thread ever initializes a kernel for
356      use.  A thread should have locked agent->modules_rwlock for reading before
357      acquiring it.  */
358   pthread_mutex_t init_mutex;
359   /* Flag indicating whether the kernel has been initialized and all fields
360      below it contain valid data.  */
361   bool initialized;
362   /* Flag indicating that the kernel has a problem that blocks an execution.  */
363   bool initialization_failed;
364   /* The object to be put into the dispatch queue.  */
365   uint64_t object;
366   /* Required size of kernel arguments.  */
367   uint32_t kernarg_segment_size;
368   /* Required size of group segment.  */
369   uint32_t group_segment_size;
370   /* Required size of private segment.  */
371   uint32_t private_segment_size;
372   /* List of all kernel dependencies.  */
373   const char **dependencies;
374   /* Number of dependencies.  */
375   unsigned dependencies_count;
376   /* Maximum OMP data size necessary for kernel from kernel dispatches.  */
377   unsigned max_omp_data_size;
378   /* True if the kernel is gridified.  */
379   bool gridified_kernel_p;
380 };
381 
382 /* Information about a particular brig module, its image and kernels.  */
383 
384 struct module_info
385 {
386   /* The next and previous module in the linked list of modules of an agent.  */
387   struct module_info *next, *prev;
388   /* The description with which the program has registered the image.  */
389   struct brig_image_desc *image_desc;
390 
391   /* Number of kernels in this module.  */
392   int kernel_count;
393   /* An array of kernel_info structures describing each kernel in this
394      module.  */
395   struct kernel_info kernels[];
396 };
397 
398 /* Information about shared brig library.  */
399 
400 struct brig_library_info
401 {
402   char *file_name;
403   hsa_ext_module_t image;
404 };
405 
406 /* Description of an HSA GPU agent and the program associated with it.  */
407 
408 struct agent_info
409 {
410   /* The HSA ID of the agent.  Assigned when hsa_context is initialized.  */
411   hsa_agent_t id;
412   /* Whether the agent has been initialized.  The fields below are usable only
413      if it has been.  */
414   bool initialized;
415   /* The HSA ISA of this agent.  */
416   hsa_isa_t isa;
417   /* Command queue of the agent.  */
418   hsa_queue_t *command_q;
419   /* Kernel from kernel dispatch command queue.  */
420   hsa_queue_t *kernel_dispatch_command_q;
421   /* The HSA memory region from which to allocate kernel arguments.  */
422   hsa_region_t kernarg_region;
423 
424   /* Read-write lock that protects kernels which are running or about to be run
425      from interference with loading and unloading of images.  Needs to be
426      locked for reading while a kernel is being run, and for writing if the
427      list of modules is manipulated (and thus the HSA program invalidated).  */
428   pthread_rwlock_t modules_rwlock;
429   /* The first module in a linked list of modules associated with this
430      kernel.  */
431   struct module_info *first_module;
432 
433   /* Mutex enforcing that only one thread will finalize the HSA program.  A
434      thread should have locked agent->modules_rwlock for reading before
435      acquiring it.  */
436   pthread_mutex_t prog_mutex;
437   /* Flag whether the HSA program that consists of all the modules has been
438      finalized.  */
439   bool prog_finalized;
440   /* Flag whether the program was finalized but with a failure.  */
441   bool prog_finalized_error;
442   /* HSA executable - the finalized program that is used to locate kernels.  */
443   hsa_executable_t executable;
444   /* List of BRIG libraries.  */
445   struct brig_library_info **brig_libraries;
446   /* Number of loaded shared BRIG libraries.  */
447   unsigned brig_libraries_count;
448 };
449 
450 /* Information about the whole HSA environment and all of its agents.  */
451 
452 struct hsa_context_info
453 {
454   /* Whether the structure has been initialized.  */
455   bool initialized;
456   /* Number of usable GPU HSA agents in the system.  */
457   int agent_count;
458   /* Array of agent_info structures describing the individual HSA agents.  */
459   struct agent_info *agents;
460 };
461 
462 /* Information about the whole HSA environment and all of its agents.  */
463 
464 static struct hsa_context_info hsa_context;
465 
466 #define DLSYM_FN(function) \
467   hsa_fns.function##_fn = dlsym (handle, #function); \
468   if (hsa_fns.function##_fn == NULL) \
469     goto dl_fail;
470 
471 static bool
init_hsa_runtime_functions(void)472 init_hsa_runtime_functions (void)
473 {
474   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
475   if (handle == NULL)
476     goto dl_fail;
477 
478   DLSYM_FN (hsa_status_string)
479   DLSYM_FN (hsa_agent_get_info)
480   DLSYM_FN (hsa_init)
481   DLSYM_FN (hsa_iterate_agents)
482   DLSYM_FN (hsa_region_get_info)
483   DLSYM_FN (hsa_queue_create)
484   DLSYM_FN (hsa_agent_iterate_regions)
485   DLSYM_FN (hsa_executable_destroy)
486   DLSYM_FN (hsa_executable_create)
487   DLSYM_FN (hsa_executable_global_variable_define)
488   DLSYM_FN (hsa_executable_load_code_object)
489   DLSYM_FN (hsa_executable_freeze)
490   DLSYM_FN (hsa_signal_create)
491   DLSYM_FN (hsa_memory_allocate)
492   DLSYM_FN (hsa_memory_free)
493   DLSYM_FN (hsa_signal_destroy)
494   DLSYM_FN (hsa_executable_get_symbol)
495   DLSYM_FN (hsa_executable_symbol_get_info)
496   DLSYM_FN (hsa_queue_add_write_index_release)
497   DLSYM_FN (hsa_queue_load_read_index_acquire)
498   DLSYM_FN (hsa_signal_wait_acquire)
499   DLSYM_FN (hsa_signal_store_relaxed)
500   DLSYM_FN (hsa_signal_store_release)
501   DLSYM_FN (hsa_signal_load_acquire)
502   DLSYM_FN (hsa_queue_destroy)
503   DLSYM_FN (hsa_ext_program_add_module)
504   DLSYM_FN (hsa_ext_program_create)
505   DLSYM_FN (hsa_ext_program_destroy)
506   DLSYM_FN (hsa_ext_program_finalize)
507   return true;
508 
509  dl_fail:
510   HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
511   return false;
512 }
513 
514 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
515 
516 static struct kernel_info *
get_kernel_for_agent(struct agent_info * agent,const char * kernel_name)517 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
518 {
519   struct module_info *module = agent->first_module;
520 
521   while (module)
522     {
523       for (unsigned i = 0; i < module->kernel_count; i++)
524 	if (strcmp (module->kernels[i].name, kernel_name) == 0)
525 	  return &module->kernels[i];
526 
527       module = module->next;
528     }
529 
530   return NULL;
531 }
532 
533 /* Return true if the agent is a GPU and acceptable of concurrent submissions
534    from different threads.  */
535 
536 static bool
suitable_hsa_agent_p(hsa_agent_t agent)537 suitable_hsa_agent_p (hsa_agent_t agent)
538 {
539   hsa_device_type_t device_type;
540   hsa_status_t status
541     = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
542 				     &device_type);
543   if (status != HSA_STATUS_SUCCESS)
544     return false;
545 
546   switch (device_type)
547     {
548     case HSA_DEVICE_TYPE_GPU:
549       break;
550     case HSA_DEVICE_TYPE_CPU:
551       if (!support_cpu_devices)
552 	return false;
553       break;
554     default:
555       return false;
556     }
557 
558   uint32_t features = 0;
559   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
560 					  &features);
561   if (status != HSA_STATUS_SUCCESS
562       || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
563     return false;
564   hsa_queue_type_t queue_type;
565   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
566 					  &queue_type);
567   if (status != HSA_STATUS_SUCCESS
568       || (queue_type != HSA_QUEUE_TYPE_MULTI))
569     return false;
570 
571   return true;
572 }
573 
574 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
575    agent_count in hsa_context.  */
576 
577 static hsa_status_t
count_gpu_agents(hsa_agent_t agent,void * data)578 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
579 {
580   if (suitable_hsa_agent_p (agent))
581     hsa_context.agent_count++;
582   return HSA_STATUS_SUCCESS;
583 }
584 
585 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
586    id to the describing structure in the hsa context.  The index of the
587    structure is pointed to by DATA, increment it afterwards.  */
588 
589 static hsa_status_t
assign_agent_ids(hsa_agent_t agent,void * data)590 assign_agent_ids (hsa_agent_t agent, void *data)
591 {
592   if (suitable_hsa_agent_p (agent))
593     {
594       int *agent_index = (int *) data;
595       hsa_context.agents[*agent_index].id = agent;
596       ++*agent_index;
597     }
598   return HSA_STATUS_SUCCESS;
599 }
600 
601 /* Initialize hsa_context if it has not already been done.
602    Return TRUE on success.  */
603 
604 static bool
init_hsa_context(void)605 init_hsa_context (void)
606 {
607   hsa_status_t status;
608   int agent_index = 0;
609 
610   if (hsa_context.initialized)
611     return true;
612   init_enviroment_variables ();
613   if (!init_hsa_runtime_functions ())
614     {
615       HSA_DEBUG ("Run-time could not be dynamically opened\n");
616       return false;
617     }
618   status = hsa_fns.hsa_init_fn ();
619   if (status != HSA_STATUS_SUCCESS)
620     return hsa_error ("Run-time could not be initialized", status);
621   HSA_DEBUG ("HSA run-time initialized\n");
622   status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
623   if (status != HSA_STATUS_SUCCESS)
624     return hsa_error ("HSA GPU devices could not be enumerated", status);
625   HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
626 
627   hsa_context.agents
628     = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
629 				  * sizeof (struct agent_info));
630   status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
631   if (agent_index != hsa_context.agent_count)
632     {
633       GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
634       return false;
635     }
636   hsa_context.initialized = true;
637   return true;
638 }
639 
640 /* Callback of dispatch queues to report errors.  */
641 
642 static void
queue_callback(hsa_status_t status,hsa_queue_t * queue,void * data)643 queue_callback (hsa_status_t status,
644 		hsa_queue_t *queue __attribute__ ((unused)),
645 		void *data __attribute__ ((unused)))
646 {
647   hsa_fatal ("Asynchronous queue error", status);
648 }
649 
650 /* Callback of hsa_agent_iterate_regions.  Determine if a memory REGION can be
651    used for kernarg allocations and if so write it to the memory pointed to by
652    DATA and break the query.  */
653 
654 static hsa_status_t
get_kernarg_memory_region(hsa_region_t region,void * data)655 get_kernarg_memory_region (hsa_region_t region, void *data)
656 {
657   hsa_status_t status;
658   hsa_region_segment_t segment;
659 
660   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
661 					   &segment);
662   if (status != HSA_STATUS_SUCCESS)
663     return status;
664   if (segment != HSA_REGION_SEGMENT_GLOBAL)
665     return HSA_STATUS_SUCCESS;
666 
667   uint32_t flags;
668   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
669 					   &flags);
670   if (status != HSA_STATUS_SUCCESS)
671     return status;
672   if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
673     {
674       hsa_region_t *ret = (hsa_region_t *) data;
675       *ret = region;
676       return HSA_STATUS_INFO_BREAK;
677     }
678   return HSA_STATUS_SUCCESS;
679 }
680 
681 /* Part of the libgomp plugin interface.  Return the number of HSA devices on
682    the system.  */
683 
684 int
GOMP_OFFLOAD_get_num_devices(void)685 GOMP_OFFLOAD_get_num_devices (void)
686 {
687   if (!init_hsa_context ())
688     return 0;
689   return hsa_context.agent_count;
690 }
691 
692 /* Part of the libgomp plugin interface.  Initialize agent number N so that it
693    can be used for computation.  Return TRUE on success.  */
694 
695 bool
GOMP_OFFLOAD_init_device(int n)696 GOMP_OFFLOAD_init_device (int n)
697 {
698   if (!init_hsa_context ())
699     return false;
700   if (n >= hsa_context.agent_count)
701     {
702       GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
703       return false;
704     }
705   struct agent_info *agent = &hsa_context.agents[n];
706 
707   if (agent->initialized)
708     return true;
709 
710   if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
711     {
712       GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
713       return false;
714     }
715   if (pthread_mutex_init (&agent->prog_mutex, NULL))
716     {
717       GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
718       return false;
719     }
720 
721   uint32_t queue_size;
722   hsa_status_t status;
723   status = hsa_fns.hsa_agent_get_info_fn (agent->id,
724 					  HSA_AGENT_INFO_QUEUE_MAX_SIZE,
725 					  &queue_size);
726   if (status != HSA_STATUS_SUCCESS)
727     return hsa_error ("Error requesting maximum queue size of the HSA agent",
728     	   	      status);
729   status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
730 					  &agent->isa);
731   if (status != HSA_STATUS_SUCCESS)
732     return hsa_error ("Error querying the ISA of the agent", status);
733   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
734 					HSA_QUEUE_TYPE_MULTI,
735 					queue_callback, NULL, UINT32_MAX,
736 					UINT32_MAX,
737 					&agent->command_q);
738   if (status != HSA_STATUS_SUCCESS)
739     return hsa_error ("Error creating command queue", status);
740 
741   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
742 					HSA_QUEUE_TYPE_MULTI,
743 					queue_callback, NULL, UINT32_MAX,
744 					UINT32_MAX,
745 					&agent->kernel_dispatch_command_q);
746   if (status != HSA_STATUS_SUCCESS)
747     return hsa_error ("Error creating kernel dispatch command queue", status);
748 
749   agent->kernarg_region.handle = (uint64_t) -1;
750   status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
751 						 get_kernarg_memory_region,
752 						 &agent->kernarg_region);
753   if (agent->kernarg_region.handle == (uint64_t) -1)
754     {
755       GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
756 			 "arguments");
757       return false;
758     }
759   HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
760 	     (long long unsigned) agent->command_q->id);
761   HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
762 	     (long long unsigned) agent->kernel_dispatch_command_q->id);
763   agent->initialized = true;
764   return true;
765 }
766 
767 /* Verify that hsa_context has already been initialized and return the
768    agent_info structure describing device number N.  Return NULL on error.  */
769 
770 static struct agent_info *
get_agent_info(int n)771 get_agent_info (int n)
772 {
773   if (!hsa_context.initialized)
774     {
775       GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
776       return NULL;
777     }
778   if (n >= hsa_context.agent_count)
779     {
780       GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
781       return NULL;
782     }
783   if (!hsa_context.agents[n].initialized)
784     {
785       GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
786       return NULL;
787     }
788   return &hsa_context.agents[n];
789 }
790 
791 /* Insert MODULE to the linked list of modules of AGENT.  */
792 
793 static void
add_module_to_agent(struct agent_info * agent,struct module_info * module)794 add_module_to_agent (struct agent_info *agent, struct module_info *module)
795 {
796   if (agent->first_module)
797     agent->first_module->prev = module;
798   module->next = agent->first_module;
799   module->prev = NULL;
800   agent->first_module = module;
801 }
802 
803 /* Remove MODULE from the linked list of modules of AGENT.  */
804 
805 static void
remove_module_from_agent(struct agent_info * agent,struct module_info * module)806 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
807 {
808   if (agent->first_module == module)
809     agent->first_module = module->next;
810   if (module->prev)
811     module->prev->next = module->next;
812   if (module->next)
813     module->next->prev = module->prev;
814 }
815 
816 /* Free the HSA program in agent and everything associated with it and set
817    agent->prog_finalized and the initialized flags of all kernels to false.
818    Return TRUE on success.  */
819 
820 static bool
destroy_hsa_program(struct agent_info * agent)821 destroy_hsa_program (struct agent_info *agent)
822 {
823   if (!agent->prog_finalized || agent->prog_finalized_error)
824     return true;
825 
826   hsa_status_t status;
827 
828   HSA_DEBUG ("Destroying the current HSA program.\n");
829 
830   status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
831   if (status != HSA_STATUS_SUCCESS)
832     return hsa_error ("Could not destroy HSA executable", status);
833 
834   struct module_info *module;
835   for (module = agent->first_module; module; module = module->next)
836     {
837       int i;
838       for (i = 0; i < module->kernel_count; i++)
839 	module->kernels[i].initialized = false;
840     }
841   agent->prog_finalized = false;
842   return true;
843 }
844 
845 /* Initialize KERNEL from D and other parameters.  Return true on success. */
846 
847 static bool
init_basic_kernel_info(struct kernel_info * kernel,struct hsa_kernel_description * d,struct agent_info * agent,struct module_info * module)848 init_basic_kernel_info (struct kernel_info *kernel,
849 			struct hsa_kernel_description *d,
850 			struct agent_info *agent,
851 			struct module_info *module)
852 {
853   kernel->agent = agent;
854   kernel->module = module;
855   kernel->name = d->name;
856   kernel->omp_data_size = d->omp_data_size;
857   kernel->gridified_kernel_p = d->gridified_kernel_p;
858   kernel->dependencies_count = d->kernel_dependencies_count;
859   kernel->dependencies = d->kernel_dependencies;
860   if (pthread_mutex_init (&kernel->init_mutex, NULL))
861     {
862       GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
863       return false;
864     }
865   return true;
866 }
867 
868 /* Part of the libgomp plugin interface.  Load BRIG module described by struct
869    brig_image_desc in TARGET_DATA and return references to kernel descriptors
870    in TARGET_TABLE.  */
871 
872 int
GOMP_OFFLOAD_load_image(int ord,unsigned version,const void * target_data,struct addr_pair ** target_table)873 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
874 			 struct addr_pair **target_table)
875 {
876   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
877     {
878       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
879 			 " (expected %u, received %u)",
880 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
881       return -1;
882     }
883 
884   struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
885   struct agent_info *agent;
886   struct addr_pair *pair;
887   struct module_info *module;
888   struct kernel_info *kernel;
889   int kernel_count = image_desc->kernel_count;
890 
891   agent = get_agent_info (ord);
892   if (!agent)
893     return -1;
894 
895   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
896     {
897       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
898       return -1;
899     }
900   if (agent->prog_finalized
901       && !destroy_hsa_program (agent))
902     return -1;
903 
904   HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
905   pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
906   *target_table = pair;
907   module = (struct module_info *)
908     GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
909 				+ kernel_count * sizeof (struct kernel_info));
910   module->image_desc = image_desc;
911   module->kernel_count = kernel_count;
912 
913   kernel = &module->kernels[0];
914 
915   /* Allocate memory for kernel dependencies.  */
916   for (unsigned i = 0; i < kernel_count; i++)
917     {
918       pair->start = (uintptr_t) kernel;
919       pair->end = (uintptr_t) (kernel + 1);
920 
921       struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
922       if (!init_basic_kernel_info (kernel, d, agent, module))
923 	return -1;
924       kernel++;
925       pair++;
926     }
927 
928   add_module_to_agent (agent, module);
929   if (pthread_rwlock_unlock (&agent->modules_rwlock))
930     {
931       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
932       return -1;
933     }
934   return kernel_count;
935 }
936 
937 /* Add a shared BRIG library from a FILE_NAME to an AGENT.  */
938 
939 static struct brig_library_info *
add_shared_library(const char * file_name,struct agent_info * agent)940 add_shared_library (const char *file_name, struct agent_info *agent)
941 {
942   struct brig_library_info *library = NULL;
943 
944   void *f = dlopen (file_name, RTLD_NOW);
945   void *start = dlsym (f, "__brig_start");
946   void *end = dlsym (f, "__brig_end");
947 
948   if (start == NULL || end == NULL)
949     return NULL;
950 
951   unsigned size = end - start;
952   char *buf = (char *) GOMP_PLUGIN_malloc (size);
953   memcpy (buf, start, size);
954 
955   library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
956   library->file_name = (char *) GOMP_PLUGIN_malloc
957     ((strlen (file_name) + 1));
958   strcpy (library->file_name, file_name);
959   library->image = (hsa_ext_module_t) buf;
960 
961   return library;
962 }
963 
964 /* Release memory used for BRIG shared libraries that correspond
965    to an AGENT.  */
966 
967 static void
release_agent_shared_libraries(struct agent_info * agent)968 release_agent_shared_libraries (struct agent_info *agent)
969 {
970   for (unsigned i = 0; i < agent->brig_libraries_count; i++)
971     if (agent->brig_libraries[i])
972       {
973 	free (agent->brig_libraries[i]->file_name);
974 	free (agent->brig_libraries[i]->image);
975 	free (agent->brig_libraries[i]);
976       }
977 
978   free (agent->brig_libraries);
979 }
980 
981 /* Create and finalize the program consisting of all loaded modules.  */
982 
983 static void
create_and_finalize_hsa_program(struct agent_info * agent)984 create_and_finalize_hsa_program (struct agent_info *agent)
985 {
986   hsa_status_t status;
987   hsa_ext_program_t prog_handle;
988   int mi = 0;
989 
990   if (pthread_mutex_lock (&agent->prog_mutex))
991     GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
992   if (agent->prog_finalized)
993     goto final;
994 
995   status = hsa_fns.hsa_ext_program_create_fn
996     (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
997      HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
998      NULL, &prog_handle);
999   if (status != HSA_STATUS_SUCCESS)
1000     hsa_fatal ("Could not create an HSA program", status);
1001 
1002   HSA_DEBUG ("Created a finalized program\n");
1003 
1004   struct module_info *module = agent->first_module;
1005   while (module)
1006     {
1007       status = hsa_fns.hsa_ext_program_add_module_fn
1008 	(prog_handle, module->image_desc->brig_module);
1009       if (status != HSA_STATUS_SUCCESS)
1010 	hsa_fatal ("Could not add a module to the HSA program", status);
1011       module = module->next;
1012       mi++;
1013     }
1014 
1015   /* Load all shared libraries.  */
1016   const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1017   const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1018 
1019   agent->brig_libraries_count = libraries_count;
1020   agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1021     (sizeof (struct brig_library_info) * libraries_count);
1022 
1023   for (unsigned i = 0; i < libraries_count; i++)
1024     {
1025       struct brig_library_info *library = add_shared_library (libraries[i],
1026 							      agent);
1027       if (library == NULL)
1028 	{
1029 	  HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1030 		       libraries[i]);
1031 	  continue;
1032 	}
1033 
1034       status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1035 						      library->image);
1036       if (status != HSA_STATUS_SUCCESS)
1037 	hsa_warn ("Could not add a shared BRIG library the HSA program",
1038 		  status);
1039       else
1040 	HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1041 		   libraries[i]);
1042     }
1043 
1044   hsa_ext_control_directives_t control_directives;
1045   memset (&control_directives, 0, sizeof (control_directives));
1046   hsa_code_object_t code_object;
1047   status = hsa_fns.hsa_ext_program_finalize_fn
1048     (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1049      control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1050   if (status != HSA_STATUS_SUCCESS)
1051     {
1052       hsa_warn ("Finalization of the HSA program failed", status);
1053       goto failure;
1054     }
1055 
1056   HSA_DEBUG ("Finalization done\n");
1057   hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1058 
1059   status
1060     = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1061 					HSA_EXECUTABLE_STATE_UNFROZEN,
1062 					"", &agent->executable);
1063   if (status != HSA_STATUS_SUCCESS)
1064     hsa_fatal ("Could not create HSA executable", status);
1065 
1066   module = agent->first_module;
1067   while (module)
1068     {
1069       /* Initialize all global variables declared in the module.  */
1070       for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1071 	{
1072 	  struct global_var_info *var;
1073 	  var = &module->image_desc->global_variables[i];
1074 	  status = hsa_fns.hsa_executable_global_variable_define_fn
1075 	    (agent->executable, var->name, var->address);
1076 
1077 	  HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1078 		     var->address);
1079 
1080 	  if (status != HSA_STATUS_SUCCESS)
1081 	    hsa_fatal ("Could not define a global variable in the HSA program",
1082 		       status);
1083 	}
1084 
1085       module = module->next;
1086     }
1087 
1088   status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1089 						       agent->id,
1090 						       code_object, "");
1091   if (status != HSA_STATUS_SUCCESS)
1092     hsa_fatal ("Could not add a code object to the HSA executable", status);
1093   status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1094   if (status != HSA_STATUS_SUCCESS)
1095     hsa_fatal ("Could not freeze the HSA executable", status);
1096 
1097   HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1098 
1099   /* If all goes good, jump to final.  */
1100   goto final;
1101 
1102 failure:
1103   agent->prog_finalized_error = true;
1104 
1105 final:
1106   agent->prog_finalized = true;
1107 
1108   if (pthread_mutex_unlock (&agent->prog_mutex))
1109     GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1110 }
1111 
1112 /* Create kernel dispatch data structure for given KERNEL.  */
1113 
1114 static struct GOMP_hsa_kernel_dispatch *
create_single_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1115 create_single_kernel_dispatch (struct kernel_info *kernel,
1116 			       unsigned omp_data_size)
1117 {
1118   struct agent_info *agent = kernel->agent;
1119   struct GOMP_hsa_kernel_dispatch *shadow
1120     = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1121 
1122   shadow->queue = agent->command_q;
1123   shadow->omp_data_memory
1124     = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1125   unsigned dispatch_count = kernel->dependencies_count;
1126   shadow->kernel_dispatch_count = dispatch_count;
1127 
1128   shadow->children_dispatches
1129     = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1130 
1131   shadow->object = kernel->object;
1132 
1133   hsa_signal_t sync_signal;
1134   hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1135   if (status != HSA_STATUS_SUCCESS)
1136     hsa_fatal ("Error creating the HSA sync signal", status);
1137 
1138   shadow->signal = sync_signal.handle;
1139   shadow->private_segment_size = kernel->private_segment_size;
1140   shadow->group_segment_size = kernel->group_segment_size;
1141 
1142   status
1143     = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1144 				      kernel->kernarg_segment_size,
1145 				      &shadow->kernarg_address);
1146   if (status != HSA_STATUS_SUCCESS)
1147     hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1148 
1149   return shadow;
1150 }
1151 
1152 /* Release data structure created for a kernel dispatch in SHADOW argument.  */
1153 
1154 static void
release_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * shadow)1155 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1156 {
1157   HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
1158 	     shadow->debug, (void *) shadow->debug);
1159 
1160   hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1161 
1162   hsa_signal_t s;
1163   s.handle = shadow->signal;
1164   hsa_fns.hsa_signal_destroy_fn (s);
1165 
1166   free (shadow->omp_data_memory);
1167 
1168   for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1169     release_kernel_dispatch (shadow->children_dispatches[i]);
1170 
1171   free (shadow->children_dispatches);
1172   free (shadow);
1173 }
1174 
1175 /* Initialize a KERNEL without its dependencies.  MAX_OMP_DATA_SIZE is used
1176    to calculate maximum necessary memory for OMP data allocation.  */
1177 
1178 static void
init_single_kernel(struct kernel_info * kernel,unsigned * max_omp_data_size)1179 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1180 {
1181   hsa_status_t status;
1182   struct agent_info *agent = kernel->agent;
1183   hsa_executable_symbol_t kernel_symbol;
1184   status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1185 						 kernel->name, agent->id,
1186 						 0, &kernel_symbol);
1187   if (status != HSA_STATUS_SUCCESS)
1188     {
1189       hsa_warn ("Could not find symbol for kernel in the code object", status);
1190       goto failure;
1191     }
1192   HSA_DEBUG ("Located kernel %s\n", kernel->name);
1193   status = hsa_fns.hsa_executable_symbol_get_info_fn
1194     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1195   if (status != HSA_STATUS_SUCCESS)
1196     hsa_fatal ("Could not extract a kernel object from its symbol", status);
1197   status = hsa_fns.hsa_executable_symbol_get_info_fn
1198     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1199      &kernel->kernarg_segment_size);
1200   if (status != HSA_STATUS_SUCCESS)
1201     hsa_fatal ("Could not get info about kernel argument size", status);
1202   status = hsa_fns.hsa_executable_symbol_get_info_fn
1203     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1204      &kernel->group_segment_size);
1205   if (status != HSA_STATUS_SUCCESS)
1206     hsa_fatal ("Could not get info about kernel group segment size", status);
1207   status = hsa_fns.hsa_executable_symbol_get_info_fn
1208     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1209      &kernel->private_segment_size);
1210   if (status != HSA_STATUS_SUCCESS)
1211     hsa_fatal ("Could not get info about kernel private segment size",
1212 	       status);
1213 
1214   HSA_DEBUG ("Kernel structure for %s fully initialized with "
1215 	     "following segment sizes: \n", kernel->name);
1216   HSA_DEBUG ("  group_segment_size: %u\n",
1217 	     (unsigned) kernel->group_segment_size);
1218   HSA_DEBUG ("  private_segment_size: %u\n",
1219 	     (unsigned) kernel->private_segment_size);
1220   HSA_DEBUG ("  kernarg_segment_size: %u\n",
1221 	     (unsigned) kernel->kernarg_segment_size);
1222   HSA_DEBUG ("  omp_data_size: %u\n", kernel->omp_data_size);
1223   HSA_DEBUG ("  gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1224 
1225   if (kernel->omp_data_size > *max_omp_data_size)
1226     *max_omp_data_size = kernel->omp_data_size;
1227 
1228   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1229     {
1230       struct kernel_info *dependency
1231 	= get_kernel_for_agent (agent, kernel->dependencies[i]);
1232 
1233       if (dependency == NULL)
1234 	{
1235 	  HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1236 		     "dependency name: %s\n", kernel->name,
1237 		     kernel->dependencies[i]);
1238 	  goto failure;
1239 	}
1240 
1241       if (dependency->dependencies_count > 0)
1242 	{
1243 	  HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1244 		     "a depth bigger than one\n");
1245 	  goto failure;
1246 	}
1247 
1248       init_single_kernel (dependency, max_omp_data_size);
1249     }
1250 
1251   return;
1252 
1253 failure:
1254   kernel->initialization_failed = true;
1255 }
1256 
1257 /* Indent stream F by INDENT spaces.  */
1258 
1259 static void
indent_stream(FILE * f,unsigned indent)1260 indent_stream (FILE *f, unsigned indent)
1261 {
1262   fprintf (f, "%*s", indent, "");
1263 }
1264 
1265 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces.  */
1266 
1267 static void
print_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * dispatch,unsigned indent)1268 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1269 {
1270   indent_stream (stderr, indent);
1271   fprintf (stderr, "this: %p\n", dispatch);
1272   indent_stream (stderr, indent);
1273   fprintf (stderr, "queue: %p\n", dispatch->queue);
1274   indent_stream (stderr, indent);
1275   fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1276   indent_stream (stderr, indent);
1277   fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1278   indent_stream (stderr, indent);
1279   fprintf (stderr, "object: %lu\n", dispatch->object);
1280   indent_stream (stderr, indent);
1281   fprintf (stderr, "signal: %lu\n", dispatch->signal);
1282   indent_stream (stderr, indent);
1283   fprintf (stderr, "private_segment_size: %u\n",
1284 	   dispatch->private_segment_size);
1285   indent_stream (stderr, indent);
1286   fprintf (stderr, "group_segment_size: %u\n",
1287 	   dispatch->group_segment_size);
1288   indent_stream (stderr, indent);
1289   fprintf (stderr, "children dispatches: %lu\n",
1290 	   dispatch->kernel_dispatch_count);
1291   indent_stream (stderr, indent);
1292   fprintf (stderr, "omp_num_threads: %u\n",
1293 	   dispatch->omp_num_threads);
1294   fprintf (stderr, "\n");
1295 
1296   for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1297     print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1298 }
1299 
1300 /* Create kernel dispatch data structure for a KERNEL and all its
1301    dependencies.  */
1302 
1303 static struct GOMP_hsa_kernel_dispatch *
create_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1304 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1305 {
1306   struct GOMP_hsa_kernel_dispatch *shadow
1307     = create_single_kernel_dispatch (kernel, omp_data_size);
1308   shadow->omp_num_threads = 64;
1309   shadow->debug = 0;
1310   shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1311 
1312   /* Create kernel dispatch data structures.  We do not allow to have
1313      a kernel dispatch with depth bigger than one.  */
1314   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1315     {
1316       struct kernel_info *dependency
1317 	= get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1318       shadow->children_dispatches[i]
1319 	= create_single_kernel_dispatch (dependency, omp_data_size);
1320       shadow->children_dispatches[i]->queue
1321 	= kernel->agent->kernel_dispatch_command_q;
1322       shadow->children_dispatches[i]->omp_level = 1;
1323     }
1324 
1325   return shadow;
1326 }
1327 
1328 /* Do all the work that is necessary before running KERNEL for the first time.
1329    The function assumes the program has been created, finalized and frozen by
1330    create_and_finalize_hsa_program.  */
1331 
1332 static void
init_kernel(struct kernel_info * kernel)1333 init_kernel (struct kernel_info *kernel)
1334 {
1335   if (pthread_mutex_lock (&kernel->init_mutex))
1336     GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1337   if (kernel->initialized)
1338     {
1339       if (pthread_mutex_unlock (&kernel->init_mutex))
1340 	GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1341 			   "mutex");
1342 
1343       return;
1344     }
1345 
1346   /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1347      dispatch operation.  */
1348   init_single_kernel (kernel, &kernel->max_omp_data_size);
1349 
1350   if (!kernel->initialization_failed)
1351     HSA_DEBUG ("\n");
1352 
1353   kernel->initialized = true;
1354   if (pthread_mutex_unlock (&kernel->init_mutex))
1355     GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1356 		       "mutex");
1357 }
1358 
1359 /* Parse the target attributes INPUT provided by the compiler and return true
1360    if we should run anything all.  If INPUT is NULL, fill DEF with default
1361    values, then store INPUT or DEF into *RESULT.  */
1362 
1363 static bool
parse_target_attributes(void ** input,struct GOMP_kernel_launch_attributes * def,struct GOMP_kernel_launch_attributes ** result)1364 parse_target_attributes (void **input,
1365 			 struct GOMP_kernel_launch_attributes *def,
1366 			 struct GOMP_kernel_launch_attributes **result)
1367 {
1368   if (!input)
1369     GOMP_PLUGIN_fatal ("No target arguments provided");
1370 
1371   bool attrs_found = false;
1372   while (*input)
1373     {
1374       uintptr_t id = (uintptr_t) *input;
1375       if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1376 	  && ((id & GOMP_TARGET_ARG_ID_MASK)
1377 	      == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1378 	{
1379 	  input++;
1380 	  attrs_found = true;
1381 	  break;
1382 	}
1383 
1384       if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1385 	input++;
1386       input++;
1387     }
1388 
1389   if (!attrs_found)
1390     {
1391       def->ndim = 1;
1392       def->gdims[0] = 1;
1393       def->gdims[1] = 1;
1394       def->gdims[2] = 1;
1395       def->wdims[0] = 1;
1396       def->wdims[1] = 1;
1397       def->wdims[2] = 1;
1398       *result = def;
1399       HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1400       return true;
1401     }
1402 
1403   struct GOMP_kernel_launch_attributes *kla;
1404   kla = (struct GOMP_kernel_launch_attributes *) *input;
1405   *result = kla;
1406   if (kla->ndim == 0 || kla->ndim > 3)
1407     GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1408 
1409   HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1410   unsigned i;
1411   for (i = 0; i < kla->ndim; i++)
1412     {
1413       HSA_DEBUG ("  Dimension %u: grid size %u and group size %u\n", i,
1414 		 kla->gdims[i], kla->wdims[i]);
1415       if (kla->gdims[i] == 0)
1416 	return false;
1417     }
1418   return true;
1419 }
1420 
1421 /* Return the group size given the requested GROUP size, GRID size and number
1422    of grid dimensions NDIM.  */
1423 
1424 static uint32_t
get_group_size(uint32_t ndim,uint32_t grid,uint32_t group)1425 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1426 {
1427   if (group == 0)
1428     {
1429       /* TODO: Provide a default via environment or device characteristics.  */
1430       if (ndim == 1)
1431 	group = 64;
1432       else if (ndim == 2)
1433 	group = 8;
1434       else
1435 	group = 4;
1436     }
1437 
1438   if (group > grid)
1439     group = grid;
1440   return group;
1441 }
1442 
1443 /* Return true if the HSA runtime can run function FN_PTR.  */
1444 
1445 bool
GOMP_OFFLOAD_can_run(void * fn_ptr)1446 GOMP_OFFLOAD_can_run (void *fn_ptr)
1447 {
1448   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1449   struct agent_info *agent = kernel->agent;
1450   create_and_finalize_hsa_program (agent);
1451 
1452   if (agent->prog_finalized_error)
1453     goto failure;
1454 
1455   init_kernel (kernel);
1456   if (kernel->initialization_failed)
1457     goto failure;
1458 
1459   return true;
1460 
1461 failure:
1462   if (suppress_host_fallback)
1463     GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1464   HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1465   return false;
1466 }
1467 
1468 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET.  */
1469 
1470 void
packet_store_release(uint32_t * packet,uint16_t header,uint16_t rest)1471 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1472 {
1473   __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1474 }
1475 
1476 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1477    launchattributes from KLA.  */
1478 
1479 void
run_kernel(struct kernel_info * kernel,void * vars,struct GOMP_kernel_launch_attributes * kla)1480 run_kernel (struct kernel_info *kernel, void *vars,
1481 	    struct GOMP_kernel_launch_attributes *kla)
1482 {
1483   struct agent_info *agent = kernel->agent;
1484   if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1485     GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1486 
1487   if (!agent->initialized)
1488     GOMP_PLUGIN_fatal ("Agent must be initialized");
1489 
1490   if (!kernel->initialized)
1491     GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1492 
1493   struct GOMP_hsa_kernel_dispatch *shadow
1494     = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1495 
1496   if (debug)
1497     {
1498       fprintf (stderr, "\nKernel has following dependencies:\n");
1499       print_kernel_dispatch (shadow, 2);
1500     }
1501 
1502   uint64_t index
1503     = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1504   HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1505 
1506   /* Wait until the queue is not full before writing the packet.   */
1507   while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1508 	 >= agent->command_q->size)
1509     ;
1510 
1511   hsa_kernel_dispatch_packet_t *packet;
1512   packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1513 	   + index % agent->command_q->size;
1514 
1515   memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1516   packet->grid_size_x = kla->gdims[0];
1517   packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1518 					     kla->wdims[0]);
1519 
1520   if (kla->ndim >= 2)
1521     {
1522       packet->grid_size_y = kla->gdims[1];
1523       packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1524 						 kla->wdims[1]);
1525     }
1526   else
1527     {
1528       packet->grid_size_y = 1;
1529       packet->workgroup_size_y = 1;
1530     }
1531 
1532   if (kla->ndim == 3)
1533     {
1534       packet->grid_size_z = kla->gdims[2];
1535       packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1536 					     kla->wdims[2]);
1537     }
1538   else
1539     {
1540       packet->grid_size_z = 1;
1541       packet->workgroup_size_z = 1;
1542     }
1543 
1544   packet->private_segment_size = kernel->private_segment_size;
1545   packet->group_segment_size = kernel->group_segment_size;
1546   packet->kernel_object = kernel->object;
1547   packet->kernarg_address = shadow->kernarg_address;
1548   hsa_signal_t s;
1549   s.handle = shadow->signal;
1550   packet->completion_signal = s;
1551   hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1552   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1553 
1554   /* PR hsa/70337.  */
1555   size_t vars_size = sizeof (vars);
1556   if (kernel->kernarg_segment_size > vars_size)
1557     {
1558       if (kernel->kernarg_segment_size != vars_size
1559 	  + sizeof (struct hsa_kernel_runtime *))
1560 	GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1561       memcpy (packet->kernarg_address + vars_size, &shadow,
1562 	      sizeof (struct hsa_kernel_runtime *));
1563     }
1564 
1565   HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1566 
1567   uint16_t header;
1568   header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1569   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1570   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1571 
1572   HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1573 
1574   packet_store_release ((uint32_t *) packet, header,
1575 			(uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1576 
1577   hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1578 				       index);
1579 
1580   /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1581      signal wait and signal load operations on their own and we need to
1582      periodically call the hsa_signal_load_acquire on completion signals of
1583      children kernels in the CPU to make that happen.  As soon the
1584      limitation will be resolved, this workaround can be removed.  */
1585 
1586   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1587 
1588   /* Root signal waits with 1ms timeout.  */
1589   while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1590 					     1000 * 1000,
1591 					     HSA_WAIT_STATE_BLOCKED) != 0)
1592     for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1593       {
1594 	hsa_signal_t child_s;
1595 	child_s.handle = shadow->children_dispatches[i]->signal;
1596 
1597 	HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1598 		   shadow->children_dispatches[i]->signal);
1599 	hsa_fns.hsa_signal_load_acquire_fn (child_s);
1600       }
1601 
1602   release_kernel_dispatch (shadow);
1603 
1604   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1605     GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1606 }
1607 
1608 /* Part of the libgomp plugin interface.  Run a kernel on device N (the number
1609    is actually ignored, we assume the FN_PTR has been mapped using the correct
1610    device) and pass it an array of pointers in VARS as a parameter.  The kernel
1611    is identified by FN_PTR which must point to a kernel_info structure.  */
1612 
1613 void
GOMP_OFFLOAD_run(int n,void * fn_ptr,void * vars,void ** args)1614 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1615 		  void *fn_ptr, void *vars, void **args)
1616 {
1617   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1618   struct GOMP_kernel_launch_attributes def;
1619   struct GOMP_kernel_launch_attributes *kla;
1620   if (!parse_target_attributes (args, &def, &kla))
1621     {
1622       HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1623       return;
1624     }
1625   run_kernel (kernel, vars, kla);
1626 }
1627 
1628 /* Information to be passed to a thread running a kernel asycnronously.  */
1629 
1630 struct async_run_info
1631 {
1632   int device;
1633   void *tgt_fn;
1634   void *tgt_vars;
1635   void **args;
1636   void *async_data;
1637 };
1638 
1639 /* Thread routine to run a kernel asynchronously.  */
1640 
1641 static void *
run_kernel_asynchronously(void * thread_arg)1642 run_kernel_asynchronously (void *thread_arg)
1643 {
1644   struct async_run_info *info = (struct async_run_info *) thread_arg;
1645   int device = info->device;
1646   void *tgt_fn = info->tgt_fn;
1647   void *tgt_vars = info->tgt_vars;
1648   void **args = info->args;
1649   void *async_data = info->async_data;
1650 
1651   free (info);
1652   GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1653   GOMP_PLUGIN_target_task_completion (async_data);
1654   return NULL;
1655 }
1656 
1657 /* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
1658    does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1659    has finished.  */
1660 
1661 void
GOMP_OFFLOAD_async_run(int device,void * tgt_fn,void * tgt_vars,void ** args,void * async_data)1662 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1663 			void **args, void *async_data)
1664 {
1665   pthread_t pt;
1666   struct async_run_info *info;
1667   HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1668   info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1669 
1670   info->device = device;
1671   info->tgt_fn = tgt_fn;
1672   info->tgt_vars = tgt_vars;
1673   info->args = args;
1674   info->async_data = async_data;
1675 
1676   int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1677   if (err != 0)
1678     GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1679 		       strerror (err));
1680   err = pthread_detach (pt);
1681   if (err != 0)
1682     GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1683 		       "asynchronously: %s", strerror (err));
1684 }
1685 
1686 /* Deinitialize all information associated with MODULE and kernels within
1687    it.  Return TRUE on success.  */
1688 
1689 static bool
destroy_module(struct module_info * module)1690 destroy_module (struct module_info *module)
1691 {
1692   int i;
1693   for (i = 0; i < module->kernel_count; i++)
1694     if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1695       {
1696 	GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1697 			   "mutex");
1698 	return false;
1699       }
1700   return true;
1701 }
1702 
1703 /* Part of the libgomp plugin interface.  Unload BRIG module described by
1704    struct brig_image_desc in TARGET_DATA from agent number N.  Return
1705    TRUE on success.  */
1706 
1707 bool
GOMP_OFFLOAD_unload_image(int n,unsigned version,const void * target_data)1708 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1709 {
1710   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1711     {
1712       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1713 			 " (expected %u, received %u)",
1714 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1715       return false;
1716     }
1717 
1718   struct agent_info *agent;
1719   agent = get_agent_info (n);
1720   if (!agent)
1721     return false;
1722 
1723   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1724     {
1725       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1726       return false;
1727     }
1728   struct module_info *module = agent->first_module;
1729   while (module)
1730     {
1731       if (module->image_desc == target_data)
1732 	break;
1733       module = module->next;
1734     }
1735   if (!module)
1736     {
1737       GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1738 			 "loaded before");
1739       return false;
1740     }
1741 
1742   remove_module_from_agent (agent, module);
1743   if (!destroy_module (module))
1744     return false;
1745   free (module);
1746   if (!destroy_hsa_program (agent))
1747     return false;
1748   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1749     {
1750       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1751       return false;
1752     }
1753   return true;
1754 }
1755 
1756 /* Part of the libgomp plugin interface.  Deinitialize all information and
1757    status associated with agent number N.  We do not attempt any
1758    synchronization, assuming the user and libgomp will not attempt
1759    deinitialization of a device that is in any way being used at the same
1760    time.  Return TRUE on success.  */
1761 
1762 bool
GOMP_OFFLOAD_fini_device(int n)1763 GOMP_OFFLOAD_fini_device (int n)
1764 {
1765   struct agent_info *agent = get_agent_info (n);
1766   if (!agent)
1767     return false;
1768 
1769   if (!agent->initialized)
1770     return true;
1771 
1772   struct module_info *next_module = agent->first_module;
1773   while (next_module)
1774     {
1775       struct module_info *module = next_module;
1776       next_module = module->next;
1777       if (!destroy_module (module))
1778 	return false;
1779       free (module);
1780     }
1781   agent->first_module = NULL;
1782   if (!destroy_hsa_program (agent))
1783     return false;
1784 
1785   release_agent_shared_libraries (agent);
1786 
1787   hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1788   if (status != HSA_STATUS_SUCCESS)
1789     return hsa_error ("Error destroying command queue", status);
1790   status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1791   if (status != HSA_STATUS_SUCCESS)
1792     return hsa_error ("Error destroying kernel dispatch command queue", status);
1793   if (pthread_mutex_destroy (&agent->prog_mutex))
1794     {
1795       GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1796       return false;
1797     }
1798   if (pthread_rwlock_destroy (&agent->modules_rwlock))
1799     {
1800       GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1801       return false;
1802     }
1803   agent->initialized = false;
1804   return true;
1805 }
1806 
1807 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1808    for HSA.  */
1809 
1810 void *
GOMP_OFFLOAD_alloc(int ord,size_t size)1811 GOMP_OFFLOAD_alloc (int ord, size_t size)
1812 {
1813   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1814 		     "it should never be called");
1815   return NULL;
1816 }
1817 
1818 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1819    for HSA.  */
1820 
1821 bool
GOMP_OFFLOAD_free(int ord,void * ptr)1822 GOMP_OFFLOAD_free (int ord, void *ptr)
1823 {
1824   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1825 		     "it should never be called");
1826   return false;
1827 }
1828 
1829 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1830    for HSA.  */
1831 
1832 bool
GOMP_OFFLOAD_dev2host(int ord,void * dst,const void * src,size_t n)1833 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1834 {
1835   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1836 		     "it should never be called");
1837   return false;
1838 }
1839 
1840 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1841    for HSA.  */
1842 
1843 bool
GOMP_OFFLOAD_host2dev(int ord,void * dst,const void * src,size_t n)1844 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1845 {
1846   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1847 		     "it should never be called");
1848   return false;
1849 }
1850 
1851 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1852    for HSA.  */
1853 
1854 bool
GOMP_OFFLOAD_dev2dev(int ord,void * dst,const void * src,size_t n)1855 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1856 {
1857   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1858 		     "it should never be called");
1859   return false;
1860 }
1861