1 /* Plugin for HSAIL execution.
2 
3    Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 
5    Contributed by Martin Jambor <mjambor@suse.cz> and
6    Martin Liska <mliska@suse.cz>.
7 
8    This file is part of the GNU Offloading and Multi Processing Library
9    (libgomp).
10 
11    Libgomp is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published by
13    the Free Software Foundation; either version 3, or (at your option)
14    any later version.
15 
16    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19    more details.
20 
21    Under Section 7 of GPL version 3, you are granted additional
22    permissions described in the GCC Runtime Library Exception, version
23    3.1, as published by the Free Software Foundation.
24 
25    You should have received a copy of the GNU General Public License and
26    a copy of the GCC Runtime Library Exception along with this program;
27    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #include "config.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <hsa.h>
38 #include <plugin/hsa_ext_finalize.h>
39 #include <dlfcn.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
42 #include "secure_getenv.h"
43 
44 /* As an HSA runtime is dlopened, following structure defines function
45    pointers utilized by the HSA plug-in.  */
46 
47 struct hsa_runtime_fn_info
48 {
49   /* HSA runtime.  */
50   hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
51 					const char **status_string);
52   hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
53 					 hsa_agent_info_t attribute,
54 					 void *value);
55   hsa_status_t (*hsa_init_fn) (void);
56   hsa_status_t (*hsa_iterate_agents_fn)
57     (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
58   hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
59 					  hsa_region_info_t attribute,
60 					  void *value);
61   hsa_status_t (*hsa_queue_create_fn)
62     (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
63      void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
64      void *data, uint32_t private_segment_size,
65      uint32_t group_segment_size, hsa_queue_t **queue);
66   hsa_status_t (*hsa_agent_iterate_regions_fn)
67     (hsa_agent_t agent,
68      hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
69   hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
70   hsa_status_t (*hsa_executable_create_fn)
71     (hsa_profile_t profile, hsa_executable_state_t executable_state,
72      const char *options, hsa_executable_t *executable);
73   hsa_status_t (*hsa_executable_global_variable_define_fn)
74     (hsa_executable_t executable, const char *variable_name, void *address);
75   hsa_status_t (*hsa_executable_load_code_object_fn)
76     (hsa_executable_t executable, hsa_agent_t agent,
77      hsa_code_object_t code_object, const char *options);
78   hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
79 					   const char *options);
80   hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
81 					uint32_t num_consumers,
82 					const hsa_agent_t *consumers,
83 					hsa_signal_t *signal);
84   hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
85 					  void **ptr);
86   hsa_status_t (*hsa_memory_free_fn) (void *ptr);
87   hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
88   hsa_status_t (*hsa_executable_get_symbol_fn)
89     (hsa_executable_t executable, const char *module_name,
90      const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
91      hsa_executable_symbol_t *symbol);
92   hsa_status_t (*hsa_executable_symbol_get_info_fn)
93     (hsa_executable_symbol_t executable_symbol,
94      hsa_executable_symbol_info_t attribute, void *value);
95   uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
96 						    uint64_t value);
97   uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
98   void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
99 				       hsa_signal_value_t value);
100   void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
101 				       hsa_signal_value_t value);
102   hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
103     (hsa_signal_t signal, hsa_signal_condition_t condition,
104      hsa_signal_value_t compare_value, uint64_t timeout_hint,
105      hsa_wait_state_t wait_state_hint);
106   hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
107   hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
108 
109   /* HSA finalizer.  */
110   hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
111 						 hsa_ext_module_t module);
112   hsa_status_t (*hsa_ext_program_create_fn)
113     (hsa_machine_model_t machine_model, hsa_profile_t profile,
114      hsa_default_float_rounding_mode_t default_float_rounding_mode,
115      const char *options, hsa_ext_program_t *program);
116   hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
117   hsa_status_t (*hsa_ext_program_finalize_fn)
118     (hsa_ext_program_t program,hsa_isa_t isa,
119      int32_t call_convention, hsa_ext_control_directives_t control_directives,
120      const char *options, hsa_code_object_type_t code_object_type,
121      hsa_code_object_t *code_object);
122 };
123 
124 /* HSA runtime functions that are initialized in init_hsa_context.  */
125 
126 static struct hsa_runtime_fn_info hsa_fns;
127 
128 /* Keep the following GOMP prefixed structures in sync with respective parts of
129    the compiler.  */
130 
131 /* Structure describing the run-time and grid properties of an HSA kernel
132    lauch.  */
133 
134 struct GOMP_kernel_launch_attributes
135 {
136   /* Number of dimensions the workload has.  Maximum number is 3.  */
137   uint32_t ndim;
138   /* Size of the grid in the three respective dimensions.  */
139   uint32_t gdims[3];
140   /* Size of work-groups in the respective dimensions.  */
141   uint32_t wdims[3];
142 };
143 
144 /* Collection of information needed for a dispatch of a kernel from a
145    kernel.  */
146 
147 struct GOMP_hsa_kernel_dispatch
148 {
149   /* Pointer to a command queue associated with a kernel dispatch agent.  */
150   void *queue;
151   /* Pointer to reserved memory for OMP data struct copying.  */
152   void *omp_data_memory;
153   /* Pointer to a memory space used for kernel arguments passing.  */
154   void *kernarg_address;
155   /* Kernel object.  */
156   uint64_t object;
157   /* Synchronization signal used for dispatch synchronization.  */
158   uint64_t signal;
159   /* Private segment size.  */
160   uint32_t private_segment_size;
161   /* Group segment size.  */
162   uint32_t group_segment_size;
163   /* Number of children kernel dispatches.  */
164   uint64_t kernel_dispatch_count;
165   /* Debug purpose argument.  */
166   uint64_t debug;
167   /* Levels-var ICV.  */
168   uint64_t omp_level;
169   /* Kernel dispatch structures created for children kernel dispatches.  */
170   struct GOMP_hsa_kernel_dispatch **children_dispatches;
171   /* Number of threads.  */
172   uint32_t omp_num_threads;
173 };
174 
175 /* Part of the libgomp plugin interface.  Return the name of the accelerator,
176    which is "hsa".  */
177 
178 const char *
GOMP_OFFLOAD_get_name(void)179 GOMP_OFFLOAD_get_name (void)
180 {
181   return "hsa";
182 }
183 
184 /* Part of the libgomp plugin interface.  Return the specific capabilities the
185    HSA accelerator have.  */
186 
187 unsigned int
GOMP_OFFLOAD_get_caps(void)188 GOMP_OFFLOAD_get_caps (void)
189 {
190   return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
191 }
192 
193 /* Part of the libgomp plugin interface.  Identify as HSA accelerator.  */
194 
195 int
GOMP_OFFLOAD_get_type(void)196 GOMP_OFFLOAD_get_type (void)
197 {
198   return OFFLOAD_TARGET_TYPE_HSA;
199 }
200 
201 /* Return the libgomp version number we're compatible with.  There is
202    no requirement for cross-version compatibility.  */
203 
204 unsigned
GOMP_OFFLOAD_version(void)205 GOMP_OFFLOAD_version (void)
206 {
207   return GOMP_VERSION;
208 }
209 
210 /* Flag to decide whether print to stderr information about what is going on.
211    Set in init_debug depending on environment variables.  */
212 
213 static bool debug;
214 
215 /* Flag to decide if the runtime should suppress a possible fallback to host
216    execution.  */
217 
218 static bool suppress_host_fallback;
219 
220 /* Flag to locate HSA runtime shared library that is dlopened
221    by this plug-in.  */
222 
223 static const char *hsa_runtime_lib;
224 
225 /* Flag to decide if the runtime should support also CPU devices (can be
226    a simulator).  */
227 
228 static bool support_cpu_devices;
229 
230 /* Initialize debug and suppress_host_fallback according to the environment.  */
231 
232 static void
init_enviroment_variables(void)233 init_enviroment_variables (void)
234 {
235   if (secure_getenv ("HSA_DEBUG"))
236     debug = true;
237   else
238     debug = false;
239 
240   if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
241     suppress_host_fallback = true;
242   else
243     suppress_host_fallback = false;
244 
245   hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
246   if (hsa_runtime_lib == NULL)
247     hsa_runtime_lib = "libhsa-runtime64.so";
248 
249   support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
250 }
251 
252 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
253    is set to true.  */
254 
255 #define HSA_LOG(prefix, ...) \
256   do \
257   { \
258     if (debug) \
259       { \
260 	fprintf (stderr, prefix); \
261 	fprintf (stderr, __VA_ARGS__); \
262       } \
263   } \
264   while (false)
265 
266 /* Print a debugging message to stderr.  */
267 
268 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
269 
270 /* Print a warning message to stderr.  */
271 
272 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
273 
274 /* Print HSA warning STR with an HSA STATUS code.  */
275 
276 static void
hsa_warn(const char * str,hsa_status_t status)277 hsa_warn (const char *str, hsa_status_t status)
278 {
279   if (!debug)
280     return;
281 
282   const char *hsa_error_msg;
283   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
284 
285   fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
286 }
287 
288 /* Report a fatal error STR together with the HSA error corresponding to STATUS
289    and terminate execution of the current process.  */
290 
291 static void
hsa_fatal(const char * str,hsa_status_t status)292 hsa_fatal (const char *str, hsa_status_t status)
293 {
294   const char *hsa_error_msg;
295   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
296   GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
297 		     hsa_error_msg);
298 }
299 
300 /* Like hsa_fatal, except only report error message, and return FALSE
301    for propagating error processing to outside of plugin.  */
302 
303 static bool
hsa_error(const char * str,hsa_status_t status)304 hsa_error (const char *str, hsa_status_t status)
305 {
306   const char *hsa_error_msg;
307   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
308   GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
309 		     hsa_error_msg);
310   return false;
311 }
312 
313 struct hsa_kernel_description
314 {
315   const char *name;
316   unsigned omp_data_size;
317   bool gridified_kernel_p;
318   unsigned kernel_dependencies_count;
319   const char **kernel_dependencies;
320 };
321 
322 struct global_var_info
323 {
324   const char *name;
325   void *address;
326 };
327 
328 /* Data passed by the static initializer of a compilation unit containing BRIG
329    to GOMP_offload_register.  */
330 
331 struct brig_image_desc
332 {
333   hsa_ext_module_t brig_module;
334   const unsigned kernel_count;
335   struct hsa_kernel_description *kernel_infos;
336   const unsigned global_variable_count;
337   struct global_var_info *global_variables;
338 };
339 
340 struct agent_info;
341 
342 /* Information required to identify, finalize and run any given kernel.  */
343 
344 struct kernel_info
345 {
346   /* Name of the kernel, required to locate it within the brig module.  */
347   const char *name;
348   /* Size of memory space for OMP data.  */
349   unsigned omp_data_size;
350   /* The specific agent the kernel has been or will be finalized for and run
351      on.  */
352   struct agent_info *agent;
353   /* The specific module where the kernel takes place.  */
354   struct module_info *module;
355   /* Mutex enforcing that at most once thread ever initializes a kernel for
356      use.  A thread should have locked agent->modules_rwlock for reading before
357      acquiring it.  */
358   pthread_mutex_t init_mutex;
359   /* Flag indicating whether the kernel has been initialized and all fields
360      below it contain valid data.  */
361   bool initialized;
362   /* Flag indicating that the kernel has a problem that blocks an execution.  */
363   bool initialization_failed;
364   /* The object to be put into the dispatch queue.  */
365   uint64_t object;
366   /* Required size of kernel arguments.  */
367   uint32_t kernarg_segment_size;
368   /* Required size of group segment.  */
369   uint32_t group_segment_size;
370   /* Required size of private segment.  */
371   uint32_t private_segment_size;
372   /* List of all kernel dependencies.  */
373   const char **dependencies;
374   /* Number of dependencies.  */
375   unsigned dependencies_count;
376   /* Maximum OMP data size necessary for kernel from kernel dispatches.  */
377   unsigned max_omp_data_size;
378   /* True if the kernel is gridified.  */
379   bool gridified_kernel_p;
380 };
381 
382 /* Information about a particular brig module, its image and kernels.  */
383 
384 struct module_info
385 {
386   /* The next and previous module in the linked list of modules of an agent.  */
387   struct module_info *next, *prev;
388   /* The description with which the program has registered the image.  */
389   struct brig_image_desc *image_desc;
390 
391   /* Number of kernels in this module.  */
392   int kernel_count;
393   /* An array of kernel_info structures describing each kernel in this
394      module.  */
395   struct kernel_info kernels[];
396 };
397 
398 /* Information about shared brig library.  */
399 
400 struct brig_library_info
401 {
402   char *file_name;
403   hsa_ext_module_t image;
404 };
405 
406 /* Description of an HSA GPU agent and the program associated with it.  */
407 
408 struct agent_info
409 {
410   /* The HSA ID of the agent.  Assigned when hsa_context is initialized.  */
411   hsa_agent_t id;
412   /* Whether the agent has been initialized.  The fields below are usable only
413      if it has been.  */
414   bool initialized;
415   /* The HSA ISA of this agent.  */
416   hsa_isa_t isa;
417   /* Command queue of the agent.  */
418   hsa_queue_t *command_q;
419   /* Kernel from kernel dispatch command queue.  */
420   hsa_queue_t *kernel_dispatch_command_q;
421   /* The HSA memory region from which to allocate kernel arguments.  */
422   hsa_region_t kernarg_region;
423 
424   /* Read-write lock that protects kernels which are running or about to be run
425      from interference with loading and unloading of images.  Needs to be
426      locked for reading while a kernel is being run, and for writing if the
427      list of modules is manipulated (and thus the HSA program invalidated).  */
428   pthread_rwlock_t modules_rwlock;
429   /* The first module in a linked list of modules associated with this
430      kernel.  */
431   struct module_info *first_module;
432 
433   /* Mutex enforcing that only one thread will finalize the HSA program.  A
434      thread should have locked agent->modules_rwlock for reading before
435      acquiring it.  */
436   pthread_mutex_t prog_mutex;
437   /* Flag whether the HSA program that consists of all the modules has been
438      finalized.  */
439   bool prog_finalized;
440   /* Flag whether the program was finalized but with a failure.  */
441   bool prog_finalized_error;
442   /* HSA executable - the finalized program that is used to locate kernels.  */
443   hsa_executable_t executable;
444   /* List of BRIG libraries.  */
445   struct brig_library_info **brig_libraries;
446   /* Number of loaded shared BRIG libraries.  */
447   unsigned brig_libraries_count;
448 };
449 
450 /* Information about the whole HSA environment and all of its agents.  */
451 
452 struct hsa_context_info
453 {
454   /* Whether the structure has been initialized.  */
455   bool initialized;
456   /* Number of usable GPU HSA agents in the system.  */
457   int agent_count;
458   /* Array of agent_info structures describing the individual HSA agents.  */
459   struct agent_info *agents;
460 };
461 
462 /* Information about the whole HSA environment and all of its agents.  */
463 
464 static struct hsa_context_info hsa_context;
465 
466 #define DLSYM_FN(function) \
467   hsa_fns.function##_fn = dlsym (handle, #function); \
468   if (hsa_fns.function##_fn == NULL) \
469     goto dl_fail;
470 
471 static bool
init_hsa_runtime_functions(void)472 init_hsa_runtime_functions (void)
473 {
474   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
475   if (handle == NULL)
476     goto dl_fail;
477 
478   DLSYM_FN (hsa_status_string)
479   DLSYM_FN (hsa_agent_get_info)
480   DLSYM_FN (hsa_init)
481   DLSYM_FN (hsa_iterate_agents)
482   DLSYM_FN (hsa_region_get_info)
483   DLSYM_FN (hsa_queue_create)
484   DLSYM_FN (hsa_agent_iterate_regions)
485   DLSYM_FN (hsa_executable_destroy)
486   DLSYM_FN (hsa_executable_create)
487   DLSYM_FN (hsa_executable_global_variable_define)
488   DLSYM_FN (hsa_executable_load_code_object)
489   DLSYM_FN (hsa_executable_freeze)
490   DLSYM_FN (hsa_signal_create)
491   DLSYM_FN (hsa_memory_allocate)
492   DLSYM_FN (hsa_memory_free)
493   DLSYM_FN (hsa_signal_destroy)
494   DLSYM_FN (hsa_executable_get_symbol)
495   DLSYM_FN (hsa_executable_symbol_get_info)
496   DLSYM_FN (hsa_queue_add_write_index_release)
497   DLSYM_FN (hsa_queue_load_read_index_acquire)
498   DLSYM_FN (hsa_signal_wait_acquire)
499   DLSYM_FN (hsa_signal_store_relaxed)
500   DLSYM_FN (hsa_signal_store_release)
501   DLSYM_FN (hsa_signal_load_acquire)
502   DLSYM_FN (hsa_queue_destroy)
503   DLSYM_FN (hsa_ext_program_add_module)
504   DLSYM_FN (hsa_ext_program_create)
505   DLSYM_FN (hsa_ext_program_destroy)
506   DLSYM_FN (hsa_ext_program_finalize)
507   return true;
508 
509  dl_fail:
510   HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
511   return false;
512 }
513 
514 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
515 
516 static struct kernel_info *
get_kernel_for_agent(struct agent_info * agent,const char * kernel_name)517 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
518 {
519   struct module_info *module = agent->first_module;
520 
521   while (module)
522     {
523       for (unsigned i = 0; i < module->kernel_count; i++)
524 	if (strcmp (module->kernels[i].name, kernel_name) == 0)
525 	  return &module->kernels[i];
526 
527       module = module->next;
528     }
529 
530   return NULL;
531 }
532 
533 /* Return true if the agent is a GPU and acceptable of concurrent submissions
534    from different threads.  */
535 
536 static bool
suitable_hsa_agent_p(hsa_agent_t agent)537 suitable_hsa_agent_p (hsa_agent_t agent)
538 {
539   hsa_device_type_t device_type;
540   hsa_status_t status
541     = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
542 				     &device_type);
543   if (status != HSA_STATUS_SUCCESS)
544     return false;
545 
546   switch (device_type)
547     {
548     case HSA_DEVICE_TYPE_GPU:
549       break;
550     case HSA_DEVICE_TYPE_CPU:
551       if (!support_cpu_devices)
552 	return false;
553       break;
554     default:
555       return false;
556     }
557 
558   uint32_t features = 0;
559   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
560 					  &features);
561   if (status != HSA_STATUS_SUCCESS
562       || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
563     return false;
564   hsa_queue_type_t queue_type;
565   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
566 					  &queue_type);
567   if (status != HSA_STATUS_SUCCESS
568       || (queue_type != HSA_QUEUE_TYPE_MULTI))
569     return false;
570 
571   return true;
572 }
573 
574 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
575    agent_count in hsa_context.  */
576 
577 static hsa_status_t
count_gpu_agents(hsa_agent_t agent,void * data)578 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
579 {
580   if (suitable_hsa_agent_p (agent))
581     hsa_context.agent_count++;
582   return HSA_STATUS_SUCCESS;
583 }
584 
585 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
586    id to the describing structure in the hsa context.  The index of the
587    structure is pointed to by DATA, increment it afterwards.  */
588 
589 static hsa_status_t
assign_agent_ids(hsa_agent_t agent,void * data)590 assign_agent_ids (hsa_agent_t agent, void *data)
591 {
592   if (suitable_hsa_agent_p (agent))
593     {
594       int *agent_index = (int *) data;
595       hsa_context.agents[*agent_index].id = agent;
596       ++*agent_index;
597     }
598   return HSA_STATUS_SUCCESS;
599 }
600 
601 /* Initialize hsa_context if it has not already been done.
602    Return TRUE on success.  */
603 
604 static bool
init_hsa_context(void)605 init_hsa_context (void)
606 {
607   hsa_status_t status;
608   int agent_index = 0;
609 
610   if (hsa_context.initialized)
611     return true;
612   init_enviroment_variables ();
613   if (!init_hsa_runtime_functions ())
614     {
615       HSA_DEBUG ("Run-time could not be dynamically opened\n");
616       return false;
617     }
618   status = hsa_fns.hsa_init_fn ();
619   if (status != HSA_STATUS_SUCCESS)
620     return hsa_error ("Run-time could not be initialized", status);
621   HSA_DEBUG ("HSA run-time initialized\n");
622   status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
623   if (status != HSA_STATUS_SUCCESS)
624     return hsa_error ("HSA GPU devices could not be enumerated", status);
625   HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
626 
627   hsa_context.agents
628     = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
629 				  * sizeof (struct agent_info));
630   status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
631   if (agent_index != hsa_context.agent_count)
632     {
633       GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
634       return false;
635     }
636   hsa_context.initialized = true;
637   return true;
638 }
639 
640 /* Callback of dispatch queues to report errors.  */
641 
642 static void
queue_callback(hsa_status_t status,hsa_queue_t * queue,void * data)643 queue_callback (hsa_status_t status,
644 		hsa_queue_t *queue __attribute__ ((unused)),
645 		void *data __attribute__ ((unused)))
646 {
647   hsa_fatal ("Asynchronous queue error", status);
648 }
649 
650 /* Callback of hsa_agent_iterate_regions.  Determine if a memory REGION can be
651    used for kernarg allocations and if so write it to the memory pointed to by
652    DATA and break the query.  */
653 
654 static hsa_status_t
get_kernarg_memory_region(hsa_region_t region,void * data)655 get_kernarg_memory_region (hsa_region_t region, void *data)
656 {
657   hsa_status_t status;
658   hsa_region_segment_t segment;
659 
660   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
661 					   &segment);
662   if (status != HSA_STATUS_SUCCESS)
663     return status;
664   if (segment != HSA_REGION_SEGMENT_GLOBAL)
665     return HSA_STATUS_SUCCESS;
666 
667   uint32_t flags;
668   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
669 					   &flags);
670   if (status != HSA_STATUS_SUCCESS)
671     return status;
672   if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
673     {
674       hsa_region_t *ret = (hsa_region_t *) data;
675       *ret = region;
676       return HSA_STATUS_INFO_BREAK;
677     }
678   return HSA_STATUS_SUCCESS;
679 }
680 
681 /* Part of the libgomp plugin interface.  Return the number of HSA devices on
682    the system.  */
683 
684 int
GOMP_OFFLOAD_get_num_devices(void)685 GOMP_OFFLOAD_get_num_devices (void)
686 {
687   if (!init_hsa_context ())
688     return 0;
689   return hsa_context.agent_count;
690 }
691 
692 /* Part of the libgomp plugin interface.  Initialize agent number N so that it
693    can be used for computation.  Return TRUE on success.  */
694 
695 bool
GOMP_OFFLOAD_init_device(int n)696 GOMP_OFFLOAD_init_device (int n)
697 {
698   if (!init_hsa_context ())
699     return false;
700   if (n >= hsa_context.agent_count)
701     {
702       GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
703       return false;
704     }
705   struct agent_info *agent = &hsa_context.agents[n];
706 
707   if (agent->initialized)
708     return true;
709 
710   if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
711     {
712       GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
713       return false;
714     }
715   if (pthread_mutex_init (&agent->prog_mutex, NULL))
716     {
717       GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
718       return false;
719     }
720 
721   uint32_t queue_size;
722   hsa_status_t status;
723   status = hsa_fns.hsa_agent_get_info_fn (agent->id,
724 					  HSA_AGENT_INFO_QUEUE_MAX_SIZE,
725 					  &queue_size);
726   if (status != HSA_STATUS_SUCCESS)
727     return hsa_error ("Error requesting maximum queue size of the HSA agent",
728     	   	      status);
729   status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
730 					  &agent->isa);
731   if (status != HSA_STATUS_SUCCESS)
732     return hsa_error ("Error querying the ISA of the agent", status);
733   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
734 					HSA_QUEUE_TYPE_MULTI,
735 					queue_callback, NULL, UINT32_MAX,
736 					UINT32_MAX,
737 					&agent->command_q);
738   if (status != HSA_STATUS_SUCCESS)
739     return hsa_error ("Error creating command queue", status);
740 
741   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
742 					HSA_QUEUE_TYPE_MULTI,
743 					queue_callback, NULL, UINT32_MAX,
744 					UINT32_MAX,
745 					&agent->kernel_dispatch_command_q);
746   if (status != HSA_STATUS_SUCCESS)
747     return hsa_error ("Error creating kernel dispatch command queue", status);
748 
749   agent->kernarg_region.handle = (uint64_t) -1;
750   status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
751 						 get_kernarg_memory_region,
752 						 &agent->kernarg_region);
753   if (agent->kernarg_region.handle == (uint64_t) -1)
754     {
755       GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
756 			 "arguments");
757       return false;
758     }
759   HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
760 	     (long long unsigned) agent->command_q->id);
761   HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
762 	     (long long unsigned) agent->kernel_dispatch_command_q->id);
763   agent->initialized = true;
764   return true;
765 }
766 
767 /* Verify that hsa_context has already been initialized and return the
768    agent_info structure describing device number N.  Return NULL on error.  */
769 
770 static struct agent_info *
get_agent_info(int n)771 get_agent_info (int n)
772 {
773   if (!hsa_context.initialized)
774     {
775       GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
776       return NULL;
777     }
778   if (n >= hsa_context.agent_count)
779     {
780       GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
781       return NULL;
782     }
783   if (!hsa_context.agents[n].initialized)
784     {
785       GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
786       return NULL;
787     }
788   return &hsa_context.agents[n];
789 }
790 
791 /* Insert MODULE to the linked list of modules of AGENT.  */
792 
793 static void
add_module_to_agent(struct agent_info * agent,struct module_info * module)794 add_module_to_agent (struct agent_info *agent, struct module_info *module)
795 {
796   if (agent->first_module)
797     agent->first_module->prev = module;
798   module->next = agent->first_module;
799   module->prev = NULL;
800   agent->first_module = module;
801 }
802 
803 /* Remove MODULE from the linked list of modules of AGENT.  */
804 
805 static void
remove_module_from_agent(struct agent_info * agent,struct module_info * module)806 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
807 {
808   if (agent->first_module == module)
809     agent->first_module = module->next;
810   if (module->prev)
811     module->prev->next = module->next;
812   if (module->next)
813     module->next->prev = module->prev;
814 }
815 
816 /* Free the HSA program in agent and everything associated with it and set
817    agent->prog_finalized and the initialized flags of all kernels to false.
818    Return TRUE on success.  */
819 
820 static bool
destroy_hsa_program(struct agent_info * agent)821 destroy_hsa_program (struct agent_info *agent)
822 {
823   if (!agent->prog_finalized || agent->prog_finalized_error)
824     return true;
825 
826   hsa_status_t status;
827 
828   HSA_DEBUG ("Destroying the current HSA program.\n");
829 
830   status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
831   if (status != HSA_STATUS_SUCCESS)
832     return hsa_error ("Could not destroy HSA executable", status);
833 
834   struct module_info *module;
835   for (module = agent->first_module; module; module = module->next)
836     {
837       int i;
838       for (i = 0; i < module->kernel_count; i++)
839 	module->kernels[i].initialized = false;
840     }
841   agent->prog_finalized = false;
842   return true;
843 }
844 
845 /* Initialize KERNEL from D and other parameters.  Return true on success. */
846 
847 static bool
init_basic_kernel_info(struct kernel_info * kernel,struct hsa_kernel_description * d,struct agent_info * agent,struct module_info * module)848 init_basic_kernel_info (struct kernel_info *kernel,
849 			struct hsa_kernel_description *d,
850 			struct agent_info *agent,
851 			struct module_info *module)
852 {
853   kernel->agent = agent;
854   kernel->module = module;
855   kernel->name = d->name;
856   kernel->omp_data_size = d->omp_data_size;
857   kernel->gridified_kernel_p = d->gridified_kernel_p;
858   kernel->dependencies_count = d->kernel_dependencies_count;
859   kernel->dependencies = d->kernel_dependencies;
860   if (pthread_mutex_init (&kernel->init_mutex, NULL))
861     {
862       GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
863       return false;
864     }
865   return true;
866 }
867 
868 /* Part of the libgomp plugin interface.  Load BRIG module described by struct
869    brig_image_desc in TARGET_DATA and return references to kernel descriptors
870    in TARGET_TABLE.  */
871 
872 int
GOMP_OFFLOAD_load_image(int ord,unsigned version,const void * target_data,struct addr_pair ** target_table)873 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
874 			 struct addr_pair **target_table)
875 {
876   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
877     {
878       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
879 			 " (expected %u, received %u)",
880 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
881       return -1;
882     }
883 
884   struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
885   struct agent_info *agent;
886   struct addr_pair *pair;
887   struct module_info *module;
888   struct kernel_info *kernel;
889   int kernel_count = image_desc->kernel_count;
890 
891   agent = get_agent_info (ord);
892   if (!agent)
893     return -1;
894 
895   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
896     {
897       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
898       return -1;
899     }
900   if (agent->prog_finalized
901       && !destroy_hsa_program (agent))
902     return -1;
903 
904   HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
905   pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
906   *target_table = pair;
907   module = (struct module_info *)
908     GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
909 				+ kernel_count * sizeof (struct kernel_info));
910   module->image_desc = image_desc;
911   module->kernel_count = kernel_count;
912 
913   kernel = &module->kernels[0];
914 
915   /* Allocate memory for kernel dependencies.  */
916   for (unsigned i = 0; i < kernel_count; i++)
917     {
918       pair->start = (uintptr_t) kernel;
919       pair->end = (uintptr_t) (kernel + 1);
920 
921       struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
922       if (!init_basic_kernel_info (kernel, d, agent, module))
923 	return -1;
924       kernel++;
925       pair++;
926     }
927 
928   add_module_to_agent (agent, module);
929   if (pthread_rwlock_unlock (&agent->modules_rwlock))
930     {
931       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
932       return -1;
933     }
934   return kernel_count;
935 }
936 
937 /* Add a shared BRIG library from a FILE_NAME to an AGENT.  */
938 
939 static struct brig_library_info *
add_shared_library(const char * file_name,struct agent_info * agent)940 add_shared_library (const char *file_name, struct agent_info *agent)
941 {
942   struct brig_library_info *library = NULL;
943 
944   void *f = dlopen (file_name, RTLD_NOW);
945   void *start = dlsym (f, "__brig_start");
946   void *end = dlsym (f, "__brig_end");
947 
948   if (start == NULL || end == NULL)
949     return NULL;
950 
951   unsigned size = end - start;
952   char *buf = (char *) GOMP_PLUGIN_malloc (size);
953   memcpy (buf, start, size);
954 
955   library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
956   library->file_name = (char *) GOMP_PLUGIN_malloc
957     ((strlen (file_name) + 1));
958   strcpy (library->file_name, file_name);
959   library->image = (hsa_ext_module_t) buf;
960 
961   return library;
962 }
963 
964 /* Release memory used for BRIG shared libraries that correspond
965    to an AGENT.  */
966 
967 static void
release_agent_shared_libraries(struct agent_info * agent)968 release_agent_shared_libraries (struct agent_info *agent)
969 {
970   for (unsigned i = 0; i < agent->brig_libraries_count; i++)
971     if (agent->brig_libraries[i])
972       {
973 	free (agent->brig_libraries[i]->file_name);
974 	free (agent->brig_libraries[i]->image);
975 	free (agent->brig_libraries[i]);
976       }
977 
978   free (agent->brig_libraries);
979 }
980 
981 /* Create and finalize the program consisting of all loaded modules.  */
982 
983 static void
create_and_finalize_hsa_program(struct agent_info * agent)984 create_and_finalize_hsa_program (struct agent_info *agent)
985 {
986   hsa_status_t status;
987   hsa_ext_program_t prog_handle;
988   int mi = 0;
989 
990   if (pthread_mutex_lock (&agent->prog_mutex))
991     GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
992   if (agent->prog_finalized)
993     goto final;
994 
995   status = hsa_fns.hsa_ext_program_create_fn
996     (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
997      HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
998      NULL, &prog_handle);
999   if (status != HSA_STATUS_SUCCESS)
1000     hsa_fatal ("Could not create an HSA program", status);
1001 
1002   HSA_DEBUG ("Created a finalized program\n");
1003 
1004   struct module_info *module = agent->first_module;
1005   while (module)
1006     {
1007       status = hsa_fns.hsa_ext_program_add_module_fn
1008 	(prog_handle, module->image_desc->brig_module);
1009       if (status != HSA_STATUS_SUCCESS)
1010 	hsa_fatal ("Could not add a module to the HSA program", status);
1011       module = module->next;
1012       mi++;
1013     }
1014 
1015   /* Load all shared libraries.  */
1016   const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1017   const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1018 
1019   agent->brig_libraries_count = libraries_count;
1020   agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1021     (sizeof (struct brig_library_info) * libraries_count);
1022 
1023   for (unsigned i = 0; i < libraries_count; i++)
1024     {
1025       struct brig_library_info *library = add_shared_library (libraries[i],
1026 							      agent);
1027       if (library == NULL)
1028 	{
1029 	  HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1030 		       libraries[i]);
1031 	  continue;
1032 	}
1033 
1034       status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1035 						      library->image);
1036       if (status != HSA_STATUS_SUCCESS)
1037 	hsa_warn ("Could not add a shared BRIG library the HSA program",
1038 		  status);
1039       else
1040 	HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1041 		   libraries[i]);
1042     }
1043 
1044   hsa_ext_control_directives_t control_directives;
1045   memset (&control_directives, 0, sizeof (control_directives));
1046   hsa_code_object_t code_object;
1047   status = hsa_fns.hsa_ext_program_finalize_fn
1048     (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1049      control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1050   if (status != HSA_STATUS_SUCCESS)
1051     {
1052       hsa_warn ("Finalization of the HSA program failed", status);
1053       goto failure;
1054     }
1055 
1056   HSA_DEBUG ("Finalization done\n");
1057   hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1058 
1059   status
1060     = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1061 					HSA_EXECUTABLE_STATE_UNFROZEN,
1062 					"", &agent->executable);
1063   if (status != HSA_STATUS_SUCCESS)
1064     hsa_fatal ("Could not create HSA executable", status);
1065 
1066   module = agent->first_module;
1067   while (module)
1068     {
1069       /* Initialize all global variables declared in the module.  */
1070       for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1071 	{
1072 	  struct global_var_info *var;
1073 	  var = &module->image_desc->global_variables[i];
1074 	  status = hsa_fns.hsa_executable_global_variable_define_fn
1075 	    (agent->executable, var->name, var->address);
1076 
1077 	  HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1078 		     var->address);
1079 
1080 	  if (status != HSA_STATUS_SUCCESS)
1081 	    hsa_fatal ("Could not define a global variable in the HSA program",
1082 		       status);
1083 	}
1084 
1085       module = module->next;
1086     }
1087 
1088   status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1089 						       agent->id,
1090 						       code_object, "");
1091   if (status != HSA_STATUS_SUCCESS)
1092     hsa_fatal ("Could not add a code object to the HSA executable", status);
1093   status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1094   if (status != HSA_STATUS_SUCCESS)
1095     hsa_fatal ("Could not freeze the HSA executable", status);
1096 
1097   HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1098 
1099   /* If all goes good, jump to final.  */
1100   goto final;
1101 
1102 failure:
1103   agent->prog_finalized_error = true;
1104 
1105 final:
1106   agent->prog_finalized = true;
1107 
1108   if (pthread_mutex_unlock (&agent->prog_mutex))
1109     GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1110 }
1111 
1112 /* Create kernel dispatch data structure for given KERNEL.  */
1113 
1114 static struct GOMP_hsa_kernel_dispatch *
create_single_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1115 create_single_kernel_dispatch (struct kernel_info *kernel,
1116 			       unsigned omp_data_size)
1117 {
1118   struct agent_info *agent = kernel->agent;
1119   struct GOMP_hsa_kernel_dispatch *shadow
1120     = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1121 
1122   shadow->queue = agent->command_q;
1123   shadow->omp_data_memory
1124     = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1125   unsigned dispatch_count = kernel->dependencies_count;
1126   shadow->kernel_dispatch_count = dispatch_count;
1127 
1128   shadow->children_dispatches
1129     = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1130 
1131   shadow->object = kernel->object;
1132 
1133   hsa_signal_t sync_signal;
1134   hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1135   if (status != HSA_STATUS_SUCCESS)
1136     hsa_fatal ("Error creating the HSA sync signal", status);
1137 
1138   shadow->signal = sync_signal.handle;
1139   shadow->private_segment_size = kernel->private_segment_size;
1140   shadow->group_segment_size = kernel->group_segment_size;
1141 
1142   status
1143     = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1144 				      kernel->kernarg_segment_size,
1145 				      &shadow->kernarg_address);
1146   if (status != HSA_STATUS_SUCCESS)
1147     hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1148 
1149   return shadow;
1150 }
1151 
1152 /* Release data structure created for a kernel dispatch in SHADOW argument.  */
1153 
1154 static void
release_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * shadow)1155 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1156 {
1157   HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
1158 	     shadow, shadow->debug,
1159 	     (void *) (uintptr_t) shadow->debug);
1160 
1161   hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1162 
1163   hsa_signal_t s;
1164   s.handle = shadow->signal;
1165   hsa_fns.hsa_signal_destroy_fn (s);
1166 
1167   free (shadow->omp_data_memory);
1168 
1169   for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1170     release_kernel_dispatch (shadow->children_dispatches[i]);
1171 
1172   free (shadow->children_dispatches);
1173   free (shadow);
1174 }
1175 
1176 /* Initialize a KERNEL without its dependencies.  MAX_OMP_DATA_SIZE is used
1177    to calculate maximum necessary memory for OMP data allocation.  */
1178 
1179 static void
init_single_kernel(struct kernel_info * kernel,unsigned * max_omp_data_size)1180 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1181 {
1182   hsa_status_t status;
1183   struct agent_info *agent = kernel->agent;
1184   hsa_executable_symbol_t kernel_symbol;
1185   status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1186 						 kernel->name, agent->id,
1187 						 0, &kernel_symbol);
1188   if (status != HSA_STATUS_SUCCESS)
1189     {
1190       hsa_warn ("Could not find symbol for kernel in the code object", status);
1191       goto failure;
1192     }
1193   HSA_DEBUG ("Located kernel %s\n", kernel->name);
1194   status = hsa_fns.hsa_executable_symbol_get_info_fn
1195     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1196   if (status != HSA_STATUS_SUCCESS)
1197     hsa_fatal ("Could not extract a kernel object from its symbol", status);
1198   status = hsa_fns.hsa_executable_symbol_get_info_fn
1199     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1200      &kernel->kernarg_segment_size);
1201   if (status != HSA_STATUS_SUCCESS)
1202     hsa_fatal ("Could not get info about kernel argument size", status);
1203   status = hsa_fns.hsa_executable_symbol_get_info_fn
1204     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1205      &kernel->group_segment_size);
1206   if (status != HSA_STATUS_SUCCESS)
1207     hsa_fatal ("Could not get info about kernel group segment size", status);
1208   status = hsa_fns.hsa_executable_symbol_get_info_fn
1209     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1210      &kernel->private_segment_size);
1211   if (status != HSA_STATUS_SUCCESS)
1212     hsa_fatal ("Could not get info about kernel private segment size",
1213 	       status);
1214 
1215   HSA_DEBUG ("Kernel structure for %s fully initialized with "
1216 	     "following segment sizes: \n", kernel->name);
1217   HSA_DEBUG ("  group_segment_size: %u\n",
1218 	     (unsigned) kernel->group_segment_size);
1219   HSA_DEBUG ("  private_segment_size: %u\n",
1220 	     (unsigned) kernel->private_segment_size);
1221   HSA_DEBUG ("  kernarg_segment_size: %u\n",
1222 	     (unsigned) kernel->kernarg_segment_size);
1223   HSA_DEBUG ("  omp_data_size: %u\n", kernel->omp_data_size);
1224   HSA_DEBUG ("  gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1225 
1226   if (kernel->omp_data_size > *max_omp_data_size)
1227     *max_omp_data_size = kernel->omp_data_size;
1228 
1229   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1230     {
1231       struct kernel_info *dependency
1232 	= get_kernel_for_agent (agent, kernel->dependencies[i]);
1233 
1234       if (dependency == NULL)
1235 	{
1236 	  HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1237 		     "dependency name: %s\n", kernel->name,
1238 		     kernel->dependencies[i]);
1239 	  goto failure;
1240 	}
1241 
1242       if (dependency->dependencies_count > 0)
1243 	{
1244 	  HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1245 		     "a depth bigger than one\n");
1246 	  goto failure;
1247 	}
1248 
1249       init_single_kernel (dependency, max_omp_data_size);
1250     }
1251 
1252   return;
1253 
1254 failure:
1255   kernel->initialization_failed = true;
1256 }
1257 
1258 /* Indent stream F by INDENT spaces.  */
1259 
1260 static void
indent_stream(FILE * f,unsigned indent)1261 indent_stream (FILE *f, unsigned indent)
1262 {
1263   fprintf (f, "%*s", indent, "");
1264 }
1265 
1266 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces.  */
1267 
1268 static void
print_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * dispatch,unsigned indent)1269 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1270 {
1271   indent_stream (stderr, indent);
1272   fprintf (stderr, "this: %p\n", dispatch);
1273   indent_stream (stderr, indent);
1274   fprintf (stderr, "queue: %p\n", dispatch->queue);
1275   indent_stream (stderr, indent);
1276   fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1277   indent_stream (stderr, indent);
1278   fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1279   indent_stream (stderr, indent);
1280   fprintf (stderr, "object: %" PRIu64 "\n", dispatch->object);
1281   indent_stream (stderr, indent);
1282   fprintf (stderr, "signal: %" PRIu64 "\n", dispatch->signal);
1283   indent_stream (stderr, indent);
1284   fprintf (stderr, "private_segment_size: %u\n",
1285 	   dispatch->private_segment_size);
1286   indent_stream (stderr, indent);
1287   fprintf (stderr, "group_segment_size: %u\n",
1288 	   dispatch->group_segment_size);
1289   indent_stream (stderr, indent);
1290   fprintf (stderr, "children dispatches: %" PRIu64 "\n",
1291 	   dispatch->kernel_dispatch_count);
1292   indent_stream (stderr, indent);
1293   fprintf (stderr, "omp_num_threads: %u\n",
1294 	   dispatch->omp_num_threads);
1295   fprintf (stderr, "\n");
1296 
1297   for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1298     print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1299 }
1300 
1301 /* Create kernel dispatch data structure for a KERNEL and all its
1302    dependencies.  */
1303 
1304 static struct GOMP_hsa_kernel_dispatch *
create_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1305 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1306 {
1307   struct GOMP_hsa_kernel_dispatch *shadow
1308     = create_single_kernel_dispatch (kernel, omp_data_size);
1309   shadow->omp_num_threads = 64;
1310   shadow->debug = 0;
1311   shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1312 
1313   /* Create kernel dispatch data structures.  We do not allow to have
1314      a kernel dispatch with depth bigger than one.  */
1315   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1316     {
1317       struct kernel_info *dependency
1318 	= get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1319       shadow->children_dispatches[i]
1320 	= create_single_kernel_dispatch (dependency, omp_data_size);
1321       shadow->children_dispatches[i]->queue
1322 	= kernel->agent->kernel_dispatch_command_q;
1323       shadow->children_dispatches[i]->omp_level = 1;
1324     }
1325 
1326   return shadow;
1327 }
1328 
1329 /* Do all the work that is necessary before running KERNEL for the first time.
1330    The function assumes the program has been created, finalized and frozen by
1331    create_and_finalize_hsa_program.  */
1332 
1333 static void
init_kernel(struct kernel_info * kernel)1334 init_kernel (struct kernel_info *kernel)
1335 {
1336   if (pthread_mutex_lock (&kernel->init_mutex))
1337     GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1338   if (kernel->initialized)
1339     {
1340       if (pthread_mutex_unlock (&kernel->init_mutex))
1341 	GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1342 			   "mutex");
1343 
1344       return;
1345     }
1346 
1347   /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1348      dispatch operation.  */
1349   init_single_kernel (kernel, &kernel->max_omp_data_size);
1350 
1351   if (!kernel->initialization_failed)
1352     HSA_DEBUG ("\n");
1353 
1354   kernel->initialized = true;
1355   if (pthread_mutex_unlock (&kernel->init_mutex))
1356     GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1357 		       "mutex");
1358 }
1359 
1360 /* Parse the target attributes INPUT provided by the compiler and return true
1361    if we should run anything all.  If INPUT is NULL, fill DEF with default
1362    values, then store INPUT or DEF into *RESULT.  */
1363 
1364 static bool
parse_target_attributes(void ** input,struct GOMP_kernel_launch_attributes * def,struct GOMP_kernel_launch_attributes ** result)1365 parse_target_attributes (void **input,
1366 			 struct GOMP_kernel_launch_attributes *def,
1367 			 struct GOMP_kernel_launch_attributes **result)
1368 {
1369   if (!input)
1370     GOMP_PLUGIN_fatal ("No target arguments provided");
1371 
1372   bool attrs_found = false;
1373   while (*input)
1374     {
1375       uintptr_t id = (uintptr_t) *input;
1376       if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1377 	  && ((id & GOMP_TARGET_ARG_ID_MASK)
1378 	      == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1379 	{
1380 	  input++;
1381 	  attrs_found = true;
1382 	  break;
1383 	}
1384 
1385       if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1386 	input++;
1387       input++;
1388     }
1389 
1390   if (!attrs_found)
1391     {
1392       def->ndim = 1;
1393       def->gdims[0] = 1;
1394       def->gdims[1] = 1;
1395       def->gdims[2] = 1;
1396       def->wdims[0] = 1;
1397       def->wdims[1] = 1;
1398       def->wdims[2] = 1;
1399       *result = def;
1400       HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1401       return true;
1402     }
1403 
1404   struct GOMP_kernel_launch_attributes *kla;
1405   kla = (struct GOMP_kernel_launch_attributes *) *input;
1406   *result = kla;
1407   if (kla->ndim == 0 || kla->ndim > 3)
1408     GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1409 
1410   HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1411   unsigned i;
1412   for (i = 0; i < kla->ndim; i++)
1413     {
1414       HSA_DEBUG ("  Dimension %u: grid size %u and group size %u\n", i,
1415 		 kla->gdims[i], kla->wdims[i]);
1416       if (kla->gdims[i] == 0)
1417 	return false;
1418     }
1419   return true;
1420 }
1421 
1422 /* Return the group size given the requested GROUP size, GRID size and number
1423    of grid dimensions NDIM.  */
1424 
1425 static uint32_t
get_group_size(uint32_t ndim,uint32_t grid,uint32_t group)1426 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1427 {
1428   if (group == 0)
1429     {
1430       /* TODO: Provide a default via environment or device characteristics.  */
1431       if (ndim == 1)
1432 	group = 64;
1433       else if (ndim == 2)
1434 	group = 8;
1435       else
1436 	group = 4;
1437     }
1438 
1439   if (group > grid)
1440     group = grid;
1441   return group;
1442 }
1443 
1444 /* Return true if the HSA runtime can run function FN_PTR.  */
1445 
1446 bool
GOMP_OFFLOAD_can_run(void * fn_ptr)1447 GOMP_OFFLOAD_can_run (void *fn_ptr)
1448 {
1449   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1450   struct agent_info *agent = kernel->agent;
1451   create_and_finalize_hsa_program (agent);
1452 
1453   if (agent->prog_finalized_error)
1454     goto failure;
1455 
1456   init_kernel (kernel);
1457   if (kernel->initialization_failed)
1458     goto failure;
1459 
1460   return true;
1461 
1462 failure:
1463   if (suppress_host_fallback)
1464     GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1465   HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1466   return false;
1467 }
1468 
1469 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET.  */
1470 
1471 void
packet_store_release(uint32_t * packet,uint16_t header,uint16_t rest)1472 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1473 {
1474   __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1475 }
1476 
1477 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1478    launchattributes from KLA.  */
1479 
1480 void
run_kernel(struct kernel_info * kernel,void * vars,struct GOMP_kernel_launch_attributes * kla)1481 run_kernel (struct kernel_info *kernel, void *vars,
1482 	    struct GOMP_kernel_launch_attributes *kla)
1483 {
1484   struct agent_info *agent = kernel->agent;
1485   if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1486     GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1487 
1488   if (!agent->initialized)
1489     GOMP_PLUGIN_fatal ("Agent must be initialized");
1490 
1491   if (!kernel->initialized)
1492     GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1493 
1494   struct GOMP_hsa_kernel_dispatch *shadow
1495     = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1496 
1497   if (debug)
1498     {
1499       fprintf (stderr, "\nKernel has following dependencies:\n");
1500       print_kernel_dispatch (shadow, 2);
1501     }
1502 
1503   uint64_t index
1504     = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1505   HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1506 
1507   /* Wait until the queue is not full before writing the packet.   */
1508   while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1509 	 >= agent->command_q->size)
1510     ;
1511 
1512   hsa_kernel_dispatch_packet_t *packet;
1513   packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1514 	   + index % agent->command_q->size;
1515 
1516   memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1517   packet->grid_size_x = kla->gdims[0];
1518   packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1519 					     kla->wdims[0]);
1520 
1521   if (kla->ndim >= 2)
1522     {
1523       packet->grid_size_y = kla->gdims[1];
1524       packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1525 						 kla->wdims[1]);
1526     }
1527   else
1528     {
1529       packet->grid_size_y = 1;
1530       packet->workgroup_size_y = 1;
1531     }
1532 
1533   if (kla->ndim == 3)
1534     {
1535       packet->grid_size_z = kla->gdims[2];
1536       packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1537 					     kla->wdims[2]);
1538     }
1539   else
1540     {
1541       packet->grid_size_z = 1;
1542       packet->workgroup_size_z = 1;
1543     }
1544 
1545   packet->private_segment_size = kernel->private_segment_size;
1546   packet->group_segment_size = kernel->group_segment_size;
1547   packet->kernel_object = kernel->object;
1548   packet->kernarg_address = shadow->kernarg_address;
1549   hsa_signal_t s;
1550   s.handle = shadow->signal;
1551   packet->completion_signal = s;
1552   hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1553   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1554 
1555   /* PR hsa/70337.  */
1556   size_t vars_size = sizeof (vars);
1557   if (kernel->kernarg_segment_size > vars_size)
1558     {
1559       if (kernel->kernarg_segment_size != vars_size
1560 	  + sizeof (struct hsa_kernel_runtime *))
1561 	GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1562       memcpy (packet->kernarg_address + vars_size, &shadow,
1563 	      sizeof (struct hsa_kernel_runtime *));
1564     }
1565 
1566   HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1567 
1568   uint16_t header;
1569   header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1570   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1571   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1572 
1573   HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1574 
1575   packet_store_release ((uint32_t *) packet, header,
1576 			(uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1577 
1578   hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1579 				       index);
1580 
1581   /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1582      signal wait and signal load operations on their own and we need to
1583      periodically call the hsa_signal_load_acquire on completion signals of
1584      children kernels in the CPU to make that happen.  As soon the
1585      limitation will be resolved, this workaround can be removed.  */
1586 
1587   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1588 
1589   /* Root signal waits with 1ms timeout.  */
1590   while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1591 					     1000 * 1000,
1592 					     HSA_WAIT_STATE_BLOCKED) != 0)
1593     for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1594       {
1595 	hsa_signal_t child_s;
1596 	child_s.handle = shadow->children_dispatches[i]->signal;
1597 
1598 	HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
1599 		   shadow->children_dispatches[i]->signal);
1600 	hsa_fns.hsa_signal_load_acquire_fn (child_s);
1601       }
1602 
1603   release_kernel_dispatch (shadow);
1604 
1605   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1606     GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1607 }
1608 
1609 /* Part of the libgomp plugin interface.  Run a kernel on device N (the number
1610    is actually ignored, we assume the FN_PTR has been mapped using the correct
1611    device) and pass it an array of pointers in VARS as a parameter.  The kernel
1612    is identified by FN_PTR which must point to a kernel_info structure.  */
1613 
1614 void
GOMP_OFFLOAD_run(int n,void * fn_ptr,void * vars,void ** args)1615 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1616 		  void *fn_ptr, void *vars, void **args)
1617 {
1618   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1619   struct GOMP_kernel_launch_attributes def;
1620   struct GOMP_kernel_launch_attributes *kla;
1621   if (!parse_target_attributes (args, &def, &kla))
1622     {
1623       HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1624       return;
1625     }
1626   run_kernel (kernel, vars, kla);
1627 }
1628 
1629 /* Information to be passed to a thread running a kernel asycnronously.  */
1630 
1631 struct async_run_info
1632 {
1633   int device;
1634   void *tgt_fn;
1635   void *tgt_vars;
1636   void **args;
1637   void *async_data;
1638 };
1639 
1640 /* Thread routine to run a kernel asynchronously.  */
1641 
1642 static void *
run_kernel_asynchronously(void * thread_arg)1643 run_kernel_asynchronously (void *thread_arg)
1644 {
1645   struct async_run_info *info = (struct async_run_info *) thread_arg;
1646   int device = info->device;
1647   void *tgt_fn = info->tgt_fn;
1648   void *tgt_vars = info->tgt_vars;
1649   void **args = info->args;
1650   void *async_data = info->async_data;
1651 
1652   free (info);
1653   GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1654   GOMP_PLUGIN_target_task_completion (async_data);
1655   return NULL;
1656 }
1657 
1658 /* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
1659    does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1660    has finished.  */
1661 
1662 void
GOMP_OFFLOAD_async_run(int device,void * tgt_fn,void * tgt_vars,void ** args,void * async_data)1663 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1664 			void **args, void *async_data)
1665 {
1666   pthread_t pt;
1667   struct async_run_info *info;
1668   HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1669   info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1670 
1671   info->device = device;
1672   info->tgt_fn = tgt_fn;
1673   info->tgt_vars = tgt_vars;
1674   info->args = args;
1675   info->async_data = async_data;
1676 
1677   int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1678   if (err != 0)
1679     GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1680 		       strerror (err));
1681   err = pthread_detach (pt);
1682   if (err != 0)
1683     GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1684 		       "asynchronously: %s", strerror (err));
1685 }
1686 
1687 /* Deinitialize all information associated with MODULE and kernels within
1688    it.  Return TRUE on success.  */
1689 
1690 static bool
destroy_module(struct module_info * module)1691 destroy_module (struct module_info *module)
1692 {
1693   int i;
1694   for (i = 0; i < module->kernel_count; i++)
1695     if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1696       {
1697 	GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1698 			   "mutex");
1699 	return false;
1700       }
1701   return true;
1702 }
1703 
1704 /* Part of the libgomp plugin interface.  Unload BRIG module described by
1705    struct brig_image_desc in TARGET_DATA from agent number N.  Return
1706    TRUE on success.  */
1707 
1708 bool
GOMP_OFFLOAD_unload_image(int n,unsigned version,const void * target_data)1709 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1710 {
1711   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1712     {
1713       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1714 			 " (expected %u, received %u)",
1715 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1716       return false;
1717     }
1718 
1719   struct agent_info *agent;
1720   agent = get_agent_info (n);
1721   if (!agent)
1722     return false;
1723 
1724   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1725     {
1726       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1727       return false;
1728     }
1729   struct module_info *module = agent->first_module;
1730   while (module)
1731     {
1732       if (module->image_desc == target_data)
1733 	break;
1734       module = module->next;
1735     }
1736   if (!module)
1737     {
1738       GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1739 			 "loaded before");
1740       return false;
1741     }
1742 
1743   remove_module_from_agent (agent, module);
1744   if (!destroy_module (module))
1745     return false;
1746   free (module);
1747   if (!destroy_hsa_program (agent))
1748     return false;
1749   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1750     {
1751       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1752       return false;
1753     }
1754   return true;
1755 }
1756 
1757 /* Part of the libgomp plugin interface.  Deinitialize all information and
1758    status associated with agent number N.  We do not attempt any
1759    synchronization, assuming the user and libgomp will not attempt
1760    deinitialization of a device that is in any way being used at the same
1761    time.  Return TRUE on success.  */
1762 
1763 bool
GOMP_OFFLOAD_fini_device(int n)1764 GOMP_OFFLOAD_fini_device (int n)
1765 {
1766   struct agent_info *agent = get_agent_info (n);
1767   if (!agent)
1768     return false;
1769 
1770   if (!agent->initialized)
1771     return true;
1772 
1773   struct module_info *next_module = agent->first_module;
1774   while (next_module)
1775     {
1776       struct module_info *module = next_module;
1777       next_module = module->next;
1778       if (!destroy_module (module))
1779 	return false;
1780       free (module);
1781     }
1782   agent->first_module = NULL;
1783   if (!destroy_hsa_program (agent))
1784     return false;
1785 
1786   release_agent_shared_libraries (agent);
1787 
1788   hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1789   if (status != HSA_STATUS_SUCCESS)
1790     return hsa_error ("Error destroying command queue", status);
1791   status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1792   if (status != HSA_STATUS_SUCCESS)
1793     return hsa_error ("Error destroying kernel dispatch command queue", status);
1794   if (pthread_mutex_destroy (&agent->prog_mutex))
1795     {
1796       GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1797       return false;
1798     }
1799   if (pthread_rwlock_destroy (&agent->modules_rwlock))
1800     {
1801       GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1802       return false;
1803     }
1804   agent->initialized = false;
1805   return true;
1806 }
1807 
1808 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1809    for HSA.  */
1810 
1811 void *
GOMP_OFFLOAD_alloc(int ord,size_t size)1812 GOMP_OFFLOAD_alloc (int ord, size_t size)
1813 {
1814   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1815 		     "it should never be called");
1816   return NULL;
1817 }
1818 
1819 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1820    for HSA.  */
1821 
1822 bool
GOMP_OFFLOAD_free(int ord,void * ptr)1823 GOMP_OFFLOAD_free (int ord, void *ptr)
1824 {
1825   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1826 		     "it should never be called");
1827   return false;
1828 }
1829 
1830 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1831    for HSA.  */
1832 
1833 bool
GOMP_OFFLOAD_dev2host(int ord,void * dst,const void * src,size_t n)1834 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1835 {
1836   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1837 		     "it should never be called");
1838   return false;
1839 }
1840 
1841 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1842    for HSA.  */
1843 
1844 bool
GOMP_OFFLOAD_host2dev(int ord,void * dst,const void * src,size_t n)1845 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1846 {
1847   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1848 		     "it should never be called");
1849   return false;
1850 }
1851 
1852 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1853    for HSA.  */
1854 
1855 bool
GOMP_OFFLOAD_dev2dev(int ord,void * dst,const void * src,size_t n)1856 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1857 {
1858   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1859 		     "it should never be called");
1860   return false;
1861 }
1862