1 /* Plugin for HSAIL execution.
2
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
4
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
7
8 This file is part of the GNU Offloading and Multi Processing Library
9 (libgomp).
10
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
15
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 more details.
20
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
24
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
29
30 #include "config.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <hsa.h>
38 #include <plugin/hsa_ext_finalize.h>
39 #include <dlfcn.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
42 #include "secure_getenv.h"
43
44 /* As an HSA runtime is dlopened, following structure defines function
45 pointers utilized by the HSA plug-in. */
46
47 struct hsa_runtime_fn_info
48 {
49 /* HSA runtime. */
50 hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
51 const char **status_string);
52 hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
53 hsa_agent_info_t attribute,
54 void *value);
55 hsa_status_t (*hsa_init_fn) (void);
56 hsa_status_t (*hsa_iterate_agents_fn)
57 (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
58 hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
59 hsa_region_info_t attribute,
60 void *value);
61 hsa_status_t (*hsa_queue_create_fn)
62 (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
63 void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
64 void *data, uint32_t private_segment_size,
65 uint32_t group_segment_size, hsa_queue_t **queue);
66 hsa_status_t (*hsa_agent_iterate_regions_fn)
67 (hsa_agent_t agent,
68 hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
69 hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
70 hsa_status_t (*hsa_executable_create_fn)
71 (hsa_profile_t profile, hsa_executable_state_t executable_state,
72 const char *options, hsa_executable_t *executable);
73 hsa_status_t (*hsa_executable_global_variable_define_fn)
74 (hsa_executable_t executable, const char *variable_name, void *address);
75 hsa_status_t (*hsa_executable_load_code_object_fn)
76 (hsa_executable_t executable, hsa_agent_t agent,
77 hsa_code_object_t code_object, const char *options);
78 hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
79 const char *options);
80 hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
81 uint32_t num_consumers,
82 const hsa_agent_t *consumers,
83 hsa_signal_t *signal);
84 hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
85 void **ptr);
86 hsa_status_t (*hsa_memory_free_fn) (void *ptr);
87 hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
88 hsa_status_t (*hsa_executable_get_symbol_fn)
89 (hsa_executable_t executable, const char *module_name,
90 const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
91 hsa_executable_symbol_t *symbol);
92 hsa_status_t (*hsa_executable_symbol_get_info_fn)
93 (hsa_executable_symbol_t executable_symbol,
94 hsa_executable_symbol_info_t attribute, void *value);
95 uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
96 uint64_t value);
97 uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
98 void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
99 hsa_signal_value_t value);
100 void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
101 hsa_signal_value_t value);
102 hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
103 (hsa_signal_t signal, hsa_signal_condition_t condition,
104 hsa_signal_value_t compare_value, uint64_t timeout_hint,
105 hsa_wait_state_t wait_state_hint);
106 hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
107 hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
108
109 /* HSA finalizer. */
110 hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
111 hsa_ext_module_t module);
112 hsa_status_t (*hsa_ext_program_create_fn)
113 (hsa_machine_model_t machine_model, hsa_profile_t profile,
114 hsa_default_float_rounding_mode_t default_float_rounding_mode,
115 const char *options, hsa_ext_program_t *program);
116 hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
117 hsa_status_t (*hsa_ext_program_finalize_fn)
118 (hsa_ext_program_t program,hsa_isa_t isa,
119 int32_t call_convention, hsa_ext_control_directives_t control_directives,
120 const char *options, hsa_code_object_type_t code_object_type,
121 hsa_code_object_t *code_object);
122 };
123
124 /* HSA runtime functions that are initialized in init_hsa_context. */
125
126 static struct hsa_runtime_fn_info hsa_fns;
127
128 /* Keep the following GOMP prefixed structures in sync with respective parts of
129 the compiler. */
130
131 /* Structure describing the run-time and grid properties of an HSA kernel
132 lauch. */
133
134 struct GOMP_kernel_launch_attributes
135 {
136 /* Number of dimensions the workload has. Maximum number is 3. */
137 uint32_t ndim;
138 /* Size of the grid in the three respective dimensions. */
139 uint32_t gdims[3];
140 /* Size of work-groups in the respective dimensions. */
141 uint32_t wdims[3];
142 };
143
144 /* Collection of information needed for a dispatch of a kernel from a
145 kernel. */
146
147 struct GOMP_hsa_kernel_dispatch
148 {
149 /* Pointer to a command queue associated with a kernel dispatch agent. */
150 void *queue;
151 /* Pointer to reserved memory for OMP data struct copying. */
152 void *omp_data_memory;
153 /* Pointer to a memory space used for kernel arguments passing. */
154 void *kernarg_address;
155 /* Kernel object. */
156 uint64_t object;
157 /* Synchronization signal used for dispatch synchronization. */
158 uint64_t signal;
159 /* Private segment size. */
160 uint32_t private_segment_size;
161 /* Group segment size. */
162 uint32_t group_segment_size;
163 /* Number of children kernel dispatches. */
164 uint64_t kernel_dispatch_count;
165 /* Debug purpose argument. */
166 uint64_t debug;
167 /* Levels-var ICV. */
168 uint64_t omp_level;
169 /* Kernel dispatch structures created for children kernel dispatches. */
170 struct GOMP_hsa_kernel_dispatch **children_dispatches;
171 /* Number of threads. */
172 uint32_t omp_num_threads;
173 };
174
175 /* Part of the libgomp plugin interface. Return the name of the accelerator,
176 which is "hsa". */
177
178 const char *
GOMP_OFFLOAD_get_name(void)179 GOMP_OFFLOAD_get_name (void)
180 {
181 return "hsa";
182 }
183
184 /* Part of the libgomp plugin interface. Return the specific capabilities the
185 HSA accelerator have. */
186
187 unsigned int
GOMP_OFFLOAD_get_caps(void)188 GOMP_OFFLOAD_get_caps (void)
189 {
190 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
191 }
192
193 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
194
195 int
GOMP_OFFLOAD_get_type(void)196 GOMP_OFFLOAD_get_type (void)
197 {
198 return OFFLOAD_TARGET_TYPE_HSA;
199 }
200
201 /* Return the libgomp version number we're compatible with. There is
202 no requirement for cross-version compatibility. */
203
204 unsigned
GOMP_OFFLOAD_version(void)205 GOMP_OFFLOAD_version (void)
206 {
207 return GOMP_VERSION;
208 }
209
210 /* Flag to decide whether print to stderr information about what is going on.
211 Set in init_debug depending on environment variables. */
212
213 static bool debug;
214
215 /* Flag to decide if the runtime should suppress a possible fallback to host
216 execution. */
217
218 static bool suppress_host_fallback;
219
220 /* Flag to locate HSA runtime shared library that is dlopened
221 by this plug-in. */
222
223 static const char *hsa_runtime_lib;
224
225 /* Flag to decide if the runtime should support also CPU devices (can be
226 a simulator). */
227
228 static bool support_cpu_devices;
229
230 /* Initialize debug and suppress_host_fallback according to the environment. */
231
232 static void
init_enviroment_variables(void)233 init_enviroment_variables (void)
234 {
235 if (secure_getenv ("HSA_DEBUG"))
236 debug = true;
237 else
238 debug = false;
239
240 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
241 suppress_host_fallback = true;
242 else
243 suppress_host_fallback = false;
244
245 hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
246 if (hsa_runtime_lib == NULL)
247 hsa_runtime_lib = "libhsa-runtime64.so";
248
249 support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
250 }
251
252 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
253 is set to true. */
254
255 #define HSA_LOG(prefix, ...) \
256 do \
257 { \
258 if (debug) \
259 { \
260 fprintf (stderr, prefix); \
261 fprintf (stderr, __VA_ARGS__); \
262 } \
263 } \
264 while (false)
265
266 /* Print a debugging message to stderr. */
267
268 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
269
270 /* Print a warning message to stderr. */
271
272 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
273
274 /* Print HSA warning STR with an HSA STATUS code. */
275
276 static void
hsa_warn(const char * str,hsa_status_t status)277 hsa_warn (const char *str, hsa_status_t status)
278 {
279 if (!debug)
280 return;
281
282 const char *hsa_error_msg;
283 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
284
285 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
286 }
287
288 /* Report a fatal error STR together with the HSA error corresponding to STATUS
289 and terminate execution of the current process. */
290
291 static void
hsa_fatal(const char * str,hsa_status_t status)292 hsa_fatal (const char *str, hsa_status_t status)
293 {
294 const char *hsa_error_msg;
295 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
296 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
297 hsa_error_msg);
298 }
299
300 /* Like hsa_fatal, except only report error message, and return FALSE
301 for propagating error processing to outside of plugin. */
302
303 static bool
hsa_error(const char * str,hsa_status_t status)304 hsa_error (const char *str, hsa_status_t status)
305 {
306 const char *hsa_error_msg;
307 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
308 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
309 hsa_error_msg);
310 return false;
311 }
312
313 struct hsa_kernel_description
314 {
315 const char *name;
316 unsigned omp_data_size;
317 bool gridified_kernel_p;
318 unsigned kernel_dependencies_count;
319 const char **kernel_dependencies;
320 };
321
322 struct global_var_info
323 {
324 const char *name;
325 void *address;
326 };
327
328 /* Data passed by the static initializer of a compilation unit containing BRIG
329 to GOMP_offload_register. */
330
331 struct brig_image_desc
332 {
333 hsa_ext_module_t brig_module;
334 const unsigned kernel_count;
335 struct hsa_kernel_description *kernel_infos;
336 const unsigned global_variable_count;
337 struct global_var_info *global_variables;
338 };
339
340 struct agent_info;
341
342 /* Information required to identify, finalize and run any given kernel. */
343
344 struct kernel_info
345 {
346 /* Name of the kernel, required to locate it within the brig module. */
347 const char *name;
348 /* Size of memory space for OMP data. */
349 unsigned omp_data_size;
350 /* The specific agent the kernel has been or will be finalized for and run
351 on. */
352 struct agent_info *agent;
353 /* The specific module where the kernel takes place. */
354 struct module_info *module;
355 /* Mutex enforcing that at most once thread ever initializes a kernel for
356 use. A thread should have locked agent->modules_rwlock for reading before
357 acquiring it. */
358 pthread_mutex_t init_mutex;
359 /* Flag indicating whether the kernel has been initialized and all fields
360 below it contain valid data. */
361 bool initialized;
362 /* Flag indicating that the kernel has a problem that blocks an execution. */
363 bool initialization_failed;
364 /* The object to be put into the dispatch queue. */
365 uint64_t object;
366 /* Required size of kernel arguments. */
367 uint32_t kernarg_segment_size;
368 /* Required size of group segment. */
369 uint32_t group_segment_size;
370 /* Required size of private segment. */
371 uint32_t private_segment_size;
372 /* List of all kernel dependencies. */
373 const char **dependencies;
374 /* Number of dependencies. */
375 unsigned dependencies_count;
376 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
377 unsigned max_omp_data_size;
378 /* True if the kernel is gridified. */
379 bool gridified_kernel_p;
380 };
381
382 /* Information about a particular brig module, its image and kernels. */
383
384 struct module_info
385 {
386 /* The next and previous module in the linked list of modules of an agent. */
387 struct module_info *next, *prev;
388 /* The description with which the program has registered the image. */
389 struct brig_image_desc *image_desc;
390
391 /* Number of kernels in this module. */
392 int kernel_count;
393 /* An array of kernel_info structures describing each kernel in this
394 module. */
395 struct kernel_info kernels[];
396 };
397
398 /* Information about shared brig library. */
399
400 struct brig_library_info
401 {
402 char *file_name;
403 hsa_ext_module_t image;
404 };
405
406 /* Description of an HSA GPU agent and the program associated with it. */
407
408 struct agent_info
409 {
410 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
411 hsa_agent_t id;
412 /* Whether the agent has been initialized. The fields below are usable only
413 if it has been. */
414 bool initialized;
415 /* The HSA ISA of this agent. */
416 hsa_isa_t isa;
417 /* Command queue of the agent. */
418 hsa_queue_t *command_q;
419 /* Kernel from kernel dispatch command queue. */
420 hsa_queue_t *kernel_dispatch_command_q;
421 /* The HSA memory region from which to allocate kernel arguments. */
422 hsa_region_t kernarg_region;
423
424 /* Read-write lock that protects kernels which are running or about to be run
425 from interference with loading and unloading of images. Needs to be
426 locked for reading while a kernel is being run, and for writing if the
427 list of modules is manipulated (and thus the HSA program invalidated). */
428 pthread_rwlock_t modules_rwlock;
429 /* The first module in a linked list of modules associated with this
430 kernel. */
431 struct module_info *first_module;
432
433 /* Mutex enforcing that only one thread will finalize the HSA program. A
434 thread should have locked agent->modules_rwlock for reading before
435 acquiring it. */
436 pthread_mutex_t prog_mutex;
437 /* Flag whether the HSA program that consists of all the modules has been
438 finalized. */
439 bool prog_finalized;
440 /* Flag whether the program was finalized but with a failure. */
441 bool prog_finalized_error;
442 /* HSA executable - the finalized program that is used to locate kernels. */
443 hsa_executable_t executable;
444 /* List of BRIG libraries. */
445 struct brig_library_info **brig_libraries;
446 /* Number of loaded shared BRIG libraries. */
447 unsigned brig_libraries_count;
448 };
449
450 /* Information about the whole HSA environment and all of its agents. */
451
452 struct hsa_context_info
453 {
454 /* Whether the structure has been initialized. */
455 bool initialized;
456 /* Number of usable GPU HSA agents in the system. */
457 int agent_count;
458 /* Array of agent_info structures describing the individual HSA agents. */
459 struct agent_info *agents;
460 };
461
462 /* Information about the whole HSA environment and all of its agents. */
463
464 static struct hsa_context_info hsa_context;
465
466 #define DLSYM_FN(function) \
467 hsa_fns.function##_fn = dlsym (handle, #function); \
468 if (hsa_fns.function##_fn == NULL) \
469 goto dl_fail;
470
471 static bool
init_hsa_runtime_functions(void)472 init_hsa_runtime_functions (void)
473 {
474 void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
475 if (handle == NULL)
476 goto dl_fail;
477
478 DLSYM_FN (hsa_status_string)
479 DLSYM_FN (hsa_agent_get_info)
480 DLSYM_FN (hsa_init)
481 DLSYM_FN (hsa_iterate_agents)
482 DLSYM_FN (hsa_region_get_info)
483 DLSYM_FN (hsa_queue_create)
484 DLSYM_FN (hsa_agent_iterate_regions)
485 DLSYM_FN (hsa_executable_destroy)
486 DLSYM_FN (hsa_executable_create)
487 DLSYM_FN (hsa_executable_global_variable_define)
488 DLSYM_FN (hsa_executable_load_code_object)
489 DLSYM_FN (hsa_executable_freeze)
490 DLSYM_FN (hsa_signal_create)
491 DLSYM_FN (hsa_memory_allocate)
492 DLSYM_FN (hsa_memory_free)
493 DLSYM_FN (hsa_signal_destroy)
494 DLSYM_FN (hsa_executable_get_symbol)
495 DLSYM_FN (hsa_executable_symbol_get_info)
496 DLSYM_FN (hsa_queue_add_write_index_release)
497 DLSYM_FN (hsa_queue_load_read_index_acquire)
498 DLSYM_FN (hsa_signal_wait_acquire)
499 DLSYM_FN (hsa_signal_store_relaxed)
500 DLSYM_FN (hsa_signal_store_release)
501 DLSYM_FN (hsa_signal_load_acquire)
502 DLSYM_FN (hsa_queue_destroy)
503 DLSYM_FN (hsa_ext_program_add_module)
504 DLSYM_FN (hsa_ext_program_create)
505 DLSYM_FN (hsa_ext_program_destroy)
506 DLSYM_FN (hsa_ext_program_finalize)
507 return true;
508
509 dl_fail:
510 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
511 return false;
512 }
513
514 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
515
516 static struct kernel_info *
get_kernel_for_agent(struct agent_info * agent,const char * kernel_name)517 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
518 {
519 struct module_info *module = agent->first_module;
520
521 while (module)
522 {
523 for (unsigned i = 0; i < module->kernel_count; i++)
524 if (strcmp (module->kernels[i].name, kernel_name) == 0)
525 return &module->kernels[i];
526
527 module = module->next;
528 }
529
530 return NULL;
531 }
532
533 /* Return true if the agent is a GPU and acceptable of concurrent submissions
534 from different threads. */
535
536 static bool
suitable_hsa_agent_p(hsa_agent_t agent)537 suitable_hsa_agent_p (hsa_agent_t agent)
538 {
539 hsa_device_type_t device_type;
540 hsa_status_t status
541 = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
542 &device_type);
543 if (status != HSA_STATUS_SUCCESS)
544 return false;
545
546 switch (device_type)
547 {
548 case HSA_DEVICE_TYPE_GPU:
549 break;
550 case HSA_DEVICE_TYPE_CPU:
551 if (!support_cpu_devices)
552 return false;
553 break;
554 default:
555 return false;
556 }
557
558 uint32_t features = 0;
559 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
560 &features);
561 if (status != HSA_STATUS_SUCCESS
562 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
563 return false;
564 hsa_queue_type_t queue_type;
565 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
566 &queue_type);
567 if (status != HSA_STATUS_SUCCESS
568 || (queue_type != HSA_QUEUE_TYPE_MULTI))
569 return false;
570
571 return true;
572 }
573
574 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
575 agent_count in hsa_context. */
576
577 static hsa_status_t
count_gpu_agents(hsa_agent_t agent,void * data)578 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
579 {
580 if (suitable_hsa_agent_p (agent))
581 hsa_context.agent_count++;
582 return HSA_STATUS_SUCCESS;
583 }
584
585 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
586 id to the describing structure in the hsa context. The index of the
587 structure is pointed to by DATA, increment it afterwards. */
588
589 static hsa_status_t
assign_agent_ids(hsa_agent_t agent,void * data)590 assign_agent_ids (hsa_agent_t agent, void *data)
591 {
592 if (suitable_hsa_agent_p (agent))
593 {
594 int *agent_index = (int *) data;
595 hsa_context.agents[*agent_index].id = agent;
596 ++*agent_index;
597 }
598 return HSA_STATUS_SUCCESS;
599 }
600
601 /* Initialize hsa_context if it has not already been done.
602 Return TRUE on success. */
603
604 static bool
init_hsa_context(void)605 init_hsa_context (void)
606 {
607 hsa_status_t status;
608 int agent_index = 0;
609
610 if (hsa_context.initialized)
611 return true;
612 init_enviroment_variables ();
613 if (!init_hsa_runtime_functions ())
614 {
615 HSA_DEBUG ("Run-time could not be dynamically opened\n");
616 return false;
617 }
618 status = hsa_fns.hsa_init_fn ();
619 if (status != HSA_STATUS_SUCCESS)
620 return hsa_error ("Run-time could not be initialized", status);
621 HSA_DEBUG ("HSA run-time initialized\n");
622 status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
623 if (status != HSA_STATUS_SUCCESS)
624 return hsa_error ("HSA GPU devices could not be enumerated", status);
625 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
626
627 hsa_context.agents
628 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
629 * sizeof (struct agent_info));
630 status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
631 if (agent_index != hsa_context.agent_count)
632 {
633 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
634 return false;
635 }
636 hsa_context.initialized = true;
637 return true;
638 }
639
640 /* Callback of dispatch queues to report errors. */
641
642 static void
queue_callback(hsa_status_t status,hsa_queue_t * queue,void * data)643 queue_callback (hsa_status_t status,
644 hsa_queue_t *queue __attribute__ ((unused)),
645 void *data __attribute__ ((unused)))
646 {
647 hsa_fatal ("Asynchronous queue error", status);
648 }
649
650 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
651 used for kernarg allocations and if so write it to the memory pointed to by
652 DATA and break the query. */
653
654 static hsa_status_t
get_kernarg_memory_region(hsa_region_t region,void * data)655 get_kernarg_memory_region (hsa_region_t region, void *data)
656 {
657 hsa_status_t status;
658 hsa_region_segment_t segment;
659
660 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
661 &segment);
662 if (status != HSA_STATUS_SUCCESS)
663 return status;
664 if (segment != HSA_REGION_SEGMENT_GLOBAL)
665 return HSA_STATUS_SUCCESS;
666
667 uint32_t flags;
668 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
669 &flags);
670 if (status != HSA_STATUS_SUCCESS)
671 return status;
672 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
673 {
674 hsa_region_t *ret = (hsa_region_t *) data;
675 *ret = region;
676 return HSA_STATUS_INFO_BREAK;
677 }
678 return HSA_STATUS_SUCCESS;
679 }
680
681 /* Part of the libgomp plugin interface. Return the number of HSA devices on
682 the system. */
683
684 int
GOMP_OFFLOAD_get_num_devices(void)685 GOMP_OFFLOAD_get_num_devices (void)
686 {
687 if (!init_hsa_context ())
688 return 0;
689 return hsa_context.agent_count;
690 }
691
692 /* Part of the libgomp plugin interface. Initialize agent number N so that it
693 can be used for computation. Return TRUE on success. */
694
695 bool
GOMP_OFFLOAD_init_device(int n)696 GOMP_OFFLOAD_init_device (int n)
697 {
698 if (!init_hsa_context ())
699 return false;
700 if (n >= hsa_context.agent_count)
701 {
702 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
703 return false;
704 }
705 struct agent_info *agent = &hsa_context.agents[n];
706
707 if (agent->initialized)
708 return true;
709
710 if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
711 {
712 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
713 return false;
714 }
715 if (pthread_mutex_init (&agent->prog_mutex, NULL))
716 {
717 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
718 return false;
719 }
720
721 uint32_t queue_size;
722 hsa_status_t status;
723 status = hsa_fns.hsa_agent_get_info_fn (agent->id,
724 HSA_AGENT_INFO_QUEUE_MAX_SIZE,
725 &queue_size);
726 if (status != HSA_STATUS_SUCCESS)
727 return hsa_error ("Error requesting maximum queue size of the HSA agent",
728 status);
729 status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
730 &agent->isa);
731 if (status != HSA_STATUS_SUCCESS)
732 return hsa_error ("Error querying the ISA of the agent", status);
733 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
734 HSA_QUEUE_TYPE_MULTI,
735 queue_callback, NULL, UINT32_MAX,
736 UINT32_MAX,
737 &agent->command_q);
738 if (status != HSA_STATUS_SUCCESS)
739 return hsa_error ("Error creating command queue", status);
740
741 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
742 HSA_QUEUE_TYPE_MULTI,
743 queue_callback, NULL, UINT32_MAX,
744 UINT32_MAX,
745 &agent->kernel_dispatch_command_q);
746 if (status != HSA_STATUS_SUCCESS)
747 return hsa_error ("Error creating kernel dispatch command queue", status);
748
749 agent->kernarg_region.handle = (uint64_t) -1;
750 status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
751 get_kernarg_memory_region,
752 &agent->kernarg_region);
753 if (agent->kernarg_region.handle == (uint64_t) -1)
754 {
755 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
756 "arguments");
757 return false;
758 }
759 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
760 (long long unsigned) agent->command_q->id);
761 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
762 (long long unsigned) agent->kernel_dispatch_command_q->id);
763 agent->initialized = true;
764 return true;
765 }
766
767 /* Verify that hsa_context has already been initialized and return the
768 agent_info structure describing device number N. Return NULL on error. */
769
770 static struct agent_info *
get_agent_info(int n)771 get_agent_info (int n)
772 {
773 if (!hsa_context.initialized)
774 {
775 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
776 return NULL;
777 }
778 if (n >= hsa_context.agent_count)
779 {
780 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
781 return NULL;
782 }
783 if (!hsa_context.agents[n].initialized)
784 {
785 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
786 return NULL;
787 }
788 return &hsa_context.agents[n];
789 }
790
791 /* Insert MODULE to the linked list of modules of AGENT. */
792
793 static void
add_module_to_agent(struct agent_info * agent,struct module_info * module)794 add_module_to_agent (struct agent_info *agent, struct module_info *module)
795 {
796 if (agent->first_module)
797 agent->first_module->prev = module;
798 module->next = agent->first_module;
799 module->prev = NULL;
800 agent->first_module = module;
801 }
802
803 /* Remove MODULE from the linked list of modules of AGENT. */
804
805 static void
remove_module_from_agent(struct agent_info * agent,struct module_info * module)806 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
807 {
808 if (agent->first_module == module)
809 agent->first_module = module->next;
810 if (module->prev)
811 module->prev->next = module->next;
812 if (module->next)
813 module->next->prev = module->prev;
814 }
815
816 /* Free the HSA program in agent and everything associated with it and set
817 agent->prog_finalized and the initialized flags of all kernels to false.
818 Return TRUE on success. */
819
820 static bool
destroy_hsa_program(struct agent_info * agent)821 destroy_hsa_program (struct agent_info *agent)
822 {
823 if (!agent->prog_finalized || agent->prog_finalized_error)
824 return true;
825
826 hsa_status_t status;
827
828 HSA_DEBUG ("Destroying the current HSA program.\n");
829
830 status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
831 if (status != HSA_STATUS_SUCCESS)
832 return hsa_error ("Could not destroy HSA executable", status);
833
834 struct module_info *module;
835 for (module = agent->first_module; module; module = module->next)
836 {
837 int i;
838 for (i = 0; i < module->kernel_count; i++)
839 module->kernels[i].initialized = false;
840 }
841 agent->prog_finalized = false;
842 return true;
843 }
844
845 /* Initialize KERNEL from D and other parameters. Return true on success. */
846
847 static bool
init_basic_kernel_info(struct kernel_info * kernel,struct hsa_kernel_description * d,struct agent_info * agent,struct module_info * module)848 init_basic_kernel_info (struct kernel_info *kernel,
849 struct hsa_kernel_description *d,
850 struct agent_info *agent,
851 struct module_info *module)
852 {
853 kernel->agent = agent;
854 kernel->module = module;
855 kernel->name = d->name;
856 kernel->omp_data_size = d->omp_data_size;
857 kernel->gridified_kernel_p = d->gridified_kernel_p;
858 kernel->dependencies_count = d->kernel_dependencies_count;
859 kernel->dependencies = d->kernel_dependencies;
860 if (pthread_mutex_init (&kernel->init_mutex, NULL))
861 {
862 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
863 return false;
864 }
865 return true;
866 }
867
868 /* Part of the libgomp plugin interface. Load BRIG module described by struct
869 brig_image_desc in TARGET_DATA and return references to kernel descriptors
870 in TARGET_TABLE. */
871
872 int
GOMP_OFFLOAD_load_image(int ord,unsigned version,const void * target_data,struct addr_pair ** target_table)873 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
874 struct addr_pair **target_table)
875 {
876 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
877 {
878 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
879 " (expected %u, received %u)",
880 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
881 return -1;
882 }
883
884 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
885 struct agent_info *agent;
886 struct addr_pair *pair;
887 struct module_info *module;
888 struct kernel_info *kernel;
889 int kernel_count = image_desc->kernel_count;
890
891 agent = get_agent_info (ord);
892 if (!agent)
893 return -1;
894
895 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
896 {
897 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
898 return -1;
899 }
900 if (agent->prog_finalized
901 && !destroy_hsa_program (agent))
902 return -1;
903
904 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
905 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
906 *target_table = pair;
907 module = (struct module_info *)
908 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
909 + kernel_count * sizeof (struct kernel_info));
910 module->image_desc = image_desc;
911 module->kernel_count = kernel_count;
912
913 kernel = &module->kernels[0];
914
915 /* Allocate memory for kernel dependencies. */
916 for (unsigned i = 0; i < kernel_count; i++)
917 {
918 pair->start = (uintptr_t) kernel;
919 pair->end = (uintptr_t) (kernel + 1);
920
921 struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
922 if (!init_basic_kernel_info (kernel, d, agent, module))
923 return -1;
924 kernel++;
925 pair++;
926 }
927
928 add_module_to_agent (agent, module);
929 if (pthread_rwlock_unlock (&agent->modules_rwlock))
930 {
931 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
932 return -1;
933 }
934 return kernel_count;
935 }
936
937 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
938
939 static struct brig_library_info *
add_shared_library(const char * file_name,struct agent_info * agent)940 add_shared_library (const char *file_name, struct agent_info *agent)
941 {
942 struct brig_library_info *library = NULL;
943
944 void *f = dlopen (file_name, RTLD_NOW);
945 void *start = dlsym (f, "__brig_start");
946 void *end = dlsym (f, "__brig_end");
947
948 if (start == NULL || end == NULL)
949 return NULL;
950
951 unsigned size = end - start;
952 char *buf = (char *) GOMP_PLUGIN_malloc (size);
953 memcpy (buf, start, size);
954
955 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
956 library->file_name = (char *) GOMP_PLUGIN_malloc
957 ((strlen (file_name) + 1));
958 strcpy (library->file_name, file_name);
959 library->image = (hsa_ext_module_t) buf;
960
961 return library;
962 }
963
964 /* Release memory used for BRIG shared libraries that correspond
965 to an AGENT. */
966
967 static void
release_agent_shared_libraries(struct agent_info * agent)968 release_agent_shared_libraries (struct agent_info *agent)
969 {
970 for (unsigned i = 0; i < agent->brig_libraries_count; i++)
971 if (agent->brig_libraries[i])
972 {
973 free (agent->brig_libraries[i]->file_name);
974 free (agent->brig_libraries[i]->image);
975 free (agent->brig_libraries[i]);
976 }
977
978 free (agent->brig_libraries);
979 }
980
981 /* Create and finalize the program consisting of all loaded modules. */
982
983 static void
create_and_finalize_hsa_program(struct agent_info * agent)984 create_and_finalize_hsa_program (struct agent_info *agent)
985 {
986 hsa_status_t status;
987 hsa_ext_program_t prog_handle;
988 int mi = 0;
989
990 if (pthread_mutex_lock (&agent->prog_mutex))
991 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
992 if (agent->prog_finalized)
993 goto final;
994
995 status = hsa_fns.hsa_ext_program_create_fn
996 (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
997 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
998 NULL, &prog_handle);
999 if (status != HSA_STATUS_SUCCESS)
1000 hsa_fatal ("Could not create an HSA program", status);
1001
1002 HSA_DEBUG ("Created a finalized program\n");
1003
1004 struct module_info *module = agent->first_module;
1005 while (module)
1006 {
1007 status = hsa_fns.hsa_ext_program_add_module_fn
1008 (prog_handle, module->image_desc->brig_module);
1009 if (status != HSA_STATUS_SUCCESS)
1010 hsa_fatal ("Could not add a module to the HSA program", status);
1011 module = module->next;
1012 mi++;
1013 }
1014
1015 /* Load all shared libraries. */
1016 const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1017 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1018
1019 agent->brig_libraries_count = libraries_count;
1020 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1021 (sizeof (struct brig_library_info) * libraries_count);
1022
1023 for (unsigned i = 0; i < libraries_count; i++)
1024 {
1025 struct brig_library_info *library = add_shared_library (libraries[i],
1026 agent);
1027 if (library == NULL)
1028 {
1029 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1030 libraries[i]);
1031 continue;
1032 }
1033
1034 status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1035 library->image);
1036 if (status != HSA_STATUS_SUCCESS)
1037 hsa_warn ("Could not add a shared BRIG library the HSA program",
1038 status);
1039 else
1040 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1041 libraries[i]);
1042 }
1043
1044 hsa_ext_control_directives_t control_directives;
1045 memset (&control_directives, 0, sizeof (control_directives));
1046 hsa_code_object_t code_object;
1047 status = hsa_fns.hsa_ext_program_finalize_fn
1048 (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1049 control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1050 if (status != HSA_STATUS_SUCCESS)
1051 {
1052 hsa_warn ("Finalization of the HSA program failed", status);
1053 goto failure;
1054 }
1055
1056 HSA_DEBUG ("Finalization done\n");
1057 hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1058
1059 status
1060 = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1061 HSA_EXECUTABLE_STATE_UNFROZEN,
1062 "", &agent->executable);
1063 if (status != HSA_STATUS_SUCCESS)
1064 hsa_fatal ("Could not create HSA executable", status);
1065
1066 module = agent->first_module;
1067 while (module)
1068 {
1069 /* Initialize all global variables declared in the module. */
1070 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1071 {
1072 struct global_var_info *var;
1073 var = &module->image_desc->global_variables[i];
1074 status = hsa_fns.hsa_executable_global_variable_define_fn
1075 (agent->executable, var->name, var->address);
1076
1077 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1078 var->address);
1079
1080 if (status != HSA_STATUS_SUCCESS)
1081 hsa_fatal ("Could not define a global variable in the HSA program",
1082 status);
1083 }
1084
1085 module = module->next;
1086 }
1087
1088 status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1089 agent->id,
1090 code_object, "");
1091 if (status != HSA_STATUS_SUCCESS)
1092 hsa_fatal ("Could not add a code object to the HSA executable", status);
1093 status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1094 if (status != HSA_STATUS_SUCCESS)
1095 hsa_fatal ("Could not freeze the HSA executable", status);
1096
1097 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1098
1099 /* If all goes good, jump to final. */
1100 goto final;
1101
1102 failure:
1103 agent->prog_finalized_error = true;
1104
1105 final:
1106 agent->prog_finalized = true;
1107
1108 if (pthread_mutex_unlock (&agent->prog_mutex))
1109 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1110 }
1111
1112 /* Create kernel dispatch data structure for given KERNEL. */
1113
1114 static struct GOMP_hsa_kernel_dispatch *
create_single_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1115 create_single_kernel_dispatch (struct kernel_info *kernel,
1116 unsigned omp_data_size)
1117 {
1118 struct agent_info *agent = kernel->agent;
1119 struct GOMP_hsa_kernel_dispatch *shadow
1120 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1121
1122 shadow->queue = agent->command_q;
1123 shadow->omp_data_memory
1124 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1125 unsigned dispatch_count = kernel->dependencies_count;
1126 shadow->kernel_dispatch_count = dispatch_count;
1127
1128 shadow->children_dispatches
1129 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1130
1131 shadow->object = kernel->object;
1132
1133 hsa_signal_t sync_signal;
1134 hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1135 if (status != HSA_STATUS_SUCCESS)
1136 hsa_fatal ("Error creating the HSA sync signal", status);
1137
1138 shadow->signal = sync_signal.handle;
1139 shadow->private_segment_size = kernel->private_segment_size;
1140 shadow->group_segment_size = kernel->group_segment_size;
1141
1142 status
1143 = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1144 kernel->kernarg_segment_size,
1145 &shadow->kernarg_address);
1146 if (status != HSA_STATUS_SUCCESS)
1147 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1148
1149 return shadow;
1150 }
1151
1152 /* Release data structure created for a kernel dispatch in SHADOW argument. */
1153
1154 static void
release_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * shadow)1155 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1156 {
1157 HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
1158 shadow, shadow->debug,
1159 (void *) (uintptr_t) shadow->debug);
1160
1161 hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1162
1163 hsa_signal_t s;
1164 s.handle = shadow->signal;
1165 hsa_fns.hsa_signal_destroy_fn (s);
1166
1167 free (shadow->omp_data_memory);
1168
1169 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1170 release_kernel_dispatch (shadow->children_dispatches[i]);
1171
1172 free (shadow->children_dispatches);
1173 free (shadow);
1174 }
1175
1176 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
1177 to calculate maximum necessary memory for OMP data allocation. */
1178
1179 static void
init_single_kernel(struct kernel_info * kernel,unsigned * max_omp_data_size)1180 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1181 {
1182 hsa_status_t status;
1183 struct agent_info *agent = kernel->agent;
1184 hsa_executable_symbol_t kernel_symbol;
1185 status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1186 kernel->name, agent->id,
1187 0, &kernel_symbol);
1188 if (status != HSA_STATUS_SUCCESS)
1189 {
1190 hsa_warn ("Could not find symbol for kernel in the code object", status);
1191 goto failure;
1192 }
1193 HSA_DEBUG ("Located kernel %s\n", kernel->name);
1194 status = hsa_fns.hsa_executable_symbol_get_info_fn
1195 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1196 if (status != HSA_STATUS_SUCCESS)
1197 hsa_fatal ("Could not extract a kernel object from its symbol", status);
1198 status = hsa_fns.hsa_executable_symbol_get_info_fn
1199 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1200 &kernel->kernarg_segment_size);
1201 if (status != HSA_STATUS_SUCCESS)
1202 hsa_fatal ("Could not get info about kernel argument size", status);
1203 status = hsa_fns.hsa_executable_symbol_get_info_fn
1204 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1205 &kernel->group_segment_size);
1206 if (status != HSA_STATUS_SUCCESS)
1207 hsa_fatal ("Could not get info about kernel group segment size", status);
1208 status = hsa_fns.hsa_executable_symbol_get_info_fn
1209 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1210 &kernel->private_segment_size);
1211 if (status != HSA_STATUS_SUCCESS)
1212 hsa_fatal ("Could not get info about kernel private segment size",
1213 status);
1214
1215 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1216 "following segment sizes: \n", kernel->name);
1217 HSA_DEBUG (" group_segment_size: %u\n",
1218 (unsigned) kernel->group_segment_size);
1219 HSA_DEBUG (" private_segment_size: %u\n",
1220 (unsigned) kernel->private_segment_size);
1221 HSA_DEBUG (" kernarg_segment_size: %u\n",
1222 (unsigned) kernel->kernarg_segment_size);
1223 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
1224 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1225
1226 if (kernel->omp_data_size > *max_omp_data_size)
1227 *max_omp_data_size = kernel->omp_data_size;
1228
1229 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1230 {
1231 struct kernel_info *dependency
1232 = get_kernel_for_agent (agent, kernel->dependencies[i]);
1233
1234 if (dependency == NULL)
1235 {
1236 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1237 "dependency name: %s\n", kernel->name,
1238 kernel->dependencies[i]);
1239 goto failure;
1240 }
1241
1242 if (dependency->dependencies_count > 0)
1243 {
1244 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1245 "a depth bigger than one\n");
1246 goto failure;
1247 }
1248
1249 init_single_kernel (dependency, max_omp_data_size);
1250 }
1251
1252 return;
1253
1254 failure:
1255 kernel->initialization_failed = true;
1256 }
1257
1258 /* Indent stream F by INDENT spaces. */
1259
1260 static void
indent_stream(FILE * f,unsigned indent)1261 indent_stream (FILE *f, unsigned indent)
1262 {
1263 fprintf (f, "%*s", indent, "");
1264 }
1265
1266 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1267
1268 static void
print_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * dispatch,unsigned indent)1269 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1270 {
1271 indent_stream (stderr, indent);
1272 fprintf (stderr, "this: %p\n", dispatch);
1273 indent_stream (stderr, indent);
1274 fprintf (stderr, "queue: %p\n", dispatch->queue);
1275 indent_stream (stderr, indent);
1276 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1277 indent_stream (stderr, indent);
1278 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1279 indent_stream (stderr, indent);
1280 fprintf (stderr, "object: %" PRIu64 "\n", dispatch->object);
1281 indent_stream (stderr, indent);
1282 fprintf (stderr, "signal: %" PRIu64 "\n", dispatch->signal);
1283 indent_stream (stderr, indent);
1284 fprintf (stderr, "private_segment_size: %u\n",
1285 dispatch->private_segment_size);
1286 indent_stream (stderr, indent);
1287 fprintf (stderr, "group_segment_size: %u\n",
1288 dispatch->group_segment_size);
1289 indent_stream (stderr, indent);
1290 fprintf (stderr, "children dispatches: %" PRIu64 "\n",
1291 dispatch->kernel_dispatch_count);
1292 indent_stream (stderr, indent);
1293 fprintf (stderr, "omp_num_threads: %u\n",
1294 dispatch->omp_num_threads);
1295 fprintf (stderr, "\n");
1296
1297 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1298 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1299 }
1300
1301 /* Create kernel dispatch data structure for a KERNEL and all its
1302 dependencies. */
1303
1304 static struct GOMP_hsa_kernel_dispatch *
create_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1305 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1306 {
1307 struct GOMP_hsa_kernel_dispatch *shadow
1308 = create_single_kernel_dispatch (kernel, omp_data_size);
1309 shadow->omp_num_threads = 64;
1310 shadow->debug = 0;
1311 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1312
1313 /* Create kernel dispatch data structures. We do not allow to have
1314 a kernel dispatch with depth bigger than one. */
1315 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1316 {
1317 struct kernel_info *dependency
1318 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1319 shadow->children_dispatches[i]
1320 = create_single_kernel_dispatch (dependency, omp_data_size);
1321 shadow->children_dispatches[i]->queue
1322 = kernel->agent->kernel_dispatch_command_q;
1323 shadow->children_dispatches[i]->omp_level = 1;
1324 }
1325
1326 return shadow;
1327 }
1328
1329 /* Do all the work that is necessary before running KERNEL for the first time.
1330 The function assumes the program has been created, finalized and frozen by
1331 create_and_finalize_hsa_program. */
1332
1333 static void
init_kernel(struct kernel_info * kernel)1334 init_kernel (struct kernel_info *kernel)
1335 {
1336 if (pthread_mutex_lock (&kernel->init_mutex))
1337 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1338 if (kernel->initialized)
1339 {
1340 if (pthread_mutex_unlock (&kernel->init_mutex))
1341 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1342 "mutex");
1343
1344 return;
1345 }
1346
1347 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1348 dispatch operation. */
1349 init_single_kernel (kernel, &kernel->max_omp_data_size);
1350
1351 if (!kernel->initialization_failed)
1352 HSA_DEBUG ("\n");
1353
1354 kernel->initialized = true;
1355 if (pthread_mutex_unlock (&kernel->init_mutex))
1356 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1357 "mutex");
1358 }
1359
1360 /* Parse the target attributes INPUT provided by the compiler and return true
1361 if we should run anything all. If INPUT is NULL, fill DEF with default
1362 values, then store INPUT or DEF into *RESULT. */
1363
1364 static bool
parse_target_attributes(void ** input,struct GOMP_kernel_launch_attributes * def,struct GOMP_kernel_launch_attributes ** result)1365 parse_target_attributes (void **input,
1366 struct GOMP_kernel_launch_attributes *def,
1367 struct GOMP_kernel_launch_attributes **result)
1368 {
1369 if (!input)
1370 GOMP_PLUGIN_fatal ("No target arguments provided");
1371
1372 bool attrs_found = false;
1373 while (*input)
1374 {
1375 uintptr_t id = (uintptr_t) *input;
1376 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1377 && ((id & GOMP_TARGET_ARG_ID_MASK)
1378 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1379 {
1380 input++;
1381 attrs_found = true;
1382 break;
1383 }
1384
1385 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1386 input++;
1387 input++;
1388 }
1389
1390 if (!attrs_found)
1391 {
1392 def->ndim = 1;
1393 def->gdims[0] = 1;
1394 def->gdims[1] = 1;
1395 def->gdims[2] = 1;
1396 def->wdims[0] = 1;
1397 def->wdims[1] = 1;
1398 def->wdims[2] = 1;
1399 *result = def;
1400 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1401 return true;
1402 }
1403
1404 struct GOMP_kernel_launch_attributes *kla;
1405 kla = (struct GOMP_kernel_launch_attributes *) *input;
1406 *result = kla;
1407 if (kla->ndim == 0 || kla->ndim > 3)
1408 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1409
1410 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1411 unsigned i;
1412 for (i = 0; i < kla->ndim; i++)
1413 {
1414 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
1415 kla->gdims[i], kla->wdims[i]);
1416 if (kla->gdims[i] == 0)
1417 return false;
1418 }
1419 return true;
1420 }
1421
1422 /* Return the group size given the requested GROUP size, GRID size and number
1423 of grid dimensions NDIM. */
1424
1425 static uint32_t
get_group_size(uint32_t ndim,uint32_t grid,uint32_t group)1426 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1427 {
1428 if (group == 0)
1429 {
1430 /* TODO: Provide a default via environment or device characteristics. */
1431 if (ndim == 1)
1432 group = 64;
1433 else if (ndim == 2)
1434 group = 8;
1435 else
1436 group = 4;
1437 }
1438
1439 if (group > grid)
1440 group = grid;
1441 return group;
1442 }
1443
1444 /* Return true if the HSA runtime can run function FN_PTR. */
1445
1446 bool
GOMP_OFFLOAD_can_run(void * fn_ptr)1447 GOMP_OFFLOAD_can_run (void *fn_ptr)
1448 {
1449 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1450 struct agent_info *agent = kernel->agent;
1451 create_and_finalize_hsa_program (agent);
1452
1453 if (agent->prog_finalized_error)
1454 goto failure;
1455
1456 init_kernel (kernel);
1457 if (kernel->initialization_failed)
1458 goto failure;
1459
1460 return true;
1461
1462 failure:
1463 if (suppress_host_fallback)
1464 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1465 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1466 return false;
1467 }
1468
1469 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1470
1471 void
packet_store_release(uint32_t * packet,uint16_t header,uint16_t rest)1472 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1473 {
1474 __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1475 }
1476
1477 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1478 launchattributes from KLA. */
1479
1480 void
run_kernel(struct kernel_info * kernel,void * vars,struct GOMP_kernel_launch_attributes * kla)1481 run_kernel (struct kernel_info *kernel, void *vars,
1482 struct GOMP_kernel_launch_attributes *kla)
1483 {
1484 struct agent_info *agent = kernel->agent;
1485 if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1486 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1487
1488 if (!agent->initialized)
1489 GOMP_PLUGIN_fatal ("Agent must be initialized");
1490
1491 if (!kernel->initialized)
1492 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1493
1494 struct GOMP_hsa_kernel_dispatch *shadow
1495 = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1496
1497 if (debug)
1498 {
1499 fprintf (stderr, "\nKernel has following dependencies:\n");
1500 print_kernel_dispatch (shadow, 2);
1501 }
1502
1503 uint64_t index
1504 = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1505 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1506
1507 /* Wait until the queue is not full before writing the packet. */
1508 while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1509 >= agent->command_q->size)
1510 ;
1511
1512 hsa_kernel_dispatch_packet_t *packet;
1513 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1514 + index % agent->command_q->size;
1515
1516 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1517 packet->grid_size_x = kla->gdims[0];
1518 packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1519 kla->wdims[0]);
1520
1521 if (kla->ndim >= 2)
1522 {
1523 packet->grid_size_y = kla->gdims[1];
1524 packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1525 kla->wdims[1]);
1526 }
1527 else
1528 {
1529 packet->grid_size_y = 1;
1530 packet->workgroup_size_y = 1;
1531 }
1532
1533 if (kla->ndim == 3)
1534 {
1535 packet->grid_size_z = kla->gdims[2];
1536 packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1537 kla->wdims[2]);
1538 }
1539 else
1540 {
1541 packet->grid_size_z = 1;
1542 packet->workgroup_size_z = 1;
1543 }
1544
1545 packet->private_segment_size = kernel->private_segment_size;
1546 packet->group_segment_size = kernel->group_segment_size;
1547 packet->kernel_object = kernel->object;
1548 packet->kernarg_address = shadow->kernarg_address;
1549 hsa_signal_t s;
1550 s.handle = shadow->signal;
1551 packet->completion_signal = s;
1552 hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1553 memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1554
1555 /* PR hsa/70337. */
1556 size_t vars_size = sizeof (vars);
1557 if (kernel->kernarg_segment_size > vars_size)
1558 {
1559 if (kernel->kernarg_segment_size != vars_size
1560 + sizeof (struct hsa_kernel_runtime *))
1561 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1562 memcpy (packet->kernarg_address + vars_size, &shadow,
1563 sizeof (struct hsa_kernel_runtime *));
1564 }
1565
1566 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1567
1568 uint16_t header;
1569 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1570 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1571 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1572
1573 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1574
1575 packet_store_release ((uint32_t *) packet, header,
1576 (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1577
1578 hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1579 index);
1580
1581 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1582 signal wait and signal load operations on their own and we need to
1583 periodically call the hsa_signal_load_acquire on completion signals of
1584 children kernels in the CPU to make that happen. As soon the
1585 limitation will be resolved, this workaround can be removed. */
1586
1587 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1588
1589 /* Root signal waits with 1ms timeout. */
1590 while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1591 1000 * 1000,
1592 HSA_WAIT_STATE_BLOCKED) != 0)
1593 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1594 {
1595 hsa_signal_t child_s;
1596 child_s.handle = shadow->children_dispatches[i]->signal;
1597
1598 HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
1599 shadow->children_dispatches[i]->signal);
1600 hsa_fns.hsa_signal_load_acquire_fn (child_s);
1601 }
1602
1603 release_kernel_dispatch (shadow);
1604
1605 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1606 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1607 }
1608
1609 /* Part of the libgomp plugin interface. Run a kernel on device N (the number
1610 is actually ignored, we assume the FN_PTR has been mapped using the correct
1611 device) and pass it an array of pointers in VARS as a parameter. The kernel
1612 is identified by FN_PTR which must point to a kernel_info structure. */
1613
1614 void
GOMP_OFFLOAD_run(int n,void * fn_ptr,void * vars,void ** args)1615 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1616 void *fn_ptr, void *vars, void **args)
1617 {
1618 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1619 struct GOMP_kernel_launch_attributes def;
1620 struct GOMP_kernel_launch_attributes *kla;
1621 if (!parse_target_attributes (args, &def, &kla))
1622 {
1623 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1624 return;
1625 }
1626 run_kernel (kernel, vars, kla);
1627 }
1628
1629 /* Information to be passed to a thread running a kernel asycnronously. */
1630
1631 struct async_run_info
1632 {
1633 int device;
1634 void *tgt_fn;
1635 void *tgt_vars;
1636 void **args;
1637 void *async_data;
1638 };
1639
1640 /* Thread routine to run a kernel asynchronously. */
1641
1642 static void *
run_kernel_asynchronously(void * thread_arg)1643 run_kernel_asynchronously (void *thread_arg)
1644 {
1645 struct async_run_info *info = (struct async_run_info *) thread_arg;
1646 int device = info->device;
1647 void *tgt_fn = info->tgt_fn;
1648 void *tgt_vars = info->tgt_vars;
1649 void **args = info->args;
1650 void *async_data = info->async_data;
1651
1652 free (info);
1653 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1654 GOMP_PLUGIN_target_task_completion (async_data);
1655 return NULL;
1656 }
1657
1658 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1659 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1660 has finished. */
1661
1662 void
GOMP_OFFLOAD_async_run(int device,void * tgt_fn,void * tgt_vars,void ** args,void * async_data)1663 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1664 void **args, void *async_data)
1665 {
1666 pthread_t pt;
1667 struct async_run_info *info;
1668 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1669 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1670
1671 info->device = device;
1672 info->tgt_fn = tgt_fn;
1673 info->tgt_vars = tgt_vars;
1674 info->args = args;
1675 info->async_data = async_data;
1676
1677 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1678 if (err != 0)
1679 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1680 strerror (err));
1681 err = pthread_detach (pt);
1682 if (err != 0)
1683 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1684 "asynchronously: %s", strerror (err));
1685 }
1686
1687 /* Deinitialize all information associated with MODULE and kernels within
1688 it. Return TRUE on success. */
1689
1690 static bool
destroy_module(struct module_info * module)1691 destroy_module (struct module_info *module)
1692 {
1693 int i;
1694 for (i = 0; i < module->kernel_count; i++)
1695 if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1696 {
1697 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1698 "mutex");
1699 return false;
1700 }
1701 return true;
1702 }
1703
1704 /* Part of the libgomp plugin interface. Unload BRIG module described by
1705 struct brig_image_desc in TARGET_DATA from agent number N. Return
1706 TRUE on success. */
1707
1708 bool
GOMP_OFFLOAD_unload_image(int n,unsigned version,const void * target_data)1709 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1710 {
1711 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1712 {
1713 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1714 " (expected %u, received %u)",
1715 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1716 return false;
1717 }
1718
1719 struct agent_info *agent;
1720 agent = get_agent_info (n);
1721 if (!agent)
1722 return false;
1723
1724 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1725 {
1726 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1727 return false;
1728 }
1729 struct module_info *module = agent->first_module;
1730 while (module)
1731 {
1732 if (module->image_desc == target_data)
1733 break;
1734 module = module->next;
1735 }
1736 if (!module)
1737 {
1738 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1739 "loaded before");
1740 return false;
1741 }
1742
1743 remove_module_from_agent (agent, module);
1744 if (!destroy_module (module))
1745 return false;
1746 free (module);
1747 if (!destroy_hsa_program (agent))
1748 return false;
1749 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1750 {
1751 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1752 return false;
1753 }
1754 return true;
1755 }
1756
1757 /* Part of the libgomp plugin interface. Deinitialize all information and
1758 status associated with agent number N. We do not attempt any
1759 synchronization, assuming the user and libgomp will not attempt
1760 deinitialization of a device that is in any way being used at the same
1761 time. Return TRUE on success. */
1762
1763 bool
GOMP_OFFLOAD_fini_device(int n)1764 GOMP_OFFLOAD_fini_device (int n)
1765 {
1766 struct agent_info *agent = get_agent_info (n);
1767 if (!agent)
1768 return false;
1769
1770 if (!agent->initialized)
1771 return true;
1772
1773 struct module_info *next_module = agent->first_module;
1774 while (next_module)
1775 {
1776 struct module_info *module = next_module;
1777 next_module = module->next;
1778 if (!destroy_module (module))
1779 return false;
1780 free (module);
1781 }
1782 agent->first_module = NULL;
1783 if (!destroy_hsa_program (agent))
1784 return false;
1785
1786 release_agent_shared_libraries (agent);
1787
1788 hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1789 if (status != HSA_STATUS_SUCCESS)
1790 return hsa_error ("Error destroying command queue", status);
1791 status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1792 if (status != HSA_STATUS_SUCCESS)
1793 return hsa_error ("Error destroying kernel dispatch command queue", status);
1794 if (pthread_mutex_destroy (&agent->prog_mutex))
1795 {
1796 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1797 return false;
1798 }
1799 if (pthread_rwlock_destroy (&agent->modules_rwlock))
1800 {
1801 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1802 return false;
1803 }
1804 agent->initialized = false;
1805 return true;
1806 }
1807
1808 /* Part of the libgomp plugin interface. Not implemented as it is not required
1809 for HSA. */
1810
1811 void *
GOMP_OFFLOAD_alloc(int ord,size_t size)1812 GOMP_OFFLOAD_alloc (int ord, size_t size)
1813 {
1814 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1815 "it should never be called");
1816 return NULL;
1817 }
1818
1819 /* Part of the libgomp plugin interface. Not implemented as it is not required
1820 for HSA. */
1821
1822 bool
GOMP_OFFLOAD_free(int ord,void * ptr)1823 GOMP_OFFLOAD_free (int ord, void *ptr)
1824 {
1825 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1826 "it should never be called");
1827 return false;
1828 }
1829
1830 /* Part of the libgomp plugin interface. Not implemented as it is not required
1831 for HSA. */
1832
1833 bool
GOMP_OFFLOAD_dev2host(int ord,void * dst,const void * src,size_t n)1834 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1835 {
1836 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1837 "it should never be called");
1838 return false;
1839 }
1840
1841 /* Part of the libgomp plugin interface. Not implemented as it is not required
1842 for HSA. */
1843
1844 bool
GOMP_OFFLOAD_host2dev(int ord,void * dst,const void * src,size_t n)1845 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1846 {
1847 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1848 "it should never be called");
1849 return false;
1850 }
1851
1852 /* Part of the libgomp plugin interface. Not implemented as it is not required
1853 for HSA. */
1854
1855 bool
GOMP_OFFLOAD_dev2dev(int ord,void * dst,const void * src,size_t n)1856 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1857 {
1858 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1859 "it should never be called");
1860 return false;
1861 }
1862