1 /*
2 * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
3 * See file LICENSE for terms.
4 */
5
6 #ifdef HAVE_CONFIG_H
7 # include "config.h"
8 #endif
9
10 #include <ucm/rocm/rocmmem.h>
11
12 #include <ucm/event/event.h>
13 #include <ucm/util/log.h>
14 #include <ucm/util/reloc.h>
15 #include <ucm/util/replace.h>
16 #include <ucs/debug/assert.h>
17 #include <ucs/sys/compiler.h>
18 #include <ucs/sys/preprocessor.h>
19
20 #include <unistd.h>
21 #include <pthread.h>
22 #include <stdlib.h>
23 #include <string.h>
24
UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate,hsa_status_t,HSA_STATUS_ERROR,hsa_amd_memory_pool_t,size_t,uint32_t,void **)25 UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t,
26 HSA_STATUS_ERROR, hsa_amd_memory_pool_t,
27 size_t, uint32_t, void**)
28 UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_free, hsa_status_t,
29 HSA_STATUS_ERROR, void*)
30
31 #if ENABLE_SYMBOL_OVERRIDE
32 UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t)
33 UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_free, hsa_status_t)
34 #endif
35
36 static UCS_F_ALWAYS_INLINE void
37 ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type)
38 {
39 ucm_event_t event;
40
41 event.mem_type.address = addr;
42 event.mem_type.size = length;
43 event.mem_type.mem_type = mem_type;
44 ucm_event_dispatch(UCM_EVENT_MEM_TYPE_ALLOC, &event);
45 }
46
47 static UCS_F_ALWAYS_INLINE void
ucm_dispatch_mem_type_free(void * addr,size_t length,ucs_memory_type_t mem_type)48 ucm_dispatch_mem_type_free(void *addr, size_t length, ucs_memory_type_t mem_type)
49 {
50 ucm_event_t event;
51
52 event.mem_type.address = addr;
53 event.mem_type.size = length;
54 event.mem_type.mem_type = mem_type;
55 ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event);
56 }
57
ucm_hsa_amd_memory_pool_free_dispatch_events(void * ptr)58 static void ucm_hsa_amd_memory_pool_free_dispatch_events(void *ptr)
59 {
60 size_t size;
61 hsa_status_t status;
62 hsa_device_type_t dev_type;
63 ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_ROCM;
64 hsa_amd_pointer_info_t info = {
65 .size = sizeof(hsa_amd_pointer_info_t),
66 };
67
68 if (ptr == NULL) {
69 return;
70 }
71
72 status = hsa_amd_pointer_info(ptr, &info, NULL, NULL, NULL);
73 if (status != HSA_STATUS_SUCCESS) {
74 ucm_warn("hsa_amd_pointer_info(dptr=%p) failed", ptr);
75 size = 1; /* set minimum length */
76 }
77 else {
78 size = info.sizeInBytes;
79 }
80
81 status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, &dev_type);
82 if (status == HSA_STATUS_SUCCESS) {
83 if (info.type != HSA_EXT_POINTER_TYPE_HSA) {
84 ucm_warn("ucm free non HSA managed memory %p", ptr);
85 return;
86 }
87
88 if (dev_type != HSA_DEVICE_TYPE_GPU) {
89 mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
90 }
91 }
92
93 ucm_dispatch_mem_type_free(ptr, size, mem_type);
94 }
95
ucm_hsa_amd_memory_pool_free(void * ptr)96 hsa_status_t ucm_hsa_amd_memory_pool_free(void* ptr)
97 {
98 hsa_status_t status;
99
100 ucm_event_enter();
101
102 ucm_trace("ucm_hsa_amd_memory_pool_free(ptr=%p)", ptr);
103
104 ucm_hsa_amd_memory_pool_free_dispatch_events(ptr);
105
106 status = ucm_orig_hsa_amd_memory_pool_free(ptr);
107
108 ucm_event_leave();
109 return status;
110 }
111
ucm_hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool,size_t size,uint32_t flags,void ** ptr)112 hsa_status_t ucm_hsa_amd_memory_pool_allocate(
113 hsa_amd_memory_pool_t memory_pool, size_t size,
114 uint32_t flags, void** ptr)
115 {
116 ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM;
117 uint32_t pool_flags = 0;
118 hsa_status_t status;
119
120 status = hsa_amd_memory_pool_get_info(memory_pool,
121 HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
122 &pool_flags);
123 if (status == HSA_STATUS_SUCCESS &&
124 !(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {
125 type = UCS_MEMORY_TYPE_ROCM_MANAGED;
126 }
127
128 ucm_event_enter();
129
130 status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr);
131 if (status == HSA_STATUS_SUCCESS) {
132 ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size);
133 ucm_dispatch_mem_type_alloc(*ptr, size, type);
134 }
135
136 ucm_event_leave();
137 return status;
138 }
139
140 static ucm_reloc_patch_t patches[] = {
141 {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_allocate),
142 ucm_override_hsa_amd_memory_pool_allocate},
143 {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_free),
144 ucm_override_hsa_amd_memory_pool_free},
145 {NULL, NULL}
146 };
147
ucm_rocmmem_install(int events)148 static ucs_status_t ucm_rocmmem_install(int events)
149 {
150 static int ucm_rocmmem_installed = 0;
151 static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
152 ucm_reloc_patch_t *patch;
153 ucs_status_t status = UCS_OK;
154
155 if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) {
156 goto out;
157 }
158
159 /* TODO: check mem reloc */
160
161 pthread_mutex_lock(&install_mutex);
162
163 if (ucm_rocmmem_installed) {
164 goto out_unlock;
165 }
166
167 for (patch = patches; patch->symbol != NULL; ++patch) {
168 status = ucm_reloc_modify(patch);
169 if (status != UCS_OK) {
170 ucm_warn("failed to install relocation table entry for '%s'", patch->symbol);
171 goto out_unlock;
172 }
173 }
174
175 ucm_debug("rocm hooks are ready");
176 ucm_rocmmem_installed = 1;
177
178 out_unlock:
179 pthread_mutex_unlock(&install_mutex);
180 out:
181 return status;
182 }
183
ucm_rocmmem_get_existing_alloc(ucm_event_handler_t * handler)184 static void ucm_rocmmem_get_existing_alloc(ucm_event_handler_t *handler)
185 {
186 }
187
188 static ucm_event_installer_t ucm_rocm_initializer = {
189 .install = ucm_rocmmem_install,
190 .get_existing_alloc = ucm_rocmmem_get_existing_alloc,
191 .get_mem_type_current_device_info = NULL
192 };
193
194 UCS_STATIC_INIT {
195 ucs_list_add_tail(&ucm_event_installer_list, &ucm_rocm_initializer.list);
196 }
197
198 UCS_STATIC_CLEANUP {
199 ucs_list_del(&ucm_rocm_initializer.list);
200 }
201