1 /*
2  * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
3  * See file LICENSE for terms.
4  */
5 
6 #ifdef HAVE_CONFIG_H
7 #  include "config.h"
8 #endif
9 
10 #include <ucm/rocm/rocmmem.h>
11 
12 #include <ucm/event/event.h>
13 #include <ucm/util/log.h>
14 #include <ucm/util/reloc.h>
15 #include <ucm/util/replace.h>
16 #include <ucs/debug/assert.h>
17 #include <ucs/sys/compiler.h>
18 #include <ucs/sys/preprocessor.h>
19 
20 #include <unistd.h>
21 #include <pthread.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate,hsa_status_t,HSA_STATUS_ERROR,hsa_amd_memory_pool_t,size_t,uint32_t,void **)25 UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t,
26                               HSA_STATUS_ERROR, hsa_amd_memory_pool_t,
27                               size_t, uint32_t, void**)
28 UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_free, hsa_status_t,
29                               HSA_STATUS_ERROR, void*)
30 
31 #if ENABLE_SYMBOL_OVERRIDE
32 UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t)
33 UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_free, hsa_status_t)
34 #endif
35 
36 static UCS_F_ALWAYS_INLINE void
37 ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type)
38 {
39     ucm_event_t event;
40 
41     event.mem_type.address  = addr;
42     event.mem_type.size     = length;
43     event.mem_type.mem_type = mem_type;
44     ucm_event_dispatch(UCM_EVENT_MEM_TYPE_ALLOC, &event);
45 }
46 
47 static UCS_F_ALWAYS_INLINE void
ucm_dispatch_mem_type_free(void * addr,size_t length,ucs_memory_type_t mem_type)48 ucm_dispatch_mem_type_free(void *addr, size_t length, ucs_memory_type_t mem_type)
49 {
50     ucm_event_t event;
51 
52     event.mem_type.address  = addr;
53     event.mem_type.size     = length;
54     event.mem_type.mem_type = mem_type;
55     ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event);
56 }
57 
ucm_hsa_amd_memory_pool_free_dispatch_events(void * ptr)58 static void ucm_hsa_amd_memory_pool_free_dispatch_events(void *ptr)
59 {
60     size_t size;
61     hsa_status_t status;
62     hsa_device_type_t dev_type;
63     ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_ROCM;
64     hsa_amd_pointer_info_t info = {
65         .size = sizeof(hsa_amd_pointer_info_t),
66     };
67 
68     if (ptr == NULL) {
69         return;
70     }
71 
72     status = hsa_amd_pointer_info(ptr, &info, NULL, NULL, NULL);
73     if (status != HSA_STATUS_SUCCESS) {
74         ucm_warn("hsa_amd_pointer_info(dptr=%p) failed", ptr);
75         size = 1; /* set minimum length */
76     }
77     else {
78         size = info.sizeInBytes;
79     }
80 
81     status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, &dev_type);
82     if (status == HSA_STATUS_SUCCESS) {
83         if (info.type != HSA_EXT_POINTER_TYPE_HSA) {
84             ucm_warn("ucm free non HSA managed memory %p", ptr);
85             return;
86         }
87 
88         if (dev_type != HSA_DEVICE_TYPE_GPU) {
89             mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
90         }
91     }
92 
93     ucm_dispatch_mem_type_free(ptr, size, mem_type);
94 }
95 
ucm_hsa_amd_memory_pool_free(void * ptr)96 hsa_status_t ucm_hsa_amd_memory_pool_free(void* ptr)
97 {
98     hsa_status_t status;
99 
100     ucm_event_enter();
101 
102     ucm_trace("ucm_hsa_amd_memory_pool_free(ptr=%p)", ptr);
103 
104     ucm_hsa_amd_memory_pool_free_dispatch_events(ptr);
105 
106     status = ucm_orig_hsa_amd_memory_pool_free(ptr);
107 
108     ucm_event_leave();
109     return status;
110 }
111 
ucm_hsa_amd_memory_pool_allocate(hsa_amd_memory_pool_t memory_pool,size_t size,uint32_t flags,void ** ptr)112 hsa_status_t ucm_hsa_amd_memory_pool_allocate(
113     hsa_amd_memory_pool_t memory_pool, size_t size,
114     uint32_t flags, void** ptr)
115 {
116     ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM;
117     uint32_t pool_flags    = 0;
118     hsa_status_t status;
119 
120     status = hsa_amd_memory_pool_get_info(memory_pool,
121                                           HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
122                                           &pool_flags);
123     if (status == HSA_STATUS_SUCCESS &&
124         !(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {
125         type = UCS_MEMORY_TYPE_ROCM_MANAGED;
126     }
127 
128     ucm_event_enter();
129 
130     status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr);
131     if (status == HSA_STATUS_SUCCESS) {
132         ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size);
133         ucm_dispatch_mem_type_alloc(*ptr, size, type);
134     }
135 
136     ucm_event_leave();
137     return status;
138 }
139 
140 static ucm_reloc_patch_t patches[] = {
141     {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_allocate),
142      ucm_override_hsa_amd_memory_pool_allocate},
143     {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_free),
144      ucm_override_hsa_amd_memory_pool_free},
145     {NULL, NULL}
146 };
147 
ucm_rocmmem_install(int events)148 static ucs_status_t ucm_rocmmem_install(int events)
149 {
150     static int ucm_rocmmem_installed = 0;
151     static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
152     ucm_reloc_patch_t *patch;
153     ucs_status_t status = UCS_OK;
154 
155     if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) {
156         goto out;
157     }
158 
159     /* TODO: check mem reloc */
160 
161     pthread_mutex_lock(&install_mutex);
162 
163     if (ucm_rocmmem_installed) {
164         goto out_unlock;
165     }
166 
167     for (patch = patches; patch->symbol != NULL; ++patch) {
168         status = ucm_reloc_modify(patch);
169         if (status != UCS_OK) {
170             ucm_warn("failed to install relocation table entry for '%s'", patch->symbol);
171             goto out_unlock;
172         }
173     }
174 
175     ucm_debug("rocm hooks are ready");
176     ucm_rocmmem_installed = 1;
177 
178 out_unlock:
179     pthread_mutex_unlock(&install_mutex);
180 out:
181     return status;
182 }
183 
ucm_rocmmem_get_existing_alloc(ucm_event_handler_t * handler)184 static void ucm_rocmmem_get_existing_alloc(ucm_event_handler_t *handler)
185 {
186 }
187 
188 static ucm_event_installer_t ucm_rocm_initializer = {
189     .install                          = ucm_rocmmem_install,
190     .get_existing_alloc               = ucm_rocmmem_get_existing_alloc,
191     .get_mem_type_current_device_info = NULL
192 };
193 
194 UCS_STATIC_INIT {
195     ucs_list_add_tail(&ucm_event_installer_list, &ucm_rocm_initializer.list);
196 }
197 
198 UCS_STATIC_CLEANUP {
199     ucs_list_del(&ucm_rocm_initializer.list);
200 }
201