1 /**
2  * Copyright (C) Mellanox Technologies Ltd. 2018.  ALL RIGHTS RESERVED.
3  * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
4  * See file LICENSE for terms.
5  */
6 
7 #ifdef HAVE_CONFIG_H
8 #  include "config.h"
9 #endif
10 
11 #include "cuda_md.h"
12 
13 #include <ucs/sys/module.h>
14 #include <ucs/profile/profile.h>
15 #include <ucs/debug/log.h>
16 #include <cuda_runtime.h>
17 #include <cuda.h>
18 
19 
20 UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_base_detect_memory_type,
21                  (md, addr, length, mem_type_p),
22                  uct_md_h md, const void *addr, size_t length,
23                  ucs_memory_type_t *mem_type_p)
24 {
25     CUmemorytype memType = (CUmemorytype)0;
26     uint32_t isManaged   = 0;
27     unsigned value       = 1;
28     void *attrdata[] = {(void *)&memType, (void *)&isManaged};
29     CUpointer_attribute attributes[2] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
30                                          CU_POINTER_ATTRIBUTE_IS_MANAGED};
31     CUresult cu_err;
32     const char *cu_err_str;
33 
34     if (addr == NULL) {
35         *mem_type_p = UCS_MEMORY_TYPE_HOST;
36         return UCS_OK;
37     }
38 
39     cu_err = cuPointerGetAttributes(2, attributes, attrdata, (CUdeviceptr)addr);
40     if ((cu_err == CUDA_SUCCESS) && (memType == CU_MEMORYTYPE_DEVICE)) {
41         if (isManaged) {
42             *mem_type_p = UCS_MEMORY_TYPE_CUDA_MANAGED;
43         } else {
44             *mem_type_p = UCS_MEMORY_TYPE_CUDA;
45             cu_err = cuPointerSetAttribute(&value, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
46                                            (CUdeviceptr)addr);
47             if (cu_err != CUDA_SUCCESS) {
48                 cuGetErrorString(cu_err, &cu_err_str);
49                 ucs_warn("cuPointerSetAttribute(%p) error: %s", (void*) addr, cu_err_str);
50             }
51         }
52         return UCS_OK;
53     }
54 
55     return UCS_ERR_INVALID_ADDR;
56 }
57 
58 ucs_status_t
uct_cuda_base_query_md_resources(uct_component_t * component,uct_md_resource_desc_t ** resources_p,unsigned * num_resources_p)59 uct_cuda_base_query_md_resources(uct_component_t *component,
60                                  uct_md_resource_desc_t **resources_p,
61                                  unsigned *num_resources_p)
62 {
63     cudaError_t cudaErr;
64     int num_gpus;
65 
66     cudaErr = cudaGetDeviceCount(&num_gpus);
67     if ((cudaErr != cudaSuccess) || (num_gpus == 0)) {
68         return uct_md_query_empty_md_resource(resources_p, num_resources_p);
69     }
70 
71     return uct_md_query_single_md_resource(component, resources_p,
72                                            num_resources_p);
73 }
74 
UCS_MODULE_INIT()75 UCS_MODULE_INIT() {
76     /* TODO make gdrcopy independent of cuda */
77     UCS_MODULE_FRAMEWORK_DECLARE(uct_cuda);
78     UCS_MODULE_FRAMEWORK_LOAD(uct_cuda, 0);
79     return UCS_OK;
80 }
81