1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #ifndef YAKSURI_CUDAI_H_INCLUDED
7 #define YAKSURI_CUDAI_H_INCLUDED
8 
9 #include "yaksi.h"
10 #include <stdint.h>
11 #include <pthread.h>
12 #include <cuda_runtime_api.h>
13 
14 #define CUDA_P2P_ENABLED  (1)
15 #define CUDA_P2P_DISABLED (2)
16 #define CUDA_P2P_CLIQUES  (3)
17 
18 /* *INDENT-OFF* */
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 /* *INDENT-ON* */
23 
24 #include <yaksuri_cudai_base.h>
25 
26 #define YAKSURI_KERNEL_NULL  NULL
27 
28 #define YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail)            \
29     do {                                                                \
30         if (cerr != cudaSuccess) {                                      \
31             fprintf(stderr, "CUDA Error (%s:%s,%d): %s\n", __func__, __FILE__, __LINE__, cudaGetErrorString(cerr)); \
32             rc = YAKSA_ERR__INTERNAL;                                   \
33             goto fn_fail;                                               \
34         }                                                               \
35     } while (0)
36 
37 typedef struct yaksuri_cudai_type_s {
38     void (*pack) (const void *inbuf, void *outbuf, uintptr_t count, yaksuri_cudai_md_s * md,
39                   int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device);
40     void (*unpack) (const void *inbuf, void *outbuf, uintptr_t count, yaksuri_cudai_md_s * md,
41                     int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device);
42     yaksuri_cudai_md_s *md;
43     pthread_mutex_t mdmutex;
44     uintptr_t num_elements;
45 } yaksuri_cudai_type_s;
46 
47 #define YAKSURI_CUDAI_INFO__DEFAULT_IOV_PUP_THRESHOLD   (16384)
48 
49 typedef struct {
50     uintptr_t iov_pack_threshold;
51     uintptr_t iov_unpack_threshold;
52     struct {
53         bool is_valid;
54         struct cudaPointerAttributes attr;
55     } inbuf, outbuf;
56 } yaksuri_cudai_info_s;
57 
58 typedef struct {
59     cudaEvent_t event;
60 } yaksuri_cudai_event_s;
61 
62 int yaksuri_cudai_finalize_hook(void);
63 int yaksuri_cudai_type_create_hook(yaksi_type_s * type);
64 int yaksuri_cudai_type_free_hook(yaksi_type_s * type);
65 int yaksuri_cudai_info_create_hook(yaksi_info_s * info);
66 int yaksuri_cudai_info_free_hook(yaksi_info_s * info);
67 int yaksuri_cudai_info_keyval_append(yaksi_info_s * info, const char *key, const void *val,
68                                      unsigned int vallen);
69 
70 int yaksuri_cudai_event_record(int device, void **event);
71 int yaksuri_cudai_event_query(void *event, int *completed);
72 int yaksuri_cudai_add_dependency(int device1, int device2);
73 
74 int yaksuri_cudai_get_ptr_attr(const void *inbuf, void *outbuf, yaksi_info_s * info,
75                                yaksur_ptr_attr_s * inattr, yaksur_ptr_attr_s * outattr);
76 
77 int yaksuri_cudai_md_alloc(yaksi_type_s * type);
78 int yaksuri_cudai_populate_pupfns(yaksi_type_s * type);
79 
80 int yaksuri_cudai_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type,
81                         yaksi_info_s * info, int target);
82 int yaksuri_cudai_iunpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type,
83                           yaksi_info_s * info, int target);
84 int yaksuri_cudai_pup_is_supported(yaksi_type_s * type, bool * is_supported);
85 uintptr_t yaksuri_cudai_get_iov_pack_threshold(yaksi_info_s * info);
86 uintptr_t yaksuri_cudai_get_iov_unpack_threshold(yaksi_info_s * info);
87 
88 /* *INDENT-OFF* */
89 #ifdef __cplusplus
90 }
91 #endif
92 /* *INDENT-ON* */
93 
94 #endif /* YAKSURI_CUDAI_H_INCLUDED */
95