1 /*
2  * QEMU paravirtual RDMA - Resource Manager Implementation
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <yuval.shaia@oracle.com>
9  *     Marcel Apfelbaum <marcel@redhat.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
19 #include "monitor/monitor.h"
20 
21 #include "trace.h"
22 #include "rdma_utils.h"
23 #include "rdma_backend.h"
24 #include "rdma_rm.h"
25 
26 /* Page directory and page tables */
27 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
28 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
29 
rdma_dump_device_counters(Monitor * mon,RdmaDeviceResources * dev_res)30 void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
31 {
32     monitor_printf(mon, "\ttx               : %" PRId64 "\n",
33                    dev_res->stats.tx);
34     monitor_printf(mon, "\ttx_len           : %" PRId64 "\n",
35                    dev_res->stats.tx_len);
36     monitor_printf(mon, "\ttx_err           : %" PRId64 "\n",
37                    dev_res->stats.tx_err);
38     monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
39                    dev_res->stats.rx_bufs);
40     monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
41                    dev_res->stats.rx_srq);
42     monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
43                    dev_res->stats.rx_bufs_len);
44     monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
45                    dev_res->stats.rx_bufs_err);
46     monitor_printf(mon, "\tcomps            : %" PRId64 "\n",
47                    dev_res->stats.completions);
48     monitor_printf(mon, "\tmissing_comps    : %" PRId32 "\n",
49                    dev_res->stats.missing_cqe);
50     monitor_printf(mon, "\tpoll_cq (bk)     : %" PRId64 "\n",
51                    dev_res->stats.poll_cq_from_bk);
52     monitor_printf(mon, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
53                    dev_res->stats.poll_cq_ppoll_to);
54     monitor_printf(mon, "\tpoll_cq (fe)     : %" PRId64 "\n",
55                    dev_res->stats.poll_cq_from_guest);
56     monitor_printf(mon, "\tpoll_cq_empty    : %" PRId64 "\n",
57                    dev_res->stats.poll_cq_from_guest_empty);
58     monitor_printf(mon, "\tmad_tx           : %" PRId64 "\n",
59                    dev_res->stats.mad_tx);
60     monitor_printf(mon, "\tmad_tx_err       : %" PRId64 "\n",
61                    dev_res->stats.mad_tx_err);
62     monitor_printf(mon, "\tmad_rx           : %" PRId64 "\n",
63                    dev_res->stats.mad_rx);
64     monitor_printf(mon, "\tmad_rx_err       : %" PRId64 "\n",
65                    dev_res->stats.mad_rx_err);
66     monitor_printf(mon, "\tmad_rx_bufs      : %" PRId64 "\n",
67                    dev_res->stats.mad_rx_bufs);
68     monitor_printf(mon, "\tmad_rx_bufs_err  : %" PRId64 "\n",
69                    dev_res->stats.mad_rx_bufs_err);
70     monitor_printf(mon, "\tPDs              : %" PRId32 "\n",
71                    dev_res->pd_tbl.used);
72     monitor_printf(mon, "\tMRs              : %" PRId32 "\n",
73                    dev_res->mr_tbl.used);
74     monitor_printf(mon, "\tUCs              : %" PRId32 "\n",
75                    dev_res->uc_tbl.used);
76     monitor_printf(mon, "\tQPs              : %" PRId32 "\n",
77                    dev_res->qp_tbl.used);
78     monitor_printf(mon, "\tCQs              : %" PRId32 "\n",
79                    dev_res->cq_tbl.used);
80     monitor_printf(mon, "\tCEQ_CTXs         : %" PRId32 "\n",
81                    dev_res->cqe_ctx_tbl.used);
82 }
83 
res_tbl_init(const char * name,RdmaRmResTbl * tbl,uint32_t tbl_sz,uint32_t res_sz)84 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
85                                 uint32_t tbl_sz, uint32_t res_sz)
86 {
87     tbl->tbl = g_malloc(tbl_sz * res_sz);
88 
89     strncpy(tbl->name, name, MAX_RM_TBL_NAME);
90     tbl->name[MAX_RM_TBL_NAME - 1] = 0;
91 
92     tbl->bitmap = bitmap_new(tbl_sz);
93     tbl->tbl_sz = tbl_sz;
94     tbl->res_sz = res_sz;
95     tbl->used = 0;
96     qemu_mutex_init(&tbl->lock);
97 }
98 
res_tbl_free(RdmaRmResTbl * tbl)99 static inline void res_tbl_free(RdmaRmResTbl *tbl)
100 {
101     if (!tbl->bitmap) {
102         return;
103     }
104     qemu_mutex_destroy(&tbl->lock);
105     g_free(tbl->tbl);
106     g_free(tbl->bitmap);
107 }
108 
rdma_res_tbl_get(RdmaRmResTbl * tbl,uint32_t handle)109 static inline void *rdma_res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
110 {
111     trace_rdma_res_tbl_get(tbl->name, handle);
112 
113     if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
114         return tbl->tbl + handle * tbl->res_sz;
115     } else {
116         rdma_error_report("Table %s, invalid handle %d", tbl->name, handle);
117         return NULL;
118     }
119 }
120 
rdma_res_tbl_alloc(RdmaRmResTbl * tbl,uint32_t * handle)121 static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
122 {
123     qemu_mutex_lock(&tbl->lock);
124 
125     *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
126     if (*handle > tbl->tbl_sz) {
127         rdma_error_report("Table %s, failed to allocate, bitmap is full",
128                           tbl->name);
129         qemu_mutex_unlock(&tbl->lock);
130         return NULL;
131     }
132 
133     set_bit(*handle, tbl->bitmap);
134 
135     tbl->used++;
136 
137     qemu_mutex_unlock(&tbl->lock);
138 
139     memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
140 
141     trace_rdma_res_tbl_alloc(tbl->name, *handle);
142 
143     return tbl->tbl + *handle * tbl->res_sz;
144 }
145 
rdma_res_tbl_dealloc(RdmaRmResTbl * tbl,uint32_t handle)146 static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
147 {
148     trace_rdma_res_tbl_dealloc(tbl->name, handle);
149 
150     qemu_mutex_lock(&tbl->lock);
151 
152     if (handle < tbl->tbl_sz) {
153         clear_bit(handle, tbl->bitmap);
154         tbl->used--;
155     }
156 
157     qemu_mutex_unlock(&tbl->lock);
158 }
159 
rdma_rm_alloc_pd(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t * pd_handle,uint32_t ctx_handle)160 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
161                      uint32_t *pd_handle, uint32_t ctx_handle)
162 {
163     RdmaRmPD *pd;
164     int ret = -ENOMEM;
165 
166     pd = rdma_res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
167     if (!pd) {
168         goto out;
169     }
170 
171     ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
172     if (ret) {
173         ret = -EIO;
174         goto out_tbl_dealloc;
175     }
176 
177     pd->ctx_handle = ctx_handle;
178 
179     return 0;
180 
181 out_tbl_dealloc:
182     rdma_res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
183 
184 out:
185     return ret;
186 }
187 
rdma_rm_get_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)188 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
189 {
190     return rdma_res_tbl_get(&dev_res->pd_tbl, pd_handle);
191 }
192 
rdma_rm_dealloc_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)193 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
194 {
195     RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
196 
197     if (pd) {
198         rdma_backend_destroy_pd(&pd->backend_pd);
199         rdma_res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
200     }
201 }
202 
rdma_rm_alloc_mr(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint64_t guest_start,uint64_t guest_length,void * host_virt,int access_flags,uint32_t * mr_handle,uint32_t * lkey,uint32_t * rkey)203 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
204                      uint64_t guest_start, uint64_t guest_length,
205                      void *host_virt, int access_flags, uint32_t *mr_handle,
206                      uint32_t *lkey, uint32_t *rkey)
207 {
208     RdmaRmMR *mr;
209     int ret = 0;
210     RdmaRmPD *pd;
211 
212     pd = rdma_rm_get_pd(dev_res, pd_handle);
213     if (!pd) {
214         return -EINVAL;
215     }
216 
217     mr = rdma_res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
218     if (!mr) {
219         return -ENOMEM;
220     }
221     trace_rdma_rm_alloc_mr(*mr_handle, host_virt, guest_start, guest_length,
222                            access_flags);
223 
224     if (host_virt) {
225         mr->virt = host_virt;
226         mr->start = guest_start;
227         mr->length = guest_length;
228         mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
229 
230         ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
231                                      mr->length, guest_start, access_flags);
232         if (ret) {
233             ret = -EIO;
234             goto out_dealloc_mr;
235         }
236 #ifdef LEGACY_RDMA_REG_MR
237         /* We keep mr_handle in lkey so send and recv get get mr ptr */
238         *lkey = *mr_handle;
239 #else
240         *lkey = rdma_backend_mr_lkey(&mr->backend_mr);
241 #endif
242     }
243 
244     *rkey = -1;
245 
246     mr->pd_handle = pd_handle;
247 
248     return 0;
249 
250 out_dealloc_mr:
251     rdma_res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
252 
253     return ret;
254 }
255 
rdma_rm_get_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)256 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
257 {
258     return rdma_res_tbl_get(&dev_res->mr_tbl, mr_handle);
259 }
260 
rdma_rm_dealloc_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)261 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
262 {
263     RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
264 
265     if (mr) {
266         rdma_backend_destroy_mr(&mr->backend_mr);
267         trace_rdma_rm_dealloc_mr(mr_handle, mr->start);
268         if (mr->start) {
269             mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
270             munmap(mr->virt, mr->length);
271         }
272         rdma_res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
273     }
274 }
275 
rdma_rm_alloc_uc(RdmaDeviceResources * dev_res,uint32_t pfn,uint32_t * uc_handle)276 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
277                      uint32_t *uc_handle)
278 {
279     RdmaRmUC *uc;
280 
281     /* TODO: Need to make sure pfn is between bar start address and
282      * bsd+RDMA_BAR2_UAR_SIZE
283     if (pfn > RDMA_BAR2_UAR_SIZE) {
284         rdma_error_report("pfn out of range (%d > %d)", pfn,
285                           RDMA_BAR2_UAR_SIZE);
286         return -ENOMEM;
287     }
288     */
289 
290     uc = rdma_res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
291     if (!uc) {
292         return -ENOMEM;
293     }
294 
295     return 0;
296 }
297 
rdma_rm_get_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)298 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
299 {
300     return rdma_res_tbl_get(&dev_res->uc_tbl, uc_handle);
301 }
302 
rdma_rm_dealloc_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)303 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
304 {
305     RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
306 
307     if (uc) {
308         rdma_res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
309     }
310 }
311 
rdma_rm_get_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)312 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
313 {
314     return rdma_res_tbl_get(&dev_res->cq_tbl, cq_handle);
315 }
316 
rdma_rm_alloc_cq(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t cqe,uint32_t * cq_handle,void * opaque)317 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
318                      uint32_t cqe, uint32_t *cq_handle, void *opaque)
319 {
320     int rc;
321     RdmaRmCQ *cq;
322 
323     cq = rdma_res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
324     if (!cq) {
325         return -ENOMEM;
326     }
327 
328     cq->opaque = opaque;
329     cq->notify = CNT_CLEAR;
330 
331     rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
332     if (rc) {
333         rc = -EIO;
334         goto out_dealloc_cq;
335     }
336 
337     return 0;
338 
339 out_dealloc_cq:
340     rdma_rm_dealloc_cq(dev_res, *cq_handle);
341 
342     return rc;
343 }
344 
rdma_rm_req_notify_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle,bool notify)345 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
346                            bool notify)
347 {
348     RdmaRmCQ *cq;
349 
350     cq = rdma_rm_get_cq(dev_res, cq_handle);
351     if (!cq) {
352         return;
353     }
354 
355     if (cq->notify != CNT_SET) {
356         cq->notify = notify ? CNT_ARM : CNT_CLEAR;
357     }
358 }
359 
rdma_rm_dealloc_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)360 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
361 {
362     RdmaRmCQ *cq;
363 
364     cq = rdma_rm_get_cq(dev_res, cq_handle);
365     if (!cq) {
366         return;
367     }
368 
369     rdma_backend_destroy_cq(&cq->backend_cq);
370 
371     rdma_res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
372 }
373 
rdma_rm_get_qp(RdmaDeviceResources * dev_res,uint32_t qpn)374 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
375 {
376     GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
377 
378     RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
379 
380     g_bytes_unref(key);
381 
382     if (!qp) {
383         rdma_error_report("Invalid QP handle %d", qpn);
384     }
385 
386     return qp;
387 }
388 
rdma_rm_alloc_qp(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint8_t qp_type,uint32_t max_send_wr,uint32_t max_send_sge,uint32_t send_cq_handle,uint32_t max_recv_wr,uint32_t max_recv_sge,uint32_t recv_cq_handle,void * opaque,uint32_t * qpn,uint8_t is_srq,uint32_t srq_handle)389 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
390                      uint8_t qp_type, uint32_t max_send_wr,
391                      uint32_t max_send_sge, uint32_t send_cq_handle,
392                      uint32_t max_recv_wr, uint32_t max_recv_sge,
393                      uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
394                      uint8_t is_srq, uint32_t srq_handle)
395 {
396     int rc;
397     RdmaRmQP *qp;
398     RdmaRmCQ *scq, *rcq;
399     RdmaRmPD *pd;
400     RdmaRmSRQ *srq = NULL;
401     uint32_t rm_qpn;
402 
403     pd = rdma_rm_get_pd(dev_res, pd_handle);
404     if (!pd) {
405         return -EINVAL;
406     }
407 
408     scq = rdma_rm_get_cq(dev_res, send_cq_handle);
409     rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
410 
411     if (!scq || !rcq) {
412         rdma_error_report("Invalid send_cqn or recv_cqn (%d, %d)",
413                           send_cq_handle, recv_cq_handle);
414         return -EINVAL;
415     }
416 
417     if (is_srq) {
418         srq = rdma_rm_get_srq(dev_res, srq_handle);
419         if (!srq) {
420             rdma_error_report("Invalid srqn %d", srq_handle);
421             return -EINVAL;
422         }
423 
424         srq->recv_cq_handle = recv_cq_handle;
425     }
426 
427     if (qp_type == IBV_QPT_GSI) {
428         scq->notify = CNT_SET;
429         rcq->notify = CNT_SET;
430     }
431 
432     qp = rdma_res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
433     if (!qp) {
434         return -ENOMEM;
435     }
436 
437     qp->qpn = rm_qpn;
438     qp->qp_state = IBV_QPS_RESET;
439     qp->qp_type = qp_type;
440     qp->send_cq_handle = send_cq_handle;
441     qp->recv_cq_handle = recv_cq_handle;
442     qp->opaque = opaque;
443     qp->is_srq = is_srq;
444 
445     rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
446                                 &scq->backend_cq, &rcq->backend_cq,
447                                 is_srq ? &srq->backend_srq : NULL,
448                                 max_send_wr, max_recv_wr, max_send_sge,
449                                 max_recv_sge);
450 
451     if (rc) {
452         rc = -EIO;
453         goto out_dealloc_qp;
454     }
455 
456     *qpn = rdma_backend_qpn(&qp->backend_qp);
457     trace_rdma_rm_alloc_qp(rm_qpn, *qpn, qp_type);
458     g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
459 
460     return 0;
461 
462 out_dealloc_qp:
463     rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
464 
465     return rc;
466 }
467 
rdma_rm_modify_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,uint32_t attr_mask,uint8_t sgid_idx,union ibv_gid * dgid,uint32_t dqpn,enum ibv_qp_state qp_state,uint32_t qkey,uint32_t rq_psn,uint32_t sq_psn)468 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
469                       uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
470                       union ibv_gid *dgid, uint32_t dqpn,
471                       enum ibv_qp_state qp_state, uint32_t qkey,
472                       uint32_t rq_psn, uint32_t sq_psn)
473 {
474     RdmaRmQP *qp;
475     int ret;
476 
477     qp = rdma_rm_get_qp(dev_res, qp_handle);
478     if (!qp) {
479         return -EINVAL;
480     }
481 
482     if (qp->qp_type == IBV_QPT_SMI) {
483         rdma_error_report("Got QP0 request");
484         return -EPERM;
485     } else if (qp->qp_type == IBV_QPT_GSI) {
486         return 0;
487     }
488 
489     trace_rdma_rm_modify_qp(qp_handle, attr_mask, qp_state, sgid_idx);
490 
491     if (attr_mask & IBV_QP_STATE) {
492         qp->qp_state = qp_state;
493 
494         if (qp->qp_state == IBV_QPS_INIT) {
495             ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
496                                              qp->qp_type, qkey);
497             if (ret) {
498                 return -EIO;
499             }
500         }
501 
502         if (qp->qp_state == IBV_QPS_RTR) {
503             /* Get backend gid index */
504             sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
505                                                      sgid_idx);
506             if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
507                 rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
508                                   sgid_idx);
509                 return -EIO;
510             }
511 
512             ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
513                                             qp->qp_type, sgid_idx, dgid, dqpn,
514                                             rq_psn, qkey,
515                                             attr_mask & IBV_QP_QKEY);
516             if (ret) {
517                 return -EIO;
518             }
519         }
520 
521         if (qp->qp_state == IBV_QPS_RTS) {
522             ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
523                                             sq_psn, qkey,
524                                             attr_mask & IBV_QP_QKEY);
525             if (ret) {
526                 return -EIO;
527             }
528         }
529     }
530 
531     return 0;
532 }
533 
rdma_rm_query_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)534 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
535                      uint32_t qp_handle, struct ibv_qp_attr *attr,
536                      int attr_mask, struct ibv_qp_init_attr *init_attr)
537 {
538     RdmaRmQP *qp;
539 
540     qp = rdma_rm_get_qp(dev_res, qp_handle);
541     if (!qp) {
542         return -EINVAL;
543     }
544 
545     return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
546 }
547 
rdma_rm_dealloc_qp(RdmaDeviceResources * dev_res,uint32_t qp_handle)548 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
549 {
550     RdmaRmQP *qp;
551     GBytes *key;
552 
553     key = g_bytes_new(&qp_handle, sizeof(qp_handle));
554     qp = g_hash_table_lookup(dev_res->qp_hash, key);
555     g_hash_table_remove(dev_res->qp_hash, key);
556     g_bytes_unref(key);
557 
558     if (!qp) {
559         return;
560     }
561 
562     rdma_backend_destroy_qp(&qp->backend_qp, dev_res);
563 
564     rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
565 }
566 
rdma_rm_get_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle)567 RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
568 {
569     return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
570 }
571 
rdma_rm_alloc_srq(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint32_t max_wr,uint32_t max_sge,uint32_t srq_limit,uint32_t * srq_handle,void * opaque)572 int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
573                       uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
574                       uint32_t *srq_handle, void *opaque)
575 {
576     RdmaRmSRQ *srq;
577     RdmaRmPD *pd;
578     int rc;
579 
580     pd = rdma_rm_get_pd(dev_res, pd_handle);
581     if (!pd) {
582         return -EINVAL;
583     }
584 
585     srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
586     if (!srq) {
587         return -ENOMEM;
588     }
589 
590     rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
591                                  max_wr, max_sge, srq_limit);
592     if (rc) {
593         rc = -EIO;
594         goto out_dealloc_srq;
595     }
596 
597     srq->opaque = opaque;
598 
599     return 0;
600 
601 out_dealloc_srq:
602     rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
603 
604     return rc;
605 }
606 
rdma_rm_query_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle,struct ibv_srq_attr * srq_attr)607 int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
608                       struct ibv_srq_attr *srq_attr)
609 {
610     RdmaRmSRQ *srq;
611 
612     srq = rdma_rm_get_srq(dev_res, srq_handle);
613     if (!srq) {
614         return -EINVAL;
615     }
616 
617     return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
618 }
619 
rdma_rm_modify_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle,struct ibv_srq_attr * srq_attr,int srq_attr_mask)620 int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
621                        struct ibv_srq_attr *srq_attr, int srq_attr_mask)
622 {
623     RdmaRmSRQ *srq;
624 
625     srq = rdma_rm_get_srq(dev_res, srq_handle);
626     if (!srq) {
627         return -EINVAL;
628     }
629 
630     if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
631         (srq_attr->srq_limit == 0)) {
632         return -EINVAL;
633     }
634 
635     if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
636         (srq_attr->max_wr == 0)) {
637         return -EINVAL;
638     }
639 
640     return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
641                                    srq_attr_mask);
642 }
643 
rdma_rm_dealloc_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle)644 void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
645 {
646     RdmaRmSRQ *srq;
647 
648     srq = rdma_rm_get_srq(dev_res, srq_handle);
649     if (!srq) {
650         return;
651     }
652 
653     rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
654     rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
655 }
656 
rdma_rm_get_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)657 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
658 {
659     void **cqe_ctx;
660 
661     cqe_ctx = rdma_res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
662     if (!cqe_ctx) {
663         return NULL;
664     }
665 
666     return *cqe_ctx;
667 }
668 
rdma_rm_alloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t * cqe_ctx_id,void * ctx)669 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
670                           void *ctx)
671 {
672     void **cqe_ctx;
673 
674     cqe_ctx = rdma_res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
675     if (!cqe_ctx) {
676         return -ENOMEM;
677     }
678 
679     *cqe_ctx = ctx;
680 
681     return 0;
682 }
683 
rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)684 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
685 {
686     rdma_res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
687 }
688 
rdma_rm_add_gid(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname,union ibv_gid * gid,int gid_idx)689 int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
690                     const char *ifname, union ibv_gid *gid, int gid_idx)
691 {
692     int rc;
693 
694     rc = rdma_backend_add_gid(backend_dev, ifname, gid);
695     if (rc) {
696         return -EINVAL;
697     }
698 
699     memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
700 
701     return 0;
702 }
703 
rdma_rm_del_gid(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname,int gid_idx)704 int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
705                     const char *ifname, int gid_idx)
706 {
707     int rc;
708 
709     if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
710         return 0;
711     }
712 
713     rc = rdma_backend_del_gid(backend_dev, ifname,
714                               &dev_res->port.gid_tbl[gid_idx].gid);
715     if (rc) {
716         return -EINVAL;
717     }
718 
719     memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
720            sizeof(dev_res->port.gid_tbl[gid_idx].gid));
721     dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
722 
723     return 0;
724 }
725 
rdma_rm_get_backend_gid_index(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,int sgid_idx)726 int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
727                                   RdmaBackendDev *backend_dev, int sgid_idx)
728 {
729     if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) {
730         rdma_error_report("Got invalid sgid_idx %d", sgid_idx);
731         return -EINVAL;
732     }
733 
734     if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
735         dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
736         rdma_backend_get_gid_index(backend_dev,
737                                    &dev_res->port.gid_tbl[sgid_idx].gid);
738     }
739 
740     return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
741 }
742 
destroy_qp_hash_key(gpointer data)743 static void destroy_qp_hash_key(gpointer data)
744 {
745     g_bytes_unref(data);
746 }
747 
init_ports(RdmaDeviceResources * dev_res)748 static void init_ports(RdmaDeviceResources *dev_res)
749 {
750     int i;
751 
752     memset(&dev_res->port, 0, sizeof(dev_res->port));
753 
754     dev_res->port.state = IBV_PORT_DOWN;
755     for (i = 0; i < MAX_PORT_GIDS; i++) {
756         dev_res->port.gid_tbl[i].backend_gid_index = -1;
757     }
758 }
759 
fini_ports(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname)760 static void fini_ports(RdmaDeviceResources *dev_res,
761                        RdmaBackendDev *backend_dev, const char *ifname)
762 {
763     int i;
764 
765     dev_res->port.state = IBV_PORT_DOWN;
766     for (i = 0; i < MAX_PORT_GIDS; i++) {
767         rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
768     }
769 }
770 
rdma_rm_init(RdmaDeviceResources * dev_res,struct ibv_device_attr * dev_attr)771 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
772 {
773     dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
774                                              destroy_qp_hash_key, NULL);
775     if (!dev_res->qp_hash) {
776         return -ENOMEM;
777     }
778 
779     res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
780     res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
781     res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
782     res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
783     res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
784                        dev_attr->max_qp_wr, sizeof(void *));
785     res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
786     res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
787                  sizeof(RdmaRmSRQ));
788 
789     init_ports(dev_res);
790 
791     qemu_mutex_init(&dev_res->lock);
792 
793     memset(&dev_res->stats, 0, sizeof(dev_res->stats));
794     atomic_set(&dev_res->stats.missing_cqe, 0);
795 
796     return 0;
797 }
798 
rdma_rm_fini(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname)799 void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
800                   const char *ifname)
801 {
802     qemu_mutex_destroy(&dev_res->lock);
803 
804     fini_ports(dev_res, backend_dev, ifname);
805 
806     res_tbl_free(&dev_res->srq_tbl);
807     res_tbl_free(&dev_res->uc_tbl);
808     res_tbl_free(&dev_res->cqe_ctx_tbl);
809     res_tbl_free(&dev_res->qp_tbl);
810     res_tbl_free(&dev_res->mr_tbl);
811     res_tbl_free(&dev_res->cq_tbl);
812     res_tbl_free(&dev_res->pd_tbl);
813 
814     if (dev_res->qp_hash) {
815         g_hash_table_destroy(dev_res->qp_hash);
816     }
817 }
818