1 /*
2 * QEMU paravirtual RDMA - Resource Manager Implementation
3 *
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
6 *
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
19
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
22 #include "rdma_rm.h"
23
24 /* Page directory and page tables */
25 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
26 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
27
res_tbl_init(const char * name,RdmaRmResTbl * tbl,uint32_t tbl_sz,uint32_t res_sz)28 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
29 uint32_t tbl_sz, uint32_t res_sz)
30 {
31 tbl->tbl = g_malloc(tbl_sz * res_sz);
32
33 strncpy(tbl->name, name, MAX_RM_TBL_NAME);
34 tbl->name[MAX_RM_TBL_NAME - 1] = 0;
35
36 tbl->bitmap = bitmap_new(tbl_sz);
37 tbl->tbl_sz = tbl_sz;
38 tbl->res_sz = res_sz;
39 qemu_mutex_init(&tbl->lock);
40 }
41
res_tbl_free(RdmaRmResTbl * tbl)42 static inline void res_tbl_free(RdmaRmResTbl *tbl)
43 {
44 qemu_mutex_destroy(&tbl->lock);
45 g_free(tbl->tbl);
46 bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
47 }
48
res_tbl_get(RdmaRmResTbl * tbl,uint32_t handle)49 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
50 {
51 pr_dbg("%s, handle=%d\n", tbl->name, handle);
52
53 if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
54 return tbl->tbl + handle * tbl->res_sz;
55 } else {
56 pr_dbg("Invalid handle %d\n", handle);
57 return NULL;
58 }
59 }
60
res_tbl_alloc(RdmaRmResTbl * tbl,uint32_t * handle)61 static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
62 {
63 qemu_mutex_lock(&tbl->lock);
64
65 *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
66 if (*handle > tbl->tbl_sz) {
67 pr_dbg("Failed to alloc, bitmap is full\n");
68 qemu_mutex_unlock(&tbl->lock);
69 return NULL;
70 }
71
72 set_bit(*handle, tbl->bitmap);
73
74 qemu_mutex_unlock(&tbl->lock);
75
76 memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
77
78 pr_dbg("%s, handle=%d\n", tbl->name, *handle);
79
80 return tbl->tbl + *handle * tbl->res_sz;
81 }
82
res_tbl_dealloc(RdmaRmResTbl * tbl,uint32_t handle)83 static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
84 {
85 pr_dbg("%s, handle=%d\n", tbl->name, handle);
86
87 qemu_mutex_lock(&tbl->lock);
88
89 if (handle < tbl->tbl_sz) {
90 clear_bit(handle, tbl->bitmap);
91 }
92
93 qemu_mutex_unlock(&tbl->lock);
94 }
95
rdma_rm_alloc_pd(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t * pd_handle,uint32_t ctx_handle)96 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
97 uint32_t *pd_handle, uint32_t ctx_handle)
98 {
99 RdmaRmPD *pd;
100 int ret = -ENOMEM;
101
102 pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
103 if (!pd) {
104 goto out;
105 }
106
107 ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
108 if (ret) {
109 ret = -EIO;
110 goto out_tbl_dealloc;
111 }
112
113 pd->ctx_handle = ctx_handle;
114
115 return 0;
116
117 out_tbl_dealloc:
118 res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
119
120 out:
121 return ret;
122 }
123
rdma_rm_get_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)124 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
125 {
126 return res_tbl_get(&dev_res->pd_tbl, pd_handle);
127 }
128
rdma_rm_dealloc_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)129 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
130 {
131 RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
132
133 if (pd) {
134 rdma_backend_destroy_pd(&pd->backend_pd);
135 res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
136 }
137 }
138
rdma_rm_alloc_mr(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint64_t guest_start,size_t guest_length,void * host_virt,int access_flags,uint32_t * mr_handle,uint32_t * lkey,uint32_t * rkey)139 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
140 uint64_t guest_start, size_t guest_length, void *host_virt,
141 int access_flags, uint32_t *mr_handle, uint32_t *lkey,
142 uint32_t *rkey)
143 {
144 RdmaRmMR *mr;
145 int ret = 0;
146 RdmaRmPD *pd;
147
148 pd = rdma_rm_get_pd(dev_res, pd_handle);
149 if (!pd) {
150 pr_dbg("Invalid PD\n");
151 return -EINVAL;
152 }
153
154 mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
155 if (!mr) {
156 pr_dbg("Failed to allocate obj in table\n");
157 return -ENOMEM;
158 }
159 pr_dbg("mr_handle=%d\n", *mr_handle);
160
161 pr_dbg("host_virt=0x%p\n", host_virt);
162 pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
163 pr_dbg("length=%zu\n", guest_length);
164
165 if (host_virt) {
166 mr->virt = host_virt;
167 mr->start = guest_start;
168 mr->length = guest_length;
169 mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
170
171 ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
172 mr->length, access_flags);
173 if (ret) {
174 pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
175 ret = -EIO;
176 goto out_dealloc_mr;
177 }
178 }
179
180 /* We keep mr_handle in lkey so send and recv get get mr ptr */
181 *lkey = *mr_handle;
182 *rkey = -1;
183
184 mr->pd_handle = pd_handle;
185
186 return 0;
187
188 out_dealloc_mr:
189 res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
190
191 return ret;
192 }
193
rdma_rm_get_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)194 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
195 {
196 return res_tbl_get(&dev_res->mr_tbl, mr_handle);
197 }
198
rdma_rm_dealloc_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)199 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
200 {
201 RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
202
203 if (mr) {
204 rdma_backend_destroy_mr(&mr->backend_mr);
205 pr_dbg("start=0x%" PRIx64 "\n", mr->start);
206 if (mr->start) {
207 mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
208 munmap(mr->virt, mr->length);
209 }
210 res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
211 }
212 }
213
rdma_rm_alloc_uc(RdmaDeviceResources * dev_res,uint32_t pfn,uint32_t * uc_handle)214 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
215 uint32_t *uc_handle)
216 {
217 RdmaRmUC *uc;
218
219 /* TODO: Need to make sure pfn is between bar start address and
220 * bsd+RDMA_BAR2_UAR_SIZE
221 if (pfn > RDMA_BAR2_UAR_SIZE) {
222 pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
223 return -ENOMEM;
224 }
225 */
226
227 uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
228 if (!uc) {
229 return -ENOMEM;
230 }
231
232 return 0;
233 }
234
rdma_rm_get_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)235 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
236 {
237 return res_tbl_get(&dev_res->uc_tbl, uc_handle);
238 }
239
rdma_rm_dealloc_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)240 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
241 {
242 RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
243
244 if (uc) {
245 res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
246 }
247 }
248
rdma_rm_get_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)249 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
250 {
251 return res_tbl_get(&dev_res->cq_tbl, cq_handle);
252 }
253
rdma_rm_alloc_cq(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t cqe,uint32_t * cq_handle,void * opaque)254 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
255 uint32_t cqe, uint32_t *cq_handle, void *opaque)
256 {
257 int rc;
258 RdmaRmCQ *cq;
259
260 cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
261 if (!cq) {
262 return -ENOMEM;
263 }
264
265 cq->opaque = opaque;
266 cq->notify = false;
267
268 rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
269 if (rc) {
270 rc = -EIO;
271 goto out_dealloc_cq;
272 }
273
274 return 0;
275
276 out_dealloc_cq:
277 rdma_rm_dealloc_cq(dev_res, *cq_handle);
278
279 return rc;
280 }
281
rdma_rm_req_notify_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle,bool notify)282 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
283 bool notify)
284 {
285 RdmaRmCQ *cq;
286
287 pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
288
289 cq = rdma_rm_get_cq(dev_res, cq_handle);
290 if (!cq) {
291 return;
292 }
293
294 cq->notify = notify;
295 pr_dbg("notify=%d\n", cq->notify);
296 }
297
rdma_rm_dealloc_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)298 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
299 {
300 RdmaRmCQ *cq;
301
302 cq = rdma_rm_get_cq(dev_res, cq_handle);
303 if (!cq) {
304 return;
305 }
306
307 rdma_backend_destroy_cq(&cq->backend_cq);
308
309 res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
310 }
311
rdma_rm_get_qp(RdmaDeviceResources * dev_res,uint32_t qpn)312 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
313 {
314 GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
315
316 RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
317
318 g_bytes_unref(key);
319
320 return qp;
321 }
322
rdma_rm_alloc_qp(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint8_t qp_type,uint32_t max_send_wr,uint32_t max_send_sge,uint32_t send_cq_handle,uint32_t max_recv_wr,uint32_t max_recv_sge,uint32_t recv_cq_handle,void * opaque,uint32_t * qpn)323 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
324 uint8_t qp_type, uint32_t max_send_wr,
325 uint32_t max_send_sge, uint32_t send_cq_handle,
326 uint32_t max_recv_wr, uint32_t max_recv_sge,
327 uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
328 {
329 int rc;
330 RdmaRmQP *qp;
331 RdmaRmCQ *scq, *rcq;
332 RdmaRmPD *pd;
333 uint32_t rm_qpn;
334
335 pr_dbg("qp_type=%d\n", qp_type);
336
337 pd = rdma_rm_get_pd(dev_res, pd_handle);
338 if (!pd) {
339 pr_err("Invalid pd handle (%d)\n", pd_handle);
340 return -EINVAL;
341 }
342
343 scq = rdma_rm_get_cq(dev_res, send_cq_handle);
344 rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
345
346 if (!scq || !rcq) {
347 pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
348 send_cq_handle, recv_cq_handle);
349 return -EINVAL;
350 }
351
352 qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
353 if (!qp) {
354 return -ENOMEM;
355 }
356 pr_dbg("rm_qpn=%d\n", rm_qpn);
357
358 qp->qpn = rm_qpn;
359 qp->qp_state = IBV_QPS_RESET;
360 qp->qp_type = qp_type;
361 qp->send_cq_handle = send_cq_handle;
362 qp->recv_cq_handle = recv_cq_handle;
363 qp->opaque = opaque;
364
365 rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
366 &scq->backend_cq, &rcq->backend_cq, max_send_wr,
367 max_recv_wr, max_send_sge, max_recv_sge);
368 if (rc) {
369 rc = -EIO;
370 goto out_dealloc_qp;
371 }
372
373 *qpn = rdma_backend_qpn(&qp->backend_qp);
374 pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
375 g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
376
377 return 0;
378
379 out_dealloc_qp:
380 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
381
382 return rc;
383 }
384
rdma_rm_modify_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,uint32_t attr_mask,union ibv_gid * dgid,uint32_t dqpn,enum ibv_qp_state qp_state,uint32_t qkey,uint32_t rq_psn,uint32_t sq_psn)385 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
386 uint32_t qp_handle, uint32_t attr_mask,
387 union ibv_gid *dgid, uint32_t dqpn,
388 enum ibv_qp_state qp_state, uint32_t qkey,
389 uint32_t rq_psn, uint32_t sq_psn)
390 {
391 RdmaRmQP *qp;
392 int ret;
393
394 pr_dbg("qpn=0x%x\n", qp_handle);
395
396 qp = rdma_rm_get_qp(dev_res, qp_handle);
397 if (!qp) {
398 return -EINVAL;
399 }
400
401 pr_dbg("qp_type=%d\n", qp->qp_type);
402 pr_dbg("attr_mask=0x%x\n", attr_mask);
403
404 if (qp->qp_type == IBV_QPT_SMI) {
405 pr_dbg("QP0 unsupported\n");
406 return -EPERM;
407 } else if (qp->qp_type == IBV_QPT_GSI) {
408 pr_dbg("QP1\n");
409 return 0;
410 }
411
412 if (attr_mask & IBV_QP_STATE) {
413 qp->qp_state = qp_state;
414 pr_dbg("qp_state=%d\n", qp->qp_state);
415
416 if (qp->qp_state == IBV_QPS_INIT) {
417 ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
418 qp->qp_type, qkey);
419 if (ret) {
420 return -EIO;
421 }
422 }
423
424 if (qp->qp_state == IBV_QPS_RTR) {
425 ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
426 qp->qp_type, dgid, dqpn, rq_psn,
427 qkey, attr_mask & IBV_QP_QKEY);
428 if (ret) {
429 return -EIO;
430 }
431 }
432
433 if (qp->qp_state == IBV_QPS_RTS) {
434 ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
435 sq_psn, qkey,
436 attr_mask & IBV_QP_QKEY);
437 if (ret) {
438 return -EIO;
439 }
440 }
441 }
442
443 return 0;
444 }
445
rdma_rm_query_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)446 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
447 uint32_t qp_handle, struct ibv_qp_attr *attr,
448 int attr_mask, struct ibv_qp_init_attr *init_attr)
449 {
450 RdmaRmQP *qp;
451
452 pr_dbg("qpn=0x%x\n", qp_handle);
453
454 qp = rdma_rm_get_qp(dev_res, qp_handle);
455 if (!qp) {
456 return -EINVAL;
457 }
458
459 pr_dbg("qp_type=%d\n", qp->qp_type);
460
461 return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
462 }
463
rdma_rm_dealloc_qp(RdmaDeviceResources * dev_res,uint32_t qp_handle)464 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
465 {
466 RdmaRmQP *qp;
467 GBytes *key;
468
469 key = g_bytes_new(&qp_handle, sizeof(qp_handle));
470 qp = g_hash_table_lookup(dev_res->qp_hash, key);
471 g_hash_table_remove(dev_res->qp_hash, key);
472 g_bytes_unref(key);
473
474 if (!qp) {
475 return;
476 }
477
478 rdma_backend_destroy_qp(&qp->backend_qp);
479
480 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
481 }
482
rdma_rm_get_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)483 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
484 {
485 void **cqe_ctx;
486
487 cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
488 if (!cqe_ctx) {
489 return NULL;
490 }
491
492 pr_dbg("ctx=%p\n", *cqe_ctx);
493
494 return *cqe_ctx;
495 }
496
rdma_rm_alloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t * cqe_ctx_id,void * ctx)497 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
498 void *ctx)
499 {
500 void **cqe_ctx;
501
502 cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
503 if (!cqe_ctx) {
504 return -ENOMEM;
505 }
506
507 pr_dbg("ctx=%p\n", ctx);
508 *cqe_ctx = ctx;
509
510 return 0;
511 }
512
rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)513 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
514 {
515 res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
516 }
517
destroy_qp_hash_key(gpointer data)518 static void destroy_qp_hash_key(gpointer data)
519 {
520 g_bytes_unref(data);
521 }
522
rdma_rm_init(RdmaDeviceResources * dev_res,struct ibv_device_attr * dev_attr,Error ** errp)523 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
524 Error **errp)
525 {
526 dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
527 destroy_qp_hash_key, NULL);
528 if (!dev_res->qp_hash) {
529 return -ENOMEM;
530 }
531
532 res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
533 res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
534 res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
535 res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
536 res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
537 dev_attr->max_qp_wr, sizeof(void *));
538 res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
539
540 return 0;
541 }
542
rdma_rm_fini(RdmaDeviceResources * dev_res)543 void rdma_rm_fini(RdmaDeviceResources *dev_res)
544 {
545 res_tbl_free(&dev_res->uc_tbl);
546 res_tbl_free(&dev_res->cqe_ctx_tbl);
547 res_tbl_free(&dev_res->qp_tbl);
548 res_tbl_free(&dev_res->mr_tbl);
549 res_tbl_free(&dev_res->cq_tbl);
550 res_tbl_free(&dev_res->pd_tbl);
551
552 g_hash_table_destroy(dev_res->qp_hash);
553 }
554