1 /*
2 * QEMU paravirtual RDMA - QP implementation
3 *
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
6 *
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16 #include "qemu/osdep.h"
17
18 #include "../rdma_utils.h"
19 #include "../rdma_rm.h"
20 #include "../rdma_backend.h"
21
22 #include "pvrdma.h"
23 #include "standard-headers/rdma/vmw_pvrdma-abi.h"
24 #include "pvrdma_qp_ops.h"
25
26 typedef struct CompHandlerCtx {
27 PVRDMADev *dev;
28 uint32_t cq_handle;
29 struct pvrdma_cqe cqe;
30 } CompHandlerCtx;
31
32 /* Send Queue WQE */
33 typedef struct PvrdmaSqWqe {
34 struct pvrdma_sq_wqe_hdr hdr;
35 struct pvrdma_sge sge[0];
36 } PvrdmaSqWqe;
37
38 /* Recv Queue WQE */
39 typedef struct PvrdmaRqWqe {
40 struct pvrdma_rq_wqe_hdr hdr;
41 struct pvrdma_sge sge[0];
42 } PvrdmaRqWqe;
43
44 /*
45 * 1. Put CQE on send CQ ring
46 * 2. Put CQ number on dsr completion ring
47 * 3. Interrupt host
48 */
pvrdma_post_cqe(PVRDMADev * dev,uint32_t cq_handle,struct pvrdma_cqe * cqe)49 static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
50 struct pvrdma_cqe *cqe)
51 {
52 struct pvrdma_cqe *cqe1;
53 struct pvrdma_cqne *cqne;
54 PvrdmaRing *ring;
55 RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
56
57 if (unlikely(!cq)) {
58 pr_dbg("Invalid cqn %d\n", cq_handle);
59 return -EINVAL;
60 }
61
62 ring = (PvrdmaRing *)cq->opaque;
63 pr_dbg("ring=%p\n", ring);
64
65 /* Step #1: Put CQE on CQ ring */
66 pr_dbg("Writing CQE\n");
67 cqe1 = pvrdma_ring_next_elem_write(ring);
68 if (unlikely(!cqe1)) {
69 return -EINVAL;
70 }
71
72 memset(cqe1, 0, sizeof(*cqe1));
73 cqe1->wr_id = cqe->wr_id;
74 cqe1->qp = cqe->qp;
75 cqe1->opcode = cqe->opcode;
76 cqe1->status = cqe->status;
77 cqe1->vendor_err = cqe->vendor_err;
78
79 pvrdma_ring_write_inc(ring);
80
81 /* Step #2: Put CQ number on dsr completion ring */
82 pr_dbg("Writing CQNE\n");
83 cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
84 if (unlikely(!cqne)) {
85 return -EINVAL;
86 }
87
88 cqne->info = cq_handle;
89 pvrdma_ring_write_inc(&dev->dsr_info.cq);
90
91 pr_dbg("cq->notify=%d\n", cq->notify);
92 if (cq->notify) {
93 cq->notify = false;
94 post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
95 }
96
97 return 0;
98 }
99
pvrdma_qp_ops_comp_handler(int status,unsigned int vendor_err,void * ctx)100 static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err,
101 void *ctx)
102 {
103 CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
104
105 pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle);
106 pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id);
107 pr_dbg("status=%d\n", status);
108 pr_dbg("vendor_err=0x%x\n", vendor_err);
109 comp_ctx->cqe.status = status;
110 comp_ctx->cqe.vendor_err = vendor_err;
111 pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe);
112 g_free(ctx);
113 }
114
pvrdma_qp_ops_fini(void)115 void pvrdma_qp_ops_fini(void)
116 {
117 rdma_backend_unregister_comp_handler();
118 }
119
pvrdma_qp_ops_init(void)120 int pvrdma_qp_ops_init(void)
121 {
122 rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
123
124 return 0;
125 }
126
pvrdma_qp_send(PVRDMADev * dev,uint32_t qp_handle)127 int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
128 {
129 RdmaRmQP *qp;
130 PvrdmaSqWqe *wqe;
131 PvrdmaRing *ring;
132
133 pr_dbg("qp_handle=0x%x\n", qp_handle);
134
135 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
136 if (unlikely(!qp)) {
137 return -EINVAL;
138 }
139
140 ring = (PvrdmaRing *)qp->opaque;
141 pr_dbg("sring=%p\n", ring);
142
143 wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
144 while (wqe) {
145 CompHandlerCtx *comp_ctx;
146
147 pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
148
149 /* Prepare CQE */
150 comp_ctx = g_malloc(sizeof(CompHandlerCtx));
151 comp_ctx->dev = dev;
152 comp_ctx->cq_handle = qp->send_cq_handle;
153 comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
154 comp_ctx->cqe.qp = qp_handle;
155 comp_ctx->cqe.opcode = wqe->hdr.opcode;
156
157 rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
158 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
159 (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
160 wqe->hdr.wr.ud.remote_qpn,
161 wqe->hdr.wr.ud.remote_qkey, comp_ctx);
162
163 pvrdma_ring_read_inc(ring);
164
165 wqe = pvrdma_ring_next_elem_read(ring);
166 }
167
168 return 0;
169 }
170
pvrdma_qp_recv(PVRDMADev * dev,uint32_t qp_handle)171 int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
172 {
173 RdmaRmQP *qp;
174 PvrdmaRqWqe *wqe;
175 PvrdmaRing *ring;
176
177 pr_dbg("qp_handle=0x%x\n", qp_handle);
178
179 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
180 if (unlikely(!qp)) {
181 return -EINVAL;
182 }
183
184 ring = &((PvrdmaRing *)qp->opaque)[1];
185 pr_dbg("rring=%p\n", ring);
186
187 wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
188 while (wqe) {
189 CompHandlerCtx *comp_ctx;
190
191 pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
192
193 /* Prepare CQE */
194 comp_ctx = g_malloc(sizeof(CompHandlerCtx));
195 comp_ctx->dev = dev;
196 comp_ctx->cq_handle = qp->recv_cq_handle;
197 comp_ctx->cqe.qp = qp_handle;
198 comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
199
200 rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
201 &qp->backend_qp, qp->qp_type,
202 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
203 comp_ctx);
204
205 pvrdma_ring_read_inc(ring);
206
207 wqe = pvrdma_ring_next_elem_read(ring);
208 }
209
210 return 0;
211 }
212
pvrdma_cq_poll(RdmaDeviceResources * dev_res,uint32_t cq_handle)213 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
214 {
215 RdmaRmCQ *cq;
216
217 cq = rdma_rm_get_cq(dev_res, cq_handle);
218 if (!cq) {
219 pr_dbg("Invalid CQ# %d\n", cq_handle);
220 return;
221 }
222
223 rdma_backend_poll_cq(dev_res, &cq->backend_cq);
224 }
225