1 /*
2  * QEMU paravirtual RDMA - QP implementation
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <yuval.shaia@oracle.com>
9  *     Marcel Apfelbaum <marcel@redhat.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 
18 #include "../rdma_utils.h"
19 #include "../rdma_rm.h"
20 #include "../rdma_backend.h"
21 
22 #include "pvrdma.h"
23 #include "standard-headers/rdma/vmw_pvrdma-abi.h"
24 #include "pvrdma_qp_ops.h"
25 
26 typedef struct CompHandlerCtx {
27     PVRDMADev *dev;
28     uint32_t cq_handle;
29     struct pvrdma_cqe cqe;
30 } CompHandlerCtx;
31 
32 /* Send Queue WQE */
33 typedef struct PvrdmaSqWqe {
34     struct pvrdma_sq_wqe_hdr hdr;
35     struct pvrdma_sge sge[0];
36 } PvrdmaSqWqe;
37 
38 /* Recv Queue WQE */
39 typedef struct PvrdmaRqWqe {
40     struct pvrdma_rq_wqe_hdr hdr;
41     struct pvrdma_sge sge[0];
42 } PvrdmaRqWqe;
43 
44 /*
45  * 1. Put CQE on send CQ ring
46  * 2. Put CQ number on dsr completion ring
47  * 3. Interrupt host
48  */
pvrdma_post_cqe(PVRDMADev * dev,uint32_t cq_handle,struct pvrdma_cqe * cqe)49 static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
50                            struct pvrdma_cqe *cqe)
51 {
52     struct pvrdma_cqe *cqe1;
53     struct pvrdma_cqne *cqne;
54     PvrdmaRing *ring;
55     RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
56 
57     if (unlikely(!cq)) {
58         pr_dbg("Invalid cqn %d\n", cq_handle);
59         return -EINVAL;
60     }
61 
62     ring = (PvrdmaRing *)cq->opaque;
63     pr_dbg("ring=%p\n", ring);
64 
65     /* Step #1: Put CQE on CQ ring */
66     pr_dbg("Writing CQE\n");
67     cqe1 = pvrdma_ring_next_elem_write(ring);
68     if (unlikely(!cqe1)) {
69         return -EINVAL;
70     }
71 
72     memset(cqe1, 0, sizeof(*cqe1));
73     cqe1->wr_id = cqe->wr_id;
74     cqe1->qp = cqe->qp;
75     cqe1->opcode = cqe->opcode;
76     cqe1->status = cqe->status;
77     cqe1->vendor_err = cqe->vendor_err;
78 
79     pvrdma_ring_write_inc(ring);
80 
81     /* Step #2: Put CQ number on dsr completion ring */
82     pr_dbg("Writing CQNE\n");
83     cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
84     if (unlikely(!cqne)) {
85         return -EINVAL;
86     }
87 
88     cqne->info = cq_handle;
89     pvrdma_ring_write_inc(&dev->dsr_info.cq);
90 
91     pr_dbg("cq->notify=%d\n", cq->notify);
92     if (cq->notify) {
93         cq->notify = false;
94         post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
95     }
96 
97     return 0;
98 }
99 
pvrdma_qp_ops_comp_handler(int status,unsigned int vendor_err,void * ctx)100 static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err,
101                                        void *ctx)
102 {
103     CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
104 
105     pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle);
106     pr_dbg("wr_id=%" PRIx64 "\n", comp_ctx->cqe.wr_id);
107     pr_dbg("status=%d\n", status);
108     pr_dbg("vendor_err=0x%x\n", vendor_err);
109     comp_ctx->cqe.status = status;
110     comp_ctx->cqe.vendor_err = vendor_err;
111     pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe);
112     g_free(ctx);
113 }
114 
pvrdma_qp_ops_fini(void)115 void pvrdma_qp_ops_fini(void)
116 {
117     rdma_backend_unregister_comp_handler();
118 }
119 
pvrdma_qp_ops_init(void)120 int pvrdma_qp_ops_init(void)
121 {
122     rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
123 
124     return 0;
125 }
126 
pvrdma_qp_send(PVRDMADev * dev,uint32_t qp_handle)127 int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
128 {
129     RdmaRmQP *qp;
130     PvrdmaSqWqe *wqe;
131     PvrdmaRing *ring;
132 
133     pr_dbg("qp_handle=0x%x\n", qp_handle);
134 
135     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
136     if (unlikely(!qp)) {
137         return -EINVAL;
138     }
139 
140     ring = (PvrdmaRing *)qp->opaque;
141     pr_dbg("sring=%p\n", ring);
142 
143     wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
144     while (wqe) {
145         CompHandlerCtx *comp_ctx;
146 
147         pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
148 
149         /* Prepare CQE */
150         comp_ctx = g_malloc(sizeof(CompHandlerCtx));
151         comp_ctx->dev = dev;
152         comp_ctx->cq_handle = qp->send_cq_handle;
153         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
154         comp_ctx->cqe.qp = qp_handle;
155         comp_ctx->cqe.opcode = wqe->hdr.opcode;
156 
157         rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
158                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
159                                (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
160                                wqe->hdr.wr.ud.remote_qpn,
161                                wqe->hdr.wr.ud.remote_qkey, comp_ctx);
162 
163         pvrdma_ring_read_inc(ring);
164 
165         wqe = pvrdma_ring_next_elem_read(ring);
166     }
167 
168     return 0;
169 }
170 
pvrdma_qp_recv(PVRDMADev * dev,uint32_t qp_handle)171 int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
172 {
173     RdmaRmQP *qp;
174     PvrdmaRqWqe *wqe;
175     PvrdmaRing *ring;
176 
177     pr_dbg("qp_handle=0x%x\n", qp_handle);
178 
179     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
180     if (unlikely(!qp)) {
181         return -EINVAL;
182     }
183 
184     ring = &((PvrdmaRing *)qp->opaque)[1];
185     pr_dbg("rring=%p\n", ring);
186 
187     wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
188     while (wqe) {
189         CompHandlerCtx *comp_ctx;
190 
191         pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
192 
193         /* Prepare CQE */
194         comp_ctx = g_malloc(sizeof(CompHandlerCtx));
195         comp_ctx->dev = dev;
196         comp_ctx->cq_handle = qp->recv_cq_handle;
197         comp_ctx->cqe.qp = qp_handle;
198         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
199 
200         rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
201                                &qp->backend_qp, qp->qp_type,
202                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
203                                comp_ctx);
204 
205         pvrdma_ring_read_inc(ring);
206 
207         wqe = pvrdma_ring_next_elem_read(ring);
208     }
209 
210     return 0;
211 }
212 
pvrdma_cq_poll(RdmaDeviceResources * dev_res,uint32_t cq_handle)213 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
214 {
215     RdmaRmCQ *cq;
216 
217     cq = rdma_rm_get_cq(dev_res, cq_handle);
218     if (!cq) {
219         pr_dbg("Invalid CQ# %d\n", cq_handle);
220         return;
221     }
222 
223     rdma_backend_poll_cq(dev_res, &cq->backend_cq);
224 }
225