1 /*
2  * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved.
3  *
4  * LICENSE_BEGIN
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  *
38  * LICENSE_END
39  *
40  *
41  */
42 
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <string.h>
47 #include <fcntl.h>
48 #include <dirent.h>
49 #include <errno.h>
50 #include <sys/stat.h>
51 #include <inttypes.h>
52 #include <sys/mman.h>
53 #include <sched.h>
54 
55 #include <infiniband/verbs.h>
56 
57 #include "kcompat.h"
58 #include "usnic_ib_abi.h"
59 
60 #include "usnic_direct.h"
61 #include "usd.h"
62 #include "usd_ib_cmd.h"
63 
64 int
usd_ib_cmd_get_context(struct usd_context * uctx)65 usd_ib_cmd_get_context(struct usd_context *uctx)
66 {
67     struct usnic_get_context cmd;
68     struct usnic_get_context_resp resp;
69     struct ib_uverbs_cmd_hdr *ich;
70     struct ib_uverbs_get_context *icp;
71     struct ib_uverbs_get_context_resp *irp;
72     struct usnic_ib_get_context_cmd *ucp;
73     struct usnic_ib_get_context_resp *urp;
74     int n;
75 
76     /* clear cmd and response */
77     memset(&cmd, 0, sizeof(cmd));
78     memset(&resp, 0, sizeof(resp));
79 
80     /* fill in the command struct */
81     ich = &cmd.ibv_cmd_hdr;
82     ich->command = IB_USER_VERBS_CMD_GET_CONTEXT;
83     ich->in_words = sizeof(cmd) / 4;
84     ich->out_words = sizeof(resp) / 4;
85 
86     icp = &cmd.ibv_cmd;
87     icp->response = (uintptr_t) & resp;
88 
89     ucp = &cmd.usnic_cmd;
90 
91 /*
92  *  Because usnic_verbs kernel module with USNIC_CTX_RESP_VERSION as 1
93  *  silently returns success even it receives resp_version larger than 1,
94  *  without filling in capbility information, here we still fill in
95  *  command with resp_version as 1 in order to retrive cababiltiy information.
96  *  Later when we decide to drop support for this version of kernel
97  *  module, we should replace the next two lines of code with commented-out
98  *  code below.
99     ucp->resp_version = USNIC_CTX_RESP_VERSION;
100     ucp->v2.encap_subcmd = 0;
101     ucp->v2.num_caps = USNIC_CAP_CNT;
102 */
103     ucp->resp_version = 1;
104     ucp->v1.num_caps = USNIC_CAP_CNT;
105 
106     n = write(uctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
107     if (n != sizeof(cmd)) {
108         return -errno;
109     }
110 
111     irp = &resp.ibv_resp;
112     uctx->event_fd = irp->async_fd;
113     uctx->num_comp_vectors = irp->num_comp_vectors;
114 
115     urp = &resp.usnic_resp;
116 
117 /*
118  * Replace the code below with the commented-out line if dropping
119  * support for kernel module with resp_version support as 1
120     if (urp->resp_version == USNIC_CTX_RESP_VERSION) {
121  */
122     if (urp->resp_version == 1) {
123         if (urp->num_caps > USNIC_CAP_CQ_SHARING &&
124             urp->cap_info[USNIC_CAP_CQ_SHARING] > 0) {
125             uctx->ucx_caps[USD_CAP_CQ_SHARING] = 1;
126         }
127         if (urp->num_caps > USNIC_CAP_MAP_PER_RES &&
128             urp->cap_info[USNIC_CAP_MAP_PER_RES] > 0) {
129             uctx->ucx_caps[USD_CAP_MAP_PER_RES] = 1;
130         }
131         if (urp->num_caps > USNIC_CAP_PIO &&
132             urp->cap_info[USNIC_CAP_PIO] > 0) {
133             uctx->ucx_caps[USD_CAP_PIO] = 1;
134         }
135         if (urp->num_caps > USNIC_CAP_CQ_INTR &&
136             urp->cap_info[USNIC_CAP_CQ_INTR] > 0) {
137             uctx->ucx_caps[USD_CAP_CQ_INTR] = 1;
138         }
139         if (urp->num_caps > USNIC_CAP_GRP_INTR &&
140             urp->cap_info[USNIC_CAP_GRP_INTR] > 0) {
141             uctx->ucx_caps[USD_CAP_GRP_INTR] = 1;
142         }
143     }
144 
145     return 0;
146 }
147 
148 int
usd_ib_cmd_devcmd(struct usd_device * dev,enum vnic_devcmd_cmd devcmd,u64 * a0,u64 * a1,int wait)149 usd_ib_cmd_devcmd(
150     struct usd_device *dev,
151     enum vnic_devcmd_cmd devcmd,
152     u64 *a0, u64 *a1, int wait)
153 {
154     struct usnic_get_context cmd;
155     struct usnic_get_context_resp resp;
156     struct ib_uverbs_cmd_hdr *ich;
157     struct ib_uverbs_get_context *icp;
158     struct usnic_ib_get_context_cmd *ucp;
159     struct usnic_ib_get_context_resp *urp;
160     struct usnic_udevcmd_cmd udevcmd;
161     struct usnic_udevcmd_resp udevcmd_resp;
162     int n;
163 
164     if (dev->ud_ctx->ucmd_ib_dev_fd < 0)
165         return -ENOENT;
166 
167     /* clear cmd and response */
168     memset(&cmd, 0, sizeof(cmd));
169     memset(&resp, 0, sizeof(resp));
170     memset(&udevcmd, 0, sizeof(udevcmd));
171     memset(&udevcmd_resp, 0, sizeof(udevcmd_resp));
172 
173     /* fill in the command struct */
174     ich = &cmd.ibv_cmd_hdr;
175     ich->command = IB_USER_VERBS_CMD_GET_CONTEXT;
176     ich->in_words = sizeof(cmd) / 4;
177     ich->out_words = sizeof(resp) / 4;
178 
179     icp = &cmd.ibv_cmd;
180     icp->response = (uintptr_t) & resp;
181 
182     /* fill in usnic devcmd struct */
183     udevcmd.vnic_idx = dev->ud_vf_list->vf_id;
184     udevcmd.devcmd = devcmd;
185     udevcmd.wait = wait;
186     udevcmd.num_args = 2;
187     udevcmd.args[0] = *a0;
188     udevcmd.args[1] = *a1;
189 
190     ucp = &cmd.usnic_cmd;
191     ucp->resp_version = USNIC_CTX_RESP_VERSION;
192     ucp->v2.encap_subcmd = 1;
193     ucp->v2.usnic_ucmd.ucmd = USNIC_USER_CMD_DEVCMD;
194     ucp->v2.usnic_ucmd.inbuf = (uintptr_t) &udevcmd;
195     ucp->v2.usnic_ucmd.inlen = (u32)sizeof(udevcmd);
196     ucp->v2.usnic_ucmd.outbuf = (uintptr_t) &udevcmd_resp;
197     ucp->v2.usnic_ucmd.outlen = (u32)sizeof(udevcmd_resp);
198 
199     n = write(dev->ud_ctx->ucmd_ib_dev_fd, &cmd, sizeof(cmd));
200     urp = &resp.usnic_resp;
201     /*
202      * If returns success, it's an old kernel who does not understand
203      * version 2 command, then we need to close the command FD to
204      * release the created ucontext object
205      */
206     if (n == sizeof(cmd)) {
207         usd_err(
208             "The running usnic_verbs kernel module does not support "
209             "encapsulating devcmd through IB GET_CONTEXT command\n");
210         close(dev->ud_ctx->ucmd_ib_dev_fd);
211         dev->ud_ctx->ucmd_ib_dev_fd = -1;
212         return -ENOTSUP;
213     } else if (errno != ECHILD) {
214         return -errno;
215     } else if (urp->resp_version != USNIC_CTX_RESP_VERSION) {
216         /* Kernel needs to make sure it returns response with a format
217          * understandable by the library. */
218         usd_err(
219             "The returned resp version does not match with requested\n");
220         return -ENOTSUP;
221     }
222 
223     *a0 = udevcmd_resp.args[0];
224     *a1 = udevcmd_resp.args[1];
225 
226     return 0;
227 }
228 
229 /*
230  * Issue IB DEALLOC_PD command to alloc a PD in kernel
231  */
232 static int
_usd_ib_cmd_dealloc_pd(struct usd_device * dev,uint32_t pd_handle)233 _usd_ib_cmd_dealloc_pd(
234     struct usd_device *dev,
235     uint32_t pd_handle)
236 {
237     struct usnic_dealloc_pd cmd;
238     struct ib_uverbs_cmd_hdr *ich;
239     struct ib_uverbs_dealloc_pd *icp;
240     int n;
241 
242     memset(&cmd, 0, sizeof(cmd));
243 
244     ich = &cmd.ibv_cmd_hdr;
245     ich->command = IB_USER_VERBS_CMD_DEALLOC_PD;
246     ich->in_words = sizeof(cmd) / 4;
247     ich->out_words = 0;
248 
249     icp = &cmd.ibv_cmd;
250     icp->pd_handle = pd_handle;
251 
252     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
253     if (n != sizeof(cmd)) {
254         return -errno;
255     }
256 
257     return 0;
258 }
259 
260 /*
261  * Issue IB ALLOC_PD command to alloc a PD in kernel
262  */
263 static int
_usd_ib_cmd_alloc_pd(struct usd_device * dev,uint32_t * handle_o,uint32_t * vfid,uint32_t * grp_vect_buf_len)264 _usd_ib_cmd_alloc_pd(
265     struct usd_device *dev,
266     uint32_t *handle_o,
267     uint32_t *vfid,
268     uint32_t *grp_vect_buf_len)
269 {
270     struct usnic_alloc_pd cmd;
271     struct usnic_alloc_pd_resp resp;
272     struct ib_uverbs_cmd_hdr *ich;
273     struct ib_uverbs_alloc_pd *icp;
274     struct usnic_ib_alloc_pd_cmd *ucp;
275     struct ib_uverbs_alloc_pd_resp *irp;
276     struct usnic_ib_alloc_pd_resp *urp;
277     int n;
278 
279     memset(&cmd, 0, sizeof(cmd));
280     memset(&resp, 0, sizeof(resp));
281 
282     /* fill in command */
283     ich = &cmd.ibv_cmd_hdr;
284     ich->command = IB_USER_VERBS_CMD_ALLOC_PD;
285     ich->in_words = sizeof(cmd) / 4;
286     ich->out_words = sizeof(resp) / 4;
287 
288     icp = &cmd.ibv_cmd;
289     icp->response = (uintptr_t) & resp;
290 
291     /*
292      * Only need to get group vector size and vf information
293      * if group interrupt is enabled
294      */
295     if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) {
296         ucp = &cmd.usnic_cmd;
297         ucp->resp_version = USNIC_IB_ALLOC_PD_VERSION;
298     }
299 
300     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
301     if (n != sizeof(cmd)) {
302         return -errno;
303     }
304 
305     /* process response */
306     irp = &resp.ibv_resp;
307     *handle_o = irp->pd_handle;
308     urp = &resp.usnic_resp;
309     if (urp->resp_version >= 1) {
310         *vfid = urp->cur.vfid;
311         *grp_vect_buf_len = urp->cur.grp_vect_buf_len;
312     }
313 
314     return 0;
315 }
316 
317 /*
318  * Create a protection domain
319  */
320 int
usd_ib_cmd_alloc_pd(struct usd_device * dev,uint32_t * handle_o)321 usd_ib_cmd_alloc_pd(
322     struct usd_device *dev,
323     uint32_t *handle_o)
324 {
325     uint32_t vfid = 0;
326     uint32_t grp_vect_buf_len = 0;
327     int err;
328 
329     /* Issue IB alloc_pd command, get assigned VF id and group vector size */
330     err = _usd_ib_cmd_alloc_pd(dev, handle_o, &vfid, &grp_vect_buf_len);
331     if (err) {
332         return err;
333     }
334 
335     /* MAP group vector address to userspace
336      * Kernel module then maps group vector user address to IOMMU and
337      * program VIC HW register
338      */
339     if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) {
340         void *va;
341         off64_t offset;
342 
343         offset = USNIC_ENCODE_PGOFF(vfid, USNIC_MMAP_GRPVECT, 0);
344         va = mmap64(NULL, grp_vect_buf_len, PROT_READ + PROT_WRITE,
345                     MAP_SHARED, dev->ud_ctx->ucx_ib_dev_fd, offset);
346 
347         if (va == MAP_FAILED) {
348             usd_err("Failed to map group vector for vf %u, grp_vect_size %u, "
349                     "error %d\n",
350                     vfid, grp_vect_buf_len, errno);
351             _usd_ib_cmd_dealloc_pd(dev, *handle_o);
352             return -errno;
353         }
354 
355         dev->grp_vect_map.va = va;
356         dev->grp_vect_map.len = grp_vect_buf_len;
357         dev->grp_vect_map.vfid = vfid;
358     }
359 
360     return 0;
361 }
362 
363 int
usd_ib_cmd_reg_mr(struct usd_device * dev,void * vaddr,size_t length,struct usd_mr * mr)364 usd_ib_cmd_reg_mr(
365     struct usd_device *dev,
366     void *vaddr,
367     size_t length,
368     struct usd_mr *mr)
369 {
370     struct usnic_reg_mr cmd;
371     struct usnic_reg_mr_resp resp;
372     struct ib_uverbs_cmd_hdr *ich;
373     struct ib_uverbs_reg_mr *icp;
374     struct ib_uverbs_reg_mr_resp *irp;
375     int n;
376 
377     memset(&cmd, 0, sizeof(cmd));
378     memset(&resp, 0, sizeof(resp));
379 
380     ich = &cmd.ibv_cmd_hdr;
381     ich->command = IB_USER_VERBS_CMD_REG_MR;
382     ich->in_words = sizeof(cmd) / 4;
383     ich->out_words = sizeof(resp) / 4;
384 
385     icp = &cmd.ibv_cmd;
386     icp->response = (uintptr_t) & resp;
387     icp->start = (uintptr_t) vaddr;
388     icp->length = length;
389     icp->hca_va = (uintptr_t) vaddr;
390     icp->pd_handle = dev->ud_pd_handle;
391     icp->access_flags = IBV_ACCESS_LOCAL_WRITE;
392 
393     /* Issue command to IB driver */
394     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
395     if (n != sizeof(cmd)) {
396         return errno;
397     }
398 
399     /* process response */
400     irp = &resp.ibv_resp;
401     mr->umr_handle = irp->mr_handle;
402     mr->umr_lkey = irp->lkey;
403     mr->umr_rkey = irp->rkey;
404 
405     return 0;
406 }
407 
408 int
usd_ib_cmd_dereg_mr(struct usd_device * dev,struct usd_mr * mr)409 usd_ib_cmd_dereg_mr(
410     struct usd_device *dev,
411     struct usd_mr *mr)
412 {
413     struct usnic_dereg_mr cmd;
414     struct ib_uverbs_cmd_hdr *ich;
415     struct ib_uverbs_dereg_mr *icp;
416     int n;
417 
418     memset(&cmd, 0, sizeof(cmd));
419 
420     ich = &cmd.ibv_cmd_hdr;
421     ich->command = IB_USER_VERBS_CMD_DEREG_MR;
422     ich->in_words = sizeof(cmd) / 4;
423     ich->out_words = 0;
424 
425     icp = &cmd.ibv_cmd;
426     icp->mr_handle = mr->umr_handle;
427 
428     /* Issue command to IB driver */
429     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
430     if (n != sizeof(cmd)) {
431         return -errno;
432     }
433 
434     return 0;
435 }
436 
437 /*
438  * Make the verbs call to create a CQ
439  */
440 int
usd_ib_cmd_create_cq(struct usd_device * dev,struct usd_cq_impl * cq,void * ibv_cq,int comp_channel,int comp_vector)441 usd_ib_cmd_create_cq(
442     struct usd_device *dev,
443     struct usd_cq_impl *cq,
444     void *ibv_cq,
445     int comp_channel,
446     int comp_vector)
447 {
448     struct usnic_create_cq cmd;
449     struct usnic_create_cq_resp resp;
450     struct ib_uverbs_cmd_hdr *ich;
451     struct ib_uverbs_create_cq *icp;
452     struct ib_uverbs_create_cq_resp *irp;
453     cpu_set_t *affinity_mask = NULL;
454     int flags = 0;
455     int n;
456 
457     memset(&cmd, 0, sizeof(cmd));
458     memset(&resp, 0, sizeof(resp));
459 
460     ich = &cmd.ibv_cmd_hdr;
461     ich->command = IB_USER_VERBS_CMD_CREATE_CQ;
462     ich->in_words = sizeof(cmd) / 4;
463     ich->out_words = sizeof(resp) / 4;
464 
465     icp = &cmd.ibv_cmd;
466     icp->response = (uintptr_t) & resp;
467 
468     if (ibv_cq == NULL) {
469         icp->user_handle = (uintptr_t) cq;
470     } else {
471         icp->user_handle = (uintptr_t) ibv_cq;  /* Pass real verbs cq pointer to kernel
472                                                  * to make ibv_get_cq_event happy */
473         flags |= USNIC_CQ_COMP_SIGNAL_VERBS;
474     }
475     icp->cqe = cq->ucq_num_entries;
476     icp->comp_channel = comp_channel;
477     icp->comp_vector = comp_vector;
478 
479     if (comp_channel != -1) {
480         if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] != 1) {
481             usd_err("usd_create_cq failed. No interrupt support\n");
482             return -ENOTSUP;
483         }
484         cmd.usnic_cmd.resp_version = USNIC_IB_CREATE_CQ_VERSION;
485         cmd.usnic_cmd.cur.flags = flags;
486         cmd.usnic_cmd.cur.comp_event_fd = comp_channel;
487         if ((affinity_mask = CPU_ALLOC(sysconf(_SC_NPROCESSORS_ONLN)))
488                 != NULL &&
489             sched_getaffinity(getpid(),
490                         CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN)),
491                         affinity_mask) == 0) {
492             cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)affinity_mask;
493             cmd.usnic_cmd.cur.affinity_mask_len =
494                             CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN));
495         } else {
496             cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)NULL;
497             cmd.usnic_cmd.cur.affinity_mask_len = 0;
498         }
499     } else {
500         /*
501          * If appliation does not request cq completion event support,
502          * send command with version 0 to allow compatibility with
503          * old kernel library
504          */
505         cmd.usnic_cmd.resp_version = 0;
506     }
507 
508     /* Issue command to IB driver */
509     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
510     if (n != sizeof(cmd)) {
511         return -errno;
512     }
513 
514     /* process response */
515     irp = &resp.ibv_resp;
516     cq->ucq_handle = irp->cq_handle;
517 
518     if (affinity_mask != NULL)
519         CPU_FREE(affinity_mask);
520 
521     return 0;
522 }
523 
524 /*
525  * Make the verbs call to destroy a CQ
526  */
527 int
usd_ib_cmd_destroy_cq(struct usd_device * dev,struct usd_cq_impl * cq)528 usd_ib_cmd_destroy_cq(
529     struct usd_device *dev,
530     struct usd_cq_impl *cq)
531 {
532     struct usnic_destroy_cq cmd;
533     struct ib_uverbs_cmd_hdr *ich;
534     struct ib_uverbs_destroy_cq *icp;
535     int n;
536 
537     memset(&cmd, 0, sizeof(cmd));
538 
539     ich = &cmd.ibv_cmd_hdr;
540     ich->command = IB_USER_VERBS_CMD_DESTROY_CQ;
541     ich->in_words = sizeof(cmd) / 4;
542     ich->out_words = 0;
543 
544     icp = &cmd.ibv_cmd;
545     icp->cq_handle = cq->ucq_handle;
546 
547     /* Issue command to IB driver */
548     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
549     if (n != sizeof(cmd)) {
550         return -errno;
551     }
552 
553     return 0;
554 }
555 
556 /*
557  * Create a verbs QP without attaching any real resources to it yet
558  */
559 int
usd_ib_cmd_create_qp(struct usd_device * dev,struct usd_qp_impl * qp,struct usd_vf_info * vfip)560 usd_ib_cmd_create_qp(
561     struct usd_device *dev,
562     struct usd_qp_impl *qp,
563     struct usd_vf_info *vfip)
564 {
565     struct usnic_create_qp cmd;
566     struct usnic_create_qp_resp *resp;
567     struct ib_uverbs_cmd_hdr *ich;
568     struct ib_uverbs_create_qp *icp;
569     struct ib_uverbs_create_qp_resp *irp = NULL;
570     struct usnic_ib_create_qp_cmd *ucp;
571     struct usnic_ib_create_qp_resp *urp;
572     struct usd_qp_filter *qfilt;
573     int ret;
574     int n;
575     uint32_t i;
576     struct usnic_vnic_barres_info *resources;
577 
578     ucp = NULL;
579     resources = NULL;
580     irp = NULL;
581     memset(&cmd, 0, sizeof(cmd));
582 
583     resp = calloc(1, sizeof(*resp));
584     if (resp == NULL) {
585         usd_err("Failed to allocate memory for create_qp_resp\n");
586         return -ENOMEM;
587     }
588 
589     ich = &cmd.ibv_cmd_hdr;
590     ich->command = IB_USER_VERBS_CMD_CREATE_QP;
591     ich->in_words = sizeof(cmd) / 4;
592     ich->out_words = sizeof(*resp) / 4;
593 
594     icp = &cmd.ibv_cmd;
595     icp->response = (uintptr_t) resp;
596     icp->user_handle = (uintptr_t) qp;
597     icp->pd_handle = dev->ud_pd_handle;
598     icp->send_cq_handle = qp->uq_wq.uwq_cq->ucq_handle;
599     icp->recv_cq_handle = qp->uq_rq.urq_cq->ucq_handle;
600     icp->srq_handle = 0;
601     icp->max_send_wr = qp->uq_wq.uwq_num_entries;
602     icp->max_recv_wr = qp->uq_rq.urq_num_entries;
603     icp->max_send_sge = 1;
604     icp->max_recv_sge = 1;
605     icp->max_inline_data = 1024;
606     icp->sq_sig_all = 0;
607     icp->qp_type = IBV_QPT_UD;
608     icp->is_srq = 0;
609     icp->reserved = 0;
610 
611     ucp = &cmd.usnic_cmd;
612 
613     if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR]) {
614         ucp->cmd_version = 2;
615     } else {
616             /*
617              * Allow compatibility with old kernel module when
618              * application does not require cq completion notification
619              */
620             ucp->cmd_version = 1;
621     }
622 
623     qfilt = &qp->uq_filter;
624     if (qfilt->qf_type == USD_FTY_UDP ||
625             qfilt->qf_type == USD_FTY_UDP_SOCK) {
626         /*
627          * Command versions 0,1,2 need to fill in the spec_v2 struct.
628          * Newer versions need to fill in the spec struct.
629          */
630         if (ucp->cmd_version <= 2) {
631             ucp->spec_v2.trans_type = USNIC_TRANSPORT_IPV4_UDP;
632             ucp->spec_v2.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd;
633         } else {
634             ucp->spec.trans_type = USNIC_TRANSPORT_IPV4_UDP;
635             ucp->spec.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd;
636         }
637     } else {
638         ret = -EINVAL;
639         goto out;
640     }
641 
642     ucp->u.cur.resources_len = RES_TYPE_MAX * sizeof(*resources);
643     resources = calloc(RES_TYPE_MAX, sizeof(*resources));
644     if (resources == NULL) {
645         usd_err("unable to allocate resources array\n");
646         ret = -ENOMEM;
647         goto out;
648     }
649     ucp->u.cur.resources = (u64)(uintptr_t)resources;
650 
651     /* Issue command to IB driver */
652     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
653     if (n != sizeof(cmd)) {
654         ret = -errno;
655         goto out;
656     }
657 
658     /* process IB part of response */
659     irp = &resp->ibv_resp;
660     qp->uq_qp_handle = irp->qp_handle;
661     qp->uq_qp_num = irp->qpn;
662 
663     /* process usnic part response */
664     urp = &resp->usnic_resp;
665 
666     qp->uq_rq.urq_index = urp->rq_idx[0];
667     qp->uq_wq.uwq_index = urp->wq_idx[0];
668 
669     qp->uq_rq.urq_cq->ucq_index = urp->cq_idx[0];
670     if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) {
671         qp->uq_wq.uwq_cq->ucq_index = urp->cq_idx[1];
672     }
673 
674     /* Pull VF info */
675     vfip->vi_vfid = urp->vfid;
676     vfip->vi_bar_bus_addr = urp->bar_bus_addr;
677     vfip->vi_bar_len = urp->bar_len;
678 
679     if (urp->cmd_version == ucp->cmd_version) {
680         /* got expected version */
681         if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] > 0) {
682             for (i = 0; i < MIN(RES_TYPE_MAX, urp->u.cur.num_barres); i++) {
683                 enum vnic_res_type type = resources[i].type;
684                 if (type < RES_TYPE_MAX) {
685                     vfip->barres[type].type = type;
686                     vfip->barres[type].bus_addr = resources[i].bus_addr;
687                     vfip->barres[type].len = resources[i].len;
688                 }
689             }
690             if (vfip->barres[RES_TYPE_WQ].bus_addr == 0) {
691                     usd_err("Failed to retrieve WQ res info\n");
692                     ret = -ENXIO;
693                     goto out;
694             }
695             if (vfip->barres[RES_TYPE_RQ].bus_addr == 0) {
696                     usd_err("Failed to retrieve RQ res info\n");
697                     ret = -ENXIO;
698                     goto out;
699             }
700             if (vfip->barres[RES_TYPE_CQ].bus_addr == 0) {
701                     usd_err("Failed to retrieve CQ res info\n");
702                     ret = -ENXIO;
703                     goto out;
704             }
705             if (vfip->barres[RES_TYPE_INTR_CTRL].bus_addr == 0) {
706                     usd_err("Failed to retrieve INTR res info\n");
707                     ret = -ENXIO;
708                     goto out;
709             }
710             if (vfip->barres[RES_TYPE_DEVCMD].bus_addr == 0) {
711                     usd_err("Failed to retrieve DEVCMD res info\n");
712                     ret = -ENXIO;
713                     goto out;
714             }
715         }
716     } else if (urp->cmd_version == 0) {
717         /* special case, old kernel that won't tell us about individual barres
718          * info but should otherwise work fine */
719 
720         if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] != 0) {
721             /* should not happen, only the presence of never-released kernel
722              * code should cause this case */
723             usd_err("USD_CAP_MAP_PER_RES claimed but qp_create cmd_version == 0\n");
724             ret = -ENXIO;
725             goto out;
726         }
727     }  else {
728         usd_err("unexpected cmd_version (%u)\n", urp->cmd_version);
729         ret = -ENXIO;
730         goto out;
731     }
732 
733     /* version 2 and beyond has interrupt support */
734     if (urp->cmd_version > 1) {
735         qp->uq_rq.urq_cq->intr_offset = urp->u.cur.rcq_intr_offset;
736         if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) {
737             qp->uq_wq.uwq_cq->intr_offset = urp->u.cur.wcq_intr_offset;
738         }
739         vfip->vi_barhead_len = urp->u.cur.barhead_len;
740     }
741 
742     free(resources);
743     free(resp);
744     return 0;
745 
746   out:
747     if (irp != NULL)                   /* indicates successful IB create QP */
748         usd_ib_cmd_destroy_qp(dev, qp);
749     free(resources);
750     free(resp);
751     return ret;
752 }
753 
754 int
usd_ib_cmd_modify_qp(struct usd_device * dev,struct usd_qp_impl * qp,int state)755 usd_ib_cmd_modify_qp(
756     struct usd_device *dev,
757     struct usd_qp_impl *qp,
758     int state)
759 {
760     struct usnic_modify_qp cmd;
761     struct ib_uverbs_cmd_hdr *ich;
762     struct ib_uverbs_modify_qp *icp;
763     int n;
764 
765     memset(&cmd, 0, sizeof(cmd));
766 
767     ich = &cmd.ibv_cmd_hdr;
768     ich->command = IB_USER_VERBS_CMD_MODIFY_QP;
769     ich->in_words = sizeof(cmd) / 4;
770     ich->out_words = 0;
771 
772     icp = &cmd.ibv_cmd;
773     icp->qp_handle = qp->uq_qp_handle;
774     icp->attr_mask = IBV_QP_STATE;
775     icp->qp_state = state;
776 
777     /* Issue command to IB driver */
778     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
779     if (n != sizeof(cmd)) {
780         return -errno;
781     }
782 
783     return 0;
784 }
785 
786 int
usd_ib_cmd_destroy_qp(struct usd_device * dev,struct usd_qp_impl * qp)787 usd_ib_cmd_destroy_qp(
788     struct usd_device *dev,
789     struct usd_qp_impl *qp)
790 {
791     struct usnic_destroy_qp cmd;
792     struct ib_uverbs_destroy_qp_resp resp;
793     struct ib_uverbs_cmd_hdr *ich;
794     struct ib_uverbs_destroy_qp *icp;
795     int n;
796 
797     memset(&cmd, 0, sizeof(cmd));
798 
799     ich = &cmd.ibv_cmd_hdr;
800     ich->command = IB_USER_VERBS_CMD_DESTROY_QP;
801     ich->in_words = sizeof(cmd) / 4;
802     ich->out_words = sizeof(resp) / 4;
803 
804     icp = &cmd.ibv_cmd;
805     icp->response = (uintptr_t) & resp;
806     icp->qp_handle = qp->uq_qp_handle;
807 
808     /* Issue command to IB driver */
809     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
810     if (n != sizeof(cmd)) {
811         return -errno;
812     }
813 
814     return 0;
815 }
816 
817 static int
usd_ib_cmd_query_device(struct usd_device * dev,struct ib_uverbs_query_device_resp * irp)818 usd_ib_cmd_query_device(
819     struct usd_device *dev,
820     struct ib_uverbs_query_device_resp *irp)
821 {
822     struct usnic_query_device cmd;
823     struct ib_uverbs_cmd_hdr *ich;
824     struct ib_uverbs_query_device *icp;
825     int n;
826 
827     memset(&cmd, 0, sizeof(cmd));
828 
829     ich = &cmd.ibv_cmd_hdr;
830     ich->command = IB_USER_VERBS_CMD_QUERY_DEVICE;
831     ich->in_words = sizeof(cmd) / 4;
832     ich->out_words = sizeof(*irp) / 4;
833 
834     icp = &cmd.ibv_cmd;
835     icp->response = (uintptr_t) irp;
836 
837     /* keep Valgrind happy */
838     memset(irp, 0x00, sizeof(*irp));
839 
840     /* Issue command to IB driver */
841     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
842     if (n != sizeof(cmd)) {
843         return -errno;
844     }
845 
846     return 0;
847 }
848 
849 static int
usd_ib_cmd_query_port(struct usd_device * dev,struct ib_uverbs_query_port_resp * irp)850 usd_ib_cmd_query_port(
851     struct usd_device *dev,
852     struct ib_uverbs_query_port_resp *irp)
853 {
854     struct usnic_query_port cmd;
855     struct ib_uverbs_cmd_hdr *ich;
856     struct ib_uverbs_query_port *icp;
857     int n;
858 
859     memset(&cmd, 0, sizeof(cmd));
860 
861     ich = &cmd.ibv_cmd_hdr;
862     ich->command = IB_USER_VERBS_CMD_QUERY_PORT;
863     ich->in_words = sizeof(cmd) / 4;
864     ich->out_words = sizeof(*irp) / 4;
865 
866     icp = &cmd.ibv_cmd;
867     icp->response = (uintptr_t) irp;
868     icp->port_num = 1;
869 
870     /* keep Valgrind happy */
871     memset(irp, 0x00, sizeof(*irp));
872 
873     /* Issue command to IB driver */
874     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
875     if (n != sizeof(cmd)) {
876         return -errno;
877     }
878 
879     return 0;
880 }
881 
882 /*
883  * For code readability, copy these two enums from kernel
884  * /usr/include/rdma/ib_verbs.h (otherwise, we'd would have to
885  * hard-code the integer values below).
886  */
887 enum ib_port_width {
888     IB_WIDTH_1X  = 1,
889     IB_WIDTH_4X  = 2,
890     IB_WIDTH_8X  = 4,
891     IB_WIDTH_12X = 8
892 };
893 
894 enum ib_port_speed {
895     IB_SPEED_SDR   = 1,  // 2.5 Gbps
896     IB_SPEED_DDR   = 2,  // 5 Gbps
897     IB_SPEED_QDR   = 4,  // 10 Gbps
898     IB_SPEED_FDR10 = 8,  // 10.3125 Gbps
899     IB_SPEED_FDR   = 16, // 14.0625 Gbps
900     IB_SPEED_EDR   = 32, // 25.78125 Gbps
901     IB_SPEED_HDR   = 64  // 50 Gbps
902 };
903 
904 
905 /*
906  * Issue query commands for device and port and interpret the resaults
907  */
908 int
usd_ib_query_dev(struct usd_device * dev)909 usd_ib_query_dev(
910     struct usd_device *dev)
911 {
912     struct ib_uverbs_query_device_resp dresp;
913     struct ib_uverbs_query_port_resp presp;
914     struct usd_device_attrs *dap;
915     unsigned speed;
916     int ret;
917 
918     ret = usd_ib_cmd_query_device(dev, &dresp);
919     if (ret != 0)
920         return ret;
921 
922     ret = usd_ib_cmd_query_port(dev, &presp);
923     if (ret != 0)
924         return ret;
925 
926     /* copy out the attributes we care about */
927     dap = &dev->ud_attrs;
928 
929     dap->uda_link_state =
930         (presp.state == 4) ? USD_LINK_UP : USD_LINK_DOWN;
931 
932     /*
933      * If link is up, derive bandwidth from speed and width.
934      * If link is down, driver reports bad speed, try to deduce from the
935      * NIC device ID.
936      */
937     if (dap->uda_link_state == USD_LINK_UP) {
938 #define MKSW(S,W) (((S)<<8)|(W))
939         speed = MKSW(presp.active_speed, presp.active_width);
940         switch (speed) {
941         case MKSW(IB_SPEED_FDR10, IB_WIDTH_1X):
942         case MKSW(IB_SPEED_DDR, IB_WIDTH_4X):
943             dap->uda_bandwidth = 10000;
944             break;
945         case MKSW(IB_SPEED_QDR, IB_WIDTH_4X):
946             dap->uda_bandwidth = 25000;
947             break;
948         case MKSW(IB_SPEED_FDR10, IB_WIDTH_4X):
949             dap->uda_bandwidth = 40000;
950             break;
951         case MKSW(IB_SPEED_HDR, IB_WIDTH_1X):
952             dap->uda_bandwidth = 50000;
953             break;
954         case MKSW(IB_SPEED_EDR, IB_WIDTH_4X):
955             dap->uda_bandwidth = 100000;
956             break;
957         case MKSW(IB_SPEED_HDR, IB_WIDTH_4X):
958             dap->uda_bandwidth = 200000;
959             break;
960         case MKSW(IB_SPEED_HDR, IB_WIDTH_8X):
961             dap->uda_bandwidth = 400000;
962             break;
963         default:
964             printf("Warning: unrecognized speed/width %d/%d, defaulting to 10G\n",
965                    presp.active_speed, presp.active_width);
966             dap->uda_bandwidth = 10000;
967             break;
968         }
969     } else {
970         /* from pci_ids.h */
971         switch (dap->uda_device_id) {
972         case 0x4f: /* Vasona */
973         case 0x84: /* Cotati */
974         case 0x85: /* Lexington */
975         case 0x12c: /* Calistoga */
976         case 0x137: /* Mountain View */
977         case 0x138: /* Walnut Creek */
978             dap->uda_bandwidth = 10000;
979             break;
980         case 0xcd:  /* icehouse */
981         case 0x14d: /* clearlake */
982             dap->uda_bandwidth = 40000;
983             break;
984         default:
985             dap->uda_bandwidth = 0;
986         }
987     }
988 
989     dap->uda_vendor_id = dresp.vendor_id;
990     dap->uda_vendor_part_id = dresp.vendor_part_id;
991     dap->uda_device_id = dresp.hw_ver;
992 
993     dap->uda_max_qp = dresp.max_qp;
994     dap->uda_max_cq = dresp.max_cq;
995 
996     return 0;
997 }
998 
999 
1000 int
usd_ib_cmd_create_comp_channel(struct usd_device * dev,int * comp_fd_o)1001 usd_ib_cmd_create_comp_channel(
1002     struct usd_device *dev,
1003     int *comp_fd_o)
1004 {
1005     int n;
1006     struct usnic_create_comp_channel cmd;
1007     struct ib_uverbs_create_comp_channel_resp resp;
1008     struct ib_uverbs_cmd_hdr *ich;
1009     struct ib_uverbs_create_comp_channel *icp;
1010     struct ib_uverbs_create_comp_channel_resp *irp;
1011 
1012     memset(&cmd, 0, sizeof(cmd));
1013 
1014     ich = &cmd.ibv_cmd_hdr;
1015     ich->command = IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL;
1016     ich->in_words = sizeof(cmd) / 4;
1017     ich->out_words = sizeof(resp) / 4;
1018 
1019     icp = &cmd.ibv_cmd;
1020     icp->response = (uintptr_t) & resp;
1021 
1022     /* Issue command to IB driver */
1023     n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
1024     if (n != sizeof(cmd)) {
1025         return -errno;
1026     }
1027 
1028     irp = &resp;
1029     *comp_fd_o = irp->fd;
1030 
1031     return 0;
1032 }
1033