1 /*
2 * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved.
3 *
4 * LICENSE_BEGIN
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 *
38 * LICENSE_END
39 *
40 *
41 */
42
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <string.h>
47 #include <fcntl.h>
48 #include <dirent.h>
49 #include <errno.h>
50 #include <sys/stat.h>
51 #include <inttypes.h>
52 #include <sys/mman.h>
53 #include <sched.h>
54
55 #include <infiniband/verbs.h>
56
57 #include "kcompat.h"
58 #include "usnic_ib_abi.h"
59
60 #include "usnic_direct.h"
61 #include "usd.h"
62 #include "usd_ib_cmd.h"
63
64 int
usd_ib_cmd_get_context(struct usd_context * uctx)65 usd_ib_cmd_get_context(struct usd_context *uctx)
66 {
67 struct usnic_get_context cmd;
68 struct usnic_get_context_resp resp;
69 struct ib_uverbs_cmd_hdr *ich;
70 struct ib_uverbs_get_context *icp;
71 struct ib_uverbs_get_context_resp *irp;
72 struct usnic_ib_get_context_cmd *ucp;
73 struct usnic_ib_get_context_resp *urp;
74 int n;
75
76 /* clear cmd and response */
77 memset(&cmd, 0, sizeof(cmd));
78 memset(&resp, 0, sizeof(resp));
79
80 /* fill in the command struct */
81 ich = &cmd.ibv_cmd_hdr;
82 ich->command = IB_USER_VERBS_CMD_GET_CONTEXT;
83 ich->in_words = sizeof(cmd) / 4;
84 ich->out_words = sizeof(resp) / 4;
85
86 icp = &cmd.ibv_cmd;
87 icp->response = (uintptr_t) & resp;
88
89 ucp = &cmd.usnic_cmd;
90
91 /*
92 * Because usnic_verbs kernel module with USNIC_CTX_RESP_VERSION as 1
93 * silently returns success even it receives resp_version larger than 1,
94 * without filling in capbility information, here we still fill in
95 * command with resp_version as 1 in order to retrive cababiltiy information.
96 * Later when we decide to drop support for this version of kernel
97 * module, we should replace the next two lines of code with commented-out
98 * code below.
99 ucp->resp_version = USNIC_CTX_RESP_VERSION;
100 ucp->v2.encap_subcmd = 0;
101 ucp->v2.num_caps = USNIC_CAP_CNT;
102 */
103 ucp->resp_version = 1;
104 ucp->v1.num_caps = USNIC_CAP_CNT;
105
106 n = write(uctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
107 if (n != sizeof(cmd)) {
108 return -errno;
109 }
110
111 irp = &resp.ibv_resp;
112 uctx->event_fd = irp->async_fd;
113 uctx->num_comp_vectors = irp->num_comp_vectors;
114
115 urp = &resp.usnic_resp;
116
117 /*
118 * Replace the code below with the commented-out line if dropping
119 * support for kernel module with resp_version support as 1
120 if (urp->resp_version == USNIC_CTX_RESP_VERSION) {
121 */
122 if (urp->resp_version == 1) {
123 if (urp->num_caps > USNIC_CAP_CQ_SHARING &&
124 urp->cap_info[USNIC_CAP_CQ_SHARING] > 0) {
125 uctx->ucx_caps[USD_CAP_CQ_SHARING] = 1;
126 }
127 if (urp->num_caps > USNIC_CAP_MAP_PER_RES &&
128 urp->cap_info[USNIC_CAP_MAP_PER_RES] > 0) {
129 uctx->ucx_caps[USD_CAP_MAP_PER_RES] = 1;
130 }
131 if (urp->num_caps > USNIC_CAP_PIO &&
132 urp->cap_info[USNIC_CAP_PIO] > 0) {
133 uctx->ucx_caps[USD_CAP_PIO] = 1;
134 }
135 if (urp->num_caps > USNIC_CAP_CQ_INTR &&
136 urp->cap_info[USNIC_CAP_CQ_INTR] > 0) {
137 uctx->ucx_caps[USD_CAP_CQ_INTR] = 1;
138 }
139 if (urp->num_caps > USNIC_CAP_GRP_INTR &&
140 urp->cap_info[USNIC_CAP_GRP_INTR] > 0) {
141 uctx->ucx_caps[USD_CAP_GRP_INTR] = 1;
142 }
143 }
144
145 return 0;
146 }
147
148 int
usd_ib_cmd_devcmd(struct usd_device * dev,enum vnic_devcmd_cmd devcmd,u64 * a0,u64 * a1,int wait)149 usd_ib_cmd_devcmd(
150 struct usd_device *dev,
151 enum vnic_devcmd_cmd devcmd,
152 u64 *a0, u64 *a1, int wait)
153 {
154 struct usnic_get_context cmd;
155 struct usnic_get_context_resp resp;
156 struct ib_uverbs_cmd_hdr *ich;
157 struct ib_uverbs_get_context *icp;
158 struct usnic_ib_get_context_cmd *ucp;
159 struct usnic_ib_get_context_resp *urp;
160 struct usnic_udevcmd_cmd udevcmd;
161 struct usnic_udevcmd_resp udevcmd_resp;
162 int n;
163
164 if (dev->ud_ctx->ucmd_ib_dev_fd < 0)
165 return -ENOENT;
166
167 /* clear cmd and response */
168 memset(&cmd, 0, sizeof(cmd));
169 memset(&resp, 0, sizeof(resp));
170 memset(&udevcmd, 0, sizeof(udevcmd));
171 memset(&udevcmd_resp, 0, sizeof(udevcmd_resp));
172
173 /* fill in the command struct */
174 ich = &cmd.ibv_cmd_hdr;
175 ich->command = IB_USER_VERBS_CMD_GET_CONTEXT;
176 ich->in_words = sizeof(cmd) / 4;
177 ich->out_words = sizeof(resp) / 4;
178
179 icp = &cmd.ibv_cmd;
180 icp->response = (uintptr_t) & resp;
181
182 /* fill in usnic devcmd struct */
183 udevcmd.vnic_idx = dev->ud_vf_list->vf_id;
184 udevcmd.devcmd = devcmd;
185 udevcmd.wait = wait;
186 udevcmd.num_args = 2;
187 udevcmd.args[0] = *a0;
188 udevcmd.args[1] = *a1;
189
190 ucp = &cmd.usnic_cmd;
191 ucp->resp_version = USNIC_CTX_RESP_VERSION;
192 ucp->v2.encap_subcmd = 1;
193 ucp->v2.usnic_ucmd.ucmd = USNIC_USER_CMD_DEVCMD;
194 ucp->v2.usnic_ucmd.inbuf = (uintptr_t) &udevcmd;
195 ucp->v2.usnic_ucmd.inlen = (u32)sizeof(udevcmd);
196 ucp->v2.usnic_ucmd.outbuf = (uintptr_t) &udevcmd_resp;
197 ucp->v2.usnic_ucmd.outlen = (u32)sizeof(udevcmd_resp);
198
199 n = write(dev->ud_ctx->ucmd_ib_dev_fd, &cmd, sizeof(cmd));
200 urp = &resp.usnic_resp;
201 /*
202 * If returns success, it's an old kernel who does not understand
203 * version 2 command, then we need to close the command FD to
204 * release the created ucontext object
205 */
206 if (n == sizeof(cmd)) {
207 usd_err(
208 "The running usnic_verbs kernel module does not support "
209 "encapsulating devcmd through IB GET_CONTEXT command\n");
210 close(dev->ud_ctx->ucmd_ib_dev_fd);
211 dev->ud_ctx->ucmd_ib_dev_fd = -1;
212 return -ENOTSUP;
213 } else if (errno != ECHILD) {
214 return -errno;
215 } else if (urp->resp_version != USNIC_CTX_RESP_VERSION) {
216 /* Kernel needs to make sure it returns response with a format
217 * understandable by the library. */
218 usd_err(
219 "The returned resp version does not match with requested\n");
220 return -ENOTSUP;
221 }
222
223 *a0 = udevcmd_resp.args[0];
224 *a1 = udevcmd_resp.args[1];
225
226 return 0;
227 }
228
229 /*
230 * Issue IB DEALLOC_PD command to alloc a PD in kernel
231 */
232 static int
_usd_ib_cmd_dealloc_pd(struct usd_device * dev,uint32_t pd_handle)233 _usd_ib_cmd_dealloc_pd(
234 struct usd_device *dev,
235 uint32_t pd_handle)
236 {
237 struct usnic_dealloc_pd cmd;
238 struct ib_uverbs_cmd_hdr *ich;
239 struct ib_uverbs_dealloc_pd *icp;
240 int n;
241
242 memset(&cmd, 0, sizeof(cmd));
243
244 ich = &cmd.ibv_cmd_hdr;
245 ich->command = IB_USER_VERBS_CMD_DEALLOC_PD;
246 ich->in_words = sizeof(cmd) / 4;
247 ich->out_words = 0;
248
249 icp = &cmd.ibv_cmd;
250 icp->pd_handle = pd_handle;
251
252 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
253 if (n != sizeof(cmd)) {
254 return -errno;
255 }
256
257 return 0;
258 }
259
260 /*
261 * Issue IB ALLOC_PD command to alloc a PD in kernel
262 */
263 static int
_usd_ib_cmd_alloc_pd(struct usd_device * dev,uint32_t * handle_o,uint32_t * vfid,uint32_t * grp_vect_buf_len)264 _usd_ib_cmd_alloc_pd(
265 struct usd_device *dev,
266 uint32_t *handle_o,
267 uint32_t *vfid,
268 uint32_t *grp_vect_buf_len)
269 {
270 struct usnic_alloc_pd cmd;
271 struct usnic_alloc_pd_resp resp;
272 struct ib_uverbs_cmd_hdr *ich;
273 struct ib_uverbs_alloc_pd *icp;
274 struct usnic_ib_alloc_pd_cmd *ucp;
275 struct ib_uverbs_alloc_pd_resp *irp;
276 struct usnic_ib_alloc_pd_resp *urp;
277 int n;
278
279 memset(&cmd, 0, sizeof(cmd));
280 memset(&resp, 0, sizeof(resp));
281
282 /* fill in command */
283 ich = &cmd.ibv_cmd_hdr;
284 ich->command = IB_USER_VERBS_CMD_ALLOC_PD;
285 ich->in_words = sizeof(cmd) / 4;
286 ich->out_words = sizeof(resp) / 4;
287
288 icp = &cmd.ibv_cmd;
289 icp->response = (uintptr_t) & resp;
290
291 /*
292 * Only need to get group vector size and vf information
293 * if group interrupt is enabled
294 */
295 if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) {
296 ucp = &cmd.usnic_cmd;
297 ucp->resp_version = USNIC_IB_ALLOC_PD_VERSION;
298 }
299
300 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
301 if (n != sizeof(cmd)) {
302 return -errno;
303 }
304
305 /* process response */
306 irp = &resp.ibv_resp;
307 *handle_o = irp->pd_handle;
308 urp = &resp.usnic_resp;
309 if (urp->resp_version >= 1) {
310 *vfid = urp->cur.vfid;
311 *grp_vect_buf_len = urp->cur.grp_vect_buf_len;
312 }
313
314 return 0;
315 }
316
317 /*
318 * Create a protection domain
319 */
320 int
usd_ib_cmd_alloc_pd(struct usd_device * dev,uint32_t * handle_o)321 usd_ib_cmd_alloc_pd(
322 struct usd_device *dev,
323 uint32_t *handle_o)
324 {
325 uint32_t vfid = 0;
326 uint32_t grp_vect_buf_len = 0;
327 int err;
328
329 /* Issue IB alloc_pd command, get assigned VF id and group vector size */
330 err = _usd_ib_cmd_alloc_pd(dev, handle_o, &vfid, &grp_vect_buf_len);
331 if (err) {
332 return err;
333 }
334
335 /* MAP group vector address to userspace
336 * Kernel module then maps group vector user address to IOMMU and
337 * program VIC HW register
338 */
339 if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) {
340 void *va;
341 off64_t offset;
342
343 offset = USNIC_ENCODE_PGOFF(vfid, USNIC_MMAP_GRPVECT, 0);
344 va = mmap64(NULL, grp_vect_buf_len, PROT_READ + PROT_WRITE,
345 MAP_SHARED, dev->ud_ctx->ucx_ib_dev_fd, offset);
346
347 if (va == MAP_FAILED) {
348 usd_err("Failed to map group vector for vf %u, grp_vect_size %u, "
349 "error %d\n",
350 vfid, grp_vect_buf_len, errno);
351 _usd_ib_cmd_dealloc_pd(dev, *handle_o);
352 return -errno;
353 }
354
355 dev->grp_vect_map.va = va;
356 dev->grp_vect_map.len = grp_vect_buf_len;
357 dev->grp_vect_map.vfid = vfid;
358 }
359
360 return 0;
361 }
362
363 int
usd_ib_cmd_reg_mr(struct usd_device * dev,void * vaddr,size_t length,struct usd_mr * mr)364 usd_ib_cmd_reg_mr(
365 struct usd_device *dev,
366 void *vaddr,
367 size_t length,
368 struct usd_mr *mr)
369 {
370 struct usnic_reg_mr cmd;
371 struct usnic_reg_mr_resp resp;
372 struct ib_uverbs_cmd_hdr *ich;
373 struct ib_uverbs_reg_mr *icp;
374 struct ib_uverbs_reg_mr_resp *irp;
375 int n;
376
377 memset(&cmd, 0, sizeof(cmd));
378 memset(&resp, 0, sizeof(resp));
379
380 ich = &cmd.ibv_cmd_hdr;
381 ich->command = IB_USER_VERBS_CMD_REG_MR;
382 ich->in_words = sizeof(cmd) / 4;
383 ich->out_words = sizeof(resp) / 4;
384
385 icp = &cmd.ibv_cmd;
386 icp->response = (uintptr_t) & resp;
387 icp->start = (uintptr_t) vaddr;
388 icp->length = length;
389 icp->hca_va = (uintptr_t) vaddr;
390 icp->pd_handle = dev->ud_pd_handle;
391 icp->access_flags = IBV_ACCESS_LOCAL_WRITE;
392
393 /* Issue command to IB driver */
394 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
395 if (n != sizeof(cmd)) {
396 return errno;
397 }
398
399 /* process response */
400 irp = &resp.ibv_resp;
401 mr->umr_handle = irp->mr_handle;
402 mr->umr_lkey = irp->lkey;
403 mr->umr_rkey = irp->rkey;
404
405 return 0;
406 }
407
408 int
usd_ib_cmd_dereg_mr(struct usd_device * dev,struct usd_mr * mr)409 usd_ib_cmd_dereg_mr(
410 struct usd_device *dev,
411 struct usd_mr *mr)
412 {
413 struct usnic_dereg_mr cmd;
414 struct ib_uverbs_cmd_hdr *ich;
415 struct ib_uverbs_dereg_mr *icp;
416 int n;
417
418 memset(&cmd, 0, sizeof(cmd));
419
420 ich = &cmd.ibv_cmd_hdr;
421 ich->command = IB_USER_VERBS_CMD_DEREG_MR;
422 ich->in_words = sizeof(cmd) / 4;
423 ich->out_words = 0;
424
425 icp = &cmd.ibv_cmd;
426 icp->mr_handle = mr->umr_handle;
427
428 /* Issue command to IB driver */
429 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
430 if (n != sizeof(cmd)) {
431 return -errno;
432 }
433
434 return 0;
435 }
436
437 /*
438 * Make the verbs call to create a CQ
439 */
440 int
usd_ib_cmd_create_cq(struct usd_device * dev,struct usd_cq_impl * cq,void * ibv_cq,int comp_channel,int comp_vector)441 usd_ib_cmd_create_cq(
442 struct usd_device *dev,
443 struct usd_cq_impl *cq,
444 void *ibv_cq,
445 int comp_channel,
446 int comp_vector)
447 {
448 struct usnic_create_cq cmd;
449 struct usnic_create_cq_resp resp;
450 struct ib_uverbs_cmd_hdr *ich;
451 struct ib_uverbs_create_cq *icp;
452 struct ib_uverbs_create_cq_resp *irp;
453 cpu_set_t *affinity_mask = NULL;
454 int flags = 0;
455 int n;
456
457 memset(&cmd, 0, sizeof(cmd));
458 memset(&resp, 0, sizeof(resp));
459
460 ich = &cmd.ibv_cmd_hdr;
461 ich->command = IB_USER_VERBS_CMD_CREATE_CQ;
462 ich->in_words = sizeof(cmd) / 4;
463 ich->out_words = sizeof(resp) / 4;
464
465 icp = &cmd.ibv_cmd;
466 icp->response = (uintptr_t) & resp;
467
468 if (ibv_cq == NULL) {
469 icp->user_handle = (uintptr_t) cq;
470 } else {
471 icp->user_handle = (uintptr_t) ibv_cq; /* Pass real verbs cq pointer to kernel
472 * to make ibv_get_cq_event happy */
473 flags |= USNIC_CQ_COMP_SIGNAL_VERBS;
474 }
475 icp->cqe = cq->ucq_num_entries;
476 icp->comp_channel = comp_channel;
477 icp->comp_vector = comp_vector;
478
479 if (comp_channel != -1) {
480 if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] != 1) {
481 usd_err("usd_create_cq failed. No interrupt support\n");
482 return -ENOTSUP;
483 }
484 cmd.usnic_cmd.resp_version = USNIC_IB_CREATE_CQ_VERSION;
485 cmd.usnic_cmd.cur.flags = flags;
486 cmd.usnic_cmd.cur.comp_event_fd = comp_channel;
487 if ((affinity_mask = CPU_ALLOC(sysconf(_SC_NPROCESSORS_ONLN)))
488 != NULL &&
489 sched_getaffinity(getpid(),
490 CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN)),
491 affinity_mask) == 0) {
492 cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)affinity_mask;
493 cmd.usnic_cmd.cur.affinity_mask_len =
494 CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN));
495 } else {
496 cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)NULL;
497 cmd.usnic_cmd.cur.affinity_mask_len = 0;
498 }
499 } else {
500 /*
501 * If appliation does not request cq completion event support,
502 * send command with version 0 to allow compatibility with
503 * old kernel library
504 */
505 cmd.usnic_cmd.resp_version = 0;
506 }
507
508 /* Issue command to IB driver */
509 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
510 if (n != sizeof(cmd)) {
511 return -errno;
512 }
513
514 /* process response */
515 irp = &resp.ibv_resp;
516 cq->ucq_handle = irp->cq_handle;
517
518 if (affinity_mask != NULL)
519 CPU_FREE(affinity_mask);
520
521 return 0;
522 }
523
524 /*
525 * Make the verbs call to destroy a CQ
526 */
527 int
usd_ib_cmd_destroy_cq(struct usd_device * dev,struct usd_cq_impl * cq)528 usd_ib_cmd_destroy_cq(
529 struct usd_device *dev,
530 struct usd_cq_impl *cq)
531 {
532 struct usnic_destroy_cq cmd;
533 struct ib_uverbs_cmd_hdr *ich;
534 struct ib_uverbs_destroy_cq *icp;
535 int n;
536
537 memset(&cmd, 0, sizeof(cmd));
538
539 ich = &cmd.ibv_cmd_hdr;
540 ich->command = IB_USER_VERBS_CMD_DESTROY_CQ;
541 ich->in_words = sizeof(cmd) / 4;
542 ich->out_words = 0;
543
544 icp = &cmd.ibv_cmd;
545 icp->cq_handle = cq->ucq_handle;
546
547 /* Issue command to IB driver */
548 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
549 if (n != sizeof(cmd)) {
550 return -errno;
551 }
552
553 return 0;
554 }
555
556 /*
557 * Create a verbs QP without attaching any real resources to it yet
558 */
559 int
usd_ib_cmd_create_qp(struct usd_device * dev,struct usd_qp_impl * qp,struct usd_vf_info * vfip)560 usd_ib_cmd_create_qp(
561 struct usd_device *dev,
562 struct usd_qp_impl *qp,
563 struct usd_vf_info *vfip)
564 {
565 struct usnic_create_qp cmd;
566 struct usnic_create_qp_resp *resp;
567 struct ib_uverbs_cmd_hdr *ich;
568 struct ib_uverbs_create_qp *icp;
569 struct ib_uverbs_create_qp_resp *irp = NULL;
570 struct usnic_ib_create_qp_cmd *ucp;
571 struct usnic_ib_create_qp_resp *urp;
572 struct usd_qp_filter *qfilt;
573 int ret;
574 int n;
575 uint32_t i;
576 struct usnic_vnic_barres_info *resources;
577
578 ucp = NULL;
579 resources = NULL;
580 irp = NULL;
581 memset(&cmd, 0, sizeof(cmd));
582
583 resp = calloc(1, sizeof(*resp));
584 if (resp == NULL) {
585 usd_err("Failed to allocate memory for create_qp_resp\n");
586 return -ENOMEM;
587 }
588
589 ich = &cmd.ibv_cmd_hdr;
590 ich->command = IB_USER_VERBS_CMD_CREATE_QP;
591 ich->in_words = sizeof(cmd) / 4;
592 ich->out_words = sizeof(*resp) / 4;
593
594 icp = &cmd.ibv_cmd;
595 icp->response = (uintptr_t) resp;
596 icp->user_handle = (uintptr_t) qp;
597 icp->pd_handle = dev->ud_pd_handle;
598 icp->send_cq_handle = qp->uq_wq.uwq_cq->ucq_handle;
599 icp->recv_cq_handle = qp->uq_rq.urq_cq->ucq_handle;
600 icp->srq_handle = 0;
601 icp->max_send_wr = qp->uq_wq.uwq_num_entries;
602 icp->max_recv_wr = qp->uq_rq.urq_num_entries;
603 icp->max_send_sge = 1;
604 icp->max_recv_sge = 1;
605 icp->max_inline_data = 1024;
606 icp->sq_sig_all = 0;
607 icp->qp_type = IBV_QPT_UD;
608 icp->is_srq = 0;
609 icp->reserved = 0;
610
611 ucp = &cmd.usnic_cmd;
612
613 if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR]) {
614 ucp->cmd_version = 2;
615 } else {
616 /*
617 * Allow compatibility with old kernel module when
618 * application does not require cq completion notification
619 */
620 ucp->cmd_version = 1;
621 }
622
623 qfilt = &qp->uq_filter;
624 if (qfilt->qf_type == USD_FTY_UDP ||
625 qfilt->qf_type == USD_FTY_UDP_SOCK) {
626 /*
627 * Command versions 0,1,2 need to fill in the spec_v2 struct.
628 * Newer versions need to fill in the spec struct.
629 */
630 if (ucp->cmd_version <= 2) {
631 ucp->spec_v2.trans_type = USNIC_TRANSPORT_IPV4_UDP;
632 ucp->spec_v2.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd;
633 } else {
634 ucp->spec.trans_type = USNIC_TRANSPORT_IPV4_UDP;
635 ucp->spec.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd;
636 }
637 } else {
638 ret = -EINVAL;
639 goto out;
640 }
641
642 ucp->u.cur.resources_len = RES_TYPE_MAX * sizeof(*resources);
643 resources = calloc(RES_TYPE_MAX, sizeof(*resources));
644 if (resources == NULL) {
645 usd_err("unable to allocate resources array\n");
646 ret = -ENOMEM;
647 goto out;
648 }
649 ucp->u.cur.resources = (u64)(uintptr_t)resources;
650
651 /* Issue command to IB driver */
652 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
653 if (n != sizeof(cmd)) {
654 ret = -errno;
655 goto out;
656 }
657
658 /* process IB part of response */
659 irp = &resp->ibv_resp;
660 qp->uq_qp_handle = irp->qp_handle;
661 qp->uq_qp_num = irp->qpn;
662
663 /* process usnic part response */
664 urp = &resp->usnic_resp;
665
666 qp->uq_rq.urq_index = urp->rq_idx[0];
667 qp->uq_wq.uwq_index = urp->wq_idx[0];
668
669 qp->uq_rq.urq_cq->ucq_index = urp->cq_idx[0];
670 if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) {
671 qp->uq_wq.uwq_cq->ucq_index = urp->cq_idx[1];
672 }
673
674 /* Pull VF info */
675 vfip->vi_vfid = urp->vfid;
676 vfip->vi_bar_bus_addr = urp->bar_bus_addr;
677 vfip->vi_bar_len = urp->bar_len;
678
679 if (urp->cmd_version == ucp->cmd_version) {
680 /* got expected version */
681 if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] > 0) {
682 for (i = 0; i < MIN(RES_TYPE_MAX, urp->u.cur.num_barres); i++) {
683 enum vnic_res_type type = resources[i].type;
684 if (type < RES_TYPE_MAX) {
685 vfip->barres[type].type = type;
686 vfip->barres[type].bus_addr = resources[i].bus_addr;
687 vfip->barres[type].len = resources[i].len;
688 }
689 }
690 if (vfip->barres[RES_TYPE_WQ].bus_addr == 0) {
691 usd_err("Failed to retrieve WQ res info\n");
692 ret = -ENXIO;
693 goto out;
694 }
695 if (vfip->barres[RES_TYPE_RQ].bus_addr == 0) {
696 usd_err("Failed to retrieve RQ res info\n");
697 ret = -ENXIO;
698 goto out;
699 }
700 if (vfip->barres[RES_TYPE_CQ].bus_addr == 0) {
701 usd_err("Failed to retrieve CQ res info\n");
702 ret = -ENXIO;
703 goto out;
704 }
705 if (vfip->barres[RES_TYPE_INTR_CTRL].bus_addr == 0) {
706 usd_err("Failed to retrieve INTR res info\n");
707 ret = -ENXIO;
708 goto out;
709 }
710 if (vfip->barres[RES_TYPE_DEVCMD].bus_addr == 0) {
711 usd_err("Failed to retrieve DEVCMD res info\n");
712 ret = -ENXIO;
713 goto out;
714 }
715 }
716 } else if (urp->cmd_version == 0) {
717 /* special case, old kernel that won't tell us about individual barres
718 * info but should otherwise work fine */
719
720 if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] != 0) {
721 /* should not happen, only the presence of never-released kernel
722 * code should cause this case */
723 usd_err("USD_CAP_MAP_PER_RES claimed but qp_create cmd_version == 0\n");
724 ret = -ENXIO;
725 goto out;
726 }
727 } else {
728 usd_err("unexpected cmd_version (%u)\n", urp->cmd_version);
729 ret = -ENXIO;
730 goto out;
731 }
732
733 /* version 2 and beyond has interrupt support */
734 if (urp->cmd_version > 1) {
735 qp->uq_rq.urq_cq->intr_offset = urp->u.cur.rcq_intr_offset;
736 if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) {
737 qp->uq_wq.uwq_cq->intr_offset = urp->u.cur.wcq_intr_offset;
738 }
739 vfip->vi_barhead_len = urp->u.cur.barhead_len;
740 }
741
742 free(resources);
743 free(resp);
744 return 0;
745
746 out:
747 if (irp != NULL) /* indicates successful IB create QP */
748 usd_ib_cmd_destroy_qp(dev, qp);
749 free(resources);
750 free(resp);
751 return ret;
752 }
753
754 int
usd_ib_cmd_modify_qp(struct usd_device * dev,struct usd_qp_impl * qp,int state)755 usd_ib_cmd_modify_qp(
756 struct usd_device *dev,
757 struct usd_qp_impl *qp,
758 int state)
759 {
760 struct usnic_modify_qp cmd;
761 struct ib_uverbs_cmd_hdr *ich;
762 struct ib_uverbs_modify_qp *icp;
763 int n;
764
765 memset(&cmd, 0, sizeof(cmd));
766
767 ich = &cmd.ibv_cmd_hdr;
768 ich->command = IB_USER_VERBS_CMD_MODIFY_QP;
769 ich->in_words = sizeof(cmd) / 4;
770 ich->out_words = 0;
771
772 icp = &cmd.ibv_cmd;
773 icp->qp_handle = qp->uq_qp_handle;
774 icp->attr_mask = IBV_QP_STATE;
775 icp->qp_state = state;
776
777 /* Issue command to IB driver */
778 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
779 if (n != sizeof(cmd)) {
780 return -errno;
781 }
782
783 return 0;
784 }
785
786 int
usd_ib_cmd_destroy_qp(struct usd_device * dev,struct usd_qp_impl * qp)787 usd_ib_cmd_destroy_qp(
788 struct usd_device *dev,
789 struct usd_qp_impl *qp)
790 {
791 struct usnic_destroy_qp cmd;
792 struct ib_uverbs_destroy_qp_resp resp;
793 struct ib_uverbs_cmd_hdr *ich;
794 struct ib_uverbs_destroy_qp *icp;
795 int n;
796
797 memset(&cmd, 0, sizeof(cmd));
798
799 ich = &cmd.ibv_cmd_hdr;
800 ich->command = IB_USER_VERBS_CMD_DESTROY_QP;
801 ich->in_words = sizeof(cmd) / 4;
802 ich->out_words = sizeof(resp) / 4;
803
804 icp = &cmd.ibv_cmd;
805 icp->response = (uintptr_t) & resp;
806 icp->qp_handle = qp->uq_qp_handle;
807
808 /* Issue command to IB driver */
809 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
810 if (n != sizeof(cmd)) {
811 return -errno;
812 }
813
814 return 0;
815 }
816
817 static int
usd_ib_cmd_query_device(struct usd_device * dev,struct ib_uverbs_query_device_resp * irp)818 usd_ib_cmd_query_device(
819 struct usd_device *dev,
820 struct ib_uverbs_query_device_resp *irp)
821 {
822 struct usnic_query_device cmd;
823 struct ib_uverbs_cmd_hdr *ich;
824 struct ib_uverbs_query_device *icp;
825 int n;
826
827 memset(&cmd, 0, sizeof(cmd));
828
829 ich = &cmd.ibv_cmd_hdr;
830 ich->command = IB_USER_VERBS_CMD_QUERY_DEVICE;
831 ich->in_words = sizeof(cmd) / 4;
832 ich->out_words = sizeof(*irp) / 4;
833
834 icp = &cmd.ibv_cmd;
835 icp->response = (uintptr_t) irp;
836
837 /* keep Valgrind happy */
838 memset(irp, 0x00, sizeof(*irp));
839
840 /* Issue command to IB driver */
841 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
842 if (n != sizeof(cmd)) {
843 return -errno;
844 }
845
846 return 0;
847 }
848
849 static int
usd_ib_cmd_query_port(struct usd_device * dev,struct ib_uverbs_query_port_resp * irp)850 usd_ib_cmd_query_port(
851 struct usd_device *dev,
852 struct ib_uverbs_query_port_resp *irp)
853 {
854 struct usnic_query_port cmd;
855 struct ib_uverbs_cmd_hdr *ich;
856 struct ib_uverbs_query_port *icp;
857 int n;
858
859 memset(&cmd, 0, sizeof(cmd));
860
861 ich = &cmd.ibv_cmd_hdr;
862 ich->command = IB_USER_VERBS_CMD_QUERY_PORT;
863 ich->in_words = sizeof(cmd) / 4;
864 ich->out_words = sizeof(*irp) / 4;
865
866 icp = &cmd.ibv_cmd;
867 icp->response = (uintptr_t) irp;
868 icp->port_num = 1;
869
870 /* keep Valgrind happy */
871 memset(irp, 0x00, sizeof(*irp));
872
873 /* Issue command to IB driver */
874 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
875 if (n != sizeof(cmd)) {
876 return -errno;
877 }
878
879 return 0;
880 }
881
882 /*
883 * For code readability, copy these two enums from kernel
884 * /usr/include/rdma/ib_verbs.h (otherwise, we'd would have to
885 * hard-code the integer values below).
886 */
887 enum ib_port_width {
888 IB_WIDTH_1X = 1,
889 IB_WIDTH_4X = 2,
890 IB_WIDTH_8X = 4,
891 IB_WIDTH_12X = 8
892 };
893
894 enum ib_port_speed {
895 IB_SPEED_SDR = 1, // 2.5 Gbps
896 IB_SPEED_DDR = 2, // 5 Gbps
897 IB_SPEED_QDR = 4, // 10 Gbps
898 IB_SPEED_FDR10 = 8, // 10.3125 Gbps
899 IB_SPEED_FDR = 16, // 14.0625 Gbps
900 IB_SPEED_EDR = 32, // 25.78125 Gbps
901 IB_SPEED_HDR = 64 // 50 Gbps
902 };
903
904
905 /*
906 * Issue query commands for device and port and interpret the resaults
907 */
908 int
usd_ib_query_dev(struct usd_device * dev)909 usd_ib_query_dev(
910 struct usd_device *dev)
911 {
912 struct ib_uverbs_query_device_resp dresp;
913 struct ib_uverbs_query_port_resp presp;
914 struct usd_device_attrs *dap;
915 unsigned speed;
916 int ret;
917
918 ret = usd_ib_cmd_query_device(dev, &dresp);
919 if (ret != 0)
920 return ret;
921
922 ret = usd_ib_cmd_query_port(dev, &presp);
923 if (ret != 0)
924 return ret;
925
926 /* copy out the attributes we care about */
927 dap = &dev->ud_attrs;
928
929 dap->uda_link_state =
930 (presp.state == 4) ? USD_LINK_UP : USD_LINK_DOWN;
931
932 /*
933 * If link is up, derive bandwidth from speed and width.
934 * If link is down, driver reports bad speed, try to deduce from the
935 * NIC device ID.
936 */
937 if (dap->uda_link_state == USD_LINK_UP) {
938 #define MKSW(S,W) (((S)<<8)|(W))
939 speed = MKSW(presp.active_speed, presp.active_width);
940 switch (speed) {
941 case MKSW(IB_SPEED_FDR10, IB_WIDTH_1X):
942 case MKSW(IB_SPEED_DDR, IB_WIDTH_4X):
943 dap->uda_bandwidth = 10000;
944 break;
945 case MKSW(IB_SPEED_QDR, IB_WIDTH_4X):
946 dap->uda_bandwidth = 25000;
947 break;
948 case MKSW(IB_SPEED_FDR10, IB_WIDTH_4X):
949 dap->uda_bandwidth = 40000;
950 break;
951 case MKSW(IB_SPEED_HDR, IB_WIDTH_1X):
952 dap->uda_bandwidth = 50000;
953 break;
954 case MKSW(IB_SPEED_EDR, IB_WIDTH_4X):
955 dap->uda_bandwidth = 100000;
956 break;
957 case MKSW(IB_SPEED_HDR, IB_WIDTH_4X):
958 dap->uda_bandwidth = 200000;
959 break;
960 case MKSW(IB_SPEED_HDR, IB_WIDTH_8X):
961 dap->uda_bandwidth = 400000;
962 break;
963 default:
964 printf("Warning: unrecognized speed/width %d/%d, defaulting to 10G\n",
965 presp.active_speed, presp.active_width);
966 dap->uda_bandwidth = 10000;
967 break;
968 }
969 } else {
970 /* from pci_ids.h */
971 switch (dap->uda_device_id) {
972 case 0x4f: /* Vasona */
973 case 0x84: /* Cotati */
974 case 0x85: /* Lexington */
975 case 0x12c: /* Calistoga */
976 case 0x137: /* Mountain View */
977 case 0x138: /* Walnut Creek */
978 dap->uda_bandwidth = 10000;
979 break;
980 case 0xcd: /* icehouse */
981 case 0x14d: /* clearlake */
982 dap->uda_bandwidth = 40000;
983 break;
984 default:
985 dap->uda_bandwidth = 0;
986 }
987 }
988
989 dap->uda_vendor_id = dresp.vendor_id;
990 dap->uda_vendor_part_id = dresp.vendor_part_id;
991 dap->uda_device_id = dresp.hw_ver;
992
993 dap->uda_max_qp = dresp.max_qp;
994 dap->uda_max_cq = dresp.max_cq;
995
996 return 0;
997 }
998
999
1000 int
usd_ib_cmd_create_comp_channel(struct usd_device * dev,int * comp_fd_o)1001 usd_ib_cmd_create_comp_channel(
1002 struct usd_device *dev,
1003 int *comp_fd_o)
1004 {
1005 int n;
1006 struct usnic_create_comp_channel cmd;
1007 struct ib_uverbs_create_comp_channel_resp resp;
1008 struct ib_uverbs_cmd_hdr *ich;
1009 struct ib_uverbs_create_comp_channel *icp;
1010 struct ib_uverbs_create_comp_channel_resp *irp;
1011
1012 memset(&cmd, 0, sizeof(cmd));
1013
1014 ich = &cmd.ibv_cmd_hdr;
1015 ich->command = IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL;
1016 ich->in_words = sizeof(cmd) / 4;
1017 ich->out_words = sizeof(resp) / 4;
1018
1019 icp = &cmd.ibv_cmd;
1020 icp->response = (uintptr_t) & resp;
1021
1022 /* Issue command to IB driver */
1023 n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd));
1024 if (n != sizeof(cmd)) {
1025 return -errno;
1026 }
1027
1028 irp = &resp;
1029 *comp_fd_o = irp->fd;
1030
1031 return 0;
1032 }
1033