xref: /freebsd/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c (revision 19261079)
1 /*-
2  * Copyright (c) 2016-2020, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "mlx5_ib.h"
29 
30 struct mlx5_ib_gsi_wr {
31 	struct ib_cqe cqe;
32 	struct ib_wc wc;
33 	int send_flags;
34 	bool completed:1;
35 };
36 
37 struct mlx5_ib_gsi_qp {
38 	struct ib_qp ibqp;
39 	struct ib_qp *rx_qp;
40 	u8 port_num;
41 	struct ib_qp_cap cap;
42 	enum ib_sig_type sq_sig_type;
43 	/* Serialize qp state modifications */
44 	struct mutex mutex;
45 	struct ib_cq *cq;
46 	struct mlx5_ib_gsi_wr *outstanding_wrs;
47 	u32 outstanding_pi, outstanding_ci;
48 	int num_qps;
49 	/* Protects access to the tx_qps. Post send operations synchronize
50 	 * with tx_qp creation in setup_qp(). Also protects the
51 	 * outstanding_wrs array and indices.
52 	 */
53 	spinlock_t lock;
54 	struct ib_qp **tx_qps;
55 };
56 
57 static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
58 {
59 	return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
60 }
61 
62 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
63 {
64 	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
65 }
66 
67 /* Call with gsi->lock locked */
68 static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
69 {
70 	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
71 	struct mlx5_ib_gsi_wr *wr;
72 	u32 index;
73 
74 	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
75 	     index++) {
76 		wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
77 
78 		if (!wr->completed)
79 			break;
80 
81 		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
82 		    wr->send_flags & IB_SEND_SIGNALED)
83 			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
84 
85 		wr->completed = false;
86 	}
87 
88 	gsi->outstanding_ci = index;
89 }
90 
91 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
92 {
93 	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
94 	struct mlx5_ib_gsi_wr *wr =
95 		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
96 	u64 wr_id;
97 	unsigned long flags;
98 
99 	spin_lock_irqsave(&gsi->lock, flags);
100 	wr->completed = true;
101 	wr_id = wr->wc.wr_id;
102 	wr->wc = *wc;
103 	wr->wc.wr_id = wr_id;
104 	wr->wc.qp = &gsi->ibqp;
105 
106 	generate_completions(gsi);
107 	spin_unlock_irqrestore(&gsi->lock, flags);
108 }
109 
110 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
111 				    struct ib_qp_init_attr *init_attr)
112 {
113 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
114 	struct mlx5_ib_gsi_qp *gsi;
115 	struct ib_qp_init_attr hw_init_attr = *init_attr;
116 	const u8 port_num = init_attr->port_num;
117 	const int num_pkeys = pd->device->attrs.max_pkeys;
118 	const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
119 	int ret;
120 
121 	mlx5_ib_dbg(dev, "creating GSI QP\n");
122 
123 	if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
124 		mlx5_ib_warn(dev,
125 			     "invalid port number %d during GSI QP creation\n",
126 			     port_num);
127 		return ERR_PTR(-EINVAL);
128 	}
129 
130 	gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
131 	if (!gsi)
132 		return ERR_PTR(-ENOMEM);
133 
134 	gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
135 	if (!gsi->tx_qps) {
136 		ret = -ENOMEM;
137 		goto err_free;
138 	}
139 
140 	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
141 				       sizeof(*gsi->outstanding_wrs),
142 				       GFP_KERNEL);
143 	if (!gsi->outstanding_wrs) {
144 		ret = -ENOMEM;
145 		goto err_free_tx;
146 	}
147 
148 	mutex_init(&gsi->mutex);
149 
150 	mutex_lock(&dev->devr.mutex);
151 
152 	if (dev->devr.ports[port_num - 1].gsi) {
153 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
154 			     port_num);
155 		ret = -EBUSY;
156 		goto err_free_wrs;
157 	}
158 	gsi->num_qps = num_qps;
159 	spin_lock_init(&gsi->lock);
160 
161 	gsi->cap = init_attr->cap;
162 	gsi->sq_sig_type = init_attr->sq_sig_type;
163 	gsi->ibqp.qp_num = 1;
164 	gsi->port_num = port_num;
165 
166 	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
167 			      IB_POLL_SOFTIRQ);
168 	if (IS_ERR(gsi->cq)) {
169 		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
170 			     PTR_ERR(gsi->cq));
171 		ret = PTR_ERR(gsi->cq);
172 		goto err_free_wrs;
173 	}
174 
175 	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
176 	hw_init_attr.send_cq = gsi->cq;
177 	if (num_qps) {
178 		hw_init_attr.cap.max_send_wr = 0;
179 		hw_init_attr.cap.max_send_sge = 0;
180 		hw_init_attr.cap.max_inline_data = 0;
181 	}
182 	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
183 	if (IS_ERR(gsi->rx_qp)) {
184 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
185 			     PTR_ERR(gsi->rx_qp));
186 		ret = PTR_ERR(gsi->rx_qp);
187 		goto err_destroy_cq;
188 	}
189 
190 	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
191 
192 	mutex_unlock(&dev->devr.mutex);
193 
194 	return &gsi->ibqp;
195 
196 err_destroy_cq:
197 	ib_free_cq(gsi->cq);
198 err_free_wrs:
199 	mutex_unlock(&dev->devr.mutex);
200 	kfree(gsi->outstanding_wrs);
201 err_free_tx:
202 	kfree(gsi->tx_qps);
203 err_free:
204 	kfree(gsi);
205 	return ERR_PTR(ret);
206 }
207 
208 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
209 {
210 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
211 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
212 	const int port_num = gsi->port_num;
213 	int qp_index;
214 	int ret;
215 
216 	mlx5_ib_dbg(dev, "destroying GSI QP\n");
217 
218 	mutex_lock(&dev->devr.mutex);
219 	ret = ib_destroy_qp(gsi->rx_qp);
220 	if (ret) {
221 		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
222 			     ret);
223 		mutex_unlock(&dev->devr.mutex);
224 		return ret;
225 	}
226 	dev->devr.ports[port_num - 1].gsi = NULL;
227 	mutex_unlock(&dev->devr.mutex);
228 	gsi->rx_qp = NULL;
229 
230 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
231 		if (!gsi->tx_qps[qp_index])
232 			continue;
233 		WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
234 		gsi->tx_qps[qp_index] = NULL;
235 	}
236 
237 	ib_free_cq(gsi->cq);
238 
239 	kfree(gsi->outstanding_wrs);
240 	kfree(gsi->tx_qps);
241 	kfree(gsi);
242 
243 	return 0;
244 }
245 
246 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
247 {
248 	struct ib_pd *pd = gsi->rx_qp->pd;
249 	struct ib_qp_init_attr init_attr = {
250 		.event_handler = gsi->rx_qp->event_handler,
251 		.qp_context = gsi->rx_qp->qp_context,
252 		.send_cq = gsi->cq,
253 		.recv_cq = gsi->rx_qp->recv_cq,
254 		.cap = {
255 			.max_send_wr = gsi->cap.max_send_wr,
256 			.max_send_sge = gsi->cap.max_send_sge,
257 			.max_inline_data = gsi->cap.max_inline_data,
258 		},
259 		.sq_sig_type = gsi->sq_sig_type,
260 		.qp_type = IB_QPT_UD,
261 		.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
262 	};
263 
264 	return ib_create_qp(pd, &init_attr);
265 }
266 
267 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
268 			 u16 qp_index)
269 {
270 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
271 	struct ib_qp_attr attr;
272 	int mask;
273 	int ret;
274 
275 	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
276 	attr.qp_state = IB_QPS_INIT;
277 	attr.pkey_index = qp_index;
278 	attr.qkey = IB_QP1_QKEY;
279 	attr.port_num = gsi->port_num;
280 	ret = ib_modify_qp(qp, &attr, mask);
281 	if (ret) {
282 		mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
283 			    qp->qp_num, ret);
284 		return ret;
285 	}
286 
287 	attr.qp_state = IB_QPS_RTR;
288 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
289 	if (ret) {
290 		mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
291 			    qp->qp_num, ret);
292 		return ret;
293 	}
294 
295 	attr.qp_state = IB_QPS_RTS;
296 	attr.sq_psn = 0;
297 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
298 	if (ret) {
299 		mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
300 			    qp->qp_num, ret);
301 		return ret;
302 	}
303 
304 	return 0;
305 }
306 
307 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
308 {
309 	struct ib_device *device = gsi->rx_qp->device;
310 	struct mlx5_ib_dev *dev = to_mdev(device);
311 	struct ib_qp *qp;
312 	unsigned long flags;
313 	u16 pkey;
314 	int ret;
315 
316 	ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
317 	if (ret) {
318 		mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
319 			     gsi->port_num, qp_index);
320 		return;
321 	}
322 
323 	if (!pkey) {
324 		mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d.  Skipping.\n",
325 			    gsi->port_num, qp_index);
326 		return;
327 	}
328 
329 	spin_lock_irqsave(&gsi->lock, flags);
330 	qp = gsi->tx_qps[qp_index];
331 	spin_unlock_irqrestore(&gsi->lock, flags);
332 	if (qp) {
333 		mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
334 			    gsi->port_num, qp_index);
335 		return;
336 	}
337 
338 	qp = create_gsi_ud_qp(gsi);
339 	if (IS_ERR(qp)) {
340 		mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
341 			     PTR_ERR(qp));
342 		return;
343 	}
344 
345 	ret = modify_to_rts(gsi, qp, qp_index);
346 	if (ret)
347 		goto err_destroy_qp;
348 
349 	spin_lock_irqsave(&gsi->lock, flags);
350 	WARN_ON_ONCE(gsi->tx_qps[qp_index]);
351 	gsi->tx_qps[qp_index] = qp;
352 	spin_unlock_irqrestore(&gsi->lock, flags);
353 
354 	return;
355 
356 err_destroy_qp:
357 	WARN_ON_ONCE(qp);
358 }
359 
360 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
361 {
362 	u16 qp_index;
363 
364 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
365 		setup_qp(gsi, qp_index);
366 }
367 
368 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
369 			  int attr_mask)
370 {
371 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
372 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
373 	int ret;
374 
375 	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
376 
377 	mutex_lock(&gsi->mutex);
378 	ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
379 	if (ret) {
380 		mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
381 		goto unlock;
382 	}
383 
384 	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
385 		setup_qps(gsi);
386 
387 unlock:
388 	mutex_unlock(&gsi->mutex);
389 
390 	return ret;
391 }
392 
393 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
394 			 int qp_attr_mask,
395 			 struct ib_qp_init_attr *qp_init_attr)
396 {
397 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
398 	int ret;
399 
400 	mutex_lock(&gsi->mutex);
401 	ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
402 	qp_init_attr->cap = gsi->cap;
403 	mutex_unlock(&gsi->mutex);
404 
405 	return ret;
406 }
407 
408 /* Call with gsi->lock locked */
409 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
410 				      struct ib_ud_wr *wr, struct ib_wc *wc)
411 {
412 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
413 	struct mlx5_ib_gsi_wr *gsi_wr;
414 
415 	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
416 		mlx5_ib_warn(dev, "no available GSI work request.\n");
417 		return -ENOMEM;
418 	}
419 
420 	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
421 				       gsi->cap.max_send_wr];
422 	gsi->outstanding_pi++;
423 
424 	if (!wc) {
425 		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
426 		gsi_wr->wc.pkey_index = wr->pkey_index;
427 		gsi_wr->wc.wr_id = wr->wr.wr_id;
428 	} else {
429 		gsi_wr->wc = *wc;
430 		gsi_wr->completed = true;
431 	}
432 
433 	gsi_wr->cqe.done = &handle_single_completion;
434 	wr->wr.wr_cqe = &gsi_wr->cqe;
435 
436 	return 0;
437 }
438 
439 /* Call with gsi->lock locked */
440 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
441 				    struct ib_ud_wr *wr)
442 {
443 	struct ib_wc wc = {
444 		{ .wr_id = wr->wr.wr_id },
445 		.status = IB_WC_SUCCESS,
446 		.opcode = IB_WC_SEND,
447 		.qp = &gsi->ibqp,
448 	};
449 	int ret;
450 
451 	ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
452 	if (ret)
453 		return ret;
454 
455 	generate_completions(gsi);
456 
457 	return 0;
458 }
459 
460 /* Call with gsi->lock locked */
461 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
462 {
463 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
464 	int qp_index = wr->pkey_index;
465 
466 	if (!mlx5_ib_deth_sqpn_cap(dev))
467 		return gsi->rx_qp;
468 
469 	if (qp_index >= gsi->num_qps)
470 		return NULL;
471 
472 	return gsi->tx_qps[qp_index];
473 }
474 
475 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
476 			  const struct ib_send_wr **bad_wr)
477 {
478 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
479 	struct ib_qp *tx_qp;
480 	unsigned long flags;
481 	int ret;
482 
483 	for (; wr; wr = wr->next) {
484 		struct ib_ud_wr cur_wr = *ud_wr(wr);
485 
486 		cur_wr.wr.next = NULL;
487 
488 		spin_lock_irqsave(&gsi->lock, flags);
489 		tx_qp = get_tx_qp(gsi, &cur_wr);
490 		if (!tx_qp) {
491 			ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
492 			if (ret)
493 				goto err;
494 			spin_unlock_irqrestore(&gsi->lock, flags);
495 			continue;
496 		}
497 
498 		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
499 		if (ret)
500 			goto err;
501 
502 		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
503 		if (ret) {
504 			/* Undo the effect of adding the outstanding wr */
505 			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
506 					      gsi->cap.max_send_wr;
507 			goto err;
508 		}
509 		spin_unlock_irqrestore(&gsi->lock, flags);
510 	}
511 
512 	return 0;
513 
514 err:
515 	spin_unlock_irqrestore(&gsi->lock, flags);
516 	*bad_wr = wr;
517 	return ret;
518 }
519 
520 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
521 			  const struct ib_recv_wr **bad_wr)
522 {
523 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
524 
525 	return ib_post_recv(gsi->rx_qp, wr, bad_wr);
526 }
527 
528 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
529 {
530 	if (!gsi)
531 		return;
532 
533 	mutex_lock(&gsi->mutex);
534 	setup_qps(gsi);
535 	mutex_unlock(&gsi->mutex);
536 }
537