xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_iq.c (revision d0b2dbfa)
1 /*-
2  * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 /*
27  * The internal queue, IQ, code is more or less a stripped down copy
28  * of the existing SQ managing code with exception of:
29  *
30  * - an optional single segment memory buffer which can be read or
31  *   written as a whole by the hardware, may be provided.
32  *
33  * - an optional completion callback for all transmit operations, may
34  *   be provided.
35  *
36  * - does not support mbufs.
37  */
38 
39 #include <dev/mlx5/mlx5_en/en.h>
40 
41 static void
42 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
43 {
44 	const struct mlx5_cqe64 *cqe;
45 	u16 ci;
46 	u16 iqcc;
47 
48 	/*
49 	 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
50 	 * otherwise a cq overrun may occur
51 	 */
52 	iqcc = iq->cc;
53 
54 	while (budget-- > 0) {
55 
56 		cqe = mlx5e_get_cqe(&iq->cq);
57 		if (!cqe)
58 			break;
59 
60 		mlx5_cqwq_pop(&iq->cq.wq);
61 
62 		ci = iqcc & iq->wq.sz_m1;
63 
64 		if (likely(iq->data[ci].dma_sync != 0)) {
65 			/* make sure data written by hardware is visible to CPU */
66 			bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
67 			bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
68 
69 			iq->data[ci].dma_sync = 0;
70 		}
71 
72 		if (likely(iq->data[ci].callback != NULL)) {
73 			iq->data[ci].callback(iq->data[ci].arg);
74 			iq->data[ci].callback = NULL;
75 		}
76 
77 		if (unlikely(iq->data[ci].p_refcount != NULL)) {
78 			atomic_add_int(iq->data[ci].p_refcount, -1);
79 			iq->data[ci].p_refcount = NULL;
80 		}
81 		iqcc += iq->data[ci].num_wqebbs;
82 	}
83 
84 	mlx5_cqwq_update_db_record(&iq->cq.wq);
85 
86 	/* Ensure cq space is freed before enabling more cqes */
87 	atomic_thread_fence_rel();
88 
89 	iq->cc = iqcc;
90 }
91 
92 static void
93 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
94 {
95 	struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
96 
97 	mtx_lock(&iq->comp_lock);
98 	mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
99 	mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
100 	mtx_unlock(&iq->comp_lock);
101 }
102 
103 void
104 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
105 {
106 	u16 pi = iq->pc & iq->wq.sz_m1;
107 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
108 
109 	mtx_assert(&iq->lock, MA_OWNED);
110 
111 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
112 
113 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
114 	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
115 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
116 
117 	/* Copy data for doorbell */
118 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
119 
120 	iq->data[pi].callback = NULL;
121 	iq->data[pi].arg = NULL;
122 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
123 	iq->data[pi].dma_sync = 0;
124 	iq->pc += iq->data[pi].num_wqebbs;
125 }
126 
127 static void
128 mlx5e_iq_free_db(struct mlx5e_iq *iq)
129 {
130 	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
131 	int x;
132 
133 	for (x = 0; x != wq_sz; x++) {
134 		if (likely(iq->data[x].dma_sync != 0)) {
135 			bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
136 			iq->data[x].dma_sync = 0;
137 		}
138 		if (likely(iq->data[x].callback != NULL)) {
139 			iq->data[x].callback(iq->data[x].arg);
140 			iq->data[x].callback = NULL;
141 		}
142 		if (unlikely(iq->data[x].p_refcount != NULL)) {
143 			atomic_add_int(iq->data[x].p_refcount, -1);
144 			iq->data[x].p_refcount = NULL;
145 		}
146 		bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
147 	}
148 	free(iq->data, M_MLX5EN);
149 }
150 
151 static int
152 mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
153 {
154 	int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
155 	int err;
156 	int x;
157 
158 	iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
159 	    mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
160 
161 	/* Create DMA descriptor maps */
162 	for (x = 0; x != wq_sz; x++) {
163 		err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
164 		if (err != 0) {
165 			while (x--)
166 				bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
167 			free(iq->data, M_MLX5EN);
168 			return (err);
169 		}
170 	}
171 	return (0);
172 }
173 
174 static int
175 mlx5e_iq_create(struct mlx5e_channel *c,
176     struct mlx5e_sq_param *param,
177     struct mlx5e_iq *iq)
178 {
179 	struct mlx5e_priv *priv = c->priv;
180 	struct mlx5_core_dev *mdev = priv->mdev;
181 	void *sqc = param->sqc;
182 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
183 	int err;
184 
185 	/* Create DMA descriptor TAG */
186 	if ((err = -bus_dma_tag_create(
187 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
188 	    1,				/* any alignment */
189 	    0,				/* no boundary */
190 	    BUS_SPACE_MAXADDR,		/* lowaddr */
191 	    BUS_SPACE_MAXADDR,		/* highaddr */
192 	    NULL, NULL,			/* filter, filterarg */
193 	    PAGE_SIZE,			/* maxsize */
194 	    1,				/* nsegments */
195 	    PAGE_SIZE,			/* maxsegsize */
196 	    0,				/* flags */
197 	    NULL, NULL,			/* lockfunc, lockfuncarg */
198 	    &iq->dma_tag)))
199 		goto done;
200 
201 	iq->mkey_be = cpu_to_be32(priv->mr.key);
202 	iq->priv = priv;
203 
204 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq,
205 	    &iq->wq, &iq->wq_ctrl);
206 	if (err)
207 		goto err_free_dma_tag;
208 
209 	iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
210 
211 	err = mlx5e_iq_alloc_db(iq);
212 	if (err)
213 		goto err_iq_wq_destroy;
214 
215 	return (0);
216 
217 err_iq_wq_destroy:
218 	mlx5_wq_destroy(&iq->wq_ctrl);
219 
220 err_free_dma_tag:
221 	bus_dma_tag_destroy(iq->dma_tag);
222 done:
223 	return (err);
224 }
225 
226 static void
227 mlx5e_iq_destroy(struct mlx5e_iq *iq)
228 {
229 	mlx5e_iq_free_db(iq);
230 	mlx5_wq_destroy(&iq->wq_ctrl);
231 	bus_dma_tag_destroy(iq->dma_tag);
232 }
233 
234 static int
235 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
236     const struct mlx5_sq_bfreg *bfreg, int tis_num)
237 {
238 	void *in;
239 	void *sqc;
240 	void *wq;
241 	int inlen;
242 	int err;
243 	u8 ts_format;
244 
245 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
246 	    sizeof(u64) * iq->wq_ctrl.buf.npages;
247 	in = mlx5_vzalloc(inlen);
248 	if (in == NULL)
249 		return (-ENOMEM);
250 
251 	iq->uar_map = bfreg->map;
252 
253 	ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
254 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
255 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
256 
257 	memcpy(sqc, param->sqc, sizeof(param->sqc));
258 
259 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
260 	MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
261 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
262 	MLX5_SET(sqc, sqc, ts_format, ts_format);
263 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
264 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
265 	MLX5_SET(sqc, sqc, allow_swp, 1);
266 
267 	/* SQ remap support requires reg_umr privileges level */
268 	if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
269 		MLX5_SET(sqc, sqc, qos_remap_en, 1);
270 		if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
271 			MLX5_SET(sqc, sqc, reg_umr, 1);
272 		 else
273 			mlx5_en_err(iq->priv->ifp,
274 			    "No reg umr SQ capability, SQ remap disabled\n");
275 	}
276 
277 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
278 	MLX5_SET(wq, wq, uar_page, bfreg->index);
279 	MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
280 	    MLX5_ADAPTER_PAGE_SHIFT);
281 	MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
282 
283 	mlx5_fill_page_array(&iq->wq_ctrl.buf,
284 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
285 
286 	err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
287 
288 	kvfree(in);
289 
290 	return (err);
291 }
292 
293 static int
294 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
295 {
296 	void *in;
297 	void *sqc;
298 	int inlen;
299 	int err;
300 
301 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
302 	in = mlx5_vzalloc(inlen);
303 	if (in == NULL)
304 		return (-ENOMEM);
305 
306 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
307 
308 	MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
309 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
310 	MLX5_SET(sqc, sqc, state, next_state);
311 
312 	err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
313 
314 	kvfree(in);
315 
316 	return (err);
317 }
318 
319 static void
320 mlx5e_iq_disable(struct mlx5e_iq *iq)
321 {
322 	mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
323 }
324 
325 int
326 mlx5e_iq_open(struct mlx5e_channel *c,
327     struct mlx5e_sq_param *sq_param,
328     struct mlx5e_cq_param *cq_param,
329     struct mlx5e_iq *iq)
330 {
331 	int err;
332 
333 	err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
334 	    &mlx5e_iq_completion, c->ix);
335 	if (err)
336 		return (err);
337 
338 	err = mlx5e_iq_create(c, sq_param, iq);
339 	if (err)
340 		goto err_close_cq;
341 
342 	err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
343 	if (err)
344 		goto err_destroy_sq;
345 
346 	err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
347 	if (err)
348 		goto err_disable_sq;
349 
350 	WRITE_ONCE(iq->running, 1);
351 
352 	return (0);
353 
354 err_disable_sq:
355 	mlx5e_iq_disable(iq);
356 err_destroy_sq:
357 	mlx5e_iq_destroy(iq);
358 err_close_cq:
359 	mlx5e_close_cq(&iq->cq);
360 
361 	return (err);
362 }
363 
364 static void
365 mlx5e_iq_drain(struct mlx5e_iq *iq)
366 {
367 	struct mlx5_core_dev *mdev = iq->priv->mdev;
368 
369 	/*
370 	 * Check if already stopped.
371 	 *
372 	 * NOTE: Serialization of this function is managed by the
373 	 * caller ensuring the priv's state lock is locked or in case
374 	 * of rate limit support, a single thread manages drain and
375 	 * resume of SQs. The "running" variable can therefore safely
376 	 * be read without any locks.
377 	 */
378 	if (READ_ONCE(iq->running) == 0)
379 		return;
380 
381 	/* don't put more packets into the SQ */
382 	WRITE_ONCE(iq->running, 0);
383 
384 	/* wait till SQ is empty or link is down */
385 	mtx_lock(&iq->lock);
386 	while (iq->cc != iq->pc &&
387 	    (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
388 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
389 	    pci_channel_offline(mdev->pdev) == 0) {
390 		mtx_unlock(&iq->lock);
391 		msleep(1);
392 		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
393 		mtx_lock(&iq->lock);
394 	}
395 	mtx_unlock(&iq->lock);
396 
397 	/* error out remaining requests */
398 	(void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
399 
400 	/* wait till SQ is empty */
401 	mtx_lock(&iq->lock);
402 	while (iq->cc != iq->pc &&
403 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
404 	    pci_channel_offline(mdev->pdev) == 0) {
405 		mtx_unlock(&iq->lock);
406 		msleep(1);
407 		iq->cq.mcq.comp(&iq->cq.mcq, NULL);
408 		mtx_lock(&iq->lock);
409 	}
410 	mtx_unlock(&iq->lock);
411 }
412 
413 void
414 mlx5e_iq_close(struct mlx5e_iq *iq)
415 {
416 	mlx5e_iq_drain(iq);
417 	mlx5e_iq_disable(iq);
418 	mlx5e_iq_destroy(iq);
419 	mlx5e_close_cq(&iq->cq);
420 }
421 
422 void
423 mlx5e_iq_static_init(struct mlx5e_iq *iq)
424 {
425 	mtx_init(&iq->lock, "mlx5iq",
426 	    MTX_NETWORK_LOCK " IQ", MTX_DEF);
427 	mtx_init(&iq->comp_lock, "mlx5iq_comp",
428 	    MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
429 }
430 
431 void
432 mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
433 {
434 	mtx_destroy(&iq->lock);
435 	mtx_destroy(&iq->comp_lock);
436 }
437 
438 void
439 mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
440 {
441 	mtx_assert(&iq->lock, MA_OWNED);
442 
443 	/* Check if we need to write the doorbell */
444 	if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
445 		return;
446 
447 	/* Ensure wqe is visible to device before updating doorbell record */
448 	wmb();
449 
450 	*iq->wq.db = cpu_to_be32(iq->pc);
451 
452 	/*
453 	 * Ensure the doorbell record is visible to device before ringing
454 	 * the doorbell:
455 	 */
456 	wmb();
457 
458 	mlx5_write64(iq->doorbell.d32, iq->uar_map,
459 	    MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
460 
461 	iq->doorbell.d64 = 0;
462 }
463 
464 static inline bool
465 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
466 {
467         u16 cc = iq->cc;
468         u16 pc = iq->pc;
469 
470         return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
471 }
472 
473 int
474 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
475 {
476 	u16 pi;
477 
478 	mtx_assert(&iq->lock, MA_OWNED);
479 
480 	if (unlikely(iq->running == 0))
481 		return (-1);
482 	if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
483 		return (-1);
484 
485 	/* Align IQ edge with NOPs to avoid WQE wrap around */
486 	pi = ((~iq->pc) & iq->wq.sz_m1);
487 	if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
488 		/* Send one multi NOP message instead of many */
489 		mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
490 		pi = ((~iq->pc) & iq->wq.sz_m1);
491 		if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
492 			return (-1);
493 	}
494 	return (iq->pc & iq->wq.sz_m1);
495 }
496 
497 static void
498 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
499     int nseg, int error)
500 {
501 	u64 *pdma_address = arg;
502 
503 	if (unlikely(error || nseg != 1))
504 		panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
505 
506 	*pdma_address = segs[0].ds_addr;
507 }
508 
509 CTASSERT(BUS_DMASYNC_POSTREAD != 0);
510 CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
511 
512 void
513 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
514     u64 *pdma_address, u32 dma_sync)
515 {
516 	int error;
517 
518 	error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
519 	    &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
520 	if (unlikely(error))
521 		panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
522 
523 	switch (dma_sync) {
524 	case BUS_DMASYNC_PREREAD:
525 		iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
526 		break;
527 	case BUS_DMASYNC_PREWRITE:
528 		iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
529 		break;
530 	default:
531 		panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
532 	}
533 
534 	/* make sure data in buffer is visible to hardware */
535 	bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);
536 }
537