1 /*-
2  * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 #include "opt_kern_tls.h"
27 #include "opt_rss.h"
28 #include "opt_ratelimit.h"
29 
30 #include <dev/mlx5/mlx5_en/en.h>
31 
32 #include <dev/mlx5/tls.h>
33 
34 #include <dev/mlx5/fs.h>
35 #include <dev/mlx5/mlx5_core/fs_tcp.h>
36 
37 #include <sys/ktls.h>
38 #include <opencrypto/cryptodev.h>
39 
40 #ifdef KERN_TLS
41 
42 static if_snd_tag_free_t mlx5e_tls_rx_snd_tag_free;
43 static if_snd_tag_modify_t mlx5e_tls_rx_snd_tag_modify;
44 
45 static const struct if_snd_tag_sw mlx5e_tls_rx_snd_tag_sw = {
46 	.snd_tag_modify = mlx5e_tls_rx_snd_tag_modify,
47 	.snd_tag_free = mlx5e_tls_rx_snd_tag_free,
48 	.type = IF_SND_TAG_TYPE_TLS_RX
49 };
50 
51 MALLOC_DEFINE(M_MLX5E_TLS_RX, "MLX5E_TLS_RX", "MLX5 ethernet HW TLS RX");
52 
53 /* software TLS RX context */
54 struct mlx5_ifc_sw_tls_rx_cntx_bits {
55 	struct mlx5_ifc_tls_static_params_bits param;
56 	struct mlx5_ifc_tls_progress_params_bits progress;
57 	struct {
58 		uint8_t key_data[8][0x20];
59 		uint8_t key_len[0x20];
60 	} key;
61 };
62 
63 CTASSERT(MLX5_ST_SZ_BYTES(sw_tls_rx_cntx) <= sizeof(((struct mlx5e_tls_rx_tag *)NULL)->crypto_params));
64 CTASSERT(MLX5_ST_SZ_BYTES(mkc) == sizeof(((struct mlx5e_tx_umr_wqe *)NULL)->mkc));
65 
66 static const char *mlx5e_tls_rx_stats_desc[] = {
67 	MLX5E_TLS_RX_STATS(MLX5E_STATS_DESC)
68 };
69 
70 static void mlx5e_tls_rx_work(struct work_struct *);
71 static bool mlx5e_tls_rx_snd_tag_find_tcp_sn_and_tls_rcd(struct mlx5e_tls_rx_tag *,
72     uint32_t, uint32_t *, uint64_t *);
73 
74 CTASSERT((MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param) % 16) == 0);
75 
76 static uint32_t
77 mlx5e_tls_rx_get_ch(struct mlx5e_priv *priv, uint32_t flowid, uint32_t flowtype)
78 {
79 	u32 ch;
80 #ifdef RSS
81 	u32 temp;
82 #endif
83 
84 	/* keep this code synced with mlx5e_select_queue() */
85 	ch = priv->params.num_channels;
86 #ifdef RSS
87 	if (rss_hash2bucket(flowid, flowtype, &temp) == 0)
88 		ch = temp % ch;
89 	else
90 #endif
91 		ch = (flowid % 128) % ch;
92 	return (ch);
93 }
94 
95 /*
96  * This function gets a pointer to an internal queue, IQ, based on the
97  * provided "flowid" and "flowtype". The IQ returned may in some rare
98  * cases not be activated or running, but this is all handled by the
99  * "mlx5e_iq_get_producer_index()" function.
100  *
101  * The idea behind this function is to spread the IQ traffic as much
102  * as possible and to avoid congestion on the same IQ when processing
103  * RX traffic.
104  */
105 static struct mlx5e_iq *
106 mlx5e_tls_rx_get_iq(struct mlx5e_priv *priv, uint32_t flowid, uint32_t flowtype)
107 {
108 	/*
109 	 * NOTE: The channels array is only freed at detach
110 	 * and it safe to return a pointer to the send tag
111 	 * inside the channels structure as long as we
112 	 * reference the priv.
113 	 */
114 	return (&priv->channel[mlx5e_tls_rx_get_ch(priv, flowid, flowtype)].iq);
115 }
116 
117 static void
118 mlx5e_tls_rx_send_static_parameters_cb(void *arg)
119 {
120 	struct mlx5e_tls_rx_tag *ptag;
121 
122 	ptag = (struct mlx5e_tls_rx_tag *)arg;
123 
124 	m_snd_tag_rele(&ptag->tag);
125 }
126 
127 /*
128  * This function sends the so-called TLS RX static parameters to the
129  * hardware. These parameters are temporarily stored in the
130  * "crypto_params" field of the TLS RX tag.  Most importantly this
131  * function sets the TCP sequence number (32-bit) and TLS record
132  * number (64-bit) where the decryption can resume.
133  *
134  * Zero is returned upon success. Else some error happend.
135  */
136 static int
137 mlx5e_tls_rx_send_static_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag)
138 {
139 	const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_umr_wqe) +
140 	    MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param), MLX5_SEND_WQE_DS);
141 	struct mlx5e_tx_umr_wqe *wqe;
142 	int pi;
143 
144 	mtx_lock(&iq->lock);
145 	pi = mlx5e_iq_get_producer_index(iq);
146 	if (pi < 0) {
147 		mtx_unlock(&iq->lock);
148 		return (-ENOMEM);
149 	}
150 	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
151 
152 	memset(wqe, 0, sizeof(*wqe));
153 
154 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
155 	    MLX5_OPCODE_UMR | (MLX5_OPCODE_MOD_UMR_TLS_TIR_STATIC_PARAMS << 24));
156 	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
157 	wqe->ctrl.imm = cpu_to_be32(ptag->tirn << 8);
158 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE | MLX5_FENCE_MODE_INITIATOR_SMALL;
159 
160 	/* fill out UMR control segment */
161 	wqe->umr.flags = 0x80;	/* inline data */
162 	wqe->umr.bsf_octowords =
163 	    cpu_to_be16(MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param) / 16);
164 
165 	/* copy in the static crypto parameters */
166 	memcpy(wqe + 1, MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, param),
167 	    MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param));
168 
169 	/* copy data for doorbell */
170 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
171 
172 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
173 	iq->data[pi].callback = &mlx5e_tls_rx_send_static_parameters_cb;
174 	iq->data[pi].arg = ptag;
175 
176 	m_snd_tag_ref(&ptag->tag);
177 
178 	iq->pc += iq->data[pi].num_wqebbs;
179 
180 	mlx5e_iq_notify_hw(iq);
181 
182 	mtx_unlock(&iq->lock);
183 
184 	return (0);	/* success */
185 }
186 
187 static void
188 mlx5e_tls_rx_send_progress_parameters_cb(void *arg)
189 {
190 	struct mlx5e_tls_rx_tag *ptag;
191 
192 	ptag = (struct mlx5e_tls_rx_tag *)arg;
193 
194 	complete(&ptag->progress_complete);
195 }
196 
197 CTASSERT(MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, progress) ==
198     sizeof(((struct mlx5e_tx_psv_wqe *)NULL)->psv));
199 
200 /*
201  * This function resets the state of the TIR context to start
202  * searching for a valid TLS header and is used only when allocating
203  * the TLS RX tag.
204  *
205  * Zero is returned upon success, else some error happened.
206  */
207 static int
208 mlx5e_tls_rx_send_progress_parameters_sync(struct mlx5e_iq *iq,
209     struct mlx5e_tls_rx_tag *ptag)
210 {
211 	const u32 ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5e_tx_psv_wqe),
212 	    MLX5_SEND_WQE_DS);
213 	struct mlx5e_priv *priv;
214 	struct mlx5e_tx_psv_wqe *wqe;
215 	int pi;
216 
217 	mtx_lock(&iq->lock);
218 	pi = mlx5e_iq_get_producer_index(iq);
219 	if (pi < 0) {
220 		mtx_unlock(&iq->lock);
221 		return (-ENOMEM);
222 	}
223 	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
224 
225 	memset(wqe, 0, sizeof(*wqe));
226 
227 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
228 	    MLX5_OPCODE_SET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS << 24));
229 	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
230 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
231 
232 	/* copy in the PSV control segment */
233 	memcpy(&wqe->psv, MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, progress),
234 	    sizeof(wqe->psv));
235 
236 	/* copy data for doorbell */
237 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
238 
239 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
240 	iq->data[pi].callback = &mlx5e_tls_rx_send_progress_parameters_cb;
241 	iq->data[pi].arg = ptag;
242 
243 	iq->pc += iq->data[pi].num_wqebbs;
244 
245 	init_completion(&ptag->progress_complete);
246 
247 	mlx5e_iq_notify_hw(iq);
248 
249 	mtx_unlock(&iq->lock);
250 
251 	while (1) {
252 		if (wait_for_completion_timeout(&ptag->progress_complete, hz) != 0)
253 			break;
254 		priv = container_of(iq, struct mlx5e_channel, iq)->priv;
255 		if (priv->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
256 		    pci_channel_offline(priv->mdev->pdev) != 0)
257 			return (-EWOULDBLOCK);
258 	}
259 
260 	return (0);	/* success */
261 }
262 
263 CTASSERT(MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE >= MLX5_ST_SZ_BYTES(tls_progress_params));
264 CTASSERT(MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE <= PAGE_SIZE);
265 
266 struct mlx5e_get_tls_progress_params_wqe {
267 	struct mlx5_wqe_ctrl_seg ctrl;
268 	struct mlx5_seg_get_psv	 psv;
269 };
270 
271 static void
272 mlx5e_tls_rx_receive_progress_parameters_cb(void *arg)
273 {
274 	struct mlx5e_tls_rx_tag *ptag;
275 	struct mlx5e_iq *iq;
276 	uint32_t tcp_curr_sn_he;
277 	uint32_t tcp_next_sn_he;
278 	uint64_t tls_rcd_num;
279 	void *buffer;
280 
281 	ptag = (struct mlx5e_tls_rx_tag *)arg;
282 	buffer = mlx5e_tls_rx_get_progress_buffer(ptag);
283 
284 	MLX5E_TLS_RX_TAG_LOCK(ptag);
285 
286 	ptag->tcp_resync_pending = 0;
287 
288 	switch (MLX5_GET(tls_progress_params, buffer, record_tracker_state)) {
289 	case MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING:
290 		break;
291 	default:
292 		goto done;
293 	}
294 
295 	switch (MLX5_GET(tls_progress_params, buffer, auth_state)) {
296 	case MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD:
297 		break;
298 	default:
299 		goto done;
300 	}
301 
302 	tcp_curr_sn_he = MLX5_GET(tls_progress_params, buffer, hw_resync_tcp_sn);
303 
304 	if (mlx5e_tls_rx_snd_tag_find_tcp_sn_and_tls_rcd(ptag, tcp_curr_sn_he,
305 	    &tcp_next_sn_he, &tls_rcd_num)) {
306 
307 		MLX5_SET64(sw_tls_rx_cntx, ptag->crypto_params,
308 		    param.initial_record_number, tls_rcd_num);
309 		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params,
310 		    param.resync_tcp_sn, tcp_curr_sn_he);
311 
312 		iq = mlx5e_tls_rx_get_iq(
313 		    container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx),
314 		    ptag->flowid, ptag->flowtype);
315 
316 		if (mlx5e_tls_rx_send_static_parameters(iq, ptag) != 0)
317 			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
318 	}
319 done:
320 	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
321 
322 	m_snd_tag_rele(&ptag->tag);
323 }
324 
325 /*
326  * This function queries the hardware for the current state of the TIR
327  * in question. It is typically called when encrypted data is received
328  * to re-establish hardware decryption of received TLS data.
329  *
330  * Zero is returned upon success, else some error happened.
331  */
332 static int
333 mlx5e_tls_rx_receive_progress_parameters(struct mlx5e_iq *iq, struct mlx5e_tls_rx_tag *ptag)
334 {
335 	struct mlx5e_get_tls_progress_params_wqe *wqe;
336 	const u32 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
337 	u64 dma_address;
338 	int pi;
339 
340 	mtx_lock(&iq->lock);
341 	pi = mlx5e_iq_get_producer_index(iq);
342 	if (pi < 0) {
343 		mtx_unlock(&iq->lock);
344 		return (-ENOMEM);
345 	}
346 
347 	mlx5e_iq_load_memory_single(iq, pi,
348 	    mlx5e_tls_rx_get_progress_buffer(ptag),
349 	    MLX5E_TLS_RX_PROGRESS_BUFFER_SIZE,
350 	    &dma_address, BUS_DMASYNC_PREREAD);
351 
352 	wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
353 
354 	memset(wqe, 0, sizeof(*wqe));
355 
356 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) |
357 	    MLX5_OPCODE_GET_PSV | (MLX5_OPCODE_MOD_PSV_TLS_TIR_PROGRESS_PARAMS << 24));
358 	wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
359 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
360 	wqe->psv.num_psv = 1 << 4;
361 	wqe->psv.l_key = iq->mkey_be;
362 	wqe->psv.psv_index[0] = cpu_to_be32(ptag->tirn);
363 	wqe->psv.va = cpu_to_be64(dma_address);
364 
365 	/* copy data for doorbell */
366 	memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
367 
368 	iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
369 	iq->data[pi].callback = &mlx5e_tls_rx_receive_progress_parameters_cb;
370 	iq->data[pi].arg = ptag;
371 
372 	m_snd_tag_ref(&ptag->tag);
373 
374 	iq->pc += iq->data[pi].num_wqebbs;
375 
376 	mlx5e_iq_notify_hw(iq);
377 
378 	mtx_unlock(&iq->lock);
379 
380 	return (0);	/* success */
381 }
382 
383 /*
384  * This is the import function for TLS RX tags.
385  */
386 static int
387 mlx5e_tls_rx_tag_import(void *arg, void **store, int cnt, int domain, int flags)
388 {
389 	struct mlx5e_tls_rx_tag *ptag;
390 	int i;
391 
392 	for (i = 0; i != cnt; i++) {
393 		ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS_RX,
394 		    mlx5_dev_domainset(arg), flags | M_ZERO);
395 		mtx_init(&ptag->mtx, "mlx5-tls-rx-tag-mtx", NULL, MTX_DEF);
396 		INIT_WORK(&ptag->work, mlx5e_tls_rx_work);
397 		store[i] = ptag;
398 	}
399 	return (i);
400 }
401 
402 /*
403  * This is the release function for TLS RX tags.
404  */
405 static void
406 mlx5e_tls_rx_tag_release(void *arg, void **store, int cnt)
407 {
408 	struct mlx5e_tls_rx_tag *ptag;
409 	int i;
410 
411 	for (i = 0; i != cnt; i++) {
412 		ptag = store[i];
413 
414 		flush_work(&ptag->work);
415 		mtx_destroy(&ptag->mtx);
416 		free(ptag, M_MLX5E_TLS_RX);
417 	}
418 }
419 
420 /*
421  * This is a convenience function to free TLS RX tags. It resets some
422  * selected fields, updates the number of resources and returns the
423  * TLS RX tag to the UMA pool of free tags.
424  */
425 static void
426 mlx5e_tls_rx_tag_zfree(struct mlx5e_tls_rx_tag *ptag)
427 {
428 	/* make sure any unhandled taskqueue events are ignored */
429 	ptag->state = MLX5E_TLS_RX_ST_FREED;
430 
431 	/* reset some variables */
432 	ptag->dek_index = 0;
433 	ptag->dek_index_ok = 0;
434 	ptag->tirn = 0;
435 	ptag->flow_rule = NULL;
436 	ptag->tcp_resync_active = 0;
437 	ptag->tcp_resync_pending = 0;
438 
439 	/* avoid leaking keys */
440 	memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params));
441 
442 	/* update number of resources in use */
443 	atomic_add_32(&ptag->tls_rx->num_resources, -1U);
444 
445 	/* return tag to UMA */
446 	uma_zfree(ptag->tls_rx->zone, ptag);
447 }
448 
449 /*
450  * This function enables TLS RX support for the given NIC, if all
451  * needed firmware capabilites are present.
452  */
453 int
454 mlx5e_tls_rx_init(struct mlx5e_priv *priv)
455 {
456 	struct mlx5e_tls_rx *ptls = &priv->tls_rx;
457 	struct sysctl_oid *node;
458 	uint32_t x;
459 
460 	if (MLX5_CAP_GEN(priv->mdev, tls_rx) == 0 ||
461 	    MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0 ||
462 	    MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version) == 0)
463 		return (0);
464 
465 	ptls->wq = create_singlethread_workqueue("mlx5-tls-rx-wq");
466 	if (ptls->wq == NULL)
467 		return (ENOMEM);
468 
469 	sysctl_ctx_init(&ptls->ctx);
470 
471 	snprintf(ptls->zname, sizeof(ptls->zname),
472 	    "mlx5_%u_tls_rx", device_get_unit(priv->mdev->pdev->dev.bsddev));
473 
474 	ptls->zone = uma_zcache_create(ptls->zname,
475 	    sizeof(struct mlx5e_tls_rx_tag), NULL, NULL, NULL, NULL,
476 	    mlx5e_tls_rx_tag_import, mlx5e_tls_rx_tag_release, priv->mdev,
477 	    UMA_ZONE_UNMANAGED);
478 
479 	/* shared between RX and TX TLS */
480 	ptls->max_resources = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
481 
482 	for (x = 0; x != MLX5E_TLS_RX_STATS_NUM; x++)
483 		ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK);
484 
485 	ptls->init = 1;
486 
487 	node = SYSCTL_ADD_NODE(&priv->sysctl_ctx,
488 	    SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
489 	    "tls_rx", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Hardware TLS receive offload");
490 	if (node == NULL)
491 		return (0);
492 
493 	mlx5e_create_counter_stats(&ptls->ctx,
494 	    SYSCTL_CHILDREN(node), "stats",
495 	    mlx5e_tls_rx_stats_desc, MLX5E_TLS_RX_STATS_NUM,
496 	    ptls->stats.arg);
497 
498 	return (0);
499 }
500 
501 /*
502  * This function disables TLS RX support for the given NIC.
503  */
504 void
505 mlx5e_tls_rx_cleanup(struct mlx5e_priv *priv)
506 {
507 	struct mlx5e_tls_rx *ptls = &priv->tls_rx;
508 	uint32_t x;
509 
510 	if (ptls->init == 0)
511 		return;
512 
513 	ptls->init = 0;
514 	flush_workqueue(ptls->wq);
515 	sysctl_ctx_free(&ptls->ctx);
516 	uma_zdestroy(ptls->zone);
517 	destroy_workqueue(ptls->wq);
518 
519 	/* check if all resources are freed */
520 	MPASS(priv->tls_rx.num_resources == 0);
521 
522 	for (x = 0; x != MLX5E_TLS_RX_STATS_NUM; x++)
523 		counter_u64_free(ptls->stats.arg[x]);
524 }
525 
526 /*
527  * This function is used to serialize sleeping firmware operations
528  * needed in order to establish and destroy a TLS RX tag.
529  */
530 static void
531 mlx5e_tls_rx_work(struct work_struct *work)
532 {
533 	struct mlx5e_tls_rx_tag *ptag;
534 	struct mlx5e_priv *priv;
535 	int err;
536 
537 	ptag = container_of(work, struct mlx5e_tls_rx_tag, work);
538 	priv = container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx);
539 
540 	switch (ptag->state) {
541 	case MLX5E_TLS_RX_ST_INIT:
542 		/* try to allocate new TIR context */
543 		err = mlx5_tls_open_tir(priv->mdev, priv->tdn,
544 		    priv->channel[mlx5e_tls_rx_get_ch(priv, ptag->flowid, ptag->flowtype)].rqtn,
545 		    &ptag->tirn);
546 		if (err) {
547 			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
548 			break;
549 		}
550 		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, progress.pd, ptag->tirn);
551 
552 		/* try to allocate a DEK context ID */
553 		err = mlx5_encryption_key_create(priv->mdev, priv->pdn,
554 		    MLX5_ADDR_OF(sw_tls_rx_cntx, ptag->crypto_params, key.key_data),
555 		    MLX5_GET(sw_tls_rx_cntx, ptag->crypto_params, key.key_len),
556 		    &ptag->dek_index);
557 		if (err) {
558 			MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
559 			break;
560 		}
561 
562 		MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, param.dek_index, ptag->dek_index);
563 
564 		ptag->dek_index_ok = 1;
565 
566 		MLX5E_TLS_RX_TAG_LOCK(ptag);
567 		if (ptag->state == MLX5E_TLS_RX_ST_INIT)
568 			ptag->state = MLX5E_TLS_RX_ST_SETUP;
569 		MLX5E_TLS_RX_TAG_UNLOCK(ptag);
570 		break;
571 
572 	case MLX5E_TLS_RX_ST_RELEASE:
573 		/* remove flow rule for incoming traffic, if any */
574 		if (ptag->flow_rule != NULL)
575 			mlx5e_accel_fs_del_inpcb(ptag->flow_rule);
576 
577 		/* try to destroy DEK context by ID */
578 		if (ptag->dek_index_ok)
579 			mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index);
580 
581 		/* try to destroy TIR context by ID */
582 		if (ptag->tirn != 0)
583 			mlx5_tls_close_tir(priv->mdev, ptag->tirn);
584 
585 		/* free tag */
586 		mlx5e_tls_rx_tag_zfree(ptag);
587 		break;
588 
589 	default:
590 		break;
591 	}
592 }
593 
594 /*
595  * This function translates the crypto parameters into the format used
596  * by the firmware and hardware. Currently only AES-128 and AES-256 is
597  * supported for TLS v1.2 and TLS v1.3.
598  *
599  * Returns zero on success, else an error happened.
600  */
601 static int
602 mlx5e_tls_rx_set_params(void *ctx, struct inpcb *inp, const struct tls_session_params *en)
603 {
604 	uint32_t tcp_sn_he;
605 	uint64_t tls_sn_he;
606 
607 	MLX5_SET(sw_tls_rx_cntx, ctx, param.const_2, 2);
608 	if (en->tls_vminor == TLS_MINOR_VER_TWO)
609 		MLX5_SET(sw_tls_rx_cntx, ctx, param.tls_version, 2); /* v1.2 */
610 	else
611 		MLX5_SET(sw_tls_rx_cntx, ctx, param.tls_version, 3); /* v1.3 */
612 	MLX5_SET(sw_tls_rx_cntx, ctx, param.const_1, 1);
613 	MLX5_SET(sw_tls_rx_cntx, ctx, param.encryption_standard, 1); /* TLS */
614 
615 	/* copy the initial vector in place */
616 	switch (en->iv_len) {
617 	case MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.gcm_iv):
618 	case MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.gcm_iv) +
619 	     MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, param.implicit_iv):
620 		memcpy(MLX5_ADDR_OF(sw_tls_rx_cntx, ctx, param.gcm_iv),
621 		    en->iv, en->iv_len);
622 		break;
623 	default:
624 		return (EINVAL);
625 	}
626 
627 	if (en->cipher_key_len <= MLX5_FLD_SZ_BYTES(sw_tls_rx_cntx, key.key_data)) {
628 		memcpy(MLX5_ADDR_OF(sw_tls_rx_cntx, ctx, key.key_data),
629 		    en->cipher_key, en->cipher_key_len);
630 		MLX5_SET(sw_tls_rx_cntx, ctx, key.key_len, en->cipher_key_len);
631 	} else {
632 		return (EINVAL);
633 	}
634 
635 	if (__predict_false(inp == NULL ||
636 	    ktls_get_rx_sequence(inp, &tcp_sn_he, &tls_sn_he) != 0))
637 		return (EINVAL);
638 
639 	MLX5_SET64(sw_tls_rx_cntx, ctx, param.initial_record_number, tls_sn_he);
640 	MLX5_SET(sw_tls_rx_cntx, ctx, param.resync_tcp_sn, tcp_sn_he);
641 
642 	return (0);
643 }
644 
645 /* Verify zero default */
646 CTASSERT(MLX5E_TLS_RX_ST_INIT == 0);
647 
648 /*
649  * This function is responsible for allocating a TLS RX tag. It is a
650  * callback function invoked by the network stack.
651  *
652  * Returns zero on success else an error happened.
653  */
654 int
655 mlx5e_tls_rx_snd_tag_alloc(if_t ifp,
656     union if_snd_tag_alloc_params *params,
657     struct m_snd_tag **ppmt)
658 {
659 	struct mlx5e_iq *iq;
660 	struct mlx5e_priv *priv;
661 	struct mlx5e_tls_rx_tag *ptag;
662 	struct mlx5_flow_rule *flow_rule;
663 	const struct tls_session_params *en;
664 	uint32_t value;
665 	int error;
666 
667 	priv = if_getsoftc(ifp);
668 
669 	if (unlikely(priv->gone != 0 || priv->tls_rx.init == 0 ||
670 	    params->hdr.flowtype == M_HASHTYPE_NONE))
671 		return (EOPNOTSUPP);
672 
673 	/* allocate new tag from zone, if any */
674 	ptag = uma_zalloc(priv->tls_rx.zone, M_NOWAIT);
675 	if (ptag == NULL)
676 		return (ENOMEM);
677 
678 	/* sanity check default values */
679 	MPASS(ptag->dek_index == 0);
680 	MPASS(ptag->dek_index_ok == 0);
681 
682 	/* setup TLS RX tag */
683 	ptag->tls_rx = &priv->tls_rx;
684 	ptag->flowtype = params->hdr.flowtype;
685 	ptag->flowid = params->hdr.flowid;
686 
687 	value = atomic_fetchadd_32(&priv->tls_rx.num_resources, 1U);
688 
689 	/* check resource limits */
690 	if (value >= priv->tls_rx.max_resources) {
691 		error = ENOMEM;
692 		goto failure;
693 	}
694 
695 	en = &params->tls_rx.tls->params;
696 
697 	/* only TLS v1.2 and v1.3 is currently supported */
698 	if (en->tls_vmajor != TLS_MAJOR_VER_ONE ||
699 	    (en->tls_vminor != TLS_MINOR_VER_TWO
700 #ifdef TLS_MINOR_VER_THREE
701 	     && en->tls_vminor != TLS_MINOR_VER_THREE
702 #endif
703 	     )) {
704 		error = EPROTONOSUPPORT;
705 		goto failure;
706 	}
707 
708 	switch (en->cipher_algorithm) {
709 	case CRYPTO_AES_NIST_GCM_16:
710 		switch (en->cipher_key_len) {
711 		case 128 / 8:
712 			if (en->tls_vminor == TLS_MINOR_VER_TWO) {
713 				if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_128) == 0) {
714 					error = EPROTONOSUPPORT;
715 					goto failure;
716 				}
717 			} else {
718 				if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_128) == 0) {
719 					error = EPROTONOSUPPORT;
720 					goto failure;
721 				}
722 			}
723 			error = mlx5e_tls_rx_set_params(
724 			    ptag->crypto_params, params->tls_rx.inp, en);
725 			if (error)
726 				goto failure;
727 			break;
728 
729 		case 256 / 8:
730 			if (en->tls_vminor == TLS_MINOR_VER_TWO) {
731 				if (MLX5_CAP_TLS(priv->mdev, tls_1_2_aes_gcm_256) == 0) {
732 					error = EPROTONOSUPPORT;
733 					goto failure;
734 				}
735 			} else {
736 				if (MLX5_CAP_TLS(priv->mdev, tls_1_3_aes_gcm_256) == 0) {
737 					error = EPROTONOSUPPORT;
738 					goto failure;
739 				}
740 			}
741 			error = mlx5e_tls_rx_set_params(
742 			    ptag->crypto_params, params->tls_rx.inp, en);
743 			if (error)
744 				goto failure;
745 			break;
746 
747 		default:
748 			error = EINVAL;
749 			goto failure;
750 		}
751 		break;
752 	default:
753 		error = EPROTONOSUPPORT;
754 		goto failure;
755 	}
756 
757 	/* store pointer to mbuf tag */
758 	MPASS(ptag->tag.refcount == 0);
759 	m_snd_tag_init(&ptag->tag, ifp, &mlx5e_tls_rx_snd_tag_sw);
760 	*ppmt = &ptag->tag;
761 
762 	/* reset state */
763 	ptag->state = MLX5E_TLS_RX_ST_INIT;
764 
765 	queue_work(priv->tls_rx.wq, &ptag->work);
766 	flush_work(&ptag->work);
767 
768 	/* check that worker task completed successfully */
769 	MLX5E_TLS_RX_TAG_LOCK(ptag);
770 	if (ptag->state == MLX5E_TLS_RX_ST_SETUP) {
771 		ptag->state = MLX5E_TLS_RX_ST_READY;
772 		error = 0;
773 	} else {
774 		error = ENOMEM;
775 	}
776 	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
777 
778 	if (unlikely(error))
779 		goto cleanup;
780 
781 	iq = mlx5e_tls_rx_get_iq(priv, ptag->flowid, ptag->flowtype);
782 
783 	/* establish connection between DEK and TIR */
784 	if (mlx5e_tls_rx_send_static_parameters(iq, ptag) != 0) {
785 		MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
786 		error = ENOMEM;
787 		goto cleanup;
788 	}
789 
790 	MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, progress.auth_state,
791 	    MLX5E_TLS_RX_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD);
792 	MLX5_SET(sw_tls_rx_cntx, ptag->crypto_params, progress.record_tracker_state,
793 	    MLX5E_TLS_RX_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
794 
795 	/* reset state to all zeros */
796 	if (mlx5e_tls_rx_send_progress_parameters_sync(iq, ptag) != 0) {
797 		MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
798 		error = ENOMEM;
799 		goto cleanup;
800 	}
801 
802 	if (if_getpcp(ifp) != IFNET_PCP_NONE || params->tls_rx.vlan_id != 0) {
803 		/* create flow rule for TLS RX traffic (tagged) */
804 		flow_rule = mlx5e_accel_fs_add_inpcb(priv, params->tls_rx.inp,
805 		    ptag->tirn, MLX5_FS_DEFAULT_FLOW_TAG, params->tls_rx.vlan_id);
806 	} else {
807 		/* create flow rule for TLS RX traffic (untagged) */
808 		flow_rule = mlx5e_accel_fs_add_inpcb(priv, params->tls_rx.inp,
809 		    ptag->tirn, MLX5_FS_DEFAULT_FLOW_TAG, MLX5E_ACCEL_FS_ADD_INPCB_NO_VLAN);
810 	}
811 
812 	if (IS_ERR_OR_NULL(flow_rule)) {
813 		MLX5E_TLS_RX_STAT_INC(ptag, rx_error, 1);
814 		error = ENOMEM;
815 		goto cleanup;
816 	}
817 
818 	ptag->flow_rule = flow_rule;
819 
820 	return (0);
821 
822 cleanup:
823 	m_snd_tag_rele(&ptag->tag);
824 	return (error);
825 
826 failure:
827 	mlx5e_tls_rx_tag_zfree(ptag);
828 	return (error);
829 }
830 
831 
832 /*
833  * This function adds the TCP sequence number and TLS record number in
834  * host endian format to a small database. When TLS records have the
835  * same length, they are simply accumulated by counting instead of
836  * separated entries in the TLS database. The dimension of the
837  * database is such that it cannot store more than 1GByte of
838  * continuous TCP data to avoid issues with TCP sequence number wrap
839  * around. A record length of zero bytes has special meaning and means
840  * that resync completed and all data in the database can be
841  * discarded. This function is called after the TCP stack has
842  * re-assembled all TCP fragments due to out of order packet reception
843  * and all TCP sequence numbers should be sequential.
844  *
845  * This function returns true if a so-called TLS RX resync operation
846  * is in progress. Else no such operation is in progress.
847  */
848 static bool
849 mlx5e_tls_rx_snd_tag_add_tcp_sequence(struct mlx5e_tls_rx_tag *ptag,
850     uint32_t tcp_sn_he, uint32_t len, uint64_t tls_rcd)
851 {
852 	uint16_t i, j, n;
853 
854 	if (ptag->tcp_resync_active == 0 ||
855 	    ptag->tcp_resync_next != tcp_sn_he ||
856 	    len == 0) {
857 		/* start over again or terminate */
858 		ptag->tcp_resync_active = (len != 0);
859 		ptag->tcp_resync_len[0] = len;
860 		ptag->tcp_resync_num[0] = 1;
861 		ptag->tcp_resync_pc = (len != 0);
862 		ptag->tcp_resync_cc = 0;
863 		ptag->tcp_resync_start = tcp_sn_he;
864 		ptag->rcd_resync_start = tls_rcd;
865 	} else {
866 		i = (ptag->tcp_resync_pc - 1) & (MLX5E_TLS_RX_RESYNC_MAX - 1);
867 		n = ptag->tcp_resync_pc - ptag->tcp_resync_cc;
868 
869 		/* check if same length like last time */
870 		if (ptag->tcp_resync_len[i] == len &&
871 		    ptag->tcp_resync_num[i] != MLX5E_TLS_RX_NUM_MAX) {
872 			/* use existing entry */
873 			ptag->tcp_resync_num[i]++;
874 		} else if (n == MLX5E_TLS_RX_RESYNC_MAX) {
875 			j = ptag->tcp_resync_cc++ & (MLX5E_TLS_RX_RESYNC_MAX - 1);
876 			/* adjust starting TCP sequence number */
877 			ptag->rcd_resync_start += ptag->tcp_resync_num[j];
878 			ptag->tcp_resync_start += ptag->tcp_resync_len[j] * ptag->tcp_resync_num[j];
879 			i = ptag->tcp_resync_pc++ & (MLX5E_TLS_RX_RESYNC_MAX - 1);
880 			/* store new entry */
881 			ptag->tcp_resync_len[i] = len;
882 			ptag->tcp_resync_num[i] = 1;
883 		} else {
884 			i = ptag->tcp_resync_pc++ & (MLX5E_TLS_RX_RESYNC_MAX - 1);
885 			/* add new entry */
886 			ptag->tcp_resync_len[i] = len;
887 			ptag->tcp_resync_num[i] = 1;
888 		}
889 	}
890 
891 	/* store next TCP SN in host endian format */
892 	ptag->tcp_resync_next = tcp_sn_he + len;
893 
894 	return (ptag->tcp_resync_active);
895 }
896 
897 /*
898  * This function checks if the given TCP sequence number points to the
899  * beginning of a valid TLS header.
900  *
901  * Returns true if a match is found. Else false.
902  */
903 static bool
904 mlx5e_tls_rx_snd_tag_find_tcp_sn_and_tls_rcd(struct mlx5e_tls_rx_tag *ptag,
905     uint32_t tcp_sn_he, uint32_t *p_next_tcp_sn_he, uint64_t *p_tls_rcd)
906 {
907 	uint16_t i, j;
908 	uint32_t off = 0;
909 	uint32_t rcd = 0;
910 	uint32_t delta;
911 	uint32_t leap;
912 
913 	for (i = ptag->tcp_resync_cc; i != ptag->tcp_resync_pc; i++) {
914 		delta = tcp_sn_he - off - ptag->tcp_resync_start;
915 
916 		/* check if subtraction went negative */
917 		if ((int32_t)delta < 0)
918 			break;
919 
920 		j = i & (MLX5E_TLS_RX_RESYNC_MAX - 1);
921 		leap = ptag->tcp_resync_len[j] * ptag->tcp_resync_num[j];
922 		if (delta < leap) {
923 			if ((delta % ptag->tcp_resync_len[j]) == 0) {
924 				*p_next_tcp_sn_he = tcp_sn_he +
925 				    ptag->tcp_resync_len[j];
926 				*p_tls_rcd = ptag->rcd_resync_start +
927 				    (uint64_t)rcd +
928 				    (uint64_t)(delta / ptag->tcp_resync_len[j]);
929 				return (true);		/* success */
930 			}
931 			break;	/* invalid offset */
932 		}
933 		rcd += ptag->tcp_resync_num[j];
934 		off += leap;
935 	}
936 	return (false);	/* not found */
937 }
938 
939 /*
940  * This is a callback function from the network stack to keep track of
941  * TLS RX TCP sequence numbers.
942  *
943  * Returns zero on success else an error happened.
944  */
945 static int
946 mlx5e_tls_rx_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
947 {
948 	struct mlx5e_tls_rx_tag *ptag;
949 	struct mlx5e_priv *priv;
950 	struct mlx5e_iq *iq;
951 	int err;
952 
953 	ptag = container_of(pmt, struct mlx5e_tls_rx_tag, tag);
954 	priv = container_of(ptag->tls_rx, struct mlx5e_priv, tls_rx);
955 
956 	if (unlikely(priv->gone != 0))
957 		return (ENXIO);
958 
959 	iq = mlx5e_tls_rx_get_iq(priv, ptag->flowid, ptag->flowtype);
960 
961 	MLX5E_TLS_RX_TAG_LOCK(ptag);
962 
963 	if (mlx5e_tls_rx_snd_tag_add_tcp_sequence(ptag,
964 	    params->tls_rx.tls_hdr_tcp_sn,
965 	    params->tls_rx.tls_rec_length,
966 	    params->tls_rx.tls_seq_number) &&
967 	    ptag->tcp_resync_pending == 0) {
968 		err = mlx5e_tls_rx_receive_progress_parameters(iq, ptag);
969 		if (err != 0) {
970 			MLX5E_TLS_RX_STAT_INC(ptag, rx_resync_err, 1);
971 		} else {
972 			ptag->tcp_resync_pending = 1;
973 			MLX5E_TLS_RX_STAT_INC(ptag, rx_resync_ok, 1);
974 		}
975 	} else {
976 		err = 0;
977 	}
978 	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
979 
980 	return (-err);
981 }
982 
983 /*
984  * This function frees a TLS RX tag in a non-blocking way.
985  */
986 static void
987 mlx5e_tls_rx_snd_tag_free(struct m_snd_tag *pmt)
988 {
989 	struct mlx5e_tls_rx_tag *ptag =
990 	    container_of(pmt, struct mlx5e_tls_rx_tag, tag);
991 	struct mlx5e_priv *priv;
992 
993 	MLX5E_TLS_RX_TAG_LOCK(ptag);
994 	ptag->state = MLX5E_TLS_RX_ST_RELEASE;
995 	MLX5E_TLS_RX_TAG_UNLOCK(ptag);
996 
997 	priv = if_getsoftc(ptag->tag.ifp);
998 	queue_work(priv->tls_rx.wq, &ptag->work);
999 }
1000 
1001 #else
1002 
1003 int
1004 mlx5e_tls_rx_init(struct mlx5e_priv *priv)
1005 {
1006 
1007 	return (0);
1008 }
1009 
1010 void
1011 mlx5e_tls_rx_cleanup(struct mlx5e_priv *priv)
1012 {
1013 	/* NOP */
1014 }
1015 
1016 #endif
1017