1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <net/dst_metadata.h>
5 #include <linux/netdevice.h>
6 #include <linux/if_macvlan.h>
7 #include <linux/list.h>
8 #include <linux/rculist.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/workqueue.h>
11 #include <linux/spinlock.h>
12 #include "tc.h"
13 #include "neigh.h"
14 #include "en_rep.h"
15 #include "eswitch.h"
16 #include "lib/fs_chains.h"
17 #include "en/tc_ct.h"
18 #include "en/mapping.h"
19 #include "en/tc_tun.h"
20 #include "lib/port_tun.h"
21 #include "en/tc/sample.h"
22 #include "en_accel/ipsec_rxtx.h"
23 #include "en/tc/int_port.h"
24 
25 struct mlx5e_rep_indr_block_priv {
26 	struct net_device *netdev;
27 	struct mlx5e_rep_priv *rpriv;
28 	enum flow_block_binder_type binder_type;
29 
30 	struct list_head list;
31 };
32 
33 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
34 				 struct mlx5e_encap_entry *e,
35 				 struct mlx5e_neigh *m_neigh,
36 				 struct net_device *neigh_dev)
37 {
38 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
39 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
40 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
41 	struct mlx5e_neigh_hash_entry *nhe;
42 	int err;
43 
44 	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
45 	if (err)
46 		return err;
47 
48 	mutex_lock(&rpriv->neigh_update.encap_lock);
49 	nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh);
50 	if (!nhe) {
51 		err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe);
52 		if (err) {
53 			mutex_unlock(&rpriv->neigh_update.encap_lock);
54 			mlx5_tun_entropy_refcount_dec(tun_entropy,
55 						      e->reformat_type);
56 			return err;
57 		}
58 	}
59 
60 	e->nhe = nhe;
61 	spin_lock(&nhe->encap_list_lock);
62 	list_add_rcu(&e->encap_list, &nhe->encap_list);
63 	spin_unlock(&nhe->encap_list_lock);
64 
65 	mutex_unlock(&rpriv->neigh_update.encap_lock);
66 
67 	return 0;
68 }
69 
70 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
71 				  struct mlx5e_encap_entry *e)
72 {
73 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
74 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
75 	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
76 
77 	if (!e->nhe)
78 		return;
79 
80 	spin_lock(&e->nhe->encap_list_lock);
81 	list_del_rcu(&e->encap_list);
82 	spin_unlock(&e->nhe->encap_list_lock);
83 
84 	mlx5e_rep_neigh_entry_release(e->nhe);
85 	e->nhe = NULL;
86 	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
87 }
88 
89 void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
90 			    struct mlx5e_encap_entry *e,
91 			    bool neigh_connected,
92 			    unsigned char ha[ETH_ALEN])
93 {
94 	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
95 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
96 	bool encap_connected;
97 	LIST_HEAD(flow_list);
98 
99 	ASSERT_RTNL();
100 
101 	mutex_lock(&esw->offloads.encap_tbl_lock);
102 	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
103 	if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
104 		goto unlock;
105 
106 	mlx5e_take_all_encap_flows(e, &flow_list);
107 
108 	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
109 	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
110 		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
111 
112 	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
113 		struct net_device *route_dev;
114 
115 		ether_addr_copy(e->h_dest, ha);
116 		ether_addr_copy(eth->h_dest, ha);
117 		/* Update the encap source mac, in case that we delete
118 		 * the flows when encap source mac changed.
119 		 */
120 		route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
121 		if (route_dev)
122 			ether_addr_copy(eth->h_source, route_dev->dev_addr);
123 
124 		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
125 	}
126 unlock:
127 	mutex_unlock(&esw->offloads.encap_tbl_lock);
128 	mlx5e_put_flow_list(priv, &flow_list);
129 }
130 
131 static int
132 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
133 			      struct flow_cls_offload *cls_flower, int flags)
134 {
135 	switch (cls_flower->command) {
136 	case FLOW_CLS_REPLACE:
137 		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
138 					      flags);
139 	case FLOW_CLS_DESTROY:
140 		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
141 					   flags);
142 	case FLOW_CLS_STATS:
143 		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
144 					  flags);
145 	default:
146 		return -EOPNOTSUPP;
147 	}
148 }
149 
150 static
151 int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
152 				    struct tc_cls_matchall_offload *ma)
153 {
154 	switch (ma->command) {
155 	case TC_CLSMATCHALL_REPLACE:
156 		return mlx5e_tc_configure_matchall(priv, ma);
157 	case TC_CLSMATCHALL_DESTROY:
158 		return mlx5e_tc_delete_matchall(priv, ma);
159 	case TC_CLSMATCHALL_STATS:
160 		mlx5e_tc_stats_matchall(priv, ma);
161 		return 0;
162 	default:
163 		return -EOPNOTSUPP;
164 	}
165 }
166 
167 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
168 				 void *cb_priv)
169 {
170 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
171 	struct mlx5e_priv *priv = cb_priv;
172 
173 	if (!priv->netdev || !netif_device_present(priv->netdev))
174 		return -EOPNOTSUPP;
175 
176 	switch (type) {
177 	case TC_SETUP_CLSFLOWER:
178 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
179 	case TC_SETUP_CLSMATCHALL:
180 		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
181 	default:
182 		return -EOPNOTSUPP;
183 	}
184 }
185 
186 static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
187 				 void *cb_priv)
188 {
189 	struct flow_cls_offload tmp, *f = type_data;
190 	struct mlx5e_priv *priv = cb_priv;
191 	struct mlx5_eswitch *esw;
192 	unsigned long flags;
193 	int err;
194 
195 	flags = MLX5_TC_FLAG(INGRESS) |
196 		MLX5_TC_FLAG(ESW_OFFLOAD) |
197 		MLX5_TC_FLAG(FT_OFFLOAD);
198 	esw = priv->mdev->priv.eswitch;
199 
200 	switch (type) {
201 	case TC_SETUP_CLSFLOWER:
202 		memcpy(&tmp, f, sizeof(*f));
203 
204 		if (!mlx5_chains_prios_supported(esw_chains(esw)))
205 			return -EOPNOTSUPP;
206 
207 		/* Re-use tc offload path by moving the ft flow to the
208 		 * reserved ft chain.
209 		 *
210 		 * FT offload can use prio range [0, INT_MAX], so we normalize
211 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
212 		 * as with tc, where prio 0 isn't supported.
213 		 *
214 		 * We only support chain 0 of FT offload.
215 		 */
216 		if (tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)))
217 			return -EOPNOTSUPP;
218 		if (tmp.common.chain_index != 0)
219 			return -EOPNOTSUPP;
220 
221 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
222 		tmp.common.prio++;
223 		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
224 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
225 		return err;
226 	default:
227 		return -EOPNOTSUPP;
228 	}
229 }
230 
231 static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
232 static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
233 int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
234 		       void *type_data)
235 {
236 	struct mlx5e_priv *priv = netdev_priv(dev);
237 	struct flow_block_offload *f = type_data;
238 
239 	f->unlocked_driver_cb = true;
240 
241 	switch (type) {
242 	case TC_SETUP_BLOCK:
243 		return flow_block_cb_setup_simple(type_data,
244 						  &mlx5e_rep_block_tc_cb_list,
245 						  mlx5e_rep_setup_tc_cb,
246 						  priv, priv, true);
247 	case TC_SETUP_FT:
248 		return flow_block_cb_setup_simple(type_data,
249 						  &mlx5e_rep_block_ft_cb_list,
250 						  mlx5e_rep_setup_ft_cb,
251 						  priv, priv, true);
252 	default:
253 		return -EOPNOTSUPP;
254 	}
255 }
256 
257 int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
258 {
259 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
260 	int err;
261 
262 	mutex_init(&uplink_priv->unready_flows_lock);
263 	INIT_LIST_HEAD(&uplink_priv->unready_flows);
264 
265 	/* init shared tc flow table */
266 	err = mlx5e_tc_esw_init(uplink_priv);
267 	return err;
268 }
269 
270 void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
271 {
272 	/* delete shared tc flow table */
273 	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
274 	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
275 }
276 
277 void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
278 {
279 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
280 
281 	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
282 		  mlx5e_tc_reoffload_flows_work);
283 }
284 
285 void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
286 {
287 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
288 
289 	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
290 }
291 
292 int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
293 {
294 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
295 
296 	queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
297 
298 	return NOTIFY_OK;
299 }
300 
301 static struct mlx5e_rep_indr_block_priv *
302 mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
303 				 struct net_device *netdev,
304 				 enum flow_block_binder_type binder_type)
305 {
306 	struct mlx5e_rep_indr_block_priv *cb_priv;
307 
308 	list_for_each_entry(cb_priv,
309 			    &rpriv->uplink_priv.tc_indr_block_priv_list,
310 			    list)
311 		if (cb_priv->netdev == netdev &&
312 		    cb_priv->binder_type == binder_type)
313 			return cb_priv;
314 
315 	return NULL;
316 }
317 
318 static int
319 mlx5e_rep_indr_offload(struct net_device *netdev,
320 		       struct flow_cls_offload *flower,
321 		       struct mlx5e_rep_indr_block_priv *indr_priv,
322 		       unsigned long flags)
323 {
324 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
325 	int err = 0;
326 
327 	if (!netif_device_present(indr_priv->rpriv->netdev))
328 		return -EOPNOTSUPP;
329 
330 	switch (flower->command) {
331 	case FLOW_CLS_REPLACE:
332 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
333 		break;
334 	case FLOW_CLS_DESTROY:
335 		err = mlx5e_delete_flower(netdev, priv, flower, flags);
336 		break;
337 	case FLOW_CLS_STATS:
338 		err = mlx5e_stats_flower(netdev, priv, flower, flags);
339 		break;
340 	default:
341 		err = -EOPNOTSUPP;
342 	}
343 
344 	return err;
345 }
346 
347 static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
348 				      void *type_data, void *indr_priv)
349 {
350 	unsigned long flags = MLX5_TC_FLAG(ESW_OFFLOAD);
351 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
352 
353 	flags |= (priv->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) ?
354 		MLX5_TC_FLAG(EGRESS) :
355 		MLX5_TC_FLAG(INGRESS);
356 
357 	switch (type) {
358 	case TC_SETUP_CLSFLOWER:
359 		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
360 					      flags);
361 	default:
362 		return -EOPNOTSUPP;
363 	}
364 }
365 
366 static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
367 				      void *type_data, void *indr_priv)
368 {
369 	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
370 	struct flow_cls_offload *f = type_data;
371 	struct flow_cls_offload tmp;
372 	struct mlx5e_priv *mpriv;
373 	struct mlx5_eswitch *esw;
374 	unsigned long flags;
375 	int err;
376 
377 	mpriv = netdev_priv(priv->rpriv->netdev);
378 	esw = mpriv->mdev->priv.eswitch;
379 
380 	flags = MLX5_TC_FLAG(EGRESS) |
381 		MLX5_TC_FLAG(ESW_OFFLOAD) |
382 		MLX5_TC_FLAG(FT_OFFLOAD);
383 
384 	switch (type) {
385 	case TC_SETUP_CLSFLOWER:
386 		memcpy(&tmp, f, sizeof(*f));
387 
388 		/* Re-use tc offload path by moving the ft flow to the
389 		 * reserved ft chain.
390 		 *
391 		 * FT offload can use prio range [0, INT_MAX], so we normalize
392 		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
393 		 * as with tc, where prio 0 isn't supported.
394 		 *
395 		 * We only support chain 0 of FT offload.
396 		 */
397 		if (!mlx5_chains_prios_supported(esw_chains(esw)) ||
398 		    tmp.common.prio >= mlx5_chains_get_prio_range(esw_chains(esw)) ||
399 		    tmp.common.chain_index)
400 			return -EOPNOTSUPP;
401 
402 		tmp.common.chain_index = mlx5_chains_get_nf_ft_chain(esw_chains(esw));
403 		tmp.common.prio++;
404 		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
405 		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
406 		return err;
407 	default:
408 		return -EOPNOTSUPP;
409 	}
410 }
411 
412 static void mlx5e_rep_indr_block_unbind(void *cb_priv)
413 {
414 	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
415 
416 	list_del(&indr_priv->list);
417 	kfree(indr_priv);
418 }
419 
420 static LIST_HEAD(mlx5e_block_cb_list);
421 
422 static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
423 {
424 	struct macvlan_dev *macvlan = netdev_priv(dev);
425 
426 	return macvlan->mode == MACVLAN_MODE_PASSTHRU;
427 }
428 
429 static int
430 mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
431 			   struct mlx5e_rep_priv *rpriv,
432 			   struct flow_block_offload *f,
433 			   flow_setup_cb_t *setup_cb,
434 			   void *data,
435 			   void (*cleanup)(struct flow_block_cb *block_cb))
436 {
437 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
438 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
439 	bool is_ovs_int_port = netif_is_ovs_master(netdev);
440 	struct mlx5e_rep_indr_block_priv *indr_priv;
441 	struct flow_block_cb *block_cb;
442 
443 	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
444 	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
445 	    !is_ovs_int_port) {
446 		if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
447 			return -EOPNOTSUPP;
448 		if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
449 			netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
450 			return -EOPNOTSUPP;
451 		}
452 	}
453 
454 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
455 	    f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
456 		return -EOPNOTSUPP;
457 
458 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
459 		return -EOPNOTSUPP;
460 
461 	if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
462 		return -EOPNOTSUPP;
463 
464 	f->unlocked_driver_cb = true;
465 	f->driver_block_list = &mlx5e_block_cb_list;
466 
467 	switch (f->command) {
468 	case FLOW_BLOCK_BIND:
469 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
470 		if (indr_priv)
471 			return -EEXIST;
472 
473 		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
474 		if (!indr_priv)
475 			return -ENOMEM;
476 
477 		indr_priv->netdev = netdev;
478 		indr_priv->rpriv = rpriv;
479 		indr_priv->binder_type = f->binder_type;
480 		list_add(&indr_priv->list,
481 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
482 
483 		block_cb = flow_indr_block_cb_alloc(setup_cb, indr_priv, indr_priv,
484 						    mlx5e_rep_indr_block_unbind,
485 						    f, netdev, sch, data, rpriv,
486 						    cleanup);
487 		if (IS_ERR(block_cb)) {
488 			list_del(&indr_priv->list);
489 			kfree(indr_priv);
490 			return PTR_ERR(block_cb);
491 		}
492 		flow_block_cb_add(block_cb, f);
493 		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
494 
495 		return 0;
496 	case FLOW_BLOCK_UNBIND:
497 		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev, f->binder_type);
498 		if (!indr_priv)
499 			return -ENOENT;
500 
501 		block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
502 		if (!block_cb)
503 			return -ENOENT;
504 
505 		flow_indr_block_cb_remove(block_cb, f);
506 		list_del(&block_cb->driver_list);
507 		return 0;
508 	default:
509 		return -EOPNOTSUPP;
510 	}
511 	return 0;
512 }
513 
514 static
515 int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
516 			    enum tc_setup_type type, void *type_data,
517 			    void *data,
518 			    void (*cleanup)(struct flow_block_cb *block_cb))
519 {
520 	if (!netdev)
521 		return -EOPNOTSUPP;
522 
523 	switch (type) {
524 	case TC_SETUP_BLOCK:
525 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
526 						  mlx5e_rep_indr_setup_tc_cb,
527 						  data, cleanup);
528 	case TC_SETUP_FT:
529 		return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
530 						  mlx5e_rep_indr_setup_ft_cb,
531 						  data, cleanup);
532 	default:
533 		return -EOPNOTSUPP;
534 	}
535 }
536 
537 int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
538 {
539 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
540 
541 	/* init indirect block notifications */
542 	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
543 
544 	return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
545 }
546 
547 void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
548 {
549 	flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
550 				 mlx5e_rep_indr_block_unbind);
551 }
552 
553 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
554 				 struct mlx5e_tc_update_priv *tc_priv,
555 				 u32 tunnel_id)
556 {
557 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
558 	struct tunnel_match_enc_opts enc_opts = {};
559 	struct mlx5_rep_uplink_priv *uplink_priv;
560 	struct mlx5e_rep_priv *uplink_rpriv;
561 	struct metadata_dst *tun_dst;
562 	struct tunnel_match_key key;
563 	u32 tun_id, enc_opts_id;
564 	struct net_device *dev;
565 	int err;
566 
567 	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
568 	tun_id = tunnel_id >> ENC_OPTS_BITS;
569 
570 	if (!tun_id)
571 		return true;
572 
573 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
574 	uplink_priv = &uplink_rpriv->uplink_priv;
575 
576 	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
577 	if (err) {
578 		WARN_ON_ONCE(true);
579 		netdev_dbg(priv->netdev,
580 			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
581 			   tun_id, err);
582 		return false;
583 	}
584 
585 	if (enc_opts_id) {
586 		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
587 				   enc_opts_id, &enc_opts);
588 		if (err) {
589 			netdev_dbg(priv->netdev,
590 				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
591 				   enc_opts_id, err);
592 			return false;
593 		}
594 	}
595 
596 	if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
597 		tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
598 					   key.enc_ip.tos, key.enc_ip.ttl,
599 					   key.enc_tp.dst, TUNNEL_KEY,
600 					   key32_to_tunnel_id(key.enc_key_id.keyid),
601 					   enc_opts.key.len);
602 	} else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
603 		tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
604 					     key.enc_ip.tos, key.enc_ip.ttl,
605 					     key.enc_tp.dst, 0, TUNNEL_KEY,
606 					     key32_to_tunnel_id(key.enc_key_id.keyid),
607 					     enc_opts.key.len);
608 	} else {
609 		netdev_dbg(priv->netdev,
610 			   "Couldn't restore tunnel, unsupported addr_type: %d\n",
611 			   key.enc_control.addr_type);
612 		return false;
613 	}
614 
615 	if (!tun_dst) {
616 		netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
617 		return false;
618 	}
619 
620 	tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
621 
622 	if (enc_opts.key.len)
623 		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
624 					enc_opts.key.data,
625 					enc_opts.key.len,
626 					enc_opts.key.dst_opt_type);
627 
628 	skb_dst_set(skb, (struct dst_entry *)tun_dst);
629 	dev = dev_get_by_index(&init_net, key.filter_ifindex);
630 	if (!dev) {
631 		netdev_dbg(priv->netdev,
632 			   "Couldn't find tunnel device with ifindex: %d\n",
633 			   key.filter_ifindex);
634 		return false;
635 	}
636 
637 	/* Set fwd_dev so we do dev_put() after datapath */
638 	tc_priv->fwd_dev = dev;
639 
640 	skb->dev = dev;
641 
642 	return true;
643 }
644 
645 static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
646 				    struct mlx5e_tc_update_priv *tc_priv)
647 {
648 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
649 	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
650 
651 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
652 	if (chain) {
653 		struct mlx5_rep_uplink_priv *uplink_priv;
654 		struct mlx5e_rep_priv *uplink_rpriv;
655 		struct tc_skb_ext *tc_skb_ext;
656 		struct mlx5_eswitch *esw;
657 		u32 zone_restore_id;
658 
659 		tc_skb_ext = tc_skb_ext_alloc(skb);
660 		if (!tc_skb_ext) {
661 			WARN_ON(1);
662 			return false;
663 		}
664 		tc_skb_ext->chain = chain;
665 		zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
666 		esw = priv->mdev->priv.eswitch;
667 		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
668 		uplink_priv = &uplink_rpriv->uplink_priv;
669 		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
670 					      zone_restore_id))
671 			return false;
672 	}
673 #endif /* CONFIG_NET_TC_SKB_EXT */
674 
675 	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
676 }
677 
678 static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
679 {
680 	if (tc_priv->fwd_dev)
681 		dev_put(tc_priv->fwd_dev);
682 }
683 
684 static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
685 				     struct mlx5_mapped_obj *mapped_obj,
686 				     struct mlx5e_tc_update_priv *tc_priv)
687 {
688 	if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
689 		netdev_dbg(priv->netdev,
690 			   "Failed to restore tunnel info for sampled packet\n");
691 		return;
692 	}
693 	mlx5e_tc_sample_skb(skb, mapped_obj);
694 	mlx5_rep_tc_post_napi_receive(tc_priv);
695 }
696 
697 static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
698 				       struct mlx5_mapped_obj *mapped_obj,
699 				       struct mlx5e_tc_update_priv *tc_priv,
700 				       bool *forward_tx,
701 				       u32 reg_c1)
702 {
703 	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
704 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
705 	struct mlx5_rep_uplink_priv *uplink_priv;
706 	struct mlx5e_rep_priv *uplink_rpriv;
707 
708 	/* Tunnel restore takes precedence over int port restore */
709 	if (tunnel_id)
710 		return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
711 
712 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
713 	uplink_priv = &uplink_rpriv->uplink_priv;
714 
715 	if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
716 				      mapped_obj->int_port_metadata, forward_tx)) {
717 		/* Set fwd_dev for future dev_put */
718 		tc_priv->fwd_dev = skb->dev;
719 
720 		return true;
721 	}
722 
723 	return false;
724 }
725 
726 void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
727 			  struct sk_buff *skb)
728 {
729 	u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
730 	struct mlx5e_tc_update_priv tc_priv = {};
731 	struct mlx5_mapped_obj mapped_obj;
732 	struct mlx5_eswitch *esw;
733 	bool forward_tx = false;
734 	struct mlx5e_priv *priv;
735 	u32 reg_c0;
736 	int err;
737 
738 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
739 	if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
740 		goto forward;
741 
742 	/* If reg_c0 is not equal to the default flow tag then skb->mark
743 	 * is not supported and must be reset back to 0.
744 	 */
745 	skb->mark = 0;
746 
747 	priv = netdev_priv(skb->dev);
748 	esw = priv->mdev->priv.eswitch;
749 	err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
750 	if (err) {
751 		netdev_dbg(priv->netdev,
752 			   "Couldn't find mapped object for reg_c0: %d, err: %d\n",
753 			   reg_c0, err);
754 		goto free_skb;
755 	}
756 
757 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
758 		if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
759 		    !mlx5_ipsec_is_rx_flow(cqe))
760 			goto free_skb;
761 	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
762 		mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
763 		goto free_skb;
764 	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
765 		if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
766 						&forward_tx, reg_c1))
767 			goto free_skb;
768 	} else {
769 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
770 		goto free_skb;
771 	}
772 
773 forward:
774 	if (forward_tx)
775 		dev_queue_xmit(skb);
776 	else
777 		napi_gro_receive(rq->cq.napi, skb);
778 
779 	mlx5_rep_tc_post_napi_receive(&tc_priv);
780 
781 	return;
782 
783 free_skb:
784 	dev_kfree_skb_any(skb);
785 }
786