1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12 
13 enum {
14 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16 
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18 				     struct mlx5_flow_attr *attr,
19 				     struct mlx5e_encap_entry *e,
20 				     int out_index)
21 {
22 	struct net_device *route_dev;
23 	int err = 0;
24 
25 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26 
27 	if (!route_dev || !netif_is_ovs_master(route_dev) ||
28 	    attr->parse_attr->filter_dev == e->out_dev)
29 		goto out;
30 
31 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
32 						MLX5E_TC_INT_PORT_EGRESS,
33 						&attr->action, out_index);
34 
35 out:
36 	if (route_dev)
37 		dev_put(route_dev);
38 
39 	return err;
40 }
41 
42 struct mlx5e_route_key {
43 	int ip_version;
44 	union {
45 		__be32 v4;
46 		struct in6_addr v6;
47 	} endpoint_ip;
48 };
49 
50 struct mlx5e_route_entry {
51 	struct mlx5e_route_key key;
52 	struct list_head encap_entries;
53 	struct list_head decap_flows;
54 	u32 flags;
55 	struct hlist_node hlist;
56 	refcount_t refcnt;
57 	int tunnel_dev_index;
58 	struct rcu_head rcu;
59 };
60 
61 struct mlx5e_tc_tun_encap {
62 	struct mlx5e_priv *priv;
63 	struct notifier_block fib_nb;
64 	spinlock_t route_lock; /* protects route_tbl */
65 	unsigned long route_tbl_last_update;
66 	DECLARE_HASHTABLE(route_tbl, 8);
67 };
68 
69 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70 {
71 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
72 }
73 
74 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
75 			     struct mlx5_flow_spec *spec)
76 {
77 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
78 	struct mlx5_rx_tun_attr *tun_attr;
79 	void *daddr, *saddr;
80 	u8 ip_version;
81 
82 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
83 	if (!tun_attr)
84 		return -ENOMEM;
85 
86 	esw_attr->rx_tun_attr = tun_attr;
87 	ip_version = mlx5e_tc_get_ip_version(spec, true);
88 
89 	if (ip_version == 4) {
90 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
92 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
93 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
94 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
95 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
96 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
97 			return 0;
98 	}
99 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
100 	else if (ip_version == 6) {
101 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102 
103 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
104 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
105 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
106 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
107 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
108 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
109 		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
110 		    ipv6_addr_any(&tun_attr->src_ip.v6))
111 			return 0;
112 	}
113 #endif
114 	/* Only set the flag if both src and dst ip addresses exist. They are
115 	 * required to establish routing.
116 	 */
117 	flow_flag_set(flow, TUN_RX);
118 	flow->attr->tun_ip_version = ip_version;
119 	return 0;
120 }
121 
122 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123 {
124 	bool all_flow_encaps_valid = true;
125 	int i;
126 
127 	/* Flow can be associated with multiple encap entries.
128 	 * Before offloading the flow verify that all of them have
129 	 * a valid neighbour.
130 	 */
131 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
132 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133 			continue;
134 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
135 			all_flow_encaps_valid = false;
136 			break;
137 		}
138 	}
139 
140 	return all_flow_encaps_valid;
141 }
142 
143 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
144 			      struct mlx5e_encap_entry *e,
145 			      struct list_head *flow_list)
146 {
147 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
148 	struct mlx5_pkt_reformat_params reformat_params;
149 	struct mlx5_esw_flow_attr *esw_attr;
150 	struct mlx5_flow_handle *rule;
151 	struct mlx5_flow_attr *attr;
152 	struct mlx5_flow_spec *spec;
153 	struct mlx5e_tc_flow *flow;
154 	int err;
155 
156 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
157 		return;
158 
159 	memset(&reformat_params, 0, sizeof(reformat_params));
160 	reformat_params.type = e->reformat_type;
161 	reformat_params.size = e->encap_size;
162 	reformat_params.data = e->encap_header;
163 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
164 						     &reformat_params,
165 						     MLX5_FLOW_NAMESPACE_FDB);
166 	if (IS_ERR(e->pkt_reformat)) {
167 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
168 			       PTR_ERR(e->pkt_reformat));
169 		return;
170 	}
171 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
172 	mlx5e_rep_queue_neigh_stats_work(priv);
173 
174 	list_for_each_entry(flow, flow_list, tmp_list) {
175 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
176 			continue;
177 
178 		spec = &flow->attr->parse_attr->spec;
179 
180 		attr = mlx5e_tc_get_encap_attr(flow);
181 		esw_attr = attr->esw_attr;
182 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
183 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184 
185 		/* Do not offload flows with unresolved neighbors */
186 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
187 			continue;
188 
189 		err = mlx5e_tc_offload_flow_post_acts(flow);
190 		if (err) {
191 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
192 				       err);
193 			continue;
194 		}
195 
196 		/* update from slow path rule to encap rule */
197 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
198 		if (IS_ERR(rule)) {
199 			mlx5e_tc_unoffload_flow_post_acts(flow);
200 			err = PTR_ERR(rule);
201 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
202 				       err);
203 			continue;
204 		}
205 
206 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
207 		flow->rule[0] = rule;
208 		/* was unset when slow path rule removed */
209 		flow_flag_set(flow, OFFLOADED);
210 	}
211 }
212 
213 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
214 			      struct mlx5e_encap_entry *e,
215 			      struct list_head *flow_list)
216 {
217 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
218 	struct mlx5_esw_flow_attr *esw_attr;
219 	struct mlx5_flow_handle *rule;
220 	struct mlx5_flow_attr *attr;
221 	struct mlx5_flow_spec *spec;
222 	struct mlx5e_tc_flow *flow;
223 	int err;
224 
225 	list_for_each_entry(flow, flow_list, tmp_list) {
226 		if (!mlx5e_is_offloaded_flow(flow))
227 			continue;
228 
229 		attr = mlx5e_tc_get_encap_attr(flow);
230 		esw_attr = attr->esw_attr;
231 		/* mark the flow's encap dest as non-valid */
232 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
233 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234 
235 		/* Clear pkt_reformat before checking slow path flag. Because
236 		 * in next iteration, the same flow is already set slow path
237 		 * flag, but still need to clear the pkt_reformat.
238 		 */
239 		if (flow_flag_test(flow, SLOW))
240 			continue;
241 
242 		/* update from encap rule to slow path rule */
243 		spec = &flow->attr->parse_attr->spec;
244 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
245 
246 		if (IS_ERR(rule)) {
247 			err = PTR_ERR(rule);
248 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
249 				       err);
250 			continue;
251 		}
252 
253 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
254 		mlx5e_tc_unoffload_flow_post_acts(flow);
255 		flow->rule[0] = rule;
256 		/* was unset when fast path rule removed */
257 		flow_flag_set(flow, OFFLOADED);
258 	}
259 
260 	/* we know that the encap is valid */
261 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
262 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
263 	e->pkt_reformat = NULL;
264 }
265 
266 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
267 				struct list_head *flow_list,
268 				int index)
269 {
270 	if (IS_ERR(mlx5e_flow_get(flow))) {
271 		/* Flow is being deleted concurrently. Wait for it to be
272 		 * unoffloaded from hardware, otherwise deleting encap will
273 		 * fail.
274 		 */
275 		wait_for_completion(&flow->del_hw_done);
276 		return;
277 	}
278 	wait_for_completion(&flow->init_done);
279 
280 	flow->tmp_entry_index = index;
281 	list_add(&flow->tmp_list, flow_list);
282 }
283 
284 /* Takes reference to all flows attached to encap and adds the flows to
285  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286  */
287 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288 {
289 	struct encap_flow_item *efi;
290 	struct mlx5e_tc_flow *flow;
291 
292 	list_for_each_entry(efi, &e->flows, list) {
293 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
294 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
295 	}
296 }
297 
298 /* Takes reference to all flows attached to route and adds the flows to
299  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300  */
301 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
302 					     struct list_head *flow_list)
303 {
304 	struct mlx5e_tc_flow *flow;
305 
306 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
307 		mlx5e_take_tmp_flow(flow, flow_list, 0);
308 }
309 
310 typedef bool (match_cb)(struct mlx5e_encap_entry *);
311 
312 static struct mlx5e_encap_entry *
313 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
314 			      struct mlx5e_encap_entry *e,
315 			      match_cb match)
316 {
317 	struct mlx5e_encap_entry *next = NULL;
318 
319 retry:
320 	rcu_read_lock();
321 
322 	/* find encap with non-zero reference counter value */
323 	for (next = e ?
324 		     list_next_or_null_rcu(&nhe->encap_list,
325 					   &e->encap_list,
326 					   struct mlx5e_encap_entry,
327 					   encap_list) :
328 		     list_first_or_null_rcu(&nhe->encap_list,
329 					    struct mlx5e_encap_entry,
330 					    encap_list);
331 	     next;
332 	     next = list_next_or_null_rcu(&nhe->encap_list,
333 					  &next->encap_list,
334 					  struct mlx5e_encap_entry,
335 					  encap_list))
336 		if (mlx5e_encap_take(next))
337 			break;
338 
339 	rcu_read_unlock();
340 
341 	/* release starting encap */
342 	if (e)
343 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
344 	if (!next)
345 		return next;
346 
347 	/* wait for encap to be fully initialized */
348 	wait_for_completion(&next->res_ready);
349 	/* continue searching if encap entry is not in valid state after completion */
350 	if (!match(next)) {
351 		e = next;
352 		goto retry;
353 	}
354 
355 	return next;
356 }
357 
358 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359 {
360 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
361 }
362 
363 static struct mlx5e_encap_entry *
364 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
365 			   struct mlx5e_encap_entry *e)
366 {
367 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
368 }
369 
370 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371 {
372 	return e->compl_result >= 0;
373 }
374 
375 struct mlx5e_encap_entry *
376 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
377 			  struct mlx5e_encap_entry *e)
378 {
379 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
380 }
381 
382 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383 {
384 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
385 	struct mlx5e_encap_entry *e = NULL;
386 	struct mlx5e_tc_flow *flow;
387 	struct mlx5_fc *counter;
388 	struct neigh_table *tbl;
389 	bool neigh_used = false;
390 	struct neighbour *n;
391 	u64 lastuse;
392 
393 	if (m_neigh->family == AF_INET)
394 		tbl = &arp_tbl;
395 #if IS_ENABLED(CONFIG_IPV6)
396 	else if (m_neigh->family == AF_INET6)
397 		tbl = ipv6_stub->nd_tbl;
398 #endif
399 	else
400 		return;
401 
402 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
403 	 * next one.
404 	 */
405 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
406 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
407 		struct encap_flow_item *efi, *tmp;
408 		struct mlx5_eswitch *esw;
409 		LIST_HEAD(flow_list);
410 
411 		esw = priv->mdev->priv.eswitch;
412 		mutex_lock(&esw->offloads.encap_tbl_lock);
413 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
414 			flow = container_of(efi, struct mlx5e_tc_flow,
415 					    encaps[efi->index]);
416 			if (IS_ERR(mlx5e_flow_get(flow)))
417 				continue;
418 			list_add(&flow->tmp_list, &flow_list);
419 
420 			if (mlx5e_is_offloaded_flow(flow)) {
421 				counter = mlx5e_tc_get_counter(flow);
422 				lastuse = mlx5_fc_query_lastuse(counter);
423 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
424 					neigh_used = true;
425 					break;
426 				}
427 			}
428 		}
429 		mutex_unlock(&esw->offloads.encap_tbl_lock);
430 
431 		mlx5e_put_flow_list(priv, &flow_list);
432 		if (neigh_used) {
433 			/* release current encap before breaking the loop */
434 			mlx5e_encap_put(priv, e);
435 			break;
436 		}
437 	}
438 
439 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
440 
441 	if (neigh_used) {
442 		nhe->reported_lastuse = jiffies;
443 
444 		/* find the relevant neigh according to the cached device and
445 		 * dst ip pair
446 		 */
447 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
448 		if (!n)
449 			return;
450 
451 		neigh_event_send(n, NULL);
452 		neigh_release(n);
453 	}
454 }
455 
456 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457 {
458 	WARN_ON(!list_empty(&e->flows));
459 
460 	if (e->compl_result > 0) {
461 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
462 
463 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
464 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
465 	}
466 
467 	kfree(e->tun_info);
468 	kfree(e->encap_header);
469 	kfree_rcu(e, rcu);
470 }
471 
472 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
473 				struct mlx5e_decap_entry *d)
474 {
475 	WARN_ON(!list_empty(&d->flows));
476 
477 	if (!d->compl_result)
478 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
479 
480 	kfree_rcu(d, rcu);
481 }
482 
483 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484 {
485 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486 
487 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
488 		return;
489 	list_del(&e->route_list);
490 	hash_del_rcu(&e->encap_hlist);
491 	mutex_unlock(&esw->offloads.encap_tbl_lock);
492 
493 	mlx5e_encap_dealloc(priv, e);
494 }
495 
496 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497 {
498 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499 
500 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501 
502 	if (!refcount_dec_and_test(&e->refcnt))
503 		return;
504 	list_del(&e->route_list);
505 	hash_del_rcu(&e->encap_hlist);
506 	mlx5e_encap_dealloc(priv, e);
507 }
508 
509 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510 {
511 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512 
513 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
514 		return;
515 	hash_del_rcu(&d->hlist);
516 	mutex_unlock(&esw->offloads.decap_tbl_lock);
517 
518 	mlx5e_decap_dealloc(priv, d);
519 }
520 
521 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
522 				     struct mlx5e_tc_flow *flow,
523 				     int out_index);
524 
525 void mlx5e_detach_encap(struct mlx5e_priv *priv,
526 			struct mlx5e_tc_flow *flow,
527 			struct mlx5_flow_attr *attr,
528 			int out_index)
529 {
530 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
531 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532 
533 	if (!mlx5e_is_eswitch_flow(flow))
534 		return;
535 
536 	if (attr->esw_attr->dests[out_index].flags &
537 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
538 		mlx5e_detach_encap_route(priv, flow, out_index);
539 
540 	/* flow wasn't fully initialized */
541 	if (!e)
542 		return;
543 
544 	mutex_lock(&esw->offloads.encap_tbl_lock);
545 	list_del(&flow->encaps[out_index].list);
546 	flow->encaps[out_index].e = NULL;
547 	if (!refcount_dec_and_test(&e->refcnt)) {
548 		mutex_unlock(&esw->offloads.encap_tbl_lock);
549 		return;
550 	}
551 	list_del(&e->route_list);
552 	hash_del_rcu(&e->encap_hlist);
553 	mutex_unlock(&esw->offloads.encap_tbl_lock);
554 
555 	mlx5e_encap_dealloc(priv, e);
556 }
557 
558 void mlx5e_detach_decap(struct mlx5e_priv *priv,
559 			struct mlx5e_tc_flow *flow)
560 {
561 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
562 	struct mlx5e_decap_entry *d = flow->decap_reformat;
563 
564 	if (!d)
565 		return;
566 
567 	mutex_lock(&esw->offloads.decap_tbl_lock);
568 	list_del(&flow->l3_to_l2_reformat);
569 	flow->decap_reformat = NULL;
570 
571 	if (!refcount_dec_and_test(&d->refcnt)) {
572 		mutex_unlock(&esw->offloads.decap_tbl_lock);
573 		return;
574 	}
575 	hash_del_rcu(&d->hlist);
576 	mutex_unlock(&esw->offloads.decap_tbl_lock);
577 
578 	mlx5e_decap_dealloc(priv, d);
579 }
580 
581 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
582 					   struct mlx5e_encap_key *b)
583 {
584 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
585 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
586 }
587 
588 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
589 					   struct mlx5e_encap_key *b,
590 					   __be16 tun_flags)
591 {
592 	struct ip_tunnel_info *a_info;
593 	struct ip_tunnel_info *b_info;
594 	bool a_has_opts, b_has_opts;
595 
596 	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
597 		return false;
598 
599 	a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
600 	b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601 
602 	/* keys are equal when both don't have any options attached */
603 	if (!a_has_opts && !b_has_opts)
604 		return true;
605 
606 	if (a_has_opts != b_has_opts)
607 		return false;
608 
609 	/* options stored in memory next to ip_tunnel_info struct */
610 	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
611 	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612 
613 	return a_info->options_len == b_info->options_len &&
614 	       !memcmp(ip_tunnel_info_opts(a_info),
615 		       ip_tunnel_info_opts(b_info),
616 		       a_info->options_len);
617 }
618 
619 static int cmp_decap_info(struct mlx5e_decap_key *a,
620 			  struct mlx5e_decap_key *b)
621 {
622 	return memcmp(&a->key, &b->key, sizeof(b->key));
623 }
624 
625 static int hash_encap_info(struct mlx5e_encap_key *key)
626 {
627 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
628 		     key->tc_tunnel->tunnel_type);
629 }
630 
631 static int hash_decap_info(struct mlx5e_decap_key *key)
632 {
633 	return jhash(&key->key, sizeof(key->key), 0);
634 }
635 
636 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637 {
638 	return refcount_inc_not_zero(&e->refcnt);
639 }
640 
641 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642 {
643 	return refcount_inc_not_zero(&e->refcnt);
644 }
645 
646 static struct mlx5e_encap_entry *
647 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
648 		uintptr_t hash_key)
649 {
650 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
651 	struct mlx5e_encap_key e_key;
652 	struct mlx5e_encap_entry *e;
653 
654 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
655 				   encap_hlist, hash_key) {
656 		e_key.ip_tun_key = &e->tun_info->key;
657 		e_key.tc_tunnel = e->tunnel;
658 		if (e->tunnel->encap_info_equal(&e_key, key) &&
659 		    mlx5e_encap_take(e))
660 			return e;
661 	}
662 
663 	return NULL;
664 }
665 
666 static struct mlx5e_decap_entry *
667 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
668 		uintptr_t hash_key)
669 {
670 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
671 	struct mlx5e_decap_key r_key;
672 	struct mlx5e_decap_entry *e;
673 
674 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
675 				   hlist, hash_key) {
676 		r_key = e->key;
677 		if (!cmp_decap_info(&r_key, key) &&
678 		    mlx5e_decap_take(e))
679 			return e;
680 	}
681 	return NULL;
682 }
683 
684 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685 {
686 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687 
688 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
689 }
690 
691 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
692 				      struct mlx5e_tc_flow *flow,
693 				      int out_index,
694 				      struct mlx5e_encap_entry *e,
695 				      struct netlink_ext_ack *extack)
696 {
697 	int i;
698 
699 	for (i = 0; i < out_index; i++) {
700 		if (flow->encaps[i].e != e)
701 			continue;
702 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
703 		netdev_err(priv->netdev, "can't duplicate encap action\n");
704 		return true;
705 	}
706 
707 	return false;
708 }
709 
710 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
711 			       struct mlx5_flow_attr *attr,
712 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
713 			       struct net_device *out_dev,
714 			       int route_dev_ifindex,
715 			       int out_index)
716 {
717 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
718 	struct net_device *route_dev;
719 	u16 vport_num;
720 	int err = 0;
721 	u32 data;
722 
723 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
724 
725 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
726 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
727 		goto out;
728 
729 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
730 	if (err)
731 		goto out;
732 
733 	attr->dest_chain = 0;
734 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
735 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
736 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
737 						       vport_num);
738 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
739 						   MLX5_FLOW_NAMESPACE_FDB,
740 						   VPORT_TO_REG, data);
741 	if (err >= 0) {
742 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
743 		err = 0;
744 	}
745 
746 out:
747 	if (route_dev)
748 		dev_put(route_dev);
749 	return err;
750 }
751 
752 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
753 				  struct mlx5_esw_flow_attr *attr,
754 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
755 				  struct net_device *out_dev,
756 				  int route_dev_ifindex,
757 				  int out_index)
758 {
759 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
760 	struct net_device *route_dev;
761 	u16 vport_num;
762 	int err = 0;
763 	u32 data;
764 
765 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
766 
767 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
768 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
769 		err = -ENODEV;
770 		goto out;
771 	}
772 
773 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
774 	if (err)
775 		goto out;
776 
777 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
778 						       vport_num);
779 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
780 
781 out:
782 	if (route_dev)
783 		dev_put(route_dev);
784 	return err;
785 }
786 
787 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788 {
789 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790 	struct mlx5_rep_uplink_priv *uplink_priv;
791 	struct mlx5e_rep_priv *uplink_rpriv;
792 	struct mlx5e_tc_tun_encap *encap;
793 	unsigned int ret;
794 
795 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
796 	uplink_priv = &uplink_rpriv->uplink_priv;
797 	encap = uplink_priv->encap;
798 
799 	spin_lock_bh(&encap->route_lock);
800 	ret = encap->route_tbl_last_update;
801 	spin_unlock_bh(&encap->route_lock);
802 	return ret;
803 }
804 
805 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
806 				    struct mlx5e_tc_flow *flow,
807 				    struct mlx5_flow_attr *attr,
808 				    struct mlx5e_encap_entry *e,
809 				    bool new_encap_entry,
810 				    unsigned long tbl_time_before,
811 				    int out_index);
812 
813 int mlx5e_attach_encap(struct mlx5e_priv *priv,
814 		       struct mlx5e_tc_flow *flow,
815 		       struct mlx5_flow_attr *attr,
816 		       struct net_device *mirred_dev,
817 		       int out_index,
818 		       struct netlink_ext_ack *extack,
819 		       struct net_device **encap_dev)
820 {
821 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
822 	struct mlx5e_tc_flow_parse_attr *parse_attr;
823 	const struct ip_tunnel_info *tun_info;
824 	const struct mlx5e_mpls_info *mpls_info;
825 	unsigned long tbl_time_before = 0;
826 	struct mlx5e_encap_entry *e;
827 	struct mlx5e_encap_key key;
828 	bool entry_created = false;
829 	unsigned short family;
830 	uintptr_t hash_key;
831 	int err = 0;
832 
833 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834 
835 	parse_attr = attr->parse_attr;
836 	tun_info = parse_attr->tun_info[out_index];
837 	mpls_info = &parse_attr->mpls_info[out_index];
838 	family = ip_tunnel_info_af(tun_info);
839 	key.ip_tun_key = &tun_info->key;
840 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
841 	if (!key.tc_tunnel) {
842 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
843 		return -EOPNOTSUPP;
844 	}
845 
846 	hash_key = hash_encap_info(&key);
847 
848 	e = mlx5e_encap_get(priv, &key, hash_key);
849 
850 	/* must verify if encap is valid or not */
851 	if (e) {
852 		/* Check that entry was not already attached to this flow */
853 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
854 			err = -EOPNOTSUPP;
855 			goto out_err;
856 		}
857 
858 		goto attach_flow;
859 	}
860 
861 	e = kzalloc(sizeof(*e), GFP_KERNEL);
862 	if (!e) {
863 		err = -ENOMEM;
864 		goto out_err;
865 	}
866 
867 	refcount_set(&e->refcnt, 1);
868 	init_completion(&e->res_ready);
869 	entry_created = true;
870 	INIT_LIST_HEAD(&e->route_list);
871 
872 	tun_info = mlx5e_dup_tun_info(tun_info);
873 	if (!tun_info) {
874 		err = -ENOMEM;
875 		goto out_err_init;
876 	}
877 	e->tun_info = tun_info;
878 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
879 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
880 	if (err)
881 		goto out_err_init;
882 
883 	INIT_LIST_HEAD(&e->flows);
884 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
885 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886 
887 	if (family == AF_INET)
888 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
889 	else if (family == AF_INET6)
890 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891 
892 	complete_all(&e->res_ready);
893 	if (err) {
894 		e->compl_result = err;
895 		goto out_err;
896 	}
897 	e->compl_result = 1;
898 
899 attach_flow:
900 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
901 				       tbl_time_before, out_index);
902 	if (err)
903 		goto out_err;
904 
905 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
906 	if (err == -EOPNOTSUPP) {
907 		/* If device doesn't support int port offload,
908 		 * redirect to uplink vport.
909 		 */
910 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
911 		err = 0;
912 	} else if (err) {
913 		goto out_err;
914 	}
915 
916 	flow->encaps[out_index].e = e;
917 	list_add(&flow->encaps[out_index].list, &e->flows);
918 	flow->encaps[out_index].index = out_index;
919 	*encap_dev = e->out_dev;
920 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
921 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
922 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923 	} else {
924 		flow_flag_set(flow, SLOW);
925 	}
926 
927 	return err;
928 
929 out_err:
930 	if (e)
931 		mlx5e_encap_put_locked(priv, e);
932 	return err;
933 
934 out_err_init:
935 	kfree(tun_info);
936 	kfree(e);
937 	return err;
938 }
939 
940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941 		       struct mlx5e_tc_flow *flow,
942 		       struct netlink_ext_ack *extack)
943 {
944 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946 	struct mlx5_pkt_reformat_params reformat_params;
947 	struct mlx5e_decap_entry *d;
948 	struct mlx5e_decap_key key;
949 	uintptr_t hash_key;
950 	int err = 0;
951 
952 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953 		NL_SET_ERR_MSG_MOD(extack,
954 				   "encap header larger than max supported");
955 		return -EOPNOTSUPP;
956 	}
957 
958 	key.key = attr->eth;
959 	hash_key = hash_decap_info(&key);
960 	mutex_lock(&esw->offloads.decap_tbl_lock);
961 	d = mlx5e_decap_get(priv, &key, hash_key);
962 	if (d) {
963 		mutex_unlock(&esw->offloads.decap_tbl_lock);
964 		wait_for_completion(&d->res_ready);
965 		mutex_lock(&esw->offloads.decap_tbl_lock);
966 		if (d->compl_result) {
967 			err = -EREMOTEIO;
968 			goto out_free;
969 		}
970 		goto found;
971 	}
972 
973 	d = kzalloc(sizeof(*d), GFP_KERNEL);
974 	if (!d) {
975 		err = -ENOMEM;
976 		goto out_err;
977 	}
978 
979 	d->key = key;
980 	refcount_set(&d->refcnt, 1);
981 	init_completion(&d->res_ready);
982 	INIT_LIST_HEAD(&d->flows);
983 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984 	mutex_unlock(&esw->offloads.decap_tbl_lock);
985 
986 	memset(&reformat_params, 0, sizeof(reformat_params));
987 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988 	reformat_params.size = sizeof(attr->eth);
989 	reformat_params.data = &attr->eth;
990 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991 						     &reformat_params,
992 						     MLX5_FLOW_NAMESPACE_FDB);
993 	if (IS_ERR(d->pkt_reformat)) {
994 		err = PTR_ERR(d->pkt_reformat);
995 		d->compl_result = err;
996 	}
997 	mutex_lock(&esw->offloads.decap_tbl_lock);
998 	complete_all(&d->res_ready);
999 	if (err)
1000 		goto out_free;
1001 
1002 found:
1003 	flow->decap_reformat = d;
1004 	attr->decap_pkt_reformat = d->pkt_reformat;
1005 	list_add(&flow->l3_to_l2_reformat, &d->flows);
1006 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1007 	return 0;
1008 
1009 out_free:
1010 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1011 	mlx5e_decap_put(priv, d);
1012 	return err;
1013 
1014 out_err:
1015 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 	return err;
1017 }
1018 
1019 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020 				 struct mlx5e_tc_flow *flow,
1021 				 struct mlx5_flow_attr *attr,
1022 				 struct netlink_ext_ack *extack,
1023 				 bool *vf_tun)
1024 {
1025 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1026 	struct mlx5_esw_flow_attr *esw_attr;
1027 	struct net_device *encap_dev = NULL;
1028 	struct mlx5e_rep_priv *rpriv;
1029 	struct mlx5e_priv *out_priv;
1030 	struct mlx5_eswitch *esw;
1031 	int out_index;
1032 	int err = 0;
1033 
1034 	parse_attr = attr->parse_attr;
1035 	esw_attr = attr->esw_attr;
1036 	*vf_tun = false;
1037 
1038 	esw = priv->mdev->priv.eswitch;
1039 	mutex_lock(&esw->offloads.encap_tbl_lock);
1040 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041 		struct net_device *out_dev;
1042 		int mirred_ifindex;
1043 
1044 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045 			continue;
1046 
1047 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049 		if (!out_dev) {
1050 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051 			err = -ENODEV;
1052 			goto out;
1053 		}
1054 		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1055 					 extack, &encap_dev);
1056 		dev_put(out_dev);
1057 		if (err)
1058 			goto out;
1059 
1060 		if (esw_attr->dests[out_index].flags &
1061 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062 		    !esw_attr->dest_int_port)
1063 			*vf_tun = true;
1064 
1065 		out_priv = netdev_priv(encap_dev);
1066 		rpriv = out_priv->ppriv;
1067 		esw_attr->dests[out_index].vport_valid = true;
1068 		esw_attr->dests[out_index].vport = rpriv->rep->vport;
1069 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1070 	}
1071 
1072 	if (*vf_tun && esw_attr->out_count > 1) {
1073 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1074 		err = -EOPNOTSUPP;
1075 		goto out;
1076 	}
1077 
1078 out:
1079 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1080 	return err;
1081 }
1082 
1083 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1084 				    struct mlx5e_tc_flow *flow,
1085 				    struct mlx5_flow_attr *attr)
1086 {
1087 	struct mlx5_esw_flow_attr *esw_attr;
1088 	int out_index;
1089 
1090 	if (!mlx5e_is_eswitch_flow(flow))
1091 		return;
1092 
1093 	esw_attr = attr->esw_attr;
1094 
1095 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1096 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1097 			continue;
1098 
1099 		mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1100 		kfree(attr->parse_attr->tun_info[out_index]);
1101 	}
1102 }
1103 
1104 static int cmp_route_info(struct mlx5e_route_key *a,
1105 			  struct mlx5e_route_key *b)
1106 {
1107 	if (a->ip_version == 4 && b->ip_version == 4)
1108 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1109 			      sizeof(a->endpoint_ip.v4));
1110 	else if (a->ip_version == 6 && b->ip_version == 6)
1111 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1112 			      sizeof(a->endpoint_ip.v6));
1113 	return 1;
1114 }
1115 
1116 static u32 hash_route_info(struct mlx5e_route_key *key)
1117 {
1118 	if (key->ip_version == 4)
1119 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1120 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1121 }
1122 
1123 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1124 				struct mlx5e_route_entry *r)
1125 {
1126 	WARN_ON(!list_empty(&r->decap_flows));
1127 	WARN_ON(!list_empty(&r->encap_entries));
1128 
1129 	kfree_rcu(r, rcu);
1130 }
1131 
1132 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1133 {
1134 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1135 
1136 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1137 		return;
1138 
1139 	hash_del_rcu(&r->hlist);
1140 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1141 
1142 	mlx5e_route_dealloc(priv, r);
1143 }
1144 
1145 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1146 {
1147 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1148 
1149 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1150 
1151 	if (!refcount_dec_and_test(&r->refcnt))
1152 		return;
1153 	hash_del_rcu(&r->hlist);
1154 	mlx5e_route_dealloc(priv, r);
1155 }
1156 
1157 static struct mlx5e_route_entry *
1158 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1159 		u32 hash_key)
1160 {
1161 	struct mlx5e_route_key r_key;
1162 	struct mlx5e_route_entry *r;
1163 
1164 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1165 		r_key = r->key;
1166 		if (!cmp_route_info(&r_key, key) &&
1167 		    refcount_inc_not_zero(&r->refcnt))
1168 			return r;
1169 	}
1170 	return NULL;
1171 }
1172 
1173 static struct mlx5e_route_entry *
1174 mlx5e_route_get_create(struct mlx5e_priv *priv,
1175 		       struct mlx5e_route_key *key,
1176 		       int tunnel_dev_index,
1177 		       unsigned long *route_tbl_change_time)
1178 {
1179 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180 	struct mlx5_rep_uplink_priv *uplink_priv;
1181 	struct mlx5e_rep_priv *uplink_rpriv;
1182 	struct mlx5e_tc_tun_encap *encap;
1183 	struct mlx5e_route_entry *r;
1184 	u32 hash_key;
1185 
1186 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1187 	uplink_priv = &uplink_rpriv->uplink_priv;
1188 	encap = uplink_priv->encap;
1189 
1190 	hash_key = hash_route_info(key);
1191 	spin_lock_bh(&encap->route_lock);
1192 	r = mlx5e_route_get(encap, key, hash_key);
1193 	spin_unlock_bh(&encap->route_lock);
1194 	if (r) {
1195 		if (!mlx5e_route_entry_valid(r)) {
1196 			mlx5e_route_put_locked(priv, r);
1197 			return ERR_PTR(-EINVAL);
1198 		}
1199 		return r;
1200 	}
1201 
1202 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1203 	if (!r)
1204 		return ERR_PTR(-ENOMEM);
1205 
1206 	r->key = *key;
1207 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1208 	r->tunnel_dev_index = tunnel_dev_index;
1209 	refcount_set(&r->refcnt, 1);
1210 	INIT_LIST_HEAD(&r->decap_flows);
1211 	INIT_LIST_HEAD(&r->encap_entries);
1212 
1213 	spin_lock_bh(&encap->route_lock);
1214 	*route_tbl_change_time = encap->route_tbl_last_update;
1215 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1216 	spin_unlock_bh(&encap->route_lock);
1217 
1218 	return r;
1219 }
1220 
1221 static struct mlx5e_route_entry *
1222 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1223 {
1224 	u32 hash_key = hash_route_info(key);
1225 	struct mlx5e_route_entry *r;
1226 
1227 	spin_lock_bh(&encap->route_lock);
1228 	encap->route_tbl_last_update = jiffies;
1229 	r = mlx5e_route_get(encap, key, hash_key);
1230 	spin_unlock_bh(&encap->route_lock);
1231 
1232 	return r;
1233 }
1234 
1235 struct mlx5e_tc_fib_event_data {
1236 	struct work_struct work;
1237 	unsigned long event;
1238 	struct mlx5e_route_entry *r;
1239 	struct net_device *ul_dev;
1240 };
1241 
1242 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1243 static struct mlx5e_tc_fib_event_data *
1244 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1245 {
1246 	struct mlx5e_tc_fib_event_data *fib_work;
1247 
1248 	fib_work = kzalloc(sizeof(*fib_work), flags);
1249 	if (WARN_ON(!fib_work))
1250 		return NULL;
1251 
1252 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1253 	fib_work->event = event;
1254 	fib_work->ul_dev = ul_dev;
1255 
1256 	return fib_work;
1257 }
1258 
1259 static int
1260 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1261 			   struct mlx5e_route_entry *r,
1262 			   unsigned long event)
1263 {
1264 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265 	struct mlx5e_tc_fib_event_data *fib_work;
1266 	struct mlx5e_rep_priv *uplink_rpriv;
1267 	struct net_device *ul_dev;
1268 
1269 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1270 	ul_dev = uplink_rpriv->netdev;
1271 
1272 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1273 	if (!fib_work)
1274 		return -ENOMEM;
1275 
1276 	dev_hold(ul_dev);
1277 	refcount_inc(&r->refcnt);
1278 	fib_work->r = r;
1279 	queue_work(priv->wq, &fib_work->work);
1280 
1281 	return 0;
1282 }
1283 
1284 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1285 			     struct mlx5e_tc_flow *flow)
1286 {
1287 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288 	unsigned long tbl_time_before, tbl_time_after;
1289 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1290 	struct mlx5_flow_attr *attr = flow->attr;
1291 	struct mlx5_esw_flow_attr *esw_attr;
1292 	struct mlx5e_route_entry *r;
1293 	struct mlx5e_route_key key;
1294 	int err = 0;
1295 
1296 	esw_attr = attr->esw_attr;
1297 	parse_attr = attr->parse_attr;
1298 	mutex_lock(&esw->offloads.encap_tbl_lock);
1299 	if (!esw_attr->rx_tun_attr)
1300 		goto out;
1301 
1302 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1303 	tbl_time_after = tbl_time_before;
1304 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1305 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1306 		goto out;
1307 
1308 	key.ip_version = attr->tun_ip_version;
1309 	if (key.ip_version == 4)
1310 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1311 	else
1312 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1313 
1314 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1315 				   &tbl_time_after);
1316 	if (IS_ERR(r)) {
1317 		err = PTR_ERR(r);
1318 		goto out;
1319 	}
1320 	/* Routing changed concurrently. FIB event handler might have missed new
1321 	 * entry, schedule update.
1322 	 */
1323 	if (tbl_time_before != tbl_time_after) {
1324 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1325 		if (err) {
1326 			mlx5e_route_put_locked(priv, r);
1327 			goto out;
1328 		}
1329 	}
1330 
1331 	flow->decap_route = r;
1332 	list_add(&flow->decap_routes, &r->decap_flows);
1333 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1334 	return 0;
1335 
1336 out:
1337 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1338 	return err;
1339 }
1340 
1341 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1342 				    struct mlx5e_tc_flow *flow,
1343 				    struct mlx5_flow_attr *attr,
1344 				    struct mlx5e_encap_entry *e,
1345 				    bool new_encap_entry,
1346 				    unsigned long tbl_time_before,
1347 				    int out_index)
1348 {
1349 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1350 	unsigned long tbl_time_after = tbl_time_before;
1351 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1352 	const struct ip_tunnel_info *tun_info;
1353 	struct mlx5_esw_flow_attr *esw_attr;
1354 	struct mlx5e_route_entry *r;
1355 	struct mlx5e_route_key key;
1356 	unsigned short family;
1357 	int err = 0;
1358 
1359 	esw_attr = attr->esw_attr;
1360 	parse_attr = attr->parse_attr;
1361 	tun_info = parse_attr->tun_info[out_index];
1362 	family = ip_tunnel_info_af(tun_info);
1363 
1364 	if (family == AF_INET) {
1365 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1366 		key.ip_version = 4;
1367 	} else if (family == AF_INET6) {
1368 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1369 		key.ip_version = 6;
1370 	}
1371 
1372 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1373 				  e->route_dev_ifindex, out_index);
1374 	if (err || !(esw_attr->dests[out_index].flags &
1375 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1376 		return err;
1377 
1378 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1379 				   &tbl_time_after);
1380 	if (IS_ERR(r))
1381 		return PTR_ERR(r);
1382 	/* Routing changed concurrently. FIB event handler might have missed new
1383 	 * entry, schedule update.
1384 	 */
1385 	if (tbl_time_before != tbl_time_after) {
1386 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1387 		if (err) {
1388 			mlx5e_route_put_locked(priv, r);
1389 			return err;
1390 		}
1391 	}
1392 
1393 	flow->encap_routes[out_index].r = r;
1394 	if (new_encap_entry)
1395 		list_add(&e->route_list, &r->encap_entries);
1396 	flow->encap_routes[out_index].index = out_index;
1397 	return 0;
1398 }
1399 
1400 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1401 			      struct mlx5e_tc_flow *flow)
1402 {
1403 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1404 	struct mlx5e_route_entry *r = flow->decap_route;
1405 
1406 	if (!r)
1407 		return;
1408 
1409 	mutex_lock(&esw->offloads.encap_tbl_lock);
1410 	list_del(&flow->decap_routes);
1411 	flow->decap_route = NULL;
1412 
1413 	if (!refcount_dec_and_test(&r->refcnt)) {
1414 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1415 		return;
1416 	}
1417 	hash_del_rcu(&r->hlist);
1418 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1419 
1420 	mlx5e_route_dealloc(priv, r);
1421 }
1422 
1423 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1424 				     struct mlx5e_tc_flow *flow,
1425 				     int out_index)
1426 {
1427 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1428 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1429 	struct mlx5e_encap_entry *e, *tmp;
1430 
1431 	if (!r)
1432 		return;
1433 
1434 	mutex_lock(&esw->offloads.encap_tbl_lock);
1435 	flow->encap_routes[out_index].r = NULL;
1436 
1437 	if (!refcount_dec_and_test(&r->refcnt)) {
1438 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1439 		return;
1440 	}
1441 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1442 		list_del_init(&e->route_list);
1443 	hash_del_rcu(&r->hlist);
1444 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1445 
1446 	mlx5e_route_dealloc(priv, r);
1447 }
1448 
1449 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1450 				   struct mlx5e_encap_entry *e,
1451 				   struct list_head *encap_flows)
1452 {
1453 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1454 	struct mlx5e_tc_flow *flow;
1455 
1456 	list_for_each_entry(flow, encap_flows, tmp_list) {
1457 		struct mlx5_esw_flow_attr *esw_attr;
1458 		struct mlx5_flow_attr *attr;
1459 
1460 		if (!mlx5e_is_offloaded_flow(flow))
1461 			continue;
1462 
1463 		attr = mlx5e_tc_get_encap_attr(flow);
1464 		esw_attr = attr->esw_attr;
1465 
1466 		if (flow_flag_test(flow, SLOW)) {
1467 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1468 		} else {
1469 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1470 			mlx5e_tc_unoffload_flow_post_acts(flow);
1471 		}
1472 
1473 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1474 		attr->modify_hdr = NULL;
1475 
1476 		esw_attr->dests[flow->tmp_entry_index].flags &=
1477 			~MLX5_ESW_DEST_ENCAP_VALID;
1478 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1479 	}
1480 
1481 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1482 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1483 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1484 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1485 		e->pkt_reformat = NULL;
1486 	}
1487 }
1488 
1489 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1490 				  struct net_device *tunnel_dev,
1491 				  struct mlx5e_encap_entry *e,
1492 				  struct list_head *encap_flows)
1493 {
1494 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495 	struct mlx5e_tc_flow *flow;
1496 	int err;
1497 
1498 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1499 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1500 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1501 	if (err)
1502 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1503 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1504 
1505 	list_for_each_entry(flow, encap_flows, tmp_list) {
1506 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1507 		struct mlx5_esw_flow_attr *esw_attr;
1508 		struct mlx5_flow_handle *rule;
1509 		struct mlx5_flow_attr *attr;
1510 		struct mlx5_flow_spec *spec;
1511 
1512 		if (flow_flag_test(flow, FAILED))
1513 			continue;
1514 
1515 		spec = &flow->attr->parse_attr->spec;
1516 
1517 		attr = mlx5e_tc_get_encap_attr(flow);
1518 		esw_attr = attr->esw_attr;
1519 		parse_attr = attr->parse_attr;
1520 
1521 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1522 					     e->out_dev, e->route_dev_ifindex,
1523 					     flow->tmp_entry_index);
1524 		if (err) {
1525 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1526 			continue;
1527 		}
1528 
1529 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1530 		if (err) {
1531 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1532 				       err);
1533 			continue;
1534 		}
1535 
1536 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1537 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1538 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1539 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1540 				goto offload_to_slow_path;
1541 
1542 			err = mlx5e_tc_offload_flow_post_acts(flow);
1543 			if (err) {
1544 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1545 					       err);
1546 				goto offload_to_slow_path;
1547 			}
1548 
1549 			/* update from slow path rule to encap rule */
1550 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1551 			if (IS_ERR(rule)) {
1552 				mlx5e_tc_unoffload_flow_post_acts(flow);
1553 				err = PTR_ERR(rule);
1554 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1555 					       err);
1556 			} else {
1557 				flow->rule[0] = rule;
1558 			}
1559 		} else {
1560 offload_to_slow_path:
1561 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1562 			/* mark the flow's encap dest as non-valid */
1563 			esw_attr->dests[flow->tmp_entry_index].flags &=
1564 				~MLX5_ESW_DEST_ENCAP_VALID;
1565 
1566 			if (IS_ERR(rule)) {
1567 				err = PTR_ERR(rule);
1568 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1569 					       err);
1570 			} else {
1571 				flow->rule[0] = rule;
1572 			}
1573 		}
1574 		flow_flag_set(flow, OFFLOADED);
1575 	}
1576 }
1577 
1578 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1579 				     struct mlx5e_route_entry *r,
1580 				     struct list_head *flow_list,
1581 				     bool replace)
1582 {
1583 	struct net_device *tunnel_dev;
1584 	struct mlx5e_encap_entry *e;
1585 
1586 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1587 	if (!tunnel_dev)
1588 		return -ENODEV;
1589 
1590 	list_for_each_entry(e, &r->encap_entries, route_list) {
1591 		LIST_HEAD(encap_flows);
1592 
1593 		mlx5e_take_all_encap_flows(e, &encap_flows);
1594 		if (list_empty(&encap_flows))
1595 			continue;
1596 
1597 		if (mlx5e_route_entry_valid(r))
1598 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1599 
1600 		if (!replace) {
1601 			list_splice(&encap_flows, flow_list);
1602 			continue;
1603 		}
1604 
1605 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1606 		list_splice(&encap_flows, flow_list);
1607 	}
1608 
1609 	return 0;
1610 }
1611 
1612 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1613 				      struct list_head *flow_list)
1614 {
1615 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1616 	struct mlx5e_tc_flow *flow;
1617 
1618 	list_for_each_entry(flow, flow_list, tmp_list)
1619 		if (mlx5e_is_offloaded_flow(flow))
1620 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1621 }
1622 
1623 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1624 				  struct list_head *decap_flows)
1625 {
1626 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1627 	struct mlx5e_tc_flow *flow;
1628 
1629 	list_for_each_entry(flow, decap_flows, tmp_list) {
1630 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1631 		struct mlx5_flow_attr *attr = flow->attr;
1632 		struct mlx5_flow_handle *rule;
1633 		struct mlx5_flow_spec *spec;
1634 		int err;
1635 
1636 		if (flow_flag_test(flow, FAILED))
1637 			continue;
1638 
1639 		parse_attr = attr->parse_attr;
1640 		spec = &parse_attr->spec;
1641 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1642 		if (err) {
1643 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1644 				       err);
1645 			continue;
1646 		}
1647 
1648 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1649 		if (IS_ERR(rule)) {
1650 			err = PTR_ERR(rule);
1651 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1652 				       err);
1653 		} else {
1654 			flow->rule[0] = rule;
1655 			flow_flag_set(flow, OFFLOADED);
1656 		}
1657 	}
1658 }
1659 
1660 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1661 					  struct mlx5e_route_entry *r,
1662 					  struct list_head *flow_list,
1663 					  bool replace)
1664 {
1665 	struct net_device *tunnel_dev;
1666 	LIST_HEAD(decap_flows);
1667 
1668 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1669 	if (!tunnel_dev)
1670 		return -ENODEV;
1671 
1672 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1673 	if (mlx5e_route_entry_valid(r))
1674 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1675 	if (replace)
1676 		mlx5e_reoffload_decap(priv, &decap_flows);
1677 
1678 	list_splice(&decap_flows, flow_list);
1679 
1680 	return 0;
1681 }
1682 
1683 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1684 {
1685 	struct mlx5e_tc_fib_event_data *event_data =
1686 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1687 	struct net_device *ul_dev = event_data->ul_dev;
1688 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1689 	struct mlx5e_route_entry *r = event_data->r;
1690 	struct mlx5_eswitch *esw;
1691 	LIST_HEAD(flow_list);
1692 	bool replace;
1693 	int err;
1694 
1695 	/* sync with concurrent neigh updates */
1696 	rtnl_lock();
1697 	esw = priv->mdev->priv.eswitch;
1698 	mutex_lock(&esw->offloads.encap_tbl_lock);
1699 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1700 
1701 	if (!mlx5e_route_entry_valid(r) && !replace)
1702 		goto out;
1703 
1704 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1705 	if (err)
1706 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1707 			       err);
1708 
1709 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1710 	if (err)
1711 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1712 			       err);
1713 
1714 	if (replace)
1715 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1716 out:
1717 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1718 	rtnl_unlock();
1719 
1720 	mlx5e_put_flow_list(priv, &flow_list);
1721 	mlx5e_route_put(priv, event_data->r);
1722 	dev_put(event_data->ul_dev);
1723 	kfree(event_data);
1724 }
1725 
1726 static struct mlx5e_tc_fib_event_data *
1727 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1728 			 struct net_device *ul_dev,
1729 			 struct mlx5e_tc_tun_encap *encap,
1730 			 unsigned long event,
1731 			 struct fib_notifier_info *info)
1732 {
1733 	struct fib_entry_notifier_info *fen_info;
1734 	struct mlx5e_tc_fib_event_data *fib_work;
1735 	struct mlx5e_route_entry *r;
1736 	struct mlx5e_route_key key;
1737 	struct net_device *fib_dev;
1738 
1739 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1740 	if (fen_info->fi->nh)
1741 		return NULL;
1742 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1743 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1744 	    fen_info->dst_len != 32)
1745 		return NULL;
1746 
1747 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1748 	if (!fib_work)
1749 		return ERR_PTR(-ENOMEM);
1750 
1751 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1752 	key.ip_version = 4;
1753 
1754 	/* Can't fail after this point because releasing reference to r
1755 	 * requires obtaining sleeping mutex which we can't do in atomic
1756 	 * context.
1757 	 */
1758 	r = mlx5e_route_lookup_for_update(encap, &key);
1759 	if (!r)
1760 		goto out;
1761 	fib_work->r = r;
1762 	dev_hold(ul_dev);
1763 
1764 	return fib_work;
1765 
1766 out:
1767 	kfree(fib_work);
1768 	return NULL;
1769 }
1770 
1771 static struct mlx5e_tc_fib_event_data *
1772 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1773 			 struct net_device *ul_dev,
1774 			 struct mlx5e_tc_tun_encap *encap,
1775 			 unsigned long event,
1776 			 struct fib_notifier_info *info)
1777 {
1778 	struct fib6_entry_notifier_info *fen_info;
1779 	struct mlx5e_tc_fib_event_data *fib_work;
1780 	struct mlx5e_route_entry *r;
1781 	struct mlx5e_route_key key;
1782 	struct net_device *fib_dev;
1783 
1784 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1785 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1786 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1787 	    fen_info->rt->fib6_dst.plen != 128)
1788 		return NULL;
1789 
1790 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1791 	if (!fib_work)
1792 		return ERR_PTR(-ENOMEM);
1793 
1794 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1795 	       sizeof(fen_info->rt->fib6_dst.addr));
1796 	key.ip_version = 6;
1797 
1798 	/* Can't fail after this point because releasing reference to r
1799 	 * requires obtaining sleeping mutex which we can't do in atomic
1800 	 * context.
1801 	 */
1802 	r = mlx5e_route_lookup_for_update(encap, &key);
1803 	if (!r)
1804 		goto out;
1805 	fib_work->r = r;
1806 	dev_hold(ul_dev);
1807 
1808 	return fib_work;
1809 
1810 out:
1811 	kfree(fib_work);
1812 	return NULL;
1813 }
1814 
1815 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1816 {
1817 	struct mlx5e_tc_fib_event_data *fib_work;
1818 	struct fib_notifier_info *info = ptr;
1819 	struct mlx5e_tc_tun_encap *encap;
1820 	struct net_device *ul_dev;
1821 	struct mlx5e_priv *priv;
1822 
1823 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1824 	priv = encap->priv;
1825 	ul_dev = priv->netdev;
1826 	priv = netdev_priv(ul_dev);
1827 
1828 	switch (event) {
1829 	case FIB_EVENT_ENTRY_REPLACE:
1830 	case FIB_EVENT_ENTRY_DEL:
1831 		if (info->family == AF_INET)
1832 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1833 		else if (info->family == AF_INET6)
1834 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1835 		else
1836 			return NOTIFY_DONE;
1837 
1838 		if (!IS_ERR_OR_NULL(fib_work)) {
1839 			queue_work(priv->wq, &fib_work->work);
1840 		} else if (IS_ERR(fib_work)) {
1841 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1842 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1843 				       PTR_ERR(fib_work));
1844 		}
1845 
1846 		break;
1847 	default:
1848 		return NOTIFY_DONE;
1849 	}
1850 
1851 	return NOTIFY_DONE;
1852 }
1853 
1854 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1855 {
1856 	struct mlx5e_tc_tun_encap *encap;
1857 	int err;
1858 
1859 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1860 	if (!encap)
1861 		return ERR_PTR(-ENOMEM);
1862 
1863 	encap->priv = priv;
1864 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1865 	spin_lock_init(&encap->route_lock);
1866 	hash_init(encap->route_tbl);
1867 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1868 				    NULL, NULL);
1869 	if (err) {
1870 		kvfree(encap);
1871 		return ERR_PTR(err);
1872 	}
1873 
1874 	return encap;
1875 }
1876 
1877 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1878 {
1879 	if (!encap)
1880 		return;
1881 
1882 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1883 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1884 	kvfree(encap);
1885 }
1886