1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "fs_core.h"
9 #include "en_tc.h"
10 #include "tc_tun.h"
11 #include "rep/tc.h"
12 #include "diag/en_tc_tracepoint.h"
13 
14 enum {
15 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
16 };
17 
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)18 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
19 				     struct mlx5_flow_attr *attr,
20 				     struct mlx5e_encap_entry *e,
21 				     int out_index)
22 {
23 	struct net_device *route_dev;
24 	int err = 0;
25 
26 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
27 
28 	if (!route_dev || !netif_is_ovs_master(route_dev))
29 		goto out;
30 
31 	if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS &&
32 	    mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) &&
33 	    (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) {
34 		mlx5_core_warn(priv->mdev,
35 			       "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n");
36 		err = -EINVAL;
37 		goto out;
38 	}
39 
40 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
41 						MLX5E_TC_INT_PORT_EGRESS,
42 						&attr->action, out_index);
43 
44 out:
45 	dev_put(route_dev);
46 
47 	return err;
48 }
49 
50 struct mlx5e_route_key {
51 	int ip_version;
52 	union {
53 		__be32 v4;
54 		struct in6_addr v6;
55 	} endpoint_ip;
56 };
57 
58 struct mlx5e_route_entry {
59 	struct mlx5e_route_key key;
60 	struct list_head encap_entries;
61 	struct list_head decap_flows;
62 	u32 flags;
63 	struct hlist_node hlist;
64 	refcount_t refcnt;
65 	int tunnel_dev_index;
66 	struct rcu_head rcu;
67 };
68 
69 struct mlx5e_tc_tun_encap {
70 	struct mlx5e_priv *priv;
71 	struct notifier_block fib_nb;
72 	spinlock_t route_lock; /* protects route_tbl */
73 	unsigned long route_tbl_last_update;
74 	DECLARE_HASHTABLE(route_tbl, 8);
75 };
76 
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)77 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
78 {
79 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
80 }
81 
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)82 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
83 			     struct mlx5_flow_spec *spec)
84 {
85 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
86 	struct mlx5_rx_tun_attr *tun_attr;
87 	void *daddr, *saddr;
88 	u8 ip_version;
89 
90 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
91 	if (!tun_attr)
92 		return -ENOMEM;
93 
94 	esw_attr->rx_tun_attr = tun_attr;
95 	ip_version = mlx5e_tc_get_ip_version(spec, true);
96 
97 	if (ip_version == 4) {
98 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
99 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
100 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
101 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
102 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
103 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
104 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
105 			return 0;
106 	}
107 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
108 	else if (ip_version == 6) {
109 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
110 
111 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
112 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
113 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
114 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
115 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
116 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
117 		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
118 		    ipv6_addr_any(&tun_attr->src_ip.v6))
119 			return 0;
120 	}
121 #endif
122 	/* Only set the flag if both src and dst ip addresses exist. They are
123 	 * required to establish routing.
124 	 */
125 	flow_flag_set(flow, TUN_RX);
126 	flow->attr->tun_ip_version = ip_version;
127 	return 0;
128 }
129 
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)130 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
131 {
132 	bool all_flow_encaps_valid = true;
133 	int i;
134 
135 	/* Flow can be associated with multiple encap entries.
136 	 * Before offloading the flow verify that all of them have
137 	 * a valid neighbour.
138 	 */
139 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
140 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
141 			continue;
142 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
143 			all_flow_encaps_valid = false;
144 			break;
145 		}
146 	}
147 
148 	return all_flow_encaps_valid;
149 }
150 
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)151 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
152 			      struct mlx5e_encap_entry *e,
153 			      struct list_head *flow_list)
154 {
155 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
156 	struct mlx5_pkt_reformat_params reformat_params;
157 	struct mlx5_esw_flow_attr *esw_attr;
158 	struct mlx5_flow_handle *rule;
159 	struct mlx5_flow_attr *attr;
160 	struct mlx5_flow_spec *spec;
161 	struct mlx5e_tc_flow *flow;
162 	int err;
163 
164 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
165 		return;
166 
167 	memset(&reformat_params, 0, sizeof(reformat_params));
168 	reformat_params.type = e->reformat_type;
169 	reformat_params.size = e->encap_size;
170 	reformat_params.data = e->encap_header;
171 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
172 						     &reformat_params,
173 						     MLX5_FLOW_NAMESPACE_FDB);
174 	if (IS_ERR(e->pkt_reformat)) {
175 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
176 			       PTR_ERR(e->pkt_reformat));
177 		return;
178 	}
179 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
180 	mlx5e_rep_queue_neigh_stats_work(priv);
181 
182 	list_for_each_entry(flow, flow_list, tmp_list) {
183 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
184 			continue;
185 
186 		spec = &flow->attr->parse_attr->spec;
187 
188 		attr = mlx5e_tc_get_encap_attr(flow);
189 		esw_attr = attr->esw_attr;
190 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
191 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
192 
193 		/* Do not offload flows with unresolved neighbors */
194 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
195 			continue;
196 
197 		err = mlx5e_tc_offload_flow_post_acts(flow);
198 		if (err) {
199 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
200 				       err);
201 			continue;
202 		}
203 
204 		/* update from slow path rule to encap rule */
205 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
206 		if (IS_ERR(rule)) {
207 			mlx5e_tc_unoffload_flow_post_acts(flow);
208 			err = PTR_ERR(rule);
209 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
210 				       err);
211 			continue;
212 		}
213 
214 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
215 		flow->rule[0] = rule;
216 		/* was unset when slow path rule removed */
217 		flow_flag_set(flow, OFFLOADED);
218 	}
219 }
220 
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)221 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
222 			      struct mlx5e_encap_entry *e,
223 			      struct list_head *flow_list)
224 {
225 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
226 	struct mlx5_esw_flow_attr *esw_attr;
227 	struct mlx5_flow_handle *rule;
228 	struct mlx5_flow_attr *attr;
229 	struct mlx5_flow_spec *spec;
230 	struct mlx5e_tc_flow *flow;
231 	int err;
232 
233 	list_for_each_entry(flow, flow_list, tmp_list) {
234 		if (!mlx5e_is_offloaded_flow(flow))
235 			continue;
236 
237 		attr = mlx5e_tc_get_encap_attr(flow);
238 		esw_attr = attr->esw_attr;
239 		/* mark the flow's encap dest as non-valid */
240 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
241 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
242 
243 		/* Clear pkt_reformat before checking slow path flag. Because
244 		 * in next iteration, the same flow is already set slow path
245 		 * flag, but still need to clear the pkt_reformat.
246 		 */
247 		if (flow_flag_test(flow, SLOW))
248 			continue;
249 
250 		/* update from encap rule to slow path rule */
251 		spec = &flow->attr->parse_attr->spec;
252 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
253 
254 		if (IS_ERR(rule)) {
255 			err = PTR_ERR(rule);
256 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
257 				       err);
258 			continue;
259 		}
260 
261 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
262 		mlx5e_tc_unoffload_flow_post_acts(flow);
263 		flow->rule[0] = rule;
264 		/* was unset when fast path rule removed */
265 		flow_flag_set(flow, OFFLOADED);
266 	}
267 
268 	/* we know that the encap is valid */
269 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
270 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
271 	e->pkt_reformat = NULL;
272 }
273 
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)274 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
275 				struct list_head *flow_list,
276 				int index)
277 {
278 	if (IS_ERR(mlx5e_flow_get(flow))) {
279 		/* Flow is being deleted concurrently. Wait for it to be
280 		 * unoffloaded from hardware, otherwise deleting encap will
281 		 * fail.
282 		 */
283 		wait_for_completion(&flow->del_hw_done);
284 		return;
285 	}
286 	wait_for_completion(&flow->init_done);
287 
288 	flow->tmp_entry_index = index;
289 	list_add(&flow->tmp_list, flow_list);
290 }
291 
292 /* Takes reference to all flows attached to encap and adds the flows to
293  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
294  */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)295 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
296 {
297 	struct encap_flow_item *efi;
298 	struct mlx5e_tc_flow *flow;
299 
300 	list_for_each_entry(efi, &e->flows, list) {
301 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
302 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
303 	}
304 }
305 
306 /* Takes reference to all flows attached to route and adds the flows to
307  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
308  */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)309 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
310 					     struct list_head *flow_list)
311 {
312 	struct mlx5e_tc_flow *flow;
313 
314 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
315 		mlx5e_take_tmp_flow(flow, flow_list, 0);
316 }
317 
318 typedef bool (match_cb)(struct mlx5e_encap_entry *);
319 
320 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)321 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
322 			      struct mlx5e_encap_entry *e,
323 			      match_cb match)
324 {
325 	struct mlx5e_encap_entry *next = NULL;
326 
327 retry:
328 	rcu_read_lock();
329 
330 	/* find encap with non-zero reference counter value */
331 	for (next = e ?
332 		     list_next_or_null_rcu(&nhe->encap_list,
333 					   &e->encap_list,
334 					   struct mlx5e_encap_entry,
335 					   encap_list) :
336 		     list_first_or_null_rcu(&nhe->encap_list,
337 					    struct mlx5e_encap_entry,
338 					    encap_list);
339 	     next;
340 	     next = list_next_or_null_rcu(&nhe->encap_list,
341 					  &next->encap_list,
342 					  struct mlx5e_encap_entry,
343 					  encap_list))
344 		if (mlx5e_encap_take(next))
345 			break;
346 
347 	rcu_read_unlock();
348 
349 	/* release starting encap */
350 	if (e)
351 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
352 	if (!next)
353 		return next;
354 
355 	/* wait for encap to be fully initialized */
356 	wait_for_completion(&next->res_ready);
357 	/* continue searching if encap entry is not in valid state after completion */
358 	if (!match(next)) {
359 		e = next;
360 		goto retry;
361 	}
362 
363 	return next;
364 }
365 
mlx5e_encap_valid(struct mlx5e_encap_entry * e)366 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
367 {
368 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
369 }
370 
371 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)372 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
373 			   struct mlx5e_encap_entry *e)
374 {
375 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
376 }
377 
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)378 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
379 {
380 	return e->compl_result >= 0;
381 }
382 
383 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)384 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
385 			  struct mlx5e_encap_entry *e)
386 {
387 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
388 }
389 
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)390 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
391 {
392 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
393 	struct mlx5e_encap_entry *e = NULL;
394 	struct mlx5e_tc_flow *flow;
395 	struct mlx5_fc *counter;
396 	struct neigh_table *tbl;
397 	bool neigh_used = false;
398 	struct neighbour *n;
399 	u64 lastuse;
400 
401 	if (m_neigh->family == AF_INET)
402 		tbl = &arp_tbl;
403 #if IS_ENABLED(CONFIG_IPV6)
404 	else if (m_neigh->family == AF_INET6)
405 		tbl = ipv6_stub->nd_tbl;
406 #endif
407 	else
408 		return;
409 
410 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
411 	 * next one.
412 	 */
413 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
414 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
415 		struct encap_flow_item *efi, *tmp;
416 		struct mlx5_eswitch *esw;
417 		LIST_HEAD(flow_list);
418 
419 		esw = priv->mdev->priv.eswitch;
420 		mutex_lock(&esw->offloads.encap_tbl_lock);
421 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
422 			flow = container_of(efi, struct mlx5e_tc_flow,
423 					    encaps[efi->index]);
424 			if (IS_ERR(mlx5e_flow_get(flow)))
425 				continue;
426 			list_add(&flow->tmp_list, &flow_list);
427 
428 			if (mlx5e_is_offloaded_flow(flow)) {
429 				counter = mlx5e_tc_get_counter(flow);
430 				lastuse = mlx5_fc_query_lastuse(counter);
431 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
432 					neigh_used = true;
433 					break;
434 				}
435 			}
436 		}
437 		mutex_unlock(&esw->offloads.encap_tbl_lock);
438 
439 		mlx5e_put_flow_list(priv, &flow_list);
440 		if (neigh_used) {
441 			/* release current encap before breaking the loop */
442 			mlx5e_encap_put(priv, e);
443 			break;
444 		}
445 	}
446 
447 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
448 
449 	if (neigh_used) {
450 		nhe->reported_lastuse = jiffies;
451 
452 		/* find the relevant neigh according to the cached device and
453 		 * dst ip pair
454 		 */
455 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
456 		if (!n)
457 			return;
458 
459 		neigh_event_send(n, NULL);
460 		neigh_release(n);
461 	}
462 }
463 
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)464 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
465 {
466 	WARN_ON(!list_empty(&e->flows));
467 
468 	if (e->compl_result > 0) {
469 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
470 
471 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
472 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
473 	}
474 
475 	kfree(e->tun_info);
476 	kfree(e->encap_header);
477 	kfree_rcu(e, rcu);
478 }
479 
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)480 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
481 				struct mlx5e_decap_entry *d)
482 {
483 	WARN_ON(!list_empty(&d->flows));
484 
485 	if (!d->compl_result)
486 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
487 
488 	kfree_rcu(d, rcu);
489 }
490 
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)491 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
492 {
493 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
494 
495 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
496 		return;
497 	list_del(&e->route_list);
498 	hash_del_rcu(&e->encap_hlist);
499 	mutex_unlock(&esw->offloads.encap_tbl_lock);
500 
501 	mlx5e_encap_dealloc(priv, e);
502 }
503 
mlx5e_encap_put_locked(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)504 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
505 {
506 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
507 
508 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
509 
510 	if (!refcount_dec_and_test(&e->refcnt))
511 		return;
512 	list_del(&e->route_list);
513 	hash_del_rcu(&e->encap_hlist);
514 	mlx5e_encap_dealloc(priv, e);
515 }
516 
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)517 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
518 {
519 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
520 
521 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
522 		return;
523 	hash_del_rcu(&d->hlist);
524 	mutex_unlock(&esw->offloads.decap_tbl_lock);
525 
526 	mlx5e_decap_dealloc(priv, d);
527 }
528 
529 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
530 				     struct mlx5e_tc_flow *flow,
531 				     int out_index);
532 
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)533 void mlx5e_detach_encap(struct mlx5e_priv *priv,
534 			struct mlx5e_tc_flow *flow,
535 			struct mlx5_flow_attr *attr,
536 			int out_index)
537 {
538 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
539 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
540 
541 	if (!mlx5e_is_eswitch_flow(flow))
542 		return;
543 
544 	if (attr->esw_attr->dests[out_index].flags &
545 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
546 		mlx5e_detach_encap_route(priv, flow, out_index);
547 
548 	/* flow wasn't fully initialized */
549 	if (!e)
550 		return;
551 
552 	mutex_lock(&esw->offloads.encap_tbl_lock);
553 	list_del(&flow->encaps[out_index].list);
554 	flow->encaps[out_index].e = NULL;
555 	if (!refcount_dec_and_test(&e->refcnt)) {
556 		mutex_unlock(&esw->offloads.encap_tbl_lock);
557 		return;
558 	}
559 	list_del(&e->route_list);
560 	hash_del_rcu(&e->encap_hlist);
561 	mutex_unlock(&esw->offloads.encap_tbl_lock);
562 
563 	mlx5e_encap_dealloc(priv, e);
564 }
565 
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)566 void mlx5e_detach_decap(struct mlx5e_priv *priv,
567 			struct mlx5e_tc_flow *flow)
568 {
569 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
570 	struct mlx5e_decap_entry *d = flow->decap_reformat;
571 
572 	if (!d)
573 		return;
574 
575 	mutex_lock(&esw->offloads.decap_tbl_lock);
576 	list_del(&flow->l3_to_l2_reformat);
577 	flow->decap_reformat = NULL;
578 
579 	if (!refcount_dec_and_test(&d->refcnt)) {
580 		mutex_unlock(&esw->offloads.decap_tbl_lock);
581 		return;
582 	}
583 	hash_del_rcu(&d->hlist);
584 	mutex_unlock(&esw->offloads.decap_tbl_lock);
585 
586 	mlx5e_decap_dealloc(priv, d);
587 }
588 
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)589 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
590 					   struct mlx5e_encap_key *b)
591 {
592 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
593 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
594 }
595 
mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b,u32 tun_type)596 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
597 					   struct mlx5e_encap_key *b,
598 					   u32 tun_type)
599 {
600 	struct ip_tunnel_info *a_info;
601 	struct ip_tunnel_info *b_info;
602 	bool a_has_opts, b_has_opts;
603 
604 	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
605 		return false;
606 
607 	a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags);
608 	b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags);
609 
610 	/* keys are equal when both don't have any options attached */
611 	if (!a_has_opts && !b_has_opts)
612 		return true;
613 
614 	if (a_has_opts != b_has_opts)
615 		return false;
616 
617 	/* options stored in memory next to ip_tunnel_info struct */
618 	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
619 	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
620 
621 	return a_info->options_len == b_info->options_len &&
622 	       !memcmp(ip_tunnel_info_opts(a_info),
623 		       ip_tunnel_info_opts(b_info),
624 		       a_info->options_len);
625 }
626 
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)627 static int cmp_decap_info(struct mlx5e_decap_key *a,
628 			  struct mlx5e_decap_key *b)
629 {
630 	return memcmp(&a->key, &b->key, sizeof(b->key));
631 }
632 
hash_encap_info(struct mlx5e_encap_key * key)633 static int hash_encap_info(struct mlx5e_encap_key *key)
634 {
635 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
636 		     key->tc_tunnel->tunnel_type);
637 }
638 
hash_decap_info(struct mlx5e_decap_key * key)639 static int hash_decap_info(struct mlx5e_decap_key *key)
640 {
641 	return jhash(&key->key, sizeof(key->key), 0);
642 }
643 
mlx5e_encap_take(struct mlx5e_encap_entry * e)644 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
645 {
646 	return refcount_inc_not_zero(&e->refcnt);
647 }
648 
mlx5e_decap_take(struct mlx5e_decap_entry * e)649 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
650 {
651 	return refcount_inc_not_zero(&e->refcnt);
652 }
653 
654 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)655 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
656 		uintptr_t hash_key)
657 {
658 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
659 	struct mlx5e_encap_key e_key;
660 	struct mlx5e_encap_entry *e;
661 
662 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
663 				   encap_hlist, hash_key) {
664 		e_key.ip_tun_key = &e->tun_info->key;
665 		e_key.tc_tunnel = e->tunnel;
666 		if (e->tunnel->encap_info_equal(&e_key, key) &&
667 		    mlx5e_encap_take(e))
668 			return e;
669 	}
670 
671 	return NULL;
672 }
673 
674 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)675 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
676 		uintptr_t hash_key)
677 {
678 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
679 	struct mlx5e_decap_key r_key;
680 	struct mlx5e_decap_entry *e;
681 
682 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
683 				   hlist, hash_key) {
684 		r_key = e->key;
685 		if (!cmp_decap_info(&r_key, key) &&
686 		    mlx5e_decap_take(e))
687 			return e;
688 	}
689 	return NULL;
690 }
691 
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)692 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
693 {
694 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
695 
696 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
697 }
698 
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)699 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
700 				      struct mlx5e_tc_flow *flow,
701 				      int out_index,
702 				      struct mlx5e_encap_entry *e,
703 				      struct netlink_ext_ack *extack)
704 {
705 	int i;
706 
707 	for (i = 0; i < out_index; i++) {
708 		if (flow->encaps[i].e != e)
709 			continue;
710 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
711 		netdev_err(priv->netdev, "can't duplicate encap action\n");
712 		return true;
713 	}
714 
715 	return false;
716 }
717 
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)718 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
719 			       struct mlx5_flow_attr *attr,
720 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
721 			       struct net_device *out_dev,
722 			       int route_dev_ifindex,
723 			       int out_index)
724 {
725 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
726 	struct net_device *route_dev;
727 	u16 vport_num;
728 	int err = 0;
729 	u32 data;
730 
731 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
732 
733 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
734 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
735 		goto out;
736 
737 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
738 	if (err)
739 		goto out;
740 
741 	attr->dest_chain = 0;
742 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
743 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
744 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
745 						       vport_num);
746 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
747 						   MLX5_FLOW_NAMESPACE_FDB,
748 						   VPORT_TO_REG, data);
749 	if (err >= 0) {
750 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
751 		err = 0;
752 	}
753 
754 out:
755 	dev_put(route_dev);
756 	return err;
757 }
758 
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)759 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
760 				  struct mlx5_esw_flow_attr *attr,
761 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
762 				  struct net_device *out_dev,
763 				  int route_dev_ifindex,
764 				  int out_index)
765 {
766 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
767 	struct net_device *route_dev;
768 	u16 vport_num;
769 	int err = 0;
770 	u32 data;
771 
772 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
773 
774 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
775 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
776 		err = -ENODEV;
777 		goto out;
778 	}
779 
780 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
781 	if (err)
782 		goto out;
783 
784 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
785 						       vport_num);
786 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
787 
788 out:
789 	dev_put(route_dev);
790 	return err;
791 }
792 
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)793 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
794 {
795 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
796 	struct mlx5_rep_uplink_priv *uplink_priv;
797 	struct mlx5e_rep_priv *uplink_rpriv;
798 	struct mlx5e_tc_tun_encap *encap;
799 	unsigned int ret;
800 
801 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
802 	uplink_priv = &uplink_rpriv->uplink_priv;
803 	encap = uplink_priv->encap;
804 
805 	spin_lock_bh(&encap->route_lock);
806 	ret = encap->route_tbl_last_update;
807 	spin_unlock_bh(&encap->route_lock);
808 	return ret;
809 }
810 
811 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
812 				    struct mlx5e_tc_flow *flow,
813 				    struct mlx5_flow_attr *attr,
814 				    struct mlx5e_encap_entry *e,
815 				    bool new_encap_entry,
816 				    unsigned long tbl_time_before,
817 				    int out_index);
818 
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)819 int mlx5e_attach_encap(struct mlx5e_priv *priv,
820 		       struct mlx5e_tc_flow *flow,
821 		       struct mlx5_flow_attr *attr,
822 		       struct net_device *mirred_dev,
823 		       int out_index,
824 		       struct netlink_ext_ack *extack,
825 		       struct net_device **encap_dev)
826 {
827 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
828 	struct mlx5e_tc_flow_parse_attr *parse_attr;
829 	const struct ip_tunnel_info *tun_info;
830 	const struct mlx5e_mpls_info *mpls_info;
831 	unsigned long tbl_time_before = 0;
832 	struct mlx5e_encap_entry *e;
833 	struct mlx5e_encap_key key;
834 	bool entry_created = false;
835 	unsigned short family;
836 	uintptr_t hash_key;
837 	int err = 0;
838 
839 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
840 
841 	parse_attr = attr->parse_attr;
842 	tun_info = parse_attr->tun_info[out_index];
843 	mpls_info = &parse_attr->mpls_info[out_index];
844 	family = ip_tunnel_info_af(tun_info);
845 	key.ip_tun_key = &tun_info->key;
846 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
847 	if (!key.tc_tunnel) {
848 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
849 		return -EOPNOTSUPP;
850 	}
851 
852 	hash_key = hash_encap_info(&key);
853 
854 	e = mlx5e_encap_get(priv, &key, hash_key);
855 
856 	/* must verify if encap is valid or not */
857 	if (e) {
858 		/* Check that entry was not already attached to this flow */
859 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
860 			err = -EOPNOTSUPP;
861 			goto out_err;
862 		}
863 
864 		goto attach_flow;
865 	}
866 
867 	e = kzalloc(sizeof(*e), GFP_KERNEL);
868 	if (!e) {
869 		err = -ENOMEM;
870 		goto out_err;
871 	}
872 
873 	refcount_set(&e->refcnt, 1);
874 	init_completion(&e->res_ready);
875 	entry_created = true;
876 	INIT_LIST_HEAD(&e->route_list);
877 
878 	tun_info = mlx5e_dup_tun_info(tun_info);
879 	if (!tun_info) {
880 		err = -ENOMEM;
881 		goto out_err_init;
882 	}
883 	e->tun_info = tun_info;
884 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
885 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
886 	if (err)
887 		goto out_err_init;
888 
889 	INIT_LIST_HEAD(&e->flows);
890 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
891 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
892 
893 	if (family == AF_INET)
894 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
895 	else if (family == AF_INET6)
896 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
897 
898 	complete_all(&e->res_ready);
899 	if (err) {
900 		e->compl_result = err;
901 		goto out_err;
902 	}
903 	e->compl_result = 1;
904 
905 attach_flow:
906 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
907 				       tbl_time_before, out_index);
908 	if (err)
909 		goto out_err;
910 
911 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
912 	if (err == -EOPNOTSUPP) {
913 		/* If device doesn't support int port offload,
914 		 * redirect to uplink vport.
915 		 */
916 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
917 		err = 0;
918 	} else if (err) {
919 		goto out_err;
920 	}
921 
922 	flow->encaps[out_index].e = e;
923 	list_add(&flow->encaps[out_index].list, &e->flows);
924 	flow->encaps[out_index].index = out_index;
925 	*encap_dev = e->out_dev;
926 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
927 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
928 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
929 	} else {
930 		flow_flag_set(flow, SLOW);
931 	}
932 
933 	return err;
934 
935 out_err:
936 	if (e)
937 		mlx5e_encap_put_locked(priv, e);
938 	return err;
939 
940 out_err_init:
941 	kfree(tun_info);
942 	kfree(e);
943 	return err;
944 }
945 
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)946 int mlx5e_attach_decap(struct mlx5e_priv *priv,
947 		       struct mlx5e_tc_flow *flow,
948 		       struct netlink_ext_ack *extack)
949 {
950 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
951 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
952 	struct mlx5_pkt_reformat_params reformat_params;
953 	struct mlx5e_decap_entry *d;
954 	struct mlx5e_decap_key key;
955 	uintptr_t hash_key;
956 	int err = 0;
957 
958 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
959 		NL_SET_ERR_MSG_MOD(extack,
960 				   "encap header larger than max supported");
961 		return -EOPNOTSUPP;
962 	}
963 
964 	key.key = attr->eth;
965 	hash_key = hash_decap_info(&key);
966 	mutex_lock(&esw->offloads.decap_tbl_lock);
967 	d = mlx5e_decap_get(priv, &key, hash_key);
968 	if (d) {
969 		mutex_unlock(&esw->offloads.decap_tbl_lock);
970 		wait_for_completion(&d->res_ready);
971 		mutex_lock(&esw->offloads.decap_tbl_lock);
972 		if (d->compl_result) {
973 			err = -EREMOTEIO;
974 			goto out_free;
975 		}
976 		goto found;
977 	}
978 
979 	d = kzalloc(sizeof(*d), GFP_KERNEL);
980 	if (!d) {
981 		err = -ENOMEM;
982 		goto out_err;
983 	}
984 
985 	d->key = key;
986 	refcount_set(&d->refcnt, 1);
987 	init_completion(&d->res_ready);
988 	INIT_LIST_HEAD(&d->flows);
989 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
990 	mutex_unlock(&esw->offloads.decap_tbl_lock);
991 
992 	memset(&reformat_params, 0, sizeof(reformat_params));
993 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
994 	reformat_params.size = sizeof(attr->eth);
995 	reformat_params.data = &attr->eth;
996 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
997 						     &reformat_params,
998 						     MLX5_FLOW_NAMESPACE_FDB);
999 	if (IS_ERR(d->pkt_reformat)) {
1000 		err = PTR_ERR(d->pkt_reformat);
1001 		d->compl_result = err;
1002 	}
1003 	mutex_lock(&esw->offloads.decap_tbl_lock);
1004 	complete_all(&d->res_ready);
1005 	if (err)
1006 		goto out_free;
1007 
1008 found:
1009 	flow->decap_reformat = d;
1010 	attr->decap_pkt_reformat = d->pkt_reformat;
1011 	list_add(&flow->l3_to_l2_reformat, &d->flows);
1012 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1013 	return 0;
1014 
1015 out_free:
1016 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1017 	mlx5e_decap_put(priv, d);
1018 	return err;
1019 
1020 out_err:
1021 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1022 	return err;
1023 }
1024 
mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1025 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1026 				 struct mlx5e_tc_flow *flow,
1027 				 struct mlx5_flow_attr *attr,
1028 				 struct netlink_ext_ack *extack,
1029 				 bool *vf_tun)
1030 {
1031 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1032 	struct mlx5_esw_flow_attr *esw_attr;
1033 	struct net_device *encap_dev = NULL;
1034 	struct mlx5e_rep_priv *rpriv;
1035 	struct mlx5e_priv *out_priv;
1036 	struct mlx5_eswitch *esw;
1037 	int out_index;
1038 	int err = 0;
1039 
1040 	parse_attr = attr->parse_attr;
1041 	esw_attr = attr->esw_attr;
1042 	*vf_tun = false;
1043 
1044 	esw = priv->mdev->priv.eswitch;
1045 	mutex_lock(&esw->offloads.encap_tbl_lock);
1046 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1047 		struct net_device *out_dev;
1048 		int mirred_ifindex;
1049 
1050 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1051 			continue;
1052 
1053 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1054 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1055 		if (!out_dev) {
1056 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1057 			err = -ENODEV;
1058 			goto out;
1059 		}
1060 		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1061 					 extack, &encap_dev);
1062 		dev_put(out_dev);
1063 		if (err)
1064 			goto out;
1065 
1066 		if (esw_attr->dests[out_index].flags &
1067 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1068 		    !esw_attr->dest_int_port)
1069 			*vf_tun = true;
1070 
1071 		out_priv = netdev_priv(encap_dev);
1072 		rpriv = out_priv->ppriv;
1073 		esw_attr->dests[out_index].vport_valid = true;
1074 		esw_attr->dests[out_index].vport = rpriv->rep->vport;
1075 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1076 	}
1077 
1078 	if (*vf_tun && esw_attr->out_count > 1) {
1079 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1080 		err = -EOPNOTSUPP;
1081 		goto out;
1082 	}
1083 
1084 out:
1085 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1086 	return err;
1087 }
1088 
mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1089 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1090 				    struct mlx5e_tc_flow *flow,
1091 				    struct mlx5_flow_attr *attr)
1092 {
1093 	struct mlx5_esw_flow_attr *esw_attr;
1094 	int out_index;
1095 
1096 	if (!mlx5e_is_eswitch_flow(flow))
1097 		return;
1098 
1099 	esw_attr = attr->esw_attr;
1100 
1101 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1102 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1103 			continue;
1104 
1105 		mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1106 		kfree(attr->parse_attr->tun_info[out_index]);
1107 	}
1108 }
1109 
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)1110 static int cmp_route_info(struct mlx5e_route_key *a,
1111 			  struct mlx5e_route_key *b)
1112 {
1113 	if (a->ip_version == 4 && b->ip_version == 4)
1114 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1115 			      sizeof(a->endpoint_ip.v4));
1116 	else if (a->ip_version == 6 && b->ip_version == 6)
1117 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1118 			      sizeof(a->endpoint_ip.v6));
1119 	return 1;
1120 }
1121 
hash_route_info(struct mlx5e_route_key * key)1122 static u32 hash_route_info(struct mlx5e_route_key *key)
1123 {
1124 	if (key->ip_version == 4)
1125 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1126 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1127 }
1128 
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1129 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1130 				struct mlx5e_route_entry *r)
1131 {
1132 	WARN_ON(!list_empty(&r->decap_flows));
1133 	WARN_ON(!list_empty(&r->encap_entries));
1134 
1135 	kfree_rcu(r, rcu);
1136 }
1137 
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1138 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1139 {
1140 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1141 
1142 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1143 		return;
1144 
1145 	hash_del_rcu(&r->hlist);
1146 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1147 
1148 	mlx5e_route_dealloc(priv, r);
1149 }
1150 
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1151 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1152 {
1153 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1154 
1155 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1156 
1157 	if (!refcount_dec_and_test(&r->refcnt))
1158 		return;
1159 	hash_del_rcu(&r->hlist);
1160 	mlx5e_route_dealloc(priv, r);
1161 }
1162 
1163 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1164 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1165 		u32 hash_key)
1166 {
1167 	struct mlx5e_route_key r_key;
1168 	struct mlx5e_route_entry *r;
1169 
1170 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1171 		r_key = r->key;
1172 		if (!cmp_route_info(&r_key, key) &&
1173 		    refcount_inc_not_zero(&r->refcnt))
1174 			return r;
1175 	}
1176 	return NULL;
1177 }
1178 
1179 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1180 mlx5e_route_get_create(struct mlx5e_priv *priv,
1181 		       struct mlx5e_route_key *key,
1182 		       int tunnel_dev_index,
1183 		       unsigned long *route_tbl_change_time)
1184 {
1185 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1186 	struct mlx5_rep_uplink_priv *uplink_priv;
1187 	struct mlx5e_rep_priv *uplink_rpriv;
1188 	struct mlx5e_tc_tun_encap *encap;
1189 	struct mlx5e_route_entry *r;
1190 	u32 hash_key;
1191 
1192 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1193 	uplink_priv = &uplink_rpriv->uplink_priv;
1194 	encap = uplink_priv->encap;
1195 
1196 	hash_key = hash_route_info(key);
1197 	spin_lock_bh(&encap->route_lock);
1198 	r = mlx5e_route_get(encap, key, hash_key);
1199 	spin_unlock_bh(&encap->route_lock);
1200 	if (r) {
1201 		if (!mlx5e_route_entry_valid(r)) {
1202 			mlx5e_route_put_locked(priv, r);
1203 			return ERR_PTR(-EINVAL);
1204 		}
1205 		return r;
1206 	}
1207 
1208 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1209 	if (!r)
1210 		return ERR_PTR(-ENOMEM);
1211 
1212 	r->key = *key;
1213 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1214 	r->tunnel_dev_index = tunnel_dev_index;
1215 	refcount_set(&r->refcnt, 1);
1216 	INIT_LIST_HEAD(&r->decap_flows);
1217 	INIT_LIST_HEAD(&r->encap_entries);
1218 
1219 	spin_lock_bh(&encap->route_lock);
1220 	*route_tbl_change_time = encap->route_tbl_last_update;
1221 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1222 	spin_unlock_bh(&encap->route_lock);
1223 
1224 	return r;
1225 }
1226 
1227 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1228 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1229 {
1230 	u32 hash_key = hash_route_info(key);
1231 	struct mlx5e_route_entry *r;
1232 
1233 	spin_lock_bh(&encap->route_lock);
1234 	encap->route_tbl_last_update = jiffies;
1235 	r = mlx5e_route_get(encap, key, hash_key);
1236 	spin_unlock_bh(&encap->route_lock);
1237 
1238 	return r;
1239 }
1240 
1241 struct mlx5e_tc_fib_event_data {
1242 	struct work_struct work;
1243 	unsigned long event;
1244 	struct mlx5e_route_entry *r;
1245 	struct net_device *ul_dev;
1246 };
1247 
1248 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1249 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1250 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1251 {
1252 	struct mlx5e_tc_fib_event_data *fib_work;
1253 
1254 	fib_work = kzalloc(sizeof(*fib_work), flags);
1255 	if (WARN_ON(!fib_work))
1256 		return NULL;
1257 
1258 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1259 	fib_work->event = event;
1260 	fib_work->ul_dev = ul_dev;
1261 
1262 	return fib_work;
1263 }
1264 
1265 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1266 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1267 			   struct mlx5e_route_entry *r,
1268 			   unsigned long event)
1269 {
1270 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1271 	struct mlx5e_tc_fib_event_data *fib_work;
1272 	struct mlx5e_rep_priv *uplink_rpriv;
1273 	struct net_device *ul_dev;
1274 
1275 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1276 	ul_dev = uplink_rpriv->netdev;
1277 
1278 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1279 	if (!fib_work)
1280 		return -ENOMEM;
1281 
1282 	dev_hold(ul_dev);
1283 	refcount_inc(&r->refcnt);
1284 	fib_work->r = r;
1285 	queue_work(priv->wq, &fib_work->work);
1286 
1287 	return 0;
1288 }
1289 
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1290 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1291 			     struct mlx5e_tc_flow *flow)
1292 {
1293 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1294 	unsigned long tbl_time_before, tbl_time_after;
1295 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1296 	struct mlx5_flow_attr *attr = flow->attr;
1297 	struct mlx5_esw_flow_attr *esw_attr;
1298 	struct mlx5e_route_entry *r;
1299 	struct mlx5e_route_key key;
1300 	int err = 0;
1301 
1302 	esw_attr = attr->esw_attr;
1303 	parse_attr = attr->parse_attr;
1304 	mutex_lock(&esw->offloads.encap_tbl_lock);
1305 	if (!esw_attr->rx_tun_attr)
1306 		goto out;
1307 
1308 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1309 	tbl_time_after = tbl_time_before;
1310 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1311 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1312 		goto out;
1313 
1314 	key.ip_version = attr->tun_ip_version;
1315 	if (key.ip_version == 4)
1316 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1317 	else
1318 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1319 
1320 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1321 				   &tbl_time_after);
1322 	if (IS_ERR(r)) {
1323 		err = PTR_ERR(r);
1324 		goto out;
1325 	}
1326 	/* Routing changed concurrently. FIB event handler might have missed new
1327 	 * entry, schedule update.
1328 	 */
1329 	if (tbl_time_before != tbl_time_after) {
1330 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1331 		if (err) {
1332 			mlx5e_route_put_locked(priv, r);
1333 			goto out;
1334 		}
1335 	}
1336 
1337 	flow->decap_route = r;
1338 	list_add(&flow->decap_routes, &r->decap_flows);
1339 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1340 	return 0;
1341 
1342 out:
1343 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1344 	return err;
1345 }
1346 
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1347 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1348 				    struct mlx5e_tc_flow *flow,
1349 				    struct mlx5_flow_attr *attr,
1350 				    struct mlx5e_encap_entry *e,
1351 				    bool new_encap_entry,
1352 				    unsigned long tbl_time_before,
1353 				    int out_index)
1354 {
1355 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1356 	unsigned long tbl_time_after = tbl_time_before;
1357 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1358 	const struct ip_tunnel_info *tun_info;
1359 	struct mlx5_esw_flow_attr *esw_attr;
1360 	struct mlx5e_route_entry *r;
1361 	struct mlx5e_route_key key;
1362 	unsigned short family;
1363 	int err = 0;
1364 
1365 	esw_attr = attr->esw_attr;
1366 	parse_attr = attr->parse_attr;
1367 	tun_info = parse_attr->tun_info[out_index];
1368 	family = ip_tunnel_info_af(tun_info);
1369 
1370 	if (family == AF_INET) {
1371 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1372 		key.ip_version = 4;
1373 	} else if (family == AF_INET6) {
1374 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1375 		key.ip_version = 6;
1376 	}
1377 
1378 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1379 				  e->route_dev_ifindex, out_index);
1380 	if (err || !(esw_attr->dests[out_index].flags &
1381 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1382 		return err;
1383 
1384 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1385 				   &tbl_time_after);
1386 	if (IS_ERR(r))
1387 		return PTR_ERR(r);
1388 	/* Routing changed concurrently. FIB event handler might have missed new
1389 	 * entry, schedule update.
1390 	 */
1391 	if (tbl_time_before != tbl_time_after) {
1392 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1393 		if (err) {
1394 			mlx5e_route_put_locked(priv, r);
1395 			return err;
1396 		}
1397 	}
1398 
1399 	flow->encap_routes[out_index].r = r;
1400 	if (new_encap_entry)
1401 		list_add(&e->route_list, &r->encap_entries);
1402 	flow->encap_routes[out_index].index = out_index;
1403 	return 0;
1404 }
1405 
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1406 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1407 			      struct mlx5e_tc_flow *flow)
1408 {
1409 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1410 	struct mlx5e_route_entry *r = flow->decap_route;
1411 
1412 	if (!r)
1413 		return;
1414 
1415 	mutex_lock(&esw->offloads.encap_tbl_lock);
1416 	list_del(&flow->decap_routes);
1417 	flow->decap_route = NULL;
1418 
1419 	if (!refcount_dec_and_test(&r->refcnt)) {
1420 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1421 		return;
1422 	}
1423 	hash_del_rcu(&r->hlist);
1424 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1425 
1426 	mlx5e_route_dealloc(priv, r);
1427 }
1428 
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1429 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1430 				     struct mlx5e_tc_flow *flow,
1431 				     int out_index)
1432 {
1433 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1434 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1435 	struct mlx5e_encap_entry *e, *tmp;
1436 
1437 	if (!r)
1438 		return;
1439 
1440 	mutex_lock(&esw->offloads.encap_tbl_lock);
1441 	flow->encap_routes[out_index].r = NULL;
1442 
1443 	if (!refcount_dec_and_test(&r->refcnt)) {
1444 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1445 		return;
1446 	}
1447 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1448 		list_del_init(&e->route_list);
1449 	hash_del_rcu(&r->hlist);
1450 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1451 
1452 	mlx5e_route_dealloc(priv, r);
1453 }
1454 
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1455 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1456 				   struct mlx5e_encap_entry *e,
1457 				   struct list_head *encap_flows)
1458 {
1459 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1460 	struct mlx5e_tc_flow *flow;
1461 
1462 	list_for_each_entry(flow, encap_flows, tmp_list) {
1463 		struct mlx5_esw_flow_attr *esw_attr;
1464 		struct mlx5_flow_attr *attr;
1465 
1466 		if (!mlx5e_is_offloaded_flow(flow))
1467 			continue;
1468 
1469 		attr = mlx5e_tc_get_encap_attr(flow);
1470 		esw_attr = attr->esw_attr;
1471 
1472 		if (flow_flag_test(flow, SLOW)) {
1473 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1474 		} else {
1475 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1476 			mlx5e_tc_unoffload_flow_post_acts(flow);
1477 		}
1478 
1479 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1480 		attr->modify_hdr = NULL;
1481 
1482 		esw_attr->dests[flow->tmp_entry_index].flags &=
1483 			~MLX5_ESW_DEST_ENCAP_VALID;
1484 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1485 	}
1486 
1487 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1488 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1489 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1490 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1491 		e->pkt_reformat = NULL;
1492 	}
1493 }
1494 
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1495 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1496 				  struct net_device *tunnel_dev,
1497 				  struct mlx5e_encap_entry *e,
1498 				  struct list_head *encap_flows)
1499 {
1500 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1501 	struct mlx5e_tc_flow *flow;
1502 	int err;
1503 
1504 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1505 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1506 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1507 	if (err)
1508 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1509 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1510 
1511 	list_for_each_entry(flow, encap_flows, tmp_list) {
1512 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1513 		struct mlx5_esw_flow_attr *esw_attr;
1514 		struct mlx5_flow_handle *rule;
1515 		struct mlx5_flow_attr *attr;
1516 		struct mlx5_flow_spec *spec;
1517 
1518 		if (flow_flag_test(flow, FAILED))
1519 			continue;
1520 
1521 		spec = &flow->attr->parse_attr->spec;
1522 
1523 		attr = mlx5e_tc_get_encap_attr(flow);
1524 		esw_attr = attr->esw_attr;
1525 		parse_attr = attr->parse_attr;
1526 
1527 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1528 					     e->out_dev, e->route_dev_ifindex,
1529 					     flow->tmp_entry_index);
1530 		if (err) {
1531 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1532 			continue;
1533 		}
1534 
1535 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1536 		if (err) {
1537 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1538 				       err);
1539 			continue;
1540 		}
1541 
1542 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1543 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1544 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1545 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1546 				goto offload_to_slow_path;
1547 
1548 			err = mlx5e_tc_offload_flow_post_acts(flow);
1549 			if (err) {
1550 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1551 					       err);
1552 				goto offload_to_slow_path;
1553 			}
1554 
1555 			/* update from slow path rule to encap rule */
1556 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1557 			if (IS_ERR(rule)) {
1558 				mlx5e_tc_unoffload_flow_post_acts(flow);
1559 				err = PTR_ERR(rule);
1560 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1561 					       err);
1562 			} else {
1563 				flow->rule[0] = rule;
1564 			}
1565 		} else {
1566 offload_to_slow_path:
1567 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1568 			/* mark the flow's encap dest as non-valid */
1569 			esw_attr->dests[flow->tmp_entry_index].flags &=
1570 				~MLX5_ESW_DEST_ENCAP_VALID;
1571 
1572 			if (IS_ERR(rule)) {
1573 				err = PTR_ERR(rule);
1574 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1575 					       err);
1576 			} else {
1577 				flow->rule[0] = rule;
1578 			}
1579 		}
1580 		flow_flag_set(flow, OFFLOADED);
1581 	}
1582 }
1583 
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1584 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1585 				     struct mlx5e_route_entry *r,
1586 				     struct list_head *flow_list,
1587 				     bool replace)
1588 {
1589 	struct net_device *tunnel_dev;
1590 	struct mlx5e_encap_entry *e;
1591 
1592 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1593 	if (!tunnel_dev)
1594 		return -ENODEV;
1595 
1596 	list_for_each_entry(e, &r->encap_entries, route_list) {
1597 		LIST_HEAD(encap_flows);
1598 
1599 		mlx5e_take_all_encap_flows(e, &encap_flows);
1600 		if (list_empty(&encap_flows))
1601 			continue;
1602 
1603 		if (mlx5e_route_entry_valid(r))
1604 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1605 
1606 		if (!replace) {
1607 			list_splice(&encap_flows, flow_list);
1608 			continue;
1609 		}
1610 
1611 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1612 		list_splice(&encap_flows, flow_list);
1613 	}
1614 
1615 	return 0;
1616 }
1617 
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1618 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1619 				      struct list_head *flow_list)
1620 {
1621 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1622 	struct mlx5e_tc_flow *flow;
1623 
1624 	list_for_each_entry(flow, flow_list, tmp_list)
1625 		if (mlx5e_is_offloaded_flow(flow))
1626 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1627 }
1628 
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1629 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1630 				  struct list_head *decap_flows)
1631 {
1632 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1633 	struct mlx5e_tc_flow *flow;
1634 
1635 	list_for_each_entry(flow, decap_flows, tmp_list) {
1636 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1637 		struct mlx5_flow_attr *attr = flow->attr;
1638 		struct mlx5_flow_handle *rule;
1639 		struct mlx5_flow_spec *spec;
1640 		int err;
1641 
1642 		if (flow_flag_test(flow, FAILED))
1643 			continue;
1644 
1645 		parse_attr = attr->parse_attr;
1646 		spec = &parse_attr->spec;
1647 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1648 		if (err) {
1649 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1650 				       err);
1651 			continue;
1652 		}
1653 
1654 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1655 		if (IS_ERR(rule)) {
1656 			err = PTR_ERR(rule);
1657 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1658 				       err);
1659 		} else {
1660 			flow->rule[0] = rule;
1661 			flow_flag_set(flow, OFFLOADED);
1662 		}
1663 	}
1664 }
1665 
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1666 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1667 					  struct mlx5e_route_entry *r,
1668 					  struct list_head *flow_list,
1669 					  bool replace)
1670 {
1671 	struct net_device *tunnel_dev;
1672 	LIST_HEAD(decap_flows);
1673 
1674 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1675 	if (!tunnel_dev)
1676 		return -ENODEV;
1677 
1678 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1679 	if (mlx5e_route_entry_valid(r))
1680 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1681 	if (replace)
1682 		mlx5e_reoffload_decap(priv, &decap_flows);
1683 
1684 	list_splice(&decap_flows, flow_list);
1685 
1686 	return 0;
1687 }
1688 
mlx5e_tc_fib_event_work(struct work_struct * work)1689 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1690 {
1691 	struct mlx5e_tc_fib_event_data *event_data =
1692 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1693 	struct net_device *ul_dev = event_data->ul_dev;
1694 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1695 	struct mlx5e_route_entry *r = event_data->r;
1696 	struct mlx5_eswitch *esw;
1697 	LIST_HEAD(flow_list);
1698 	bool replace;
1699 	int err;
1700 
1701 	/* sync with concurrent neigh updates */
1702 	rtnl_lock();
1703 	esw = priv->mdev->priv.eswitch;
1704 	mutex_lock(&esw->offloads.encap_tbl_lock);
1705 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1706 
1707 	if (!mlx5e_route_entry_valid(r) && !replace)
1708 		goto out;
1709 
1710 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1711 	if (err)
1712 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1713 			       err);
1714 
1715 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1716 	if (err)
1717 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1718 			       err);
1719 
1720 	if (replace)
1721 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1722 out:
1723 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1724 	rtnl_unlock();
1725 
1726 	mlx5e_put_flow_list(priv, &flow_list);
1727 	mlx5e_route_put(priv, event_data->r);
1728 	dev_put(event_data->ul_dev);
1729 	kfree(event_data);
1730 }
1731 
1732 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1733 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1734 			 struct net_device *ul_dev,
1735 			 struct mlx5e_tc_tun_encap *encap,
1736 			 unsigned long event,
1737 			 struct fib_notifier_info *info)
1738 {
1739 	struct fib_entry_notifier_info *fen_info;
1740 	struct mlx5e_tc_fib_event_data *fib_work;
1741 	struct mlx5e_route_entry *r;
1742 	struct mlx5e_route_key key;
1743 	struct net_device *fib_dev;
1744 
1745 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1746 	if (fen_info->fi->nh)
1747 		return NULL;
1748 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1749 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1750 	    fen_info->dst_len != 32)
1751 		return NULL;
1752 
1753 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1754 	if (!fib_work)
1755 		return ERR_PTR(-ENOMEM);
1756 
1757 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1758 	key.ip_version = 4;
1759 
1760 	/* Can't fail after this point because releasing reference to r
1761 	 * requires obtaining sleeping mutex which we can't do in atomic
1762 	 * context.
1763 	 */
1764 	r = mlx5e_route_lookup_for_update(encap, &key);
1765 	if (!r)
1766 		goto out;
1767 	fib_work->r = r;
1768 	dev_hold(ul_dev);
1769 
1770 	return fib_work;
1771 
1772 out:
1773 	kfree(fib_work);
1774 	return NULL;
1775 }
1776 
1777 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1778 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1779 			 struct net_device *ul_dev,
1780 			 struct mlx5e_tc_tun_encap *encap,
1781 			 unsigned long event,
1782 			 struct fib_notifier_info *info)
1783 {
1784 	struct fib6_entry_notifier_info *fen_info;
1785 	struct mlx5e_tc_fib_event_data *fib_work;
1786 	struct mlx5e_route_entry *r;
1787 	struct mlx5e_route_key key;
1788 	struct net_device *fib_dev;
1789 
1790 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1791 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1792 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1793 	    fen_info->rt->fib6_dst.plen != 128)
1794 		return NULL;
1795 
1796 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1797 	if (!fib_work)
1798 		return ERR_PTR(-ENOMEM);
1799 
1800 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1801 	       sizeof(fen_info->rt->fib6_dst.addr));
1802 	key.ip_version = 6;
1803 
1804 	/* Can't fail after this point because releasing reference to r
1805 	 * requires obtaining sleeping mutex which we can't do in atomic
1806 	 * context.
1807 	 */
1808 	r = mlx5e_route_lookup_for_update(encap, &key);
1809 	if (!r)
1810 		goto out;
1811 	fib_work->r = r;
1812 	dev_hold(ul_dev);
1813 
1814 	return fib_work;
1815 
1816 out:
1817 	kfree(fib_work);
1818 	return NULL;
1819 }
1820 
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1821 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1822 {
1823 	struct mlx5e_tc_fib_event_data *fib_work;
1824 	struct fib_notifier_info *info = ptr;
1825 	struct mlx5e_tc_tun_encap *encap;
1826 	struct net_device *ul_dev;
1827 	struct mlx5e_priv *priv;
1828 
1829 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1830 	priv = encap->priv;
1831 	ul_dev = priv->netdev;
1832 	priv = netdev_priv(ul_dev);
1833 
1834 	switch (event) {
1835 	case FIB_EVENT_ENTRY_REPLACE:
1836 	case FIB_EVENT_ENTRY_DEL:
1837 		if (info->family == AF_INET)
1838 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1839 		else if (info->family == AF_INET6)
1840 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1841 		else
1842 			return NOTIFY_DONE;
1843 
1844 		if (!IS_ERR_OR_NULL(fib_work)) {
1845 			queue_work(priv->wq, &fib_work->work);
1846 		} else if (IS_ERR(fib_work)) {
1847 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1848 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1849 				       PTR_ERR(fib_work));
1850 		}
1851 
1852 		break;
1853 	default:
1854 		return NOTIFY_DONE;
1855 	}
1856 
1857 	return NOTIFY_DONE;
1858 }
1859 
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1860 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1861 {
1862 	struct mlx5e_tc_tun_encap *encap;
1863 	int err;
1864 
1865 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1866 	if (!encap)
1867 		return ERR_PTR(-ENOMEM);
1868 
1869 	encap->priv = priv;
1870 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1871 	spin_lock_init(&encap->route_lock);
1872 	hash_init(encap->route_tbl);
1873 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1874 				    NULL, NULL);
1875 	if (err) {
1876 		kvfree(encap);
1877 		return ERR_PTR(err);
1878 	}
1879 
1880 	return encap;
1881 }
1882 
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1883 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1884 {
1885 	if (!encap)
1886 		return;
1887 
1888 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1889 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1890 	kvfree(encap);
1891 }
1892