1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "fs_core.h"
9 #include "en_tc.h"
10 #include "tc_tun.h"
11 #include "rep/tc.h"
12 #include "diag/en_tc_tracepoint.h"
13
14 enum {
15 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
16 };
17
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)18 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
19 struct mlx5_flow_attr *attr,
20 struct mlx5e_encap_entry *e,
21 int out_index)
22 {
23 struct net_device *route_dev;
24 int err = 0;
25
26 route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
27
28 if (!route_dev || !netif_is_ovs_master(route_dev))
29 goto out;
30
31 if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS &&
32 mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) &&
33 (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) {
34 mlx5_core_warn(priv->mdev,
35 "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n");
36 err = -EINVAL;
37 goto out;
38 }
39
40 err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
41 MLX5E_TC_INT_PORT_EGRESS,
42 &attr->action, out_index);
43
44 out:
45 dev_put(route_dev);
46
47 return err;
48 }
49
50 struct mlx5e_route_key {
51 int ip_version;
52 union {
53 __be32 v4;
54 struct in6_addr v6;
55 } endpoint_ip;
56 };
57
58 struct mlx5e_route_entry {
59 struct mlx5e_route_key key;
60 struct list_head encap_entries;
61 struct list_head decap_flows;
62 u32 flags;
63 struct hlist_node hlist;
64 refcount_t refcnt;
65 int tunnel_dev_index;
66 struct rcu_head rcu;
67 };
68
69 struct mlx5e_tc_tun_encap {
70 struct mlx5e_priv *priv;
71 struct notifier_block fib_nb;
72 spinlock_t route_lock; /* protects route_tbl */
73 unsigned long route_tbl_last_update;
74 DECLARE_HASHTABLE(route_tbl, 8);
75 };
76
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)77 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
78 {
79 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
80 }
81
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)82 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
83 struct mlx5_flow_spec *spec)
84 {
85 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
86 struct mlx5_rx_tun_attr *tun_attr;
87 void *daddr, *saddr;
88 u8 ip_version;
89
90 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
91 if (!tun_attr)
92 return -ENOMEM;
93
94 esw_attr->rx_tun_attr = tun_attr;
95 ip_version = mlx5e_tc_get_ip_version(spec, true);
96
97 if (ip_version == 4) {
98 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
99 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
100 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
101 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
102 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
103 tun_attr->src_ip.v4 = *(__be32 *)saddr;
104 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
105 return 0;
106 }
107 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
108 else if (ip_version == 6) {
109 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
110
111 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
112 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
113 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
114 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
115 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
116 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
117 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
118 ipv6_addr_any(&tun_attr->src_ip.v6))
119 return 0;
120 }
121 #endif
122 /* Only set the flag if both src and dst ip addresses exist. They are
123 * required to establish routing.
124 */
125 flow_flag_set(flow, TUN_RX);
126 flow->attr->tun_ip_version = ip_version;
127 return 0;
128 }
129
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)130 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
131 {
132 bool all_flow_encaps_valid = true;
133 int i;
134
135 /* Flow can be associated with multiple encap entries.
136 * Before offloading the flow verify that all of them have
137 * a valid neighbour.
138 */
139 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
140 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
141 continue;
142 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
143 all_flow_encaps_valid = false;
144 break;
145 }
146 }
147
148 return all_flow_encaps_valid;
149 }
150
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)151 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
152 struct mlx5e_encap_entry *e,
153 struct list_head *flow_list)
154 {
155 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
156 struct mlx5_pkt_reformat_params reformat_params;
157 struct mlx5_esw_flow_attr *esw_attr;
158 struct mlx5_flow_handle *rule;
159 struct mlx5_flow_attr *attr;
160 struct mlx5_flow_spec *spec;
161 struct mlx5e_tc_flow *flow;
162 int err;
163
164 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
165 return;
166
167 memset(&reformat_params, 0, sizeof(reformat_params));
168 reformat_params.type = e->reformat_type;
169 reformat_params.size = e->encap_size;
170 reformat_params.data = e->encap_header;
171 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
172 &reformat_params,
173 MLX5_FLOW_NAMESPACE_FDB);
174 if (IS_ERR(e->pkt_reformat)) {
175 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
176 PTR_ERR(e->pkt_reformat));
177 return;
178 }
179 e->flags |= MLX5_ENCAP_ENTRY_VALID;
180 mlx5e_rep_queue_neigh_stats_work(priv);
181
182 list_for_each_entry(flow, flow_list, tmp_list) {
183 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
184 continue;
185
186 spec = &flow->attr->parse_attr->spec;
187
188 attr = mlx5e_tc_get_encap_attr(flow);
189 esw_attr = attr->esw_attr;
190 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
191 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
192
193 /* Do not offload flows with unresolved neighbors */
194 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
195 continue;
196
197 err = mlx5e_tc_offload_flow_post_acts(flow);
198 if (err) {
199 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
200 err);
201 continue;
202 }
203
204 /* update from slow path rule to encap rule */
205 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
206 if (IS_ERR(rule)) {
207 mlx5e_tc_unoffload_flow_post_acts(flow);
208 err = PTR_ERR(rule);
209 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
210 err);
211 continue;
212 }
213
214 mlx5e_tc_unoffload_from_slow_path(esw, flow);
215 flow->rule[0] = rule;
216 /* was unset when slow path rule removed */
217 flow_flag_set(flow, OFFLOADED);
218 }
219 }
220
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)221 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
222 struct mlx5e_encap_entry *e,
223 struct list_head *flow_list)
224 {
225 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
226 struct mlx5_esw_flow_attr *esw_attr;
227 struct mlx5_flow_handle *rule;
228 struct mlx5_flow_attr *attr;
229 struct mlx5_flow_spec *spec;
230 struct mlx5e_tc_flow *flow;
231 int err;
232
233 list_for_each_entry(flow, flow_list, tmp_list) {
234 if (!mlx5e_is_offloaded_flow(flow))
235 continue;
236
237 attr = mlx5e_tc_get_encap_attr(flow);
238 esw_attr = attr->esw_attr;
239 /* mark the flow's encap dest as non-valid */
240 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
241 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
242
243 /* Clear pkt_reformat before checking slow path flag. Because
244 * in next iteration, the same flow is already set slow path
245 * flag, but still need to clear the pkt_reformat.
246 */
247 if (flow_flag_test(flow, SLOW))
248 continue;
249
250 /* update from encap rule to slow path rule */
251 spec = &flow->attr->parse_attr->spec;
252 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
253
254 if (IS_ERR(rule)) {
255 err = PTR_ERR(rule);
256 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
257 err);
258 continue;
259 }
260
261 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
262 mlx5e_tc_unoffload_flow_post_acts(flow);
263 flow->rule[0] = rule;
264 /* was unset when fast path rule removed */
265 flow_flag_set(flow, OFFLOADED);
266 }
267
268 /* we know that the encap is valid */
269 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
270 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
271 e->pkt_reformat = NULL;
272 }
273
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)274 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
275 struct list_head *flow_list,
276 int index)
277 {
278 if (IS_ERR(mlx5e_flow_get(flow))) {
279 /* Flow is being deleted concurrently. Wait for it to be
280 * unoffloaded from hardware, otherwise deleting encap will
281 * fail.
282 */
283 wait_for_completion(&flow->del_hw_done);
284 return;
285 }
286 wait_for_completion(&flow->init_done);
287
288 flow->tmp_entry_index = index;
289 list_add(&flow->tmp_list, flow_list);
290 }
291
292 /* Takes reference to all flows attached to encap and adds the flows to
293 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
294 */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)295 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
296 {
297 struct encap_flow_item *efi;
298 struct mlx5e_tc_flow *flow;
299
300 list_for_each_entry(efi, &e->flows, list) {
301 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
302 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
303 }
304 }
305
306 /* Takes reference to all flows attached to route and adds the flows to
307 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
308 */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)309 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
310 struct list_head *flow_list)
311 {
312 struct mlx5e_tc_flow *flow;
313
314 list_for_each_entry(flow, &r->decap_flows, decap_routes)
315 mlx5e_take_tmp_flow(flow, flow_list, 0);
316 }
317
318 typedef bool (match_cb)(struct mlx5e_encap_entry *);
319
320 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)321 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
322 struct mlx5e_encap_entry *e,
323 match_cb match)
324 {
325 struct mlx5e_encap_entry *next = NULL;
326
327 retry:
328 rcu_read_lock();
329
330 /* find encap with non-zero reference counter value */
331 for (next = e ?
332 list_next_or_null_rcu(&nhe->encap_list,
333 &e->encap_list,
334 struct mlx5e_encap_entry,
335 encap_list) :
336 list_first_or_null_rcu(&nhe->encap_list,
337 struct mlx5e_encap_entry,
338 encap_list);
339 next;
340 next = list_next_or_null_rcu(&nhe->encap_list,
341 &next->encap_list,
342 struct mlx5e_encap_entry,
343 encap_list))
344 if (mlx5e_encap_take(next))
345 break;
346
347 rcu_read_unlock();
348
349 /* release starting encap */
350 if (e)
351 mlx5e_encap_put(netdev_priv(e->out_dev), e);
352 if (!next)
353 return next;
354
355 /* wait for encap to be fully initialized */
356 wait_for_completion(&next->res_ready);
357 /* continue searching if encap entry is not in valid state after completion */
358 if (!match(next)) {
359 e = next;
360 goto retry;
361 }
362
363 return next;
364 }
365
mlx5e_encap_valid(struct mlx5e_encap_entry * e)366 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
367 {
368 return e->flags & MLX5_ENCAP_ENTRY_VALID;
369 }
370
371 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)372 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
373 struct mlx5e_encap_entry *e)
374 {
375 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
376 }
377
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)378 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
379 {
380 return e->compl_result >= 0;
381 }
382
383 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)384 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
385 struct mlx5e_encap_entry *e)
386 {
387 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
388 }
389
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)390 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
391 {
392 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
393 struct mlx5e_encap_entry *e = NULL;
394 struct mlx5e_tc_flow *flow;
395 struct mlx5_fc *counter;
396 struct neigh_table *tbl;
397 bool neigh_used = false;
398 struct neighbour *n;
399 u64 lastuse;
400
401 if (m_neigh->family == AF_INET)
402 tbl = &arp_tbl;
403 #if IS_ENABLED(CONFIG_IPV6)
404 else if (m_neigh->family == AF_INET6)
405 tbl = ipv6_stub->nd_tbl;
406 #endif
407 else
408 return;
409
410 /* mlx5e_get_next_valid_encap() releases previous encap before returning
411 * next one.
412 */
413 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
414 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
415 struct encap_flow_item *efi, *tmp;
416 struct mlx5_eswitch *esw;
417 LIST_HEAD(flow_list);
418
419 esw = priv->mdev->priv.eswitch;
420 mutex_lock(&esw->offloads.encap_tbl_lock);
421 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
422 flow = container_of(efi, struct mlx5e_tc_flow,
423 encaps[efi->index]);
424 if (IS_ERR(mlx5e_flow_get(flow)))
425 continue;
426 list_add(&flow->tmp_list, &flow_list);
427
428 if (mlx5e_is_offloaded_flow(flow)) {
429 counter = mlx5e_tc_get_counter(flow);
430 lastuse = mlx5_fc_query_lastuse(counter);
431 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
432 neigh_used = true;
433 break;
434 }
435 }
436 }
437 mutex_unlock(&esw->offloads.encap_tbl_lock);
438
439 mlx5e_put_flow_list(priv, &flow_list);
440 if (neigh_used) {
441 /* release current encap before breaking the loop */
442 mlx5e_encap_put(priv, e);
443 break;
444 }
445 }
446
447 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
448
449 if (neigh_used) {
450 nhe->reported_lastuse = jiffies;
451
452 /* find the relevant neigh according to the cached device and
453 * dst ip pair
454 */
455 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
456 if (!n)
457 return;
458
459 neigh_event_send(n, NULL);
460 neigh_release(n);
461 }
462 }
463
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)464 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
465 {
466 WARN_ON(!list_empty(&e->flows));
467
468 if (e->compl_result > 0) {
469 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
470
471 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
472 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
473 }
474
475 kfree(e->tun_info);
476 kfree(e->encap_header);
477 kfree_rcu(e, rcu);
478 }
479
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)480 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
481 struct mlx5e_decap_entry *d)
482 {
483 WARN_ON(!list_empty(&d->flows));
484
485 if (!d->compl_result)
486 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
487
488 kfree_rcu(d, rcu);
489 }
490
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)491 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
492 {
493 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
494
495 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
496 return;
497 list_del(&e->route_list);
498 hash_del_rcu(&e->encap_hlist);
499 mutex_unlock(&esw->offloads.encap_tbl_lock);
500
501 mlx5e_encap_dealloc(priv, e);
502 }
503
mlx5e_encap_put_locked(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)504 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
505 {
506 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
507
508 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
509
510 if (!refcount_dec_and_test(&e->refcnt))
511 return;
512 list_del(&e->route_list);
513 hash_del_rcu(&e->encap_hlist);
514 mlx5e_encap_dealloc(priv, e);
515 }
516
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)517 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
518 {
519 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
520
521 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
522 return;
523 hash_del_rcu(&d->hlist);
524 mutex_unlock(&esw->offloads.decap_tbl_lock);
525
526 mlx5e_decap_dealloc(priv, d);
527 }
528
529 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
530 struct mlx5e_tc_flow *flow,
531 int out_index);
532
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)533 void mlx5e_detach_encap(struct mlx5e_priv *priv,
534 struct mlx5e_tc_flow *flow,
535 struct mlx5_flow_attr *attr,
536 int out_index)
537 {
538 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
539 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
540
541 if (!mlx5e_is_eswitch_flow(flow))
542 return;
543
544 if (attr->esw_attr->dests[out_index].flags &
545 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
546 mlx5e_detach_encap_route(priv, flow, out_index);
547
548 /* flow wasn't fully initialized */
549 if (!e)
550 return;
551
552 mutex_lock(&esw->offloads.encap_tbl_lock);
553 list_del(&flow->encaps[out_index].list);
554 flow->encaps[out_index].e = NULL;
555 if (!refcount_dec_and_test(&e->refcnt)) {
556 mutex_unlock(&esw->offloads.encap_tbl_lock);
557 return;
558 }
559 list_del(&e->route_list);
560 hash_del_rcu(&e->encap_hlist);
561 mutex_unlock(&esw->offloads.encap_tbl_lock);
562
563 mlx5e_encap_dealloc(priv, e);
564 }
565
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)566 void mlx5e_detach_decap(struct mlx5e_priv *priv,
567 struct mlx5e_tc_flow *flow)
568 {
569 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
570 struct mlx5e_decap_entry *d = flow->decap_reformat;
571
572 if (!d)
573 return;
574
575 mutex_lock(&esw->offloads.decap_tbl_lock);
576 list_del(&flow->l3_to_l2_reformat);
577 flow->decap_reformat = NULL;
578
579 if (!refcount_dec_and_test(&d->refcnt)) {
580 mutex_unlock(&esw->offloads.decap_tbl_lock);
581 return;
582 }
583 hash_del_rcu(&d->hlist);
584 mutex_unlock(&esw->offloads.decap_tbl_lock);
585
586 mlx5e_decap_dealloc(priv, d);
587 }
588
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)589 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
590 struct mlx5e_encap_key *b)
591 {
592 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
593 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
594 }
595
mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b,u32 tun_type)596 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
597 struct mlx5e_encap_key *b,
598 u32 tun_type)
599 {
600 struct ip_tunnel_info *a_info;
601 struct ip_tunnel_info *b_info;
602 bool a_has_opts, b_has_opts;
603
604 if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
605 return false;
606
607 a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags);
608 b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags);
609
610 /* keys are equal when both don't have any options attached */
611 if (!a_has_opts && !b_has_opts)
612 return true;
613
614 if (a_has_opts != b_has_opts)
615 return false;
616
617 /* options stored in memory next to ip_tunnel_info struct */
618 a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
619 b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
620
621 return a_info->options_len == b_info->options_len &&
622 !memcmp(ip_tunnel_info_opts(a_info),
623 ip_tunnel_info_opts(b_info),
624 a_info->options_len);
625 }
626
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)627 static int cmp_decap_info(struct mlx5e_decap_key *a,
628 struct mlx5e_decap_key *b)
629 {
630 return memcmp(&a->key, &b->key, sizeof(b->key));
631 }
632
hash_encap_info(struct mlx5e_encap_key * key)633 static int hash_encap_info(struct mlx5e_encap_key *key)
634 {
635 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
636 key->tc_tunnel->tunnel_type);
637 }
638
hash_decap_info(struct mlx5e_decap_key * key)639 static int hash_decap_info(struct mlx5e_decap_key *key)
640 {
641 return jhash(&key->key, sizeof(key->key), 0);
642 }
643
mlx5e_encap_take(struct mlx5e_encap_entry * e)644 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
645 {
646 return refcount_inc_not_zero(&e->refcnt);
647 }
648
mlx5e_decap_take(struct mlx5e_decap_entry * e)649 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
650 {
651 return refcount_inc_not_zero(&e->refcnt);
652 }
653
654 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)655 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
656 uintptr_t hash_key)
657 {
658 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
659 struct mlx5e_encap_key e_key;
660 struct mlx5e_encap_entry *e;
661
662 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
663 encap_hlist, hash_key) {
664 e_key.ip_tun_key = &e->tun_info->key;
665 e_key.tc_tunnel = e->tunnel;
666 if (e->tunnel->encap_info_equal(&e_key, key) &&
667 mlx5e_encap_take(e))
668 return e;
669 }
670
671 return NULL;
672 }
673
674 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)675 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
676 uintptr_t hash_key)
677 {
678 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
679 struct mlx5e_decap_key r_key;
680 struct mlx5e_decap_entry *e;
681
682 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
683 hlist, hash_key) {
684 r_key = e->key;
685 if (!cmp_decap_info(&r_key, key) &&
686 mlx5e_decap_take(e))
687 return e;
688 }
689 return NULL;
690 }
691
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)692 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
693 {
694 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
695
696 return kmemdup(tun_info, tun_size, GFP_KERNEL);
697 }
698
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)699 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
700 struct mlx5e_tc_flow *flow,
701 int out_index,
702 struct mlx5e_encap_entry *e,
703 struct netlink_ext_ack *extack)
704 {
705 int i;
706
707 for (i = 0; i < out_index; i++) {
708 if (flow->encaps[i].e != e)
709 continue;
710 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
711 netdev_err(priv->netdev, "can't duplicate encap action\n");
712 return true;
713 }
714
715 return false;
716 }
717
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)718 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
719 struct mlx5_flow_attr *attr,
720 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
721 struct net_device *out_dev,
722 int route_dev_ifindex,
723 int out_index)
724 {
725 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
726 struct net_device *route_dev;
727 u16 vport_num;
728 int err = 0;
729 u32 data;
730
731 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
732
733 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
734 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
735 goto out;
736
737 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
738 if (err)
739 goto out;
740
741 attr->dest_chain = 0;
742 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
743 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
744 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
745 vport_num);
746 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
747 MLX5_FLOW_NAMESPACE_FDB,
748 VPORT_TO_REG, data);
749 if (err >= 0) {
750 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
751 err = 0;
752 }
753
754 out:
755 dev_put(route_dev);
756 return err;
757 }
758
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)759 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
760 struct mlx5_esw_flow_attr *attr,
761 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
762 struct net_device *out_dev,
763 int route_dev_ifindex,
764 int out_index)
765 {
766 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
767 struct net_device *route_dev;
768 u16 vport_num;
769 int err = 0;
770 u32 data;
771
772 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
773
774 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
775 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
776 err = -ENODEV;
777 goto out;
778 }
779
780 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
781 if (err)
782 goto out;
783
784 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
785 vport_num);
786 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
787
788 out:
789 dev_put(route_dev);
790 return err;
791 }
792
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)793 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
794 {
795 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
796 struct mlx5_rep_uplink_priv *uplink_priv;
797 struct mlx5e_rep_priv *uplink_rpriv;
798 struct mlx5e_tc_tun_encap *encap;
799 unsigned int ret;
800
801 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
802 uplink_priv = &uplink_rpriv->uplink_priv;
803 encap = uplink_priv->encap;
804
805 spin_lock_bh(&encap->route_lock);
806 ret = encap->route_tbl_last_update;
807 spin_unlock_bh(&encap->route_lock);
808 return ret;
809 }
810
811 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
812 struct mlx5e_tc_flow *flow,
813 struct mlx5_flow_attr *attr,
814 struct mlx5e_encap_entry *e,
815 bool new_encap_entry,
816 unsigned long tbl_time_before,
817 int out_index);
818
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)819 int mlx5e_attach_encap(struct mlx5e_priv *priv,
820 struct mlx5e_tc_flow *flow,
821 struct mlx5_flow_attr *attr,
822 struct net_device *mirred_dev,
823 int out_index,
824 struct netlink_ext_ack *extack,
825 struct net_device **encap_dev)
826 {
827 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
828 struct mlx5e_tc_flow_parse_attr *parse_attr;
829 const struct ip_tunnel_info *tun_info;
830 const struct mlx5e_mpls_info *mpls_info;
831 unsigned long tbl_time_before = 0;
832 struct mlx5e_encap_entry *e;
833 struct mlx5e_encap_key key;
834 bool entry_created = false;
835 unsigned short family;
836 uintptr_t hash_key;
837 int err = 0;
838
839 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
840
841 parse_attr = attr->parse_attr;
842 tun_info = parse_attr->tun_info[out_index];
843 mpls_info = &parse_attr->mpls_info[out_index];
844 family = ip_tunnel_info_af(tun_info);
845 key.ip_tun_key = &tun_info->key;
846 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
847 if (!key.tc_tunnel) {
848 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
849 return -EOPNOTSUPP;
850 }
851
852 hash_key = hash_encap_info(&key);
853
854 e = mlx5e_encap_get(priv, &key, hash_key);
855
856 /* must verify if encap is valid or not */
857 if (e) {
858 /* Check that entry was not already attached to this flow */
859 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
860 err = -EOPNOTSUPP;
861 goto out_err;
862 }
863
864 goto attach_flow;
865 }
866
867 e = kzalloc(sizeof(*e), GFP_KERNEL);
868 if (!e) {
869 err = -ENOMEM;
870 goto out_err;
871 }
872
873 refcount_set(&e->refcnt, 1);
874 init_completion(&e->res_ready);
875 entry_created = true;
876 INIT_LIST_HEAD(&e->route_list);
877
878 tun_info = mlx5e_dup_tun_info(tun_info);
879 if (!tun_info) {
880 err = -ENOMEM;
881 goto out_err_init;
882 }
883 e->tun_info = tun_info;
884 memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
885 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
886 if (err)
887 goto out_err_init;
888
889 INIT_LIST_HEAD(&e->flows);
890 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
891 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
892
893 if (family == AF_INET)
894 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
895 else if (family == AF_INET6)
896 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
897
898 complete_all(&e->res_ready);
899 if (err) {
900 e->compl_result = err;
901 goto out_err;
902 }
903 e->compl_result = 1;
904
905 attach_flow:
906 err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
907 tbl_time_before, out_index);
908 if (err)
909 goto out_err;
910
911 err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
912 if (err == -EOPNOTSUPP) {
913 /* If device doesn't support int port offload,
914 * redirect to uplink vport.
915 */
916 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
917 err = 0;
918 } else if (err) {
919 goto out_err;
920 }
921
922 flow->encaps[out_index].e = e;
923 list_add(&flow->encaps[out_index].list, &e->flows);
924 flow->encaps[out_index].index = out_index;
925 *encap_dev = e->out_dev;
926 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
927 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
928 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
929 } else {
930 flow_flag_set(flow, SLOW);
931 }
932
933 return err;
934
935 out_err:
936 if (e)
937 mlx5e_encap_put_locked(priv, e);
938 return err;
939
940 out_err_init:
941 kfree(tun_info);
942 kfree(e);
943 return err;
944 }
945
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)946 int mlx5e_attach_decap(struct mlx5e_priv *priv,
947 struct mlx5e_tc_flow *flow,
948 struct netlink_ext_ack *extack)
949 {
950 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
951 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
952 struct mlx5_pkt_reformat_params reformat_params;
953 struct mlx5e_decap_entry *d;
954 struct mlx5e_decap_key key;
955 uintptr_t hash_key;
956 int err = 0;
957
958 if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
959 NL_SET_ERR_MSG_MOD(extack,
960 "encap header larger than max supported");
961 return -EOPNOTSUPP;
962 }
963
964 key.key = attr->eth;
965 hash_key = hash_decap_info(&key);
966 mutex_lock(&esw->offloads.decap_tbl_lock);
967 d = mlx5e_decap_get(priv, &key, hash_key);
968 if (d) {
969 mutex_unlock(&esw->offloads.decap_tbl_lock);
970 wait_for_completion(&d->res_ready);
971 mutex_lock(&esw->offloads.decap_tbl_lock);
972 if (d->compl_result) {
973 err = -EREMOTEIO;
974 goto out_free;
975 }
976 goto found;
977 }
978
979 d = kzalloc(sizeof(*d), GFP_KERNEL);
980 if (!d) {
981 err = -ENOMEM;
982 goto out_err;
983 }
984
985 d->key = key;
986 refcount_set(&d->refcnt, 1);
987 init_completion(&d->res_ready);
988 INIT_LIST_HEAD(&d->flows);
989 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
990 mutex_unlock(&esw->offloads.decap_tbl_lock);
991
992 memset(&reformat_params, 0, sizeof(reformat_params));
993 reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
994 reformat_params.size = sizeof(attr->eth);
995 reformat_params.data = &attr->eth;
996 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
997 &reformat_params,
998 MLX5_FLOW_NAMESPACE_FDB);
999 if (IS_ERR(d->pkt_reformat)) {
1000 err = PTR_ERR(d->pkt_reformat);
1001 d->compl_result = err;
1002 }
1003 mutex_lock(&esw->offloads.decap_tbl_lock);
1004 complete_all(&d->res_ready);
1005 if (err)
1006 goto out_free;
1007
1008 found:
1009 flow->decap_reformat = d;
1010 attr->decap_pkt_reformat = d->pkt_reformat;
1011 list_add(&flow->l3_to_l2_reformat, &d->flows);
1012 mutex_unlock(&esw->offloads.decap_tbl_lock);
1013 return 0;
1014
1015 out_free:
1016 mutex_unlock(&esw->offloads.decap_tbl_lock);
1017 mlx5e_decap_put(priv, d);
1018 return err;
1019
1020 out_err:
1021 mutex_unlock(&esw->offloads.decap_tbl_lock);
1022 return err;
1023 }
1024
mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1025 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1026 struct mlx5e_tc_flow *flow,
1027 struct mlx5_flow_attr *attr,
1028 struct netlink_ext_ack *extack,
1029 bool *vf_tun)
1030 {
1031 struct mlx5e_tc_flow_parse_attr *parse_attr;
1032 struct mlx5_esw_flow_attr *esw_attr;
1033 struct net_device *encap_dev = NULL;
1034 struct mlx5e_rep_priv *rpriv;
1035 struct mlx5e_priv *out_priv;
1036 struct mlx5_eswitch *esw;
1037 int out_index;
1038 int err = 0;
1039
1040 parse_attr = attr->parse_attr;
1041 esw_attr = attr->esw_attr;
1042 *vf_tun = false;
1043
1044 esw = priv->mdev->priv.eswitch;
1045 mutex_lock(&esw->offloads.encap_tbl_lock);
1046 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1047 struct net_device *out_dev;
1048 int mirred_ifindex;
1049
1050 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1051 continue;
1052
1053 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1054 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1055 if (!out_dev) {
1056 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1057 err = -ENODEV;
1058 goto out;
1059 }
1060 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1061 extack, &encap_dev);
1062 dev_put(out_dev);
1063 if (err)
1064 goto out;
1065
1066 if (esw_attr->dests[out_index].flags &
1067 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1068 !esw_attr->dest_int_port)
1069 *vf_tun = true;
1070
1071 out_priv = netdev_priv(encap_dev);
1072 rpriv = out_priv->ppriv;
1073 esw_attr->dests[out_index].vport_valid = true;
1074 esw_attr->dests[out_index].vport = rpriv->rep->vport;
1075 esw_attr->dests[out_index].mdev = out_priv->mdev;
1076 }
1077
1078 if (*vf_tun && esw_attr->out_count > 1) {
1079 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1080 err = -EOPNOTSUPP;
1081 goto out;
1082 }
1083
1084 out:
1085 mutex_unlock(&esw->offloads.encap_tbl_lock);
1086 return err;
1087 }
1088
mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1089 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1090 struct mlx5e_tc_flow *flow,
1091 struct mlx5_flow_attr *attr)
1092 {
1093 struct mlx5_esw_flow_attr *esw_attr;
1094 int out_index;
1095
1096 if (!mlx5e_is_eswitch_flow(flow))
1097 return;
1098
1099 esw_attr = attr->esw_attr;
1100
1101 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1102 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1103 continue;
1104
1105 mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1106 kfree(attr->parse_attr->tun_info[out_index]);
1107 }
1108 }
1109
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)1110 static int cmp_route_info(struct mlx5e_route_key *a,
1111 struct mlx5e_route_key *b)
1112 {
1113 if (a->ip_version == 4 && b->ip_version == 4)
1114 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1115 sizeof(a->endpoint_ip.v4));
1116 else if (a->ip_version == 6 && b->ip_version == 6)
1117 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1118 sizeof(a->endpoint_ip.v6));
1119 return 1;
1120 }
1121
hash_route_info(struct mlx5e_route_key * key)1122 static u32 hash_route_info(struct mlx5e_route_key *key)
1123 {
1124 if (key->ip_version == 4)
1125 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1126 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1127 }
1128
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1129 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1130 struct mlx5e_route_entry *r)
1131 {
1132 WARN_ON(!list_empty(&r->decap_flows));
1133 WARN_ON(!list_empty(&r->encap_entries));
1134
1135 kfree_rcu(r, rcu);
1136 }
1137
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1138 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1139 {
1140 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1141
1142 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1143 return;
1144
1145 hash_del_rcu(&r->hlist);
1146 mutex_unlock(&esw->offloads.encap_tbl_lock);
1147
1148 mlx5e_route_dealloc(priv, r);
1149 }
1150
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1151 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1152 {
1153 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1154
1155 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1156
1157 if (!refcount_dec_and_test(&r->refcnt))
1158 return;
1159 hash_del_rcu(&r->hlist);
1160 mlx5e_route_dealloc(priv, r);
1161 }
1162
1163 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1164 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1165 u32 hash_key)
1166 {
1167 struct mlx5e_route_key r_key;
1168 struct mlx5e_route_entry *r;
1169
1170 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1171 r_key = r->key;
1172 if (!cmp_route_info(&r_key, key) &&
1173 refcount_inc_not_zero(&r->refcnt))
1174 return r;
1175 }
1176 return NULL;
1177 }
1178
1179 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1180 mlx5e_route_get_create(struct mlx5e_priv *priv,
1181 struct mlx5e_route_key *key,
1182 int tunnel_dev_index,
1183 unsigned long *route_tbl_change_time)
1184 {
1185 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1186 struct mlx5_rep_uplink_priv *uplink_priv;
1187 struct mlx5e_rep_priv *uplink_rpriv;
1188 struct mlx5e_tc_tun_encap *encap;
1189 struct mlx5e_route_entry *r;
1190 u32 hash_key;
1191
1192 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1193 uplink_priv = &uplink_rpriv->uplink_priv;
1194 encap = uplink_priv->encap;
1195
1196 hash_key = hash_route_info(key);
1197 spin_lock_bh(&encap->route_lock);
1198 r = mlx5e_route_get(encap, key, hash_key);
1199 spin_unlock_bh(&encap->route_lock);
1200 if (r) {
1201 if (!mlx5e_route_entry_valid(r)) {
1202 mlx5e_route_put_locked(priv, r);
1203 return ERR_PTR(-EINVAL);
1204 }
1205 return r;
1206 }
1207
1208 r = kzalloc(sizeof(*r), GFP_KERNEL);
1209 if (!r)
1210 return ERR_PTR(-ENOMEM);
1211
1212 r->key = *key;
1213 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1214 r->tunnel_dev_index = tunnel_dev_index;
1215 refcount_set(&r->refcnt, 1);
1216 INIT_LIST_HEAD(&r->decap_flows);
1217 INIT_LIST_HEAD(&r->encap_entries);
1218
1219 spin_lock_bh(&encap->route_lock);
1220 *route_tbl_change_time = encap->route_tbl_last_update;
1221 hash_add(encap->route_tbl, &r->hlist, hash_key);
1222 spin_unlock_bh(&encap->route_lock);
1223
1224 return r;
1225 }
1226
1227 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1228 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1229 {
1230 u32 hash_key = hash_route_info(key);
1231 struct mlx5e_route_entry *r;
1232
1233 spin_lock_bh(&encap->route_lock);
1234 encap->route_tbl_last_update = jiffies;
1235 r = mlx5e_route_get(encap, key, hash_key);
1236 spin_unlock_bh(&encap->route_lock);
1237
1238 return r;
1239 }
1240
1241 struct mlx5e_tc_fib_event_data {
1242 struct work_struct work;
1243 unsigned long event;
1244 struct mlx5e_route_entry *r;
1245 struct net_device *ul_dev;
1246 };
1247
1248 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1249 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1250 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1251 {
1252 struct mlx5e_tc_fib_event_data *fib_work;
1253
1254 fib_work = kzalloc(sizeof(*fib_work), flags);
1255 if (WARN_ON(!fib_work))
1256 return NULL;
1257
1258 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1259 fib_work->event = event;
1260 fib_work->ul_dev = ul_dev;
1261
1262 return fib_work;
1263 }
1264
1265 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1266 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1267 struct mlx5e_route_entry *r,
1268 unsigned long event)
1269 {
1270 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1271 struct mlx5e_tc_fib_event_data *fib_work;
1272 struct mlx5e_rep_priv *uplink_rpriv;
1273 struct net_device *ul_dev;
1274
1275 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1276 ul_dev = uplink_rpriv->netdev;
1277
1278 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1279 if (!fib_work)
1280 return -ENOMEM;
1281
1282 dev_hold(ul_dev);
1283 refcount_inc(&r->refcnt);
1284 fib_work->r = r;
1285 queue_work(priv->wq, &fib_work->work);
1286
1287 return 0;
1288 }
1289
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1290 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1291 struct mlx5e_tc_flow *flow)
1292 {
1293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1294 unsigned long tbl_time_before, tbl_time_after;
1295 struct mlx5e_tc_flow_parse_attr *parse_attr;
1296 struct mlx5_flow_attr *attr = flow->attr;
1297 struct mlx5_esw_flow_attr *esw_attr;
1298 struct mlx5e_route_entry *r;
1299 struct mlx5e_route_key key;
1300 int err = 0;
1301
1302 esw_attr = attr->esw_attr;
1303 parse_attr = attr->parse_attr;
1304 mutex_lock(&esw->offloads.encap_tbl_lock);
1305 if (!esw_attr->rx_tun_attr)
1306 goto out;
1307
1308 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1309 tbl_time_after = tbl_time_before;
1310 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1311 if (err || !esw_attr->rx_tun_attr->decap_vport)
1312 goto out;
1313
1314 key.ip_version = attr->tun_ip_version;
1315 if (key.ip_version == 4)
1316 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1317 else
1318 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1319
1320 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1321 &tbl_time_after);
1322 if (IS_ERR(r)) {
1323 err = PTR_ERR(r);
1324 goto out;
1325 }
1326 /* Routing changed concurrently. FIB event handler might have missed new
1327 * entry, schedule update.
1328 */
1329 if (tbl_time_before != tbl_time_after) {
1330 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1331 if (err) {
1332 mlx5e_route_put_locked(priv, r);
1333 goto out;
1334 }
1335 }
1336
1337 flow->decap_route = r;
1338 list_add(&flow->decap_routes, &r->decap_flows);
1339 mutex_unlock(&esw->offloads.encap_tbl_lock);
1340 return 0;
1341
1342 out:
1343 mutex_unlock(&esw->offloads.encap_tbl_lock);
1344 return err;
1345 }
1346
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1347 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1348 struct mlx5e_tc_flow *flow,
1349 struct mlx5_flow_attr *attr,
1350 struct mlx5e_encap_entry *e,
1351 bool new_encap_entry,
1352 unsigned long tbl_time_before,
1353 int out_index)
1354 {
1355 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1356 unsigned long tbl_time_after = tbl_time_before;
1357 struct mlx5e_tc_flow_parse_attr *parse_attr;
1358 const struct ip_tunnel_info *tun_info;
1359 struct mlx5_esw_flow_attr *esw_attr;
1360 struct mlx5e_route_entry *r;
1361 struct mlx5e_route_key key;
1362 unsigned short family;
1363 int err = 0;
1364
1365 esw_attr = attr->esw_attr;
1366 parse_attr = attr->parse_attr;
1367 tun_info = parse_attr->tun_info[out_index];
1368 family = ip_tunnel_info_af(tun_info);
1369
1370 if (family == AF_INET) {
1371 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1372 key.ip_version = 4;
1373 } else if (family == AF_INET6) {
1374 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1375 key.ip_version = 6;
1376 }
1377
1378 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1379 e->route_dev_ifindex, out_index);
1380 if (err || !(esw_attr->dests[out_index].flags &
1381 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1382 return err;
1383
1384 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1385 &tbl_time_after);
1386 if (IS_ERR(r))
1387 return PTR_ERR(r);
1388 /* Routing changed concurrently. FIB event handler might have missed new
1389 * entry, schedule update.
1390 */
1391 if (tbl_time_before != tbl_time_after) {
1392 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1393 if (err) {
1394 mlx5e_route_put_locked(priv, r);
1395 return err;
1396 }
1397 }
1398
1399 flow->encap_routes[out_index].r = r;
1400 if (new_encap_entry)
1401 list_add(&e->route_list, &r->encap_entries);
1402 flow->encap_routes[out_index].index = out_index;
1403 return 0;
1404 }
1405
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1406 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1407 struct mlx5e_tc_flow *flow)
1408 {
1409 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1410 struct mlx5e_route_entry *r = flow->decap_route;
1411
1412 if (!r)
1413 return;
1414
1415 mutex_lock(&esw->offloads.encap_tbl_lock);
1416 list_del(&flow->decap_routes);
1417 flow->decap_route = NULL;
1418
1419 if (!refcount_dec_and_test(&r->refcnt)) {
1420 mutex_unlock(&esw->offloads.encap_tbl_lock);
1421 return;
1422 }
1423 hash_del_rcu(&r->hlist);
1424 mutex_unlock(&esw->offloads.encap_tbl_lock);
1425
1426 mlx5e_route_dealloc(priv, r);
1427 }
1428
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1429 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1430 struct mlx5e_tc_flow *flow,
1431 int out_index)
1432 {
1433 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1434 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1435 struct mlx5e_encap_entry *e, *tmp;
1436
1437 if (!r)
1438 return;
1439
1440 mutex_lock(&esw->offloads.encap_tbl_lock);
1441 flow->encap_routes[out_index].r = NULL;
1442
1443 if (!refcount_dec_and_test(&r->refcnt)) {
1444 mutex_unlock(&esw->offloads.encap_tbl_lock);
1445 return;
1446 }
1447 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1448 list_del_init(&e->route_list);
1449 hash_del_rcu(&r->hlist);
1450 mutex_unlock(&esw->offloads.encap_tbl_lock);
1451
1452 mlx5e_route_dealloc(priv, r);
1453 }
1454
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1455 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1456 struct mlx5e_encap_entry *e,
1457 struct list_head *encap_flows)
1458 {
1459 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1460 struct mlx5e_tc_flow *flow;
1461
1462 list_for_each_entry(flow, encap_flows, tmp_list) {
1463 struct mlx5_esw_flow_attr *esw_attr;
1464 struct mlx5_flow_attr *attr;
1465
1466 if (!mlx5e_is_offloaded_flow(flow))
1467 continue;
1468
1469 attr = mlx5e_tc_get_encap_attr(flow);
1470 esw_attr = attr->esw_attr;
1471
1472 if (flow_flag_test(flow, SLOW)) {
1473 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1474 } else {
1475 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1476 mlx5e_tc_unoffload_flow_post_acts(flow);
1477 }
1478
1479 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1480 attr->modify_hdr = NULL;
1481
1482 esw_attr->dests[flow->tmp_entry_index].flags &=
1483 ~MLX5_ESW_DEST_ENCAP_VALID;
1484 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1485 }
1486
1487 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1488 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1489 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1490 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1491 e->pkt_reformat = NULL;
1492 }
1493 }
1494
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1495 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1496 struct net_device *tunnel_dev,
1497 struct mlx5e_encap_entry *e,
1498 struct list_head *encap_flows)
1499 {
1500 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1501 struct mlx5e_tc_flow *flow;
1502 int err;
1503
1504 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1505 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1506 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1507 if (err)
1508 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1509 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1510
1511 list_for_each_entry(flow, encap_flows, tmp_list) {
1512 struct mlx5e_tc_flow_parse_attr *parse_attr;
1513 struct mlx5_esw_flow_attr *esw_attr;
1514 struct mlx5_flow_handle *rule;
1515 struct mlx5_flow_attr *attr;
1516 struct mlx5_flow_spec *spec;
1517
1518 if (flow_flag_test(flow, FAILED))
1519 continue;
1520
1521 spec = &flow->attr->parse_attr->spec;
1522
1523 attr = mlx5e_tc_get_encap_attr(flow);
1524 esw_attr = attr->esw_attr;
1525 parse_attr = attr->parse_attr;
1526
1527 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1528 e->out_dev, e->route_dev_ifindex,
1529 flow->tmp_entry_index);
1530 if (err) {
1531 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1532 continue;
1533 }
1534
1535 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1536 if (err) {
1537 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1538 err);
1539 continue;
1540 }
1541
1542 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1543 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1544 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1545 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1546 goto offload_to_slow_path;
1547
1548 err = mlx5e_tc_offload_flow_post_acts(flow);
1549 if (err) {
1550 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1551 err);
1552 goto offload_to_slow_path;
1553 }
1554
1555 /* update from slow path rule to encap rule */
1556 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1557 if (IS_ERR(rule)) {
1558 mlx5e_tc_unoffload_flow_post_acts(flow);
1559 err = PTR_ERR(rule);
1560 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1561 err);
1562 } else {
1563 flow->rule[0] = rule;
1564 }
1565 } else {
1566 offload_to_slow_path:
1567 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1568 /* mark the flow's encap dest as non-valid */
1569 esw_attr->dests[flow->tmp_entry_index].flags &=
1570 ~MLX5_ESW_DEST_ENCAP_VALID;
1571
1572 if (IS_ERR(rule)) {
1573 err = PTR_ERR(rule);
1574 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1575 err);
1576 } else {
1577 flow->rule[0] = rule;
1578 }
1579 }
1580 flow_flag_set(flow, OFFLOADED);
1581 }
1582 }
1583
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1584 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1585 struct mlx5e_route_entry *r,
1586 struct list_head *flow_list,
1587 bool replace)
1588 {
1589 struct net_device *tunnel_dev;
1590 struct mlx5e_encap_entry *e;
1591
1592 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1593 if (!tunnel_dev)
1594 return -ENODEV;
1595
1596 list_for_each_entry(e, &r->encap_entries, route_list) {
1597 LIST_HEAD(encap_flows);
1598
1599 mlx5e_take_all_encap_flows(e, &encap_flows);
1600 if (list_empty(&encap_flows))
1601 continue;
1602
1603 if (mlx5e_route_entry_valid(r))
1604 mlx5e_invalidate_encap(priv, e, &encap_flows);
1605
1606 if (!replace) {
1607 list_splice(&encap_flows, flow_list);
1608 continue;
1609 }
1610
1611 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1612 list_splice(&encap_flows, flow_list);
1613 }
1614
1615 return 0;
1616 }
1617
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1618 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1619 struct list_head *flow_list)
1620 {
1621 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1622 struct mlx5e_tc_flow *flow;
1623
1624 list_for_each_entry(flow, flow_list, tmp_list)
1625 if (mlx5e_is_offloaded_flow(flow))
1626 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1627 }
1628
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1629 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1630 struct list_head *decap_flows)
1631 {
1632 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1633 struct mlx5e_tc_flow *flow;
1634
1635 list_for_each_entry(flow, decap_flows, tmp_list) {
1636 struct mlx5e_tc_flow_parse_attr *parse_attr;
1637 struct mlx5_flow_attr *attr = flow->attr;
1638 struct mlx5_flow_handle *rule;
1639 struct mlx5_flow_spec *spec;
1640 int err;
1641
1642 if (flow_flag_test(flow, FAILED))
1643 continue;
1644
1645 parse_attr = attr->parse_attr;
1646 spec = &parse_attr->spec;
1647 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1648 if (err) {
1649 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1650 err);
1651 continue;
1652 }
1653
1654 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1655 if (IS_ERR(rule)) {
1656 err = PTR_ERR(rule);
1657 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1658 err);
1659 } else {
1660 flow->rule[0] = rule;
1661 flow_flag_set(flow, OFFLOADED);
1662 }
1663 }
1664 }
1665
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1666 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1667 struct mlx5e_route_entry *r,
1668 struct list_head *flow_list,
1669 bool replace)
1670 {
1671 struct net_device *tunnel_dev;
1672 LIST_HEAD(decap_flows);
1673
1674 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1675 if (!tunnel_dev)
1676 return -ENODEV;
1677
1678 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1679 if (mlx5e_route_entry_valid(r))
1680 mlx5e_unoffload_flow_list(priv, &decap_flows);
1681 if (replace)
1682 mlx5e_reoffload_decap(priv, &decap_flows);
1683
1684 list_splice(&decap_flows, flow_list);
1685
1686 return 0;
1687 }
1688
mlx5e_tc_fib_event_work(struct work_struct * work)1689 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1690 {
1691 struct mlx5e_tc_fib_event_data *event_data =
1692 container_of(work, struct mlx5e_tc_fib_event_data, work);
1693 struct net_device *ul_dev = event_data->ul_dev;
1694 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1695 struct mlx5e_route_entry *r = event_data->r;
1696 struct mlx5_eswitch *esw;
1697 LIST_HEAD(flow_list);
1698 bool replace;
1699 int err;
1700
1701 /* sync with concurrent neigh updates */
1702 rtnl_lock();
1703 esw = priv->mdev->priv.eswitch;
1704 mutex_lock(&esw->offloads.encap_tbl_lock);
1705 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1706
1707 if (!mlx5e_route_entry_valid(r) && !replace)
1708 goto out;
1709
1710 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1711 if (err)
1712 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1713 err);
1714
1715 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1716 if (err)
1717 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1718 err);
1719
1720 if (replace)
1721 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1722 out:
1723 mutex_unlock(&esw->offloads.encap_tbl_lock);
1724 rtnl_unlock();
1725
1726 mlx5e_put_flow_list(priv, &flow_list);
1727 mlx5e_route_put(priv, event_data->r);
1728 dev_put(event_data->ul_dev);
1729 kfree(event_data);
1730 }
1731
1732 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1733 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1734 struct net_device *ul_dev,
1735 struct mlx5e_tc_tun_encap *encap,
1736 unsigned long event,
1737 struct fib_notifier_info *info)
1738 {
1739 struct fib_entry_notifier_info *fen_info;
1740 struct mlx5e_tc_fib_event_data *fib_work;
1741 struct mlx5e_route_entry *r;
1742 struct mlx5e_route_key key;
1743 struct net_device *fib_dev;
1744
1745 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1746 if (fen_info->fi->nh)
1747 return NULL;
1748 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1749 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1750 fen_info->dst_len != 32)
1751 return NULL;
1752
1753 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1754 if (!fib_work)
1755 return ERR_PTR(-ENOMEM);
1756
1757 key.endpoint_ip.v4 = htonl(fen_info->dst);
1758 key.ip_version = 4;
1759
1760 /* Can't fail after this point because releasing reference to r
1761 * requires obtaining sleeping mutex which we can't do in atomic
1762 * context.
1763 */
1764 r = mlx5e_route_lookup_for_update(encap, &key);
1765 if (!r)
1766 goto out;
1767 fib_work->r = r;
1768 dev_hold(ul_dev);
1769
1770 return fib_work;
1771
1772 out:
1773 kfree(fib_work);
1774 return NULL;
1775 }
1776
1777 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1778 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1779 struct net_device *ul_dev,
1780 struct mlx5e_tc_tun_encap *encap,
1781 unsigned long event,
1782 struct fib_notifier_info *info)
1783 {
1784 struct fib6_entry_notifier_info *fen_info;
1785 struct mlx5e_tc_fib_event_data *fib_work;
1786 struct mlx5e_route_entry *r;
1787 struct mlx5e_route_key key;
1788 struct net_device *fib_dev;
1789
1790 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1791 fib_dev = fib6_info_nh_dev(fen_info->rt);
1792 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1793 fen_info->rt->fib6_dst.plen != 128)
1794 return NULL;
1795
1796 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1797 if (!fib_work)
1798 return ERR_PTR(-ENOMEM);
1799
1800 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1801 sizeof(fen_info->rt->fib6_dst.addr));
1802 key.ip_version = 6;
1803
1804 /* Can't fail after this point because releasing reference to r
1805 * requires obtaining sleeping mutex which we can't do in atomic
1806 * context.
1807 */
1808 r = mlx5e_route_lookup_for_update(encap, &key);
1809 if (!r)
1810 goto out;
1811 fib_work->r = r;
1812 dev_hold(ul_dev);
1813
1814 return fib_work;
1815
1816 out:
1817 kfree(fib_work);
1818 return NULL;
1819 }
1820
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1821 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1822 {
1823 struct mlx5e_tc_fib_event_data *fib_work;
1824 struct fib_notifier_info *info = ptr;
1825 struct mlx5e_tc_tun_encap *encap;
1826 struct net_device *ul_dev;
1827 struct mlx5e_priv *priv;
1828
1829 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1830 priv = encap->priv;
1831 ul_dev = priv->netdev;
1832 priv = netdev_priv(ul_dev);
1833
1834 switch (event) {
1835 case FIB_EVENT_ENTRY_REPLACE:
1836 case FIB_EVENT_ENTRY_DEL:
1837 if (info->family == AF_INET)
1838 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1839 else if (info->family == AF_INET6)
1840 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1841 else
1842 return NOTIFY_DONE;
1843
1844 if (!IS_ERR_OR_NULL(fib_work)) {
1845 queue_work(priv->wq, &fib_work->work);
1846 } else if (IS_ERR(fib_work)) {
1847 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1848 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1849 PTR_ERR(fib_work));
1850 }
1851
1852 break;
1853 default:
1854 return NOTIFY_DONE;
1855 }
1856
1857 return NOTIFY_DONE;
1858 }
1859
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1860 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1861 {
1862 struct mlx5e_tc_tun_encap *encap;
1863 int err;
1864
1865 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1866 if (!encap)
1867 return ERR_PTR(-ENOMEM);
1868
1869 encap->priv = priv;
1870 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1871 spin_lock_init(&encap->route_lock);
1872 hash_init(encap->route_tbl);
1873 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1874 NULL, NULL);
1875 if (err) {
1876 kvfree(encap);
1877 return ERR_PTR(err);
1878 }
1879
1880 return encap;
1881 }
1882
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1883 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1884 {
1885 if (!encap)
1886 return;
1887
1888 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1889 flush_workqueue(encap->priv->wq); /* flush fib event works */
1890 kvfree(encap);
1891 }
1892