1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/xarray.h>
16 
17 #include "lib/fs_chains.h"
18 #include "en/tc_ct.h"
19 #include "en/mod_hdr.h"
20 #include "en/mapping.h"
21 #include "en.h"
22 #include "en_tc.h"
23 #include "en_rep.h"
24 
25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
28 #define MLX5_CT_STATE_TRK_BIT BIT(2)
29 #define MLX5_CT_STATE_NAT_BIT BIT(3)
30 
31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
34 
35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
37 
38 #define ct_dbg(fmt, args...)\
39 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
40 
41 struct mlx5_tc_ct_priv {
42 	struct mlx5_core_dev *dev;
43 	const struct net_device *netdev;
44 	struct mod_hdr_tbl *mod_hdr_tbl;
45 	struct idr fte_ids;
46 	struct xarray tuple_ids;
47 	struct rhashtable zone_ht;
48 	struct rhashtable ct_tuples_ht;
49 	struct rhashtable ct_tuples_nat_ht;
50 	struct mlx5_flow_table *ct;
51 	struct mlx5_flow_table *ct_nat;
52 	struct mlx5_flow_table *post_ct;
53 	struct mutex control_lock; /* guards parallel adds/dels */
54 	struct mutex shared_counter_lock;
55 	struct mapping_ctx *zone_mapping;
56 	struct mapping_ctx *labels_mapping;
57 	enum mlx5_flow_namespace_type ns_type;
58 	struct mlx5_fs_chains *chains;
59 };
60 
61 struct mlx5_ct_flow {
62 	struct mlx5_flow_attr *pre_ct_attr;
63 	struct mlx5_flow_attr *post_ct_attr;
64 	struct mlx5_flow_handle *pre_ct_rule;
65 	struct mlx5_flow_handle *post_ct_rule;
66 	struct mlx5_ct_ft *ft;
67 	u32 fte_id;
68 	u32 chain_mapping;
69 };
70 
71 struct mlx5_ct_zone_rule {
72 	struct mlx5_flow_handle *rule;
73 	struct mlx5e_mod_hdr_handle *mh;
74 	struct mlx5_flow_attr *attr;
75 	bool nat;
76 };
77 
78 struct mlx5_tc_ct_pre {
79 	struct mlx5_flow_table *ft;
80 	struct mlx5_flow_group *flow_grp;
81 	struct mlx5_flow_group *miss_grp;
82 	struct mlx5_flow_handle *flow_rule;
83 	struct mlx5_flow_handle *miss_rule;
84 	struct mlx5_modify_hdr *modify_hdr;
85 };
86 
87 struct mlx5_ct_ft {
88 	struct rhash_head node;
89 	u16 zone;
90 	u32 zone_restore_id;
91 	refcount_t refcount;
92 	struct nf_flowtable *nf_ft;
93 	struct mlx5_tc_ct_priv *ct_priv;
94 	struct rhashtable ct_entries_ht;
95 	struct mlx5_tc_ct_pre pre_ct;
96 	struct mlx5_tc_ct_pre pre_ct_nat;
97 };
98 
99 struct mlx5_ct_tuple {
100 	u16 addr_type;
101 	__be16 n_proto;
102 	u8 ip_proto;
103 	struct {
104 		union {
105 			__be32 src_v4;
106 			struct in6_addr src_v6;
107 		};
108 		union {
109 			__be32 dst_v4;
110 			struct in6_addr dst_v6;
111 		};
112 	} ip;
113 	struct {
114 		__be16 src;
115 		__be16 dst;
116 	} port;
117 
118 	u16 zone;
119 };
120 
121 struct mlx5_ct_shared_counter {
122 	struct mlx5_fc *counter;
123 	refcount_t refcount;
124 };
125 
126 struct mlx5_ct_entry {
127 	struct rhash_head node;
128 	struct rhash_head tuple_node;
129 	struct rhash_head tuple_nat_node;
130 	struct mlx5_ct_shared_counter *shared_counter;
131 	unsigned long cookie;
132 	unsigned long restore_cookie;
133 	struct mlx5_ct_tuple tuple;
134 	struct mlx5_ct_tuple tuple_nat;
135 	struct mlx5_ct_zone_rule zone_rules[2];
136 };
137 
138 static const struct rhashtable_params cts_ht_params = {
139 	.head_offset = offsetof(struct mlx5_ct_entry, node),
140 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
141 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
142 	.automatic_shrinking = true,
143 	.min_size = 16 * 1024,
144 };
145 
146 static const struct rhashtable_params zone_params = {
147 	.head_offset = offsetof(struct mlx5_ct_ft, node),
148 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
149 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
150 	.automatic_shrinking = true,
151 };
152 
153 static const struct rhashtable_params tuples_ht_params = {
154 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
155 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
156 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
157 	.automatic_shrinking = true,
158 	.min_size = 16 * 1024,
159 };
160 
161 static const struct rhashtable_params tuples_nat_ht_params = {
162 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
163 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
164 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
165 	.automatic_shrinking = true,
166 	.min_size = 16 * 1024,
167 };
168 
169 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)170 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
171 {
172 	struct flow_match_control control;
173 	struct flow_match_basic basic;
174 
175 	flow_rule_match_basic(rule, &basic);
176 	flow_rule_match_control(rule, &control);
177 
178 	tuple->n_proto = basic.key->n_proto;
179 	tuple->ip_proto = basic.key->ip_proto;
180 	tuple->addr_type = control.key->addr_type;
181 
182 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
183 		struct flow_match_ipv4_addrs match;
184 
185 		flow_rule_match_ipv4_addrs(rule, &match);
186 		tuple->ip.src_v4 = match.key->src;
187 		tuple->ip.dst_v4 = match.key->dst;
188 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
189 		struct flow_match_ipv6_addrs match;
190 
191 		flow_rule_match_ipv6_addrs(rule, &match);
192 		tuple->ip.src_v6 = match.key->src;
193 		tuple->ip.dst_v6 = match.key->dst;
194 	} else {
195 		return -EOPNOTSUPP;
196 	}
197 
198 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
199 		struct flow_match_ports match;
200 
201 		flow_rule_match_ports(rule, &match);
202 		switch (tuple->ip_proto) {
203 		case IPPROTO_TCP:
204 		case IPPROTO_UDP:
205 			tuple->port.src = match.key->src;
206 			tuple->port.dst = match.key->dst;
207 			break;
208 		default:
209 			return -EOPNOTSUPP;
210 		}
211 	} else {
212 		return -EOPNOTSUPP;
213 	}
214 
215 	return 0;
216 }
217 
218 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)219 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
220 			     struct flow_rule *rule)
221 {
222 	struct flow_action *flow_action = &rule->action;
223 	struct flow_action_entry *act;
224 	u32 offset, val, ip6_offset;
225 	int i;
226 
227 	flow_action_for_each(i, act, flow_action) {
228 		if (act->id != FLOW_ACTION_MANGLE)
229 			continue;
230 
231 		offset = act->mangle.offset;
232 		val = act->mangle.val;
233 		switch (act->mangle.htype) {
234 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
235 			if (offset == offsetof(struct iphdr, saddr))
236 				tuple->ip.src_v4 = cpu_to_be32(val);
237 			else if (offset == offsetof(struct iphdr, daddr))
238 				tuple->ip.dst_v4 = cpu_to_be32(val);
239 			else
240 				return -EOPNOTSUPP;
241 			break;
242 
243 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
244 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
245 			ip6_offset /= 4;
246 			if (ip6_offset < 4)
247 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
248 			else if (ip6_offset < 8)
249 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
250 			else
251 				return -EOPNOTSUPP;
252 			break;
253 
254 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
255 			if (offset == offsetof(struct tcphdr, source))
256 				tuple->port.src = cpu_to_be16(val);
257 			else if (offset == offsetof(struct tcphdr, dest))
258 				tuple->port.dst = cpu_to_be16(val);
259 			else
260 				return -EOPNOTSUPP;
261 			break;
262 
263 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
264 			if (offset == offsetof(struct udphdr, source))
265 				tuple->port.src = cpu_to_be16(val);
266 			else if (offset == offsetof(struct udphdr, dest))
267 				tuple->port.dst = cpu_to_be16(val);
268 			else
269 				return -EOPNOTSUPP;
270 			break;
271 
272 		default:
273 			return -EOPNOTSUPP;
274 		}
275 	}
276 
277 	return 0;
278 }
279 
280 static int
mlx5_tc_ct_set_tuple_match(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)281 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
282 			   struct flow_rule *rule)
283 {
284 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
285 				       outer_headers);
286 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
287 				       outer_headers);
288 	u16 addr_type = 0;
289 	u8 ip_proto = 0;
290 
291 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
292 		struct flow_match_basic match;
293 
294 		flow_rule_match_basic(rule, &match);
295 
296 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
297 				       headers_v);
298 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
299 			 match.mask->ip_proto);
300 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
301 			 match.key->ip_proto);
302 
303 		ip_proto = match.key->ip_proto;
304 	}
305 
306 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
307 		struct flow_match_control match;
308 
309 		flow_rule_match_control(rule, &match);
310 		addr_type = match.key->addr_type;
311 	}
312 
313 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
314 		struct flow_match_ipv4_addrs match;
315 
316 		flow_rule_match_ipv4_addrs(rule, &match);
317 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
318 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
319 		       &match.mask->src, sizeof(match.mask->src));
320 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
321 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
322 		       &match.key->src, sizeof(match.key->src));
323 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
324 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
325 		       &match.mask->dst, sizeof(match.mask->dst));
326 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
327 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
328 		       &match.key->dst, sizeof(match.key->dst));
329 	}
330 
331 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
332 		struct flow_match_ipv6_addrs match;
333 
334 		flow_rule_match_ipv6_addrs(rule, &match);
335 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
336 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
337 		       &match.mask->src, sizeof(match.mask->src));
338 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
339 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
340 		       &match.key->src, sizeof(match.key->src));
341 
342 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
343 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
344 		       &match.mask->dst, sizeof(match.mask->dst));
345 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
347 		       &match.key->dst, sizeof(match.key->dst));
348 	}
349 
350 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
351 		struct flow_match_ports match;
352 
353 		flow_rule_match_ports(rule, &match);
354 		switch (ip_proto) {
355 		case IPPROTO_TCP:
356 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
357 				 tcp_sport, ntohs(match.mask->src));
358 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
359 				 tcp_sport, ntohs(match.key->src));
360 
361 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
362 				 tcp_dport, ntohs(match.mask->dst));
363 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
364 				 tcp_dport, ntohs(match.key->dst));
365 			break;
366 
367 		case IPPROTO_UDP:
368 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
369 				 udp_sport, ntohs(match.mask->src));
370 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
371 				 udp_sport, ntohs(match.key->src));
372 
373 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
374 				 udp_dport, ntohs(match.mask->dst));
375 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
376 				 udp_dport, ntohs(match.key->dst));
377 			break;
378 		default:
379 			break;
380 		}
381 	}
382 
383 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
384 		struct flow_match_tcp match;
385 
386 		flow_rule_match_tcp(rule, &match);
387 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
388 			 ntohs(match.mask->flags));
389 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
390 			 ntohs(match.key->flags));
391 	}
392 
393 	return 0;
394 }
395 
396 static void
mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)397 mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
398 {
399 	if (!refcount_dec_and_test(&entry->shared_counter->refcount))
400 		return;
401 
402 	mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
403 	kfree(entry->shared_counter);
404 }
405 
406 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)407 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
408 			  struct mlx5_ct_entry *entry,
409 			  bool nat)
410 {
411 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
412 	struct mlx5_flow_attr *attr = zone_rule->attr;
413 
414 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
415 
416 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
417 	mlx5e_mod_hdr_detach(ct_priv->dev,
418 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
419 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
420 	kfree(attr);
421 }
422 
423 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)424 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
425 			   struct mlx5_ct_entry *entry)
426 {
427 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
428 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
429 }
430 
431 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)432 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
433 {
434 	struct flow_action *flow_action = &flow_rule->action;
435 	struct flow_action_entry *act;
436 	int i;
437 
438 	flow_action_for_each(i, act, flow_action) {
439 		if (act->id == FLOW_ACTION_CT_METADATA)
440 			return act;
441 	}
442 
443 	return NULL;
444 }
445 
446 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)447 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
448 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
449 			       u8 ct_state,
450 			       u32 mark,
451 			       u32 labels_id,
452 			       u8 zone_restore_id)
453 {
454 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
455 	struct mlx5_core_dev *dev = ct_priv->dev;
456 	int err;
457 
458 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
459 					CTSTATE_TO_REG, ct_state);
460 	if (err)
461 		return err;
462 
463 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
464 					MARK_TO_REG, mark);
465 	if (err)
466 		return err;
467 
468 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
469 					LABELS_TO_REG, labels_id);
470 	if (err)
471 		return err;
472 
473 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
474 					ZONE_RESTORE_TO_REG, zone_restore_id);
475 	if (err)
476 		return err;
477 
478 	/* Make another copy of zone id in reg_b for
479 	 * NIC rx flows since we don't copy reg_c1 to
480 	 * reg_b upon miss.
481 	 */
482 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
483 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
484 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
485 		if (err)
486 			return err;
487 	}
488 	return 0;
489 }
490 
491 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)492 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
493 				   char *modact)
494 {
495 	u32 offset = act->mangle.offset, field;
496 
497 	switch (act->mangle.htype) {
498 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
499 		MLX5_SET(set_action_in, modact, length, 0);
500 		if (offset == offsetof(struct iphdr, saddr))
501 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
502 		else if (offset == offsetof(struct iphdr, daddr))
503 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
504 		else
505 			return -EOPNOTSUPP;
506 		break;
507 
508 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
509 		MLX5_SET(set_action_in, modact, length, 0);
510 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
511 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
512 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
513 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
514 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
515 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
516 		else if (offset == offsetof(struct ipv6hdr, saddr))
517 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
518 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
519 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
520 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
521 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
522 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
523 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
524 		else if (offset == offsetof(struct ipv6hdr, daddr))
525 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
526 		else
527 			return -EOPNOTSUPP;
528 		break;
529 
530 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
531 		MLX5_SET(set_action_in, modact, length, 16);
532 		if (offset == offsetof(struct tcphdr, source))
533 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
534 		else if (offset == offsetof(struct tcphdr, dest))
535 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
536 		else
537 			return -EOPNOTSUPP;
538 		break;
539 
540 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
541 		MLX5_SET(set_action_in, modact, length, 16);
542 		if (offset == offsetof(struct udphdr, source))
543 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
544 		else if (offset == offsetof(struct udphdr, dest))
545 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
546 		else
547 			return -EOPNOTSUPP;
548 		break;
549 
550 	default:
551 		return -EOPNOTSUPP;
552 	}
553 
554 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
555 	MLX5_SET(set_action_in, modact, offset, 0);
556 	MLX5_SET(set_action_in, modact, field, field);
557 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
558 
559 	return 0;
560 }
561 
562 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)563 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
564 			    struct flow_rule *flow_rule,
565 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
566 {
567 	struct flow_action *flow_action = &flow_rule->action;
568 	struct mlx5_core_dev *mdev = ct_priv->dev;
569 	struct flow_action_entry *act;
570 	size_t action_size;
571 	char *modact;
572 	int err, i;
573 
574 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
575 
576 	flow_action_for_each(i, act, flow_action) {
577 		switch (act->id) {
578 		case FLOW_ACTION_MANGLE: {
579 			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
580 						    mod_acts);
581 			if (err)
582 				return err;
583 
584 			modact = mod_acts->actions +
585 				 mod_acts->num_actions * action_size;
586 
587 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
588 			if (err)
589 				return err;
590 
591 			mod_acts->num_actions++;
592 		}
593 		break;
594 
595 		case FLOW_ACTION_CT_METADATA:
596 			/* Handled earlier */
597 			continue;
598 		default:
599 			return -EOPNOTSUPP;
600 		}
601 	}
602 
603 	return 0;
604 }
605 
606 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat)607 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
608 				struct mlx5_flow_attr *attr,
609 				struct flow_rule *flow_rule,
610 				struct mlx5e_mod_hdr_handle **mh,
611 				u8 zone_restore_id, bool nat)
612 {
613 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
614 	struct flow_action_entry *meta;
615 	u16 ct_state = 0;
616 	int err;
617 
618 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
619 	if (!meta)
620 		return -EOPNOTSUPP;
621 
622 	err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
623 			  &attr->ct_attr.ct_labels_id);
624 	if (err)
625 		return -EOPNOTSUPP;
626 	if (nat) {
627 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
628 						  &mod_acts);
629 		if (err)
630 			goto err_mapping;
631 
632 		ct_state |= MLX5_CT_STATE_NAT_BIT;
633 	}
634 
635 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
636 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
637 					     ct_state,
638 					     meta->ct_metadata.mark,
639 					     attr->ct_attr.ct_labels_id,
640 					     zone_restore_id);
641 	if (err)
642 		goto err_mapping;
643 
644 	*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
645 				   ct_priv->mod_hdr_tbl,
646 				   ct_priv->ns_type,
647 				   &mod_acts);
648 	if (IS_ERR(*mh)) {
649 		err = PTR_ERR(*mh);
650 		goto err_mapping;
651 	}
652 	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
653 
654 	dealloc_mod_hdr_actions(&mod_acts);
655 	return 0;
656 
657 err_mapping:
658 	dealloc_mod_hdr_actions(&mod_acts);
659 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
660 	return err;
661 }
662 
663 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)664 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
665 			  struct flow_rule *flow_rule,
666 			  struct mlx5_ct_entry *entry,
667 			  bool nat, u8 zone_restore_id)
668 {
669 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
670 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
671 	struct mlx5_flow_spec *spec = NULL;
672 	struct mlx5_flow_attr *attr;
673 	int err;
674 
675 	zone_rule->nat = nat;
676 
677 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
678 	if (!spec)
679 		return -ENOMEM;
680 
681 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
682 	if (!attr) {
683 		err = -ENOMEM;
684 		goto err_attr;
685 	}
686 
687 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
688 					      &zone_rule->mh,
689 					      zone_restore_id, nat);
690 	if (err) {
691 		ct_dbg("Failed to create ct entry mod hdr");
692 		goto err_mod_hdr;
693 	}
694 
695 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
696 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
697 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
698 	attr->dest_chain = 0;
699 	attr->dest_ft = ct_priv->post_ct;
700 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
701 	attr->outer_match_level = MLX5_MATCH_L4;
702 	attr->counter = entry->shared_counter->counter;
703 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
704 
705 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
706 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
707 				    entry->tuple.zone & MLX5_CT_ZONE_MASK,
708 				    MLX5_CT_ZONE_MASK);
709 
710 	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
711 	if (IS_ERR(zone_rule->rule)) {
712 		err = PTR_ERR(zone_rule->rule);
713 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
714 		goto err_rule;
715 	}
716 
717 	zone_rule->attr = attr;
718 
719 	kfree(spec);
720 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
721 
722 	return 0;
723 
724 err_rule:
725 	mlx5e_mod_hdr_detach(ct_priv->dev,
726 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
727 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
728 err_mod_hdr:
729 	kfree(attr);
730 err_attr:
731 	kfree(spec);
732 	return err;
733 }
734 
735 static struct mlx5_ct_shared_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)736 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
737 			      struct mlx5_ct_entry *entry)
738 {
739 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
740 	struct mlx5_ct_shared_counter *shared_counter;
741 	struct mlx5_core_dev *dev = ct_priv->dev;
742 	struct mlx5_ct_entry *rev_entry;
743 	__be16 tmp_port;
744 	int ret;
745 
746 	/* get the reversed tuple */
747 	tmp_port = rev_tuple.port.src;
748 	rev_tuple.port.src = rev_tuple.port.dst;
749 	rev_tuple.port.dst = tmp_port;
750 
751 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
752 		__be32 tmp_addr = rev_tuple.ip.src_v4;
753 
754 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
755 		rev_tuple.ip.dst_v4 = tmp_addr;
756 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
757 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
758 
759 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
760 		rev_tuple.ip.dst_v6 = tmp_addr;
761 	} else {
762 		return ERR_PTR(-EOPNOTSUPP);
763 	}
764 
765 	/* Use the same counter as the reverse direction */
766 	mutex_lock(&ct_priv->shared_counter_lock);
767 	rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
768 					   tuples_ht_params);
769 	if (rev_entry) {
770 		if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
771 			mutex_unlock(&ct_priv->shared_counter_lock);
772 			return rev_entry->shared_counter;
773 		}
774 	}
775 	mutex_unlock(&ct_priv->shared_counter_lock);
776 
777 	shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
778 	if (!shared_counter)
779 		return ERR_PTR(-ENOMEM);
780 
781 	shared_counter->counter = mlx5_fc_create(dev, true);
782 	if (IS_ERR(shared_counter->counter)) {
783 		ct_dbg("Failed to create counter for ct entry");
784 		ret = PTR_ERR(shared_counter->counter);
785 		kfree(shared_counter);
786 		return ERR_PTR(ret);
787 	}
788 
789 	refcount_set(&shared_counter->refcount, 1);
790 	return shared_counter;
791 }
792 
793 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)794 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
795 			   struct flow_rule *flow_rule,
796 			   struct mlx5_ct_entry *entry,
797 			   u8 zone_restore_id)
798 {
799 	int err;
800 
801 	entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
802 	if (IS_ERR(entry->shared_counter)) {
803 		err = PTR_ERR(entry->shared_counter);
804 		ct_dbg("Failed to create counter for ct entry");
805 		return err;
806 	}
807 
808 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
809 					zone_restore_id);
810 	if (err)
811 		goto err_orig;
812 
813 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
814 					zone_restore_id);
815 	if (err)
816 		goto err_nat;
817 
818 	return 0;
819 
820 err_nat:
821 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
822 err_orig:
823 	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
824 	return err;
825 }
826 
827 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)828 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
829 				  struct flow_cls_offload *flow)
830 {
831 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
832 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
833 	struct flow_action_entry *meta_action;
834 	unsigned long cookie = flow->cookie;
835 	struct mlx5_ct_entry *entry;
836 	int err;
837 
838 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
839 	if (!meta_action)
840 		return -EOPNOTSUPP;
841 
842 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
843 				       cts_ht_params);
844 	if (entry)
845 		return 0;
846 
847 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
848 	if (!entry)
849 		return -ENOMEM;
850 
851 	entry->tuple.zone = ft->zone;
852 	entry->cookie = flow->cookie;
853 	entry->restore_cookie = meta_action->ct_metadata.cookie;
854 
855 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
856 	if (err)
857 		goto err_set;
858 
859 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
860 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
861 	if (err)
862 		goto err_set;
863 
864 	err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
865 				     &entry->tuple_node,
866 				     tuples_ht_params);
867 	if (err)
868 		goto err_tuple;
869 
870 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
871 		err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
872 					     &entry->tuple_nat_node,
873 					     tuples_nat_ht_params);
874 		if (err)
875 			goto err_tuple_nat;
876 	}
877 
878 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
879 					 ft->zone_restore_id);
880 	if (err)
881 		goto err_rules;
882 
883 	err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
884 				     cts_ht_params);
885 	if (err)
886 		goto err_insert;
887 
888 	return 0;
889 
890 err_insert:
891 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
892 err_rules:
893 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
894 			       &entry->tuple_nat_node, tuples_nat_ht_params);
895 err_tuple_nat:
896 	if (entry->tuple_node.next)
897 		rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
898 				       &entry->tuple_node,
899 				       tuples_ht_params);
900 err_tuple:
901 err_set:
902 	kfree(entry);
903 	netdev_warn(ct_priv->netdev,
904 		    "Failed to offload ct entry, err: %d\n", err);
905 	return err;
906 }
907 
908 static void
mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)909 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
910 			struct mlx5_ct_entry *entry)
911 {
912 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
913 	mutex_lock(&ct_priv->shared_counter_lock);
914 	if (entry->tuple_node.next)
915 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
916 				       &entry->tuple_nat_node,
917 				       tuples_nat_ht_params);
918 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
919 			       tuples_ht_params);
920 	mutex_unlock(&ct_priv->shared_counter_lock);
921 	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
922 
923 }
924 
925 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)926 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
927 				  struct flow_cls_offload *flow)
928 {
929 	unsigned long cookie = flow->cookie;
930 	struct mlx5_ct_entry *entry;
931 
932 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
933 				       cts_ht_params);
934 	if (!entry)
935 		return -ENOENT;
936 
937 	mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
938 	WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
939 				       &entry->node,
940 				       cts_ht_params));
941 	kfree(entry);
942 
943 	return 0;
944 }
945 
946 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)947 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
948 				    struct flow_cls_offload *f)
949 {
950 	unsigned long cookie = f->cookie;
951 	struct mlx5_ct_entry *entry;
952 	u64 lastuse, packets, bytes;
953 
954 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
955 				       cts_ht_params);
956 	if (!entry)
957 		return -ENOENT;
958 
959 	mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
960 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
961 			  FLOW_ACTION_HW_STATS_DELAYED);
962 
963 	return 0;
964 }
965 
966 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)967 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
968 			      void *cb_priv)
969 {
970 	struct flow_cls_offload *f = type_data;
971 	struct mlx5_ct_ft *ft = cb_priv;
972 
973 	if (type != TC_SETUP_CLSFLOWER)
974 		return -EOPNOTSUPP;
975 
976 	switch (f->command) {
977 	case FLOW_CLS_REPLACE:
978 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
979 	case FLOW_CLS_DESTROY:
980 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
981 	case FLOW_CLS_STATS:
982 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
983 	default:
984 		break;
985 	}
986 
987 	return -EOPNOTSUPP;
988 }
989 
990 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)991 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
992 			u16 zone)
993 {
994 	struct flow_keys flow_keys;
995 
996 	skb_reset_network_header(skb);
997 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
998 
999 	tuple->zone = zone;
1000 
1001 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1002 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
1003 		return false;
1004 
1005 	tuple->port.src = flow_keys.ports.src;
1006 	tuple->port.dst = flow_keys.ports.dst;
1007 	tuple->n_proto = flow_keys.basic.n_proto;
1008 	tuple->ip_proto = flow_keys.basic.ip_proto;
1009 
1010 	switch (flow_keys.basic.n_proto) {
1011 	case htons(ETH_P_IP):
1012 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1013 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1014 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1015 		break;
1016 
1017 	case htons(ETH_P_IPV6):
1018 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1019 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1020 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1021 		break;
1022 	default:
1023 		goto out;
1024 	}
1025 
1026 	return true;
1027 
1028 out:
1029 	return false;
1030 }
1031 
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1032 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1033 {
1034 	u32 ctstate = 0, ctstate_mask = 0;
1035 
1036 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1037 					&ctstate, &ctstate_mask);
1038 	if (ctstate_mask)
1039 		return -EOPNOTSUPP;
1040 
1041 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1042 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1043 				    ctstate, ctstate_mask);
1044 
1045 	return 0;
1046 }
1047 
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1048 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1049 {
1050 	if (!priv || !ct_attr->ct_labels_id)
1051 		return;
1052 
1053 	mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1054 }
1055 
1056 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1057 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1058 		     struct mlx5_flow_spec *spec,
1059 		     struct flow_cls_offload *f,
1060 		     struct mlx5_ct_attr *ct_attr,
1061 		     struct netlink_ext_ack *extack)
1062 {
1063 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1064 	struct flow_dissector_key_ct *mask, *key;
1065 	bool trk, est, untrk, unest, new;
1066 	u32 ctstate = 0, ctstate_mask = 0;
1067 	u16 ct_state_on, ct_state_off;
1068 	u16 ct_state, ct_state_mask;
1069 	struct flow_match_ct match;
1070 	u32 ct_labels[4];
1071 
1072 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1073 		return 0;
1074 
1075 	if (!priv) {
1076 		NL_SET_ERR_MSG_MOD(extack,
1077 				   "offload of ct matching isn't available");
1078 		return -EOPNOTSUPP;
1079 	}
1080 
1081 	flow_rule_match_ct(rule, &match);
1082 
1083 	key = match.key;
1084 	mask = match.mask;
1085 
1086 	ct_state = key->ct_state;
1087 	ct_state_mask = mask->ct_state;
1088 
1089 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1090 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1091 			      TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1092 		NL_SET_ERR_MSG_MOD(extack,
1093 				   "only ct_state trk, est and new are supported for offload");
1094 		return -EOPNOTSUPP;
1095 	}
1096 
1097 	ct_state_on = ct_state & ct_state_mask;
1098 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1099 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1100 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1101 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1102 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1103 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1104 
1105 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1106 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1107 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1108 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1109 
1110 	if (new) {
1111 		NL_SET_ERR_MSG_MOD(extack,
1112 				   "matching on ct_state +new isn't supported");
1113 		return -EOPNOTSUPP;
1114 	}
1115 
1116 	if (mask->ct_zone)
1117 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1118 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1119 	if (ctstate_mask)
1120 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1121 					    ctstate, ctstate_mask);
1122 	if (mask->ct_mark)
1123 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1124 					    key->ct_mark, mask->ct_mark);
1125 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1126 	    mask->ct_labels[3]) {
1127 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1128 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1129 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1130 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1131 		if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1132 			return -EOPNOTSUPP;
1133 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1134 					    MLX5_CT_LABELS_MASK);
1135 	}
1136 
1137 	return 0;
1138 }
1139 
1140 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1141 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1142 			struct mlx5_flow_attr *attr,
1143 			const struct flow_action_entry *act,
1144 			struct netlink_ext_ack *extack)
1145 {
1146 	if (!priv) {
1147 		NL_SET_ERR_MSG_MOD(extack,
1148 				   "offload of ct action isn't available");
1149 		return -EOPNOTSUPP;
1150 	}
1151 
1152 	attr->ct_attr.zone = act->ct.zone;
1153 	attr->ct_attr.ct_action = act->ct.action;
1154 	attr->ct_attr.nf_ft = act->ct.flow_table;
1155 
1156 	return 0;
1157 }
1158 
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1159 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1160 				  struct mlx5_tc_ct_pre *pre_ct,
1161 				  bool nat)
1162 {
1163 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1164 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1165 	struct mlx5_core_dev *dev = ct_priv->dev;
1166 	struct mlx5_flow_table *ft = pre_ct->ft;
1167 	struct mlx5_flow_destination dest = {};
1168 	struct mlx5_flow_act flow_act = {};
1169 	struct mlx5_modify_hdr *mod_hdr;
1170 	struct mlx5_flow_handle *rule;
1171 	struct mlx5_flow_spec *spec;
1172 	u32 ctstate;
1173 	u16 zone;
1174 	int err;
1175 
1176 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1177 	if (!spec)
1178 		return -ENOMEM;
1179 
1180 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1181 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1182 					ZONE_TO_REG, zone);
1183 	if (err) {
1184 		ct_dbg("Failed to set zone register mapping");
1185 		goto err_mapping;
1186 	}
1187 
1188 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1189 					   pre_mod_acts.num_actions,
1190 					   pre_mod_acts.actions);
1191 
1192 	if (IS_ERR(mod_hdr)) {
1193 		err = PTR_ERR(mod_hdr);
1194 		ct_dbg("Failed to create pre ct mod hdr");
1195 		goto err_mapping;
1196 	}
1197 	pre_ct->modify_hdr = mod_hdr;
1198 
1199 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1200 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1201 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1202 	flow_act.modify_hdr = mod_hdr;
1203 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1204 
1205 	/* add flow rule */
1206 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1207 				    zone, MLX5_CT_ZONE_MASK);
1208 	ctstate = MLX5_CT_STATE_TRK_BIT;
1209 	if (nat)
1210 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1211 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1212 
1213 	dest.ft = ct_priv->post_ct;
1214 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1215 	if (IS_ERR(rule)) {
1216 		err = PTR_ERR(rule);
1217 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1218 		goto err_flow_rule;
1219 	}
1220 	pre_ct->flow_rule = rule;
1221 
1222 	/* add miss rule */
1223 	memset(spec, 0, sizeof(*spec));
1224 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1225 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1226 	if (IS_ERR(rule)) {
1227 		err = PTR_ERR(rule);
1228 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1229 		goto err_miss_rule;
1230 	}
1231 	pre_ct->miss_rule = rule;
1232 
1233 	dealloc_mod_hdr_actions(&pre_mod_acts);
1234 	kvfree(spec);
1235 	return 0;
1236 
1237 err_miss_rule:
1238 	mlx5_del_flow_rules(pre_ct->flow_rule);
1239 err_flow_rule:
1240 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1241 err_mapping:
1242 	dealloc_mod_hdr_actions(&pre_mod_acts);
1243 	kvfree(spec);
1244 	return err;
1245 }
1246 
1247 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1248 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1249 		       struct mlx5_tc_ct_pre *pre_ct)
1250 {
1251 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1252 	struct mlx5_core_dev *dev = ct_priv->dev;
1253 
1254 	mlx5_del_flow_rules(pre_ct->flow_rule);
1255 	mlx5_del_flow_rules(pre_ct->miss_rule);
1256 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1257 }
1258 
1259 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1260 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1261 			struct mlx5_tc_ct_pre *pre_ct,
1262 			bool nat)
1263 {
1264 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1265 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1266 	struct mlx5_core_dev *dev = ct_priv->dev;
1267 	struct mlx5_flow_table_attr ft_attr = {};
1268 	struct mlx5_flow_namespace *ns;
1269 	struct mlx5_flow_table *ft;
1270 	struct mlx5_flow_group *g;
1271 	u32 metadata_reg_c_2_mask;
1272 	u32 *flow_group_in;
1273 	void *misc;
1274 	int err;
1275 
1276 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1277 	if (!ns) {
1278 		err = -EOPNOTSUPP;
1279 		ct_dbg("Failed to get flow namespace");
1280 		return err;
1281 	}
1282 
1283 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1284 	if (!flow_group_in)
1285 		return -ENOMEM;
1286 
1287 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1288 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1289 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1290 	ft_attr.max_fte = 2;
1291 	ft_attr.level = 1;
1292 	ft = mlx5_create_flow_table(ns, &ft_attr);
1293 	if (IS_ERR(ft)) {
1294 		err = PTR_ERR(ft);
1295 		ct_dbg("Failed to create pre ct table");
1296 		goto out_free;
1297 	}
1298 	pre_ct->ft = ft;
1299 
1300 	/* create flow group */
1301 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1302 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1303 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1304 		 MLX5_MATCH_MISC_PARAMETERS_2);
1305 
1306 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1307 			    match_criteria.misc_parameters_2);
1308 
1309 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1310 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1311 	if (nat)
1312 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1313 
1314 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1315 		 metadata_reg_c_2_mask);
1316 
1317 	g = mlx5_create_flow_group(ft, flow_group_in);
1318 	if (IS_ERR(g)) {
1319 		err = PTR_ERR(g);
1320 		ct_dbg("Failed to create pre ct group");
1321 		goto err_flow_grp;
1322 	}
1323 	pre_ct->flow_grp = g;
1324 
1325 	/* create miss group */
1326 	memset(flow_group_in, 0, inlen);
1327 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1328 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1329 	g = mlx5_create_flow_group(ft, flow_group_in);
1330 	if (IS_ERR(g)) {
1331 		err = PTR_ERR(g);
1332 		ct_dbg("Failed to create pre ct miss group");
1333 		goto err_miss_grp;
1334 	}
1335 	pre_ct->miss_grp = g;
1336 
1337 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1338 	if (err)
1339 		goto err_add_rules;
1340 
1341 	kvfree(flow_group_in);
1342 	return 0;
1343 
1344 err_add_rules:
1345 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1346 err_miss_grp:
1347 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1348 err_flow_grp:
1349 	mlx5_destroy_flow_table(ft);
1350 out_free:
1351 	kvfree(flow_group_in);
1352 	return err;
1353 }
1354 
1355 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1356 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1357 		       struct mlx5_tc_ct_pre *pre_ct)
1358 {
1359 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1360 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1361 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1362 	mlx5_destroy_flow_table(pre_ct->ft);
1363 }
1364 
1365 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1366 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1367 {
1368 	int err;
1369 
1370 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1371 	if (err)
1372 		return err;
1373 
1374 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1375 	if (err)
1376 		goto err_pre_ct_nat;
1377 
1378 	return 0;
1379 
1380 err_pre_ct_nat:
1381 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1382 	return err;
1383 }
1384 
1385 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1386 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1387 {
1388 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1389 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1390 }
1391 
1392 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1393 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1394 		     struct nf_flowtable *nf_ft)
1395 {
1396 	struct mlx5_ct_ft *ft;
1397 	int err;
1398 
1399 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1400 	if (ft) {
1401 		refcount_inc(&ft->refcount);
1402 		return ft;
1403 	}
1404 
1405 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1406 	if (!ft)
1407 		return ERR_PTR(-ENOMEM);
1408 
1409 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1410 	if (err)
1411 		goto err_mapping;
1412 
1413 	ft->zone = zone;
1414 	ft->nf_ft = nf_ft;
1415 	ft->ct_priv = ct_priv;
1416 	refcount_set(&ft->refcount, 1);
1417 
1418 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1419 	if (err)
1420 		goto err_alloc_pre_ct;
1421 
1422 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1423 	if (err)
1424 		goto err_init;
1425 
1426 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1427 				     zone_params);
1428 	if (err)
1429 		goto err_insert;
1430 
1431 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1432 					   mlx5_tc_ct_block_flow_offload, ft);
1433 	if (err)
1434 		goto err_add_cb;
1435 
1436 	return ft;
1437 
1438 err_add_cb:
1439 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1440 err_insert:
1441 	rhashtable_destroy(&ft->ct_entries_ht);
1442 err_init:
1443 	mlx5_tc_ct_free_pre_ct_tables(ft);
1444 err_alloc_pre_ct:
1445 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1446 err_mapping:
1447 	kfree(ft);
1448 	return ERR_PTR(err);
1449 }
1450 
1451 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1452 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1453 {
1454 	struct mlx5_tc_ct_priv *ct_priv = arg;
1455 	struct mlx5_ct_entry *entry = ptr;
1456 
1457 	mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1458 	kfree(entry);
1459 }
1460 
1461 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1462 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1463 {
1464 	if (!refcount_dec_and_test(&ft->refcount))
1465 		return;
1466 
1467 	nf_flow_table_offload_del_cb(ft->nf_ft,
1468 				     mlx5_tc_ct_block_flow_offload, ft);
1469 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1470 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1471 				    mlx5_tc_ct_flush_ft_entry,
1472 				    ct_priv);
1473 	mlx5_tc_ct_free_pre_ct_tables(ft);
1474 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1475 	kfree(ft);
1476 }
1477 
1478 /* We translate the tc filter with CT action to the following HW model:
1479  *
1480  * +---------------------+
1481  * + ft prio (tc chain) +
1482  * + original match      +
1483  * +---------------------+
1484  *      | set chain miss mapping
1485  *      | set fte_id
1486  *      | set tunnel_id
1487  *      | do decap
1488  *      v
1489  * +---------------------+
1490  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1491  * + zone+nat match      +---------------->+ post_ct (see below) +
1492  * +---------------------+  set zone       +---------------------+
1493  *      | set zone
1494  *      v
1495  * +--------------------+
1496  * + CT (nat or no nat) +
1497  * + tuple + zone match +
1498  * +--------------------+
1499  *      | set mark
1500  *      | set labels_id
1501  *      | set established
1502  *	| set zone_restore
1503  *      | do nat (if needed)
1504  *      v
1505  * +--------------+
1506  * + post_ct      + original filter actions
1507  * + fte_id match +------------------------>
1508  * +--------------+
1509  */
1510 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr)1511 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1512 			  struct mlx5e_tc_flow *flow,
1513 			  struct mlx5_flow_spec *orig_spec,
1514 			  struct mlx5_flow_attr *attr)
1515 {
1516 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1517 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1518 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1519 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1520 	struct mlx5_flow_spec *post_ct_spec = NULL;
1521 	struct mlx5_flow_attr *pre_ct_attr;
1522 	struct mlx5_modify_hdr *mod_hdr;
1523 	struct mlx5_flow_handle *rule;
1524 	struct mlx5_ct_flow *ct_flow;
1525 	int chain_mapping = 0, err;
1526 	struct mlx5_ct_ft *ft;
1527 	u32 fte_id = 1;
1528 
1529 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1530 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1531 	if (!post_ct_spec || !ct_flow) {
1532 		kfree(post_ct_spec);
1533 		kfree(ct_flow);
1534 		return ERR_PTR(-ENOMEM);
1535 	}
1536 
1537 	/* Register for CT established events */
1538 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1539 				  attr->ct_attr.nf_ft);
1540 	if (IS_ERR(ft)) {
1541 		err = PTR_ERR(ft);
1542 		ct_dbg("Failed to register to ft callback");
1543 		goto err_ft;
1544 	}
1545 	ct_flow->ft = ft;
1546 
1547 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1548 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1549 	if (err) {
1550 		netdev_warn(priv->netdev,
1551 			    "Failed to allocate fte id, err: %d\n", err);
1552 		goto err_idr;
1553 	}
1554 	ct_flow->fte_id = fte_id;
1555 
1556 	/* Base flow attributes of both rules on original rule attribute */
1557 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1558 	if (!ct_flow->pre_ct_attr) {
1559 		err = -ENOMEM;
1560 		goto err_alloc_pre;
1561 	}
1562 
1563 	ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1564 	if (!ct_flow->post_ct_attr) {
1565 		err = -ENOMEM;
1566 		goto err_alloc_post;
1567 	}
1568 
1569 	pre_ct_attr = ct_flow->pre_ct_attr;
1570 	memcpy(pre_ct_attr, attr, attr_sz);
1571 	memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1572 
1573 	/* Modify the original rule's action to fwd and modify, leave decap */
1574 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1575 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1576 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1577 
1578 	/* Write chain miss tag for miss in ct table as we
1579 	 * don't go though all prios of this chain as normal tc rules
1580 	 * miss.
1581 	 */
1582 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1583 					    &chain_mapping);
1584 	if (err) {
1585 		ct_dbg("Failed to get chain register mapping for chain");
1586 		goto err_get_chain;
1587 	}
1588 	ct_flow->chain_mapping = chain_mapping;
1589 
1590 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1591 					CHAIN_TO_REG, chain_mapping);
1592 	if (err) {
1593 		ct_dbg("Failed to set chain register mapping");
1594 		goto err_mapping;
1595 	}
1596 
1597 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1598 					FTEID_TO_REG, fte_id);
1599 	if (err) {
1600 		ct_dbg("Failed to set fte_id register mapping");
1601 		goto err_mapping;
1602 	}
1603 
1604 	/* If original flow is decap, we do it before going into ct table
1605 	 * so add a rewrite for the tunnel match_id.
1606 	 */
1607 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1608 	    attr->chain == 0) {
1609 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1610 
1611 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1612 						ct_priv->ns_type,
1613 						TUNNEL_TO_REG,
1614 						tun_id);
1615 		if (err) {
1616 			ct_dbg("Failed to set tunnel register mapping");
1617 			goto err_mapping;
1618 		}
1619 	}
1620 
1621 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1622 					   pre_mod_acts.num_actions,
1623 					   pre_mod_acts.actions);
1624 	if (IS_ERR(mod_hdr)) {
1625 		err = PTR_ERR(mod_hdr);
1626 		ct_dbg("Failed to create pre ct mod hdr");
1627 		goto err_mapping;
1628 	}
1629 	pre_ct_attr->modify_hdr = mod_hdr;
1630 
1631 	/* Post ct rule matches on fte_id and executes original rule's
1632 	 * tc rule action
1633 	 */
1634 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1635 				    fte_id, MLX5_FTE_ID_MASK);
1636 
1637 	/* Put post_ct rule on post_ct flow table */
1638 	ct_flow->post_ct_attr->chain = 0;
1639 	ct_flow->post_ct_attr->prio = 0;
1640 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1641 
1642 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1643 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1644 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1645 	rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1646 				   ct_flow->post_ct_attr);
1647 	ct_flow->post_ct_rule = rule;
1648 	if (IS_ERR(ct_flow->post_ct_rule)) {
1649 		err = PTR_ERR(ct_flow->post_ct_rule);
1650 		ct_dbg("Failed to add post ct rule");
1651 		goto err_insert_post_ct;
1652 	}
1653 
1654 	/* Change original rule point to ct table */
1655 	pre_ct_attr->dest_chain = 0;
1656 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1657 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1658 						   pre_ct_attr);
1659 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1660 		err = PTR_ERR(ct_flow->pre_ct_rule);
1661 		ct_dbg("Failed to add pre ct rule");
1662 		goto err_insert_orig;
1663 	}
1664 
1665 	attr->ct_attr.ct_flow = ct_flow;
1666 	dealloc_mod_hdr_actions(&pre_mod_acts);
1667 	kfree(post_ct_spec);
1668 
1669 	return rule;
1670 
1671 err_insert_orig:
1672 	mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1673 			    ct_flow->post_ct_attr);
1674 err_insert_post_ct:
1675 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1676 err_mapping:
1677 	dealloc_mod_hdr_actions(&pre_mod_acts);
1678 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1679 err_get_chain:
1680 	kfree(ct_flow->post_ct_attr);
1681 err_alloc_post:
1682 	kfree(ct_flow->pre_ct_attr);
1683 err_alloc_pre:
1684 	idr_remove(&ct_priv->fte_ids, fte_id);
1685 err_idr:
1686 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1687 err_ft:
1688 	kfree(post_ct_spec);
1689 	kfree(ct_flow);
1690 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1691 	return ERR_PTR(err);
1692 }
1693 
1694 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_acts)1695 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1696 				struct mlx5_flow_spec *orig_spec,
1697 				struct mlx5_flow_attr *attr,
1698 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1699 {
1700 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1701 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1702 	struct mlx5_flow_attr *pre_ct_attr;
1703 	struct mlx5_modify_hdr *mod_hdr;
1704 	struct mlx5_flow_handle *rule;
1705 	struct mlx5_ct_flow *ct_flow;
1706 	int err;
1707 
1708 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1709 	if (!ct_flow)
1710 		return ERR_PTR(-ENOMEM);
1711 
1712 	/* Base esw attributes on original rule attribute */
1713 	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1714 	if (!pre_ct_attr) {
1715 		err = -ENOMEM;
1716 		goto err_attr;
1717 	}
1718 
1719 	memcpy(pre_ct_attr, attr, attr_sz);
1720 
1721 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1722 	if (err) {
1723 		ct_dbg("Failed to set register for ct clear");
1724 		goto err_set_registers;
1725 	}
1726 
1727 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1728 					   mod_acts->num_actions,
1729 					   mod_acts->actions);
1730 	if (IS_ERR(mod_hdr)) {
1731 		err = PTR_ERR(mod_hdr);
1732 		ct_dbg("Failed to add create ct clear mod hdr");
1733 		goto err_set_registers;
1734 	}
1735 
1736 	dealloc_mod_hdr_actions(mod_acts);
1737 	pre_ct_attr->modify_hdr = mod_hdr;
1738 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1739 
1740 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1741 	if (IS_ERR(rule)) {
1742 		err = PTR_ERR(rule);
1743 		ct_dbg("Failed to add ct clear rule");
1744 		goto err_insert;
1745 	}
1746 
1747 	attr->ct_attr.ct_flow = ct_flow;
1748 	ct_flow->pre_ct_attr = pre_ct_attr;
1749 	ct_flow->pre_ct_rule = rule;
1750 	return rule;
1751 
1752 err_insert:
1753 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1754 err_set_registers:
1755 	netdev_warn(priv->netdev,
1756 		    "Failed to offload ct clear flow, err %d\n", err);
1757 	kfree(pre_ct_attr);
1758 err_attr:
1759 	kfree(ct_flow);
1760 
1761 	return ERR_PTR(err);
1762 }
1763 
1764 struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts)1765 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1766 			struct mlx5e_tc_flow *flow,
1767 			struct mlx5_flow_spec *spec,
1768 			struct mlx5_flow_attr *attr,
1769 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1770 {
1771 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1772 	struct mlx5_flow_handle *rule;
1773 
1774 	if (!priv)
1775 		return ERR_PTR(-EOPNOTSUPP);
1776 
1777 	mutex_lock(&priv->control_lock);
1778 
1779 	if (clear_action)
1780 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1781 	else
1782 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1783 	mutex_unlock(&priv->control_lock);
1784 
1785 	return rule;
1786 }
1787 
1788 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_ct_flow * ct_flow)1789 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1790 			 struct mlx5e_tc_flow *flow,
1791 			 struct mlx5_ct_flow *ct_flow)
1792 {
1793 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1794 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1795 
1796 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1797 			    pre_ct_attr);
1798 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1799 
1800 	if (ct_flow->post_ct_rule) {
1801 		mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1802 				    ct_flow->post_ct_attr);
1803 		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1804 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1805 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1806 	}
1807 
1808 	kfree(ct_flow->pre_ct_attr);
1809 	kfree(ct_flow->post_ct_attr);
1810 	kfree(ct_flow);
1811 }
1812 
1813 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1814 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1815 		       struct mlx5e_tc_flow *flow,
1816 		       struct mlx5_flow_attr *attr)
1817 {
1818 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1819 
1820 	/* We are called on error to clean up stuff from parsing
1821 	 * but we don't have anything for now
1822 	 */
1823 	if (!ct_flow)
1824 		return;
1825 
1826 	mutex_lock(&priv->control_lock);
1827 	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1828 	mutex_unlock(&priv->control_lock);
1829 }
1830 
1831 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)1832 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1833 				  const char **err_msg)
1834 {
1835 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1836 		*err_msg = "firmware level support is missing";
1837 		return -EOPNOTSUPP;
1838 	}
1839 
1840 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1841 		/* vlan workaround should be avoided for multi chain rules.
1842 		 * This is just a sanity check as pop vlan action should
1843 		 * be supported by any FW that supports ignore_flow_level
1844 		 */
1845 
1846 		*err_msg = "firmware vlan actions support is missing";
1847 		return -EOPNOTSUPP;
1848 	}
1849 
1850 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1851 				    fdb_modify_header_fwd_to_table)) {
1852 		/* CT always writes to registers which are mod header actions.
1853 		 * Therefore, mod header and goto is required
1854 		 */
1855 
1856 		*err_msg = "firmware fwd and modify support is missing";
1857 		return -EOPNOTSUPP;
1858 	}
1859 
1860 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1861 		*err_msg = "register loopback isn't supported";
1862 		return -EOPNOTSUPP;
1863 	}
1864 
1865 	return 0;
1866 }
1867 
1868 static int
mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv * priv,const char ** err_msg)1869 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
1870 				  const char **err_msg)
1871 {
1872 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
1873 		*err_msg = "firmware level support is missing";
1874 		return -EOPNOTSUPP;
1875 	}
1876 
1877 	return 0;
1878 }
1879 
1880 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,const char ** err_msg)1881 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
1882 			      enum mlx5_flow_namespace_type ns_type,
1883 			      const char **err_msg)
1884 {
1885 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1886 
1887 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1888 	/* cannot restore chain ID on HW miss */
1889 
1890 	*err_msg = "tc skb extension missing";
1891 	return -EOPNOTSUPP;
1892 #endif
1893 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
1894 		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
1895 	else
1896 		return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
1897 }
1898 
1899 #define INIT_ERR_PREFIX "tc ct offload init failed"
1900 
1901 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type)1902 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
1903 		struct mod_hdr_tbl *mod_hdr,
1904 		enum mlx5_flow_namespace_type ns_type)
1905 {
1906 	struct mlx5_tc_ct_priv *ct_priv;
1907 	struct mlx5_core_dev *dev;
1908 	const char *msg;
1909 	int err;
1910 
1911 	dev = priv->mdev;
1912 	err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
1913 	if (err) {
1914 		mlx5_core_warn(dev,
1915 			       "tc ct offload not supported, %s\n",
1916 			       msg);
1917 		goto err_support;
1918 	}
1919 
1920 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1921 	if (!ct_priv)
1922 		goto err_alloc;
1923 
1924 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1925 	if (IS_ERR(ct_priv->zone_mapping)) {
1926 		err = PTR_ERR(ct_priv->zone_mapping);
1927 		goto err_mapping_zone;
1928 	}
1929 
1930 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1931 	if (IS_ERR(ct_priv->labels_mapping)) {
1932 		err = PTR_ERR(ct_priv->labels_mapping);
1933 		goto err_mapping_labels;
1934 	}
1935 
1936 	ct_priv->ns_type = ns_type;
1937 	ct_priv->chains = chains;
1938 	ct_priv->netdev = priv->netdev;
1939 	ct_priv->dev = priv->mdev;
1940 	ct_priv->mod_hdr_tbl = mod_hdr;
1941 	ct_priv->ct = mlx5_chains_create_global_table(chains);
1942 	if (IS_ERR(ct_priv->ct)) {
1943 		err = PTR_ERR(ct_priv->ct);
1944 		mlx5_core_warn(dev,
1945 			       "%s, failed to create ct table err: %d\n",
1946 			       INIT_ERR_PREFIX, err);
1947 		goto err_ct_tbl;
1948 	}
1949 
1950 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
1951 	if (IS_ERR(ct_priv->ct_nat)) {
1952 		err = PTR_ERR(ct_priv->ct_nat);
1953 		mlx5_core_warn(dev,
1954 			       "%s, failed to create ct nat table err: %d\n",
1955 			       INIT_ERR_PREFIX, err);
1956 		goto err_ct_nat_tbl;
1957 	}
1958 
1959 	ct_priv->post_ct = mlx5_chains_create_global_table(chains);
1960 	if (IS_ERR(ct_priv->post_ct)) {
1961 		err = PTR_ERR(ct_priv->post_ct);
1962 		mlx5_core_warn(dev,
1963 			       "%s, failed to create post ct table err: %d\n",
1964 			       INIT_ERR_PREFIX, err);
1965 		goto err_post_ct_tbl;
1966 	}
1967 
1968 	idr_init(&ct_priv->fte_ids);
1969 	mutex_init(&ct_priv->control_lock);
1970 	mutex_init(&ct_priv->shared_counter_lock);
1971 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
1972 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
1973 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
1974 
1975 	return ct_priv;
1976 
1977 err_post_ct_tbl:
1978 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
1979 err_ct_nat_tbl:
1980 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
1981 err_ct_tbl:
1982 	mapping_destroy(ct_priv->labels_mapping);
1983 err_mapping_labels:
1984 	mapping_destroy(ct_priv->zone_mapping);
1985 err_mapping_zone:
1986 	kfree(ct_priv);
1987 err_alloc:
1988 err_support:
1989 
1990 	return NULL;
1991 }
1992 
1993 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)1994 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
1995 {
1996 	struct mlx5_fs_chains *chains;
1997 
1998 	if (!ct_priv)
1999 		return;
2000 
2001 	chains = ct_priv->chains;
2002 
2003 	mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2004 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2005 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2006 	mapping_destroy(ct_priv->zone_mapping);
2007 	mapping_destroy(ct_priv->labels_mapping);
2008 
2009 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2010 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2011 	rhashtable_destroy(&ct_priv->zone_ht);
2012 	mutex_destroy(&ct_priv->control_lock);
2013 	mutex_destroy(&ct_priv->shared_counter_lock);
2014 	idr_destroy(&ct_priv->fte_ids);
2015 	kfree(ct_priv);
2016 }
2017 
2018 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2019 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2020 			 struct sk_buff *skb, u8 zone_restore_id)
2021 {
2022 	struct mlx5_ct_tuple tuple = {};
2023 	struct mlx5_ct_entry *entry;
2024 	u16 zone;
2025 
2026 	if (!ct_priv || !zone_restore_id)
2027 		return true;
2028 
2029 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2030 		return false;
2031 
2032 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2033 		return false;
2034 
2035 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
2036 				       tuples_ht_params);
2037 	if (!entry)
2038 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
2039 					       &tuple, tuples_nat_ht_params);
2040 	if (!entry)
2041 		return false;
2042 
2043 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2044 	return true;
2045 }
2046