1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39 
40 #include "en.h"
41 #include "eswitch.h"
42 #include "ipsec.h"
43 #include "ipsec_rxtx.h"
44 #include "en_rep.h"
45 
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
48 
to_ipsec_sa_entry(struct xfrm_state * x)49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
50 {
51 	return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
52 }
53 
to_ipsec_pol_entry(struct xfrm_policy * x)54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
55 {
56 	return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
57 }
58 
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)59 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
60 {
61 	struct mlx5e_ipsec_dwork *dwork =
62 		container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 	struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 	struct xfrm_state *x = sa_entry->x;
65 
66 	if (sa_entry->attrs.drop)
67 		return;
68 
69 	spin_lock_bh(&x->lock);
70 	if (x->km.state == XFRM_STATE_EXPIRED) {
71 		sa_entry->attrs.drop = true;
72 		spin_unlock_bh(&x->lock);
73 
74 		mlx5e_accel_ipsec_fs_modify(sa_entry);
75 		return;
76 	}
77 
78 	if (x->km.state != XFRM_STATE_VALID) {
79 		spin_unlock_bh(&x->lock);
80 		return;
81 	}
82 
83 	xfrm_state_check_expire(x);
84 	spin_unlock_bh(&x->lock);
85 
86 	queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
87 			   MLX5_IPSEC_RESCHED);
88 }
89 
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)90 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
91 {
92 	struct xfrm_state *x = sa_entry->x;
93 	u32 seq_bottom = 0;
94 	u32 esn, esn_msb;
95 	u8 overlap;
96 
97 	switch (x->xso.dir) {
98 	case XFRM_DEV_OFFLOAD_IN:
99 		esn = x->replay_esn->seq;
100 		esn_msb = x->replay_esn->seq_hi;
101 		break;
102 	case XFRM_DEV_OFFLOAD_OUT:
103 		esn = x->replay_esn->oseq;
104 		esn_msb = x->replay_esn->oseq_hi;
105 		break;
106 	default:
107 		WARN_ON(true);
108 		return false;
109 	}
110 
111 	overlap = sa_entry->esn_state.overlap;
112 
113 	if (!x->replay_esn->replay_window) {
114 		seq_bottom = esn;
115 	} else {
116 		if (esn >= x->replay_esn->replay_window)
117 			seq_bottom = esn - x->replay_esn->replay_window + 1;
118 
119 		if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
120 			esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
121 	}
122 
123 	if (sa_entry->esn_state.esn_msb)
124 		sa_entry->esn_state.esn = esn;
125 	else
126 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
127 		 * the first packet sent using a given SA will contain a sequence
128 		 * number of 1.
129 		 */
130 		sa_entry->esn_state.esn = max_t(u32, esn, 1);
131 	sa_entry->esn_state.esn_msb = esn_msb;
132 
133 	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
134 		sa_entry->esn_state.overlap = 0;
135 		return true;
136 	} else if (unlikely(!overlap &&
137 			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
138 		sa_entry->esn_state.overlap = 1;
139 		return true;
140 	}
141 
142 	return false;
143 }
144 
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)145 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
146 				    struct mlx5_accel_esp_xfrm_attrs *attrs)
147 {
148 	struct xfrm_state *x = sa_entry->x;
149 	s64 start_value, n;
150 
151 	attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
152 	attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
153 	if (x->lft.soft_packet_limit == XFRM_INF)
154 		return;
155 
156 	/* Compute hard limit initial value and number of rounds.
157 	 *
158 	 * The counting pattern of hardware counter goes:
159 	 *                value  -> 2^31-1
160 	 *      2^31  | (2^31-1) -> 2^31-1
161 	 *      2^31  | (2^31-1) -> 2^31-1
162 	 *      [..]
163 	 *      2^31  | (2^31-1) -> 0
164 	 *
165 	 * The pattern is created by using an ASO operation to atomically set
166 	 * bit 31 after the down counter clears bit 31. This is effectively an
167 	 * atomic addition of 2**31 to the counter.
168 	 *
169 	 * We wish to configure the counter, within the above pattern, so that
170 	 * when it reaches 0, it has hit the hard limit. This is defined by this
171 	 * system of equations:
172 	 *
173 	 *      hard_limit == start_value + n * 2^31
174 	 *      n >= 0
175 	 *      start_value < 2^32, start_value >= 0
176 	 *
177 	 * These equations are not single-solution, there are often two choices:
178 	 *      hard_limit == start_value + n * 2^31
179 	 *      hard_limit == (start_value+2^31) + (n-1) * 2^31
180 	 *
181 	 * The algorithm selects the solution that keeps the counter value
182 	 * above 2^31 until the final iteration.
183 	 */
184 
185 	/* Start by estimating n and compute start_value */
186 	n = attrs->lft.hard_packet_limit / BIT_ULL(31);
187 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
188 
189 	/* Choose the best of the two solutions: */
190 	if (n >= 1)
191 		n -= 1;
192 
193 	/* Computed values solve the system of equations: */
194 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
195 
196 	/* The best solution means: when there are multiple iterations we must
197 	 * start above 2^31 and count down to 2**31 to get the interrupt.
198 	 */
199 	attrs->lft.hard_packet_limit = lower_32_bits(start_value);
200 	attrs->lft.numb_rounds_hard = (u64)n;
201 
202 	/* Compute soft limit initial value and number of rounds.
203 	 *
204 	 * The soft_limit is achieved by adjusting the counter's
205 	 * interrupt_value. This is embedded in the counting pattern created by
206 	 * hard packet calculations above.
207 	 *
208 	 * We wish to compute the interrupt_value for the soft_limit. This is
209 	 * defined by this system of equations:
210 	 *
211 	 *      soft_limit == start_value - soft_value + n * 2^31
212 	 *      n >= 0
213 	 *      soft_value < 2^32, soft_value >= 0
214 	 *      for n == 0 start_value > soft_value
215 	 *
216 	 * As with compute_hard_n_value() the equations are not single-solution.
217 	 * The algorithm selects the solution that has:
218 	 *      2^30 <= soft_limit < 2^31 + 2^30
219 	 * for the interior iterations, which guarantees a large guard band
220 	 * around the counter hard limit and next interrupt.
221 	 */
222 
223 	/* Start by estimating n and compute soft_value */
224 	n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
225 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
226 		      x->lft.soft_packet_limit;
227 
228 	/* Compare against constraints and adjust n */
229 	if (n < 0)
230 		n = 0;
231 	else if (start_value >= BIT_ULL(32))
232 		n -= 1;
233 	else if (start_value < 0)
234 		n += 1;
235 
236 	/* Choose the best of the two solutions: */
237 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
238 	if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
239 		n += 1;
240 
241 	/* Note that the upper limit of soft_value happens naturally because we
242 	 * always select the lowest soft_value.
243 	 */
244 
245 	/* Computed values solve the system of equations: */
246 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
247 
248 	/* The best solution means: when there are multiple iterations we must
249 	 * not fall below 2^30 as that would get too close to the false
250 	 * hard_limit and when we reach an interior iteration for soft_limit it
251 	 * has to be far away from 2**32-1 which is the counter reset point
252 	 * after the +2^31 to accommodate latency.
253 	 */
254 	attrs->lft.soft_packet_limit = lower_32_bits(start_value);
255 	attrs->lft.numb_rounds_soft = (u64)n;
256 }
257 
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)258 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
259 				  struct mlx5_accel_esp_xfrm_attrs *attrs)
260 {
261 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
262 	struct xfrm_state *x = sa_entry->x;
263 	struct net_device *netdev;
264 	struct neighbour *n;
265 	u8 addr[ETH_ALEN];
266 	const void *pkey;
267 	u8 *dst, *src;
268 
269 	if (attrs->mode != XFRM_MODE_TUNNEL ||
270 	    attrs->type != XFRM_DEV_OFFLOAD_PACKET)
271 		return;
272 
273 	netdev = x->xso.real_dev;
274 
275 	mlx5_query_mac_address(mdev, addr);
276 	switch (attrs->dir) {
277 	case XFRM_DEV_OFFLOAD_IN:
278 		src = attrs->dmac;
279 		dst = attrs->smac;
280 		pkey = &attrs->addrs.saddr.a4;
281 		break;
282 	case XFRM_DEV_OFFLOAD_OUT:
283 		src = attrs->smac;
284 		dst = attrs->dmac;
285 		pkey = &attrs->addrs.daddr.a4;
286 		break;
287 	default:
288 		return;
289 	}
290 
291 	ether_addr_copy(src, addr);
292 	n = neigh_lookup(&arp_tbl, pkey, netdev);
293 	if (!n) {
294 		n = neigh_create(&arp_tbl, pkey, netdev);
295 		if (IS_ERR(n))
296 			return;
297 		neigh_event_send(n, NULL);
298 		attrs->drop = true;
299 	} else {
300 		neigh_ha_snapshot(addr, n, netdev);
301 		ether_addr_copy(dst, addr);
302 	}
303 	neigh_release(n);
304 }
305 
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)306 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
307 {
308 	/*
309 	 * State doesn't have subnet prefixes in outer headers.
310 	 * The match is performed for exaxt source/destination addresses.
311 	 */
312 	memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
313 	memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
314 }
315 
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)316 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
317 					struct mlx5_accel_esp_xfrm_attrs *attrs)
318 {
319 	struct xfrm_state *x = sa_entry->x;
320 	struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
321 	struct aead_geniv_ctx *geniv_ctx;
322 	struct crypto_aead *aead;
323 	unsigned int crypto_data_len, key_len;
324 	int ivsize;
325 
326 	memset(attrs, 0, sizeof(*attrs));
327 
328 	/* key */
329 	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
330 	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
331 
332 	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
333 	aes_gcm->key_len = key_len * 8;
334 
335 	/* salt and seq_iv */
336 	aead = x->data;
337 	geniv_ctx = crypto_aead_ctx(aead);
338 	ivsize = crypto_aead_ivsize(aead);
339 	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
340 	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
341 	       sizeof(aes_gcm->salt));
342 
343 	attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
344 
345 	/* iv len */
346 	aes_gcm->icv_len = x->aead->alg_icv_len;
347 
348 	attrs->dir = x->xso.dir;
349 
350 	/* esn */
351 	if (x->props.flags & XFRM_STATE_ESN) {
352 		attrs->replay_esn.trigger = true;
353 		attrs->replay_esn.esn = sa_entry->esn_state.esn;
354 		attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
355 		attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
356 		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
357 			goto skip_replay_window;
358 
359 		switch (x->replay_esn->replay_window) {
360 		case 32:
361 			attrs->replay_esn.replay_window =
362 				MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
363 			break;
364 		case 64:
365 			attrs->replay_esn.replay_window =
366 				MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
367 			break;
368 		case 128:
369 			attrs->replay_esn.replay_window =
370 				MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
371 			break;
372 		case 256:
373 			attrs->replay_esn.replay_window =
374 				MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
375 			break;
376 		default:
377 			WARN_ON(true);
378 			return;
379 		}
380 	}
381 
382 skip_replay_window:
383 	/* spi */
384 	attrs->spi = be32_to_cpu(x->id.spi);
385 
386 	/* source , destination ips */
387 	memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
388 	       sizeof(attrs->addrs.saddr));
389 	memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
390 	attrs->addrs.family = x->props.family;
391 	mlx5e_ipsec_state_mask(&attrs->addrs);
392 	attrs->type = x->xso.type;
393 	attrs->reqid = x->props.reqid;
394 	attrs->upspec.dport = ntohs(x->sel.dport);
395 	attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
396 	attrs->upspec.sport = ntohs(x->sel.sport);
397 	attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
398 	attrs->upspec.proto = x->sel.proto;
399 	attrs->mode = x->props.mode;
400 
401 	mlx5e_ipsec_init_limits(sa_entry, attrs);
402 	mlx5e_ipsec_init_macs(sa_entry, attrs);
403 
404 	if (x->encap) {
405 		attrs->encap = true;
406 		attrs->sport = x->encap->encap_sport;
407 		attrs->dport = x->encap->encap_dport;
408 	}
409 }
410 
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)411 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
412 				     struct xfrm_state *x,
413 				     struct netlink_ext_ack *extack)
414 {
415 	if (x->props.aalgo != SADB_AALG_NONE) {
416 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
417 		return -EINVAL;
418 	}
419 	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
420 		NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
421 		return -EINVAL;
422 	}
423 	if (x->props.calgo != SADB_X_CALG_NONE) {
424 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
425 		return -EINVAL;
426 	}
427 	if (x->props.flags & XFRM_STATE_ESN &&
428 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
429 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
430 		return -EINVAL;
431 	}
432 	if (x->props.family != AF_INET &&
433 	    x->props.family != AF_INET6) {
434 		NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
435 		return -EINVAL;
436 	}
437 	if (x->id.proto != IPPROTO_ESP) {
438 		NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
439 		return -EINVAL;
440 	}
441 	if (x->encap) {
442 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
443 			NL_SET_ERR_MSG_MOD(extack,
444 					   "Encapsulation is not supported");
445 			return -EINVAL;
446 		}
447 
448 		if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
449 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
450 			return -EINVAL;
451 		}
452 
453 		if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
454 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
455 			return -EINVAL;
456 		}
457 
458 		if (x->props.mode != XFRM_MODE_TRANSPORT) {
459 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
460 			return -EINVAL;
461 		}
462 	}
463 	if (!x->aead) {
464 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
465 		return -EINVAL;
466 	}
467 	if (x->aead->alg_icv_len != 128) {
468 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
469 		return -EINVAL;
470 	}
471 	if ((x->aead->alg_key_len != 128 + 32) &&
472 	    (x->aead->alg_key_len != 256 + 32)) {
473 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
474 		return -EINVAL;
475 	}
476 	if (x->tfcpad) {
477 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
478 		return -EINVAL;
479 	}
480 	if (!x->geniv) {
481 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
482 		return -EINVAL;
483 	}
484 	if (strcmp(x->geniv, "seqiv")) {
485 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
486 		return -EINVAL;
487 	}
488 
489 	if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
490 	    x->sel.proto != IPPROTO_TCP) {
491 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
492 		return -EINVAL;
493 	}
494 
495 	if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
496 		NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
497 		return -EINVAL;
498 	}
499 
500 	switch (x->xso.type) {
501 	case XFRM_DEV_OFFLOAD_CRYPTO:
502 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
503 			NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
504 			return -EINVAL;
505 		}
506 
507 		break;
508 	case XFRM_DEV_OFFLOAD_PACKET:
509 		if (!(mlx5_ipsec_device_caps(mdev) &
510 		      MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
511 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
512 			return -EINVAL;
513 		}
514 
515 		if (x->props.mode == XFRM_MODE_TUNNEL &&
516 		    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
517 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
518 			return -EINVAL;
519 		}
520 
521 		if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
522 		    x->replay_esn->replay_window != 32 &&
523 		    x->replay_esn->replay_window != 64 &&
524 		    x->replay_esn->replay_window != 128 &&
525 		    x->replay_esn->replay_window != 256) {
526 			NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
527 			return -EINVAL;
528 		}
529 
530 		if (!x->props.reqid) {
531 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
532 			return -EINVAL;
533 		}
534 
535 		if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
536 		    x->lft.hard_byte_limit != XFRM_INF) {
537 			/* XFRM stack doesn't prevent such configuration :(. */
538 			NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
539 			return -EINVAL;
540 		}
541 
542 		if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
543 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
544 			return -EINVAL;
545 		}
546 
547 		if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
548 		    x->lft.hard_packet_limit != XFRM_INF) {
549 			/* XFRM stack doesn't prevent such configuration :(. */
550 			NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
551 			return -EINVAL;
552 		}
553 
554 		if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
555 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
556 			return -EINVAL;
557 		}
558 		break;
559 	default:
560 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
561 		return -EINVAL;
562 	}
563 	return 0;
564 }
565 
mlx5e_ipsec_modify_state(struct work_struct * _work)566 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
567 {
568 	struct mlx5e_ipsec_work *work =
569 		container_of(_work, struct mlx5e_ipsec_work, work);
570 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
571 	struct mlx5_accel_esp_xfrm_attrs *attrs;
572 
573 	attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
574 
575 	mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
576 }
577 
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)578 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
579 {
580 	struct xfrm_state *x = sa_entry->x;
581 
582 	if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
583 	    x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
584 		return;
585 
586 	if (x->props.flags & XFRM_STATE_ESN) {
587 		sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
588 		return;
589 	}
590 
591 	sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
592 }
593 
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)594 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
595 {
596 	struct mlx5e_ipsec_work *work =
597 		container_of(_work, struct mlx5e_ipsec_work, work);
598 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
599 	struct mlx5e_ipsec_netevent_data *data = work->data;
600 	struct mlx5_accel_esp_xfrm_attrs *attrs;
601 
602 	attrs = &sa_entry->attrs;
603 
604 	switch (attrs->dir) {
605 	case XFRM_DEV_OFFLOAD_IN:
606 		ether_addr_copy(attrs->smac, data->addr);
607 		break;
608 	case XFRM_DEV_OFFLOAD_OUT:
609 		ether_addr_copy(attrs->dmac, data->addr);
610 		break;
611 	default:
612 		WARN_ON_ONCE(true);
613 	}
614 	attrs->drop = false;
615 	mlx5e_accel_ipsec_fs_modify(sa_entry);
616 }
617 
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)618 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
619 {
620 	struct xfrm_state *x = sa_entry->x;
621 	struct mlx5e_ipsec_work *work;
622 	void *data = NULL;
623 
624 	switch (x->xso.type) {
625 	case XFRM_DEV_OFFLOAD_CRYPTO:
626 		if (!(x->props.flags & XFRM_STATE_ESN))
627 			return 0;
628 		break;
629 	case XFRM_DEV_OFFLOAD_PACKET:
630 		if (x->props.mode != XFRM_MODE_TUNNEL)
631 			return 0;
632 		break;
633 	default:
634 		break;
635 	}
636 
637 	work = kzalloc(sizeof(*work), GFP_KERNEL);
638 	if (!work)
639 		return -ENOMEM;
640 
641 	switch (x->xso.type) {
642 	case XFRM_DEV_OFFLOAD_CRYPTO:
643 		data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
644 		if (!data)
645 			goto free_work;
646 
647 		INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
648 		break;
649 	case XFRM_DEV_OFFLOAD_PACKET:
650 		data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
651 			       GFP_KERNEL);
652 		if (!data)
653 			goto free_work;
654 
655 		INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
656 		break;
657 	default:
658 		break;
659 	}
660 
661 	work->data = data;
662 	work->sa_entry = sa_entry;
663 	sa_entry->work = work;
664 	return 0;
665 
666 free_work:
667 	kfree(work);
668 	return -ENOMEM;
669 }
670 
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)671 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
672 {
673 	struct xfrm_state *x = sa_entry->x;
674 	struct mlx5e_ipsec_dwork *dwork;
675 
676 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
677 		return 0;
678 
679 	if (x->lft.soft_packet_limit == XFRM_INF &&
680 	    x->lft.hard_packet_limit == XFRM_INF &&
681 	    x->lft.soft_byte_limit == XFRM_INF &&
682 	    x->lft.hard_byte_limit == XFRM_INF)
683 		return 0;
684 
685 	dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
686 	if (!dwork)
687 		return -ENOMEM;
688 
689 	dwork->sa_entry = sa_entry;
690 	INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
691 	sa_entry->dwork = dwork;
692 	return 0;
693 }
694 
mlx5e_xfrm_add_state(struct xfrm_state * x,struct netlink_ext_ack * extack)695 static int mlx5e_xfrm_add_state(struct xfrm_state *x,
696 				struct netlink_ext_ack *extack)
697 {
698 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
699 	struct net_device *netdev = x->xso.real_dev;
700 	struct mlx5e_ipsec *ipsec;
701 	struct mlx5e_priv *priv;
702 	gfp_t gfp;
703 	int err;
704 
705 	priv = netdev_priv(netdev);
706 	if (!priv->ipsec)
707 		return -EOPNOTSUPP;
708 
709 	ipsec = priv->ipsec;
710 	gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
711 	sa_entry = kzalloc(sizeof(*sa_entry), gfp);
712 	if (!sa_entry)
713 		return -ENOMEM;
714 
715 	sa_entry->x = x;
716 	sa_entry->ipsec = ipsec;
717 	/* Check if this SA is originated from acquire flow temporary SA */
718 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
719 		goto out;
720 
721 	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
722 	if (err)
723 		goto err_xfrm;
724 
725 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
726 		err = -EBUSY;
727 		goto err_xfrm;
728 	}
729 
730 	/* check esn */
731 	if (x->props.flags & XFRM_STATE_ESN)
732 		mlx5e_ipsec_update_esn_state(sa_entry);
733 	else
734 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
735 		 * the first packet sent using a given SA will contain a sequence
736 		 * number of 1.
737 		 */
738 		sa_entry->esn_state.esn = 1;
739 
740 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
741 
742 	err = mlx5_ipsec_create_work(sa_entry);
743 	if (err)
744 		goto unblock_ipsec;
745 
746 	err = mlx5e_ipsec_create_dwork(sa_entry);
747 	if (err)
748 		goto release_work;
749 
750 	/* create hw context */
751 	err = mlx5_ipsec_create_sa_ctx(sa_entry);
752 	if (err)
753 		goto release_dwork;
754 
755 	err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
756 	if (err)
757 		goto err_hw_ctx;
758 
759 	if (x->props.mode == XFRM_MODE_TUNNEL &&
760 	    x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
761 	    !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
762 		NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
763 		err = -EINVAL;
764 		goto err_add_rule;
765 	}
766 
767 	/* We use *_bh() variant because xfrm_timer_handler(), which runs
768 	 * in softirq context, can reach our state delete logic and we need
769 	 * xa_erase_bh() there.
770 	 */
771 	err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
772 			   GFP_KERNEL);
773 	if (err)
774 		goto err_add_rule;
775 
776 	mlx5e_ipsec_set_esn_ops(sa_entry);
777 
778 	if (sa_entry->dwork)
779 		queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
780 				   MLX5_IPSEC_RESCHED);
781 
782 	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
783 	    x->props.mode == XFRM_MODE_TUNNEL) {
784 		xa_lock_bh(&ipsec->sadb);
785 		__xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
786 			      MLX5E_IPSEC_TUNNEL_SA);
787 		xa_unlock_bh(&ipsec->sadb);
788 	}
789 
790 out:
791 	x->xso.offload_handle = (unsigned long)sa_entry;
792 	return 0;
793 
794 err_add_rule:
795 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
796 err_hw_ctx:
797 	mlx5_ipsec_free_sa_ctx(sa_entry);
798 release_dwork:
799 	kfree(sa_entry->dwork);
800 release_work:
801 	if (sa_entry->work)
802 		kfree(sa_entry->work->data);
803 	kfree(sa_entry->work);
804 unblock_ipsec:
805 	mlx5_eswitch_unblock_ipsec(priv->mdev);
806 err_xfrm:
807 	kfree(sa_entry);
808 	NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
809 	return err;
810 }
811 
mlx5e_xfrm_del_state(struct xfrm_state * x)812 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
813 {
814 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
815 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
816 	struct mlx5e_ipsec_sa_entry *old;
817 
818 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
819 		return;
820 
821 	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
822 	WARN_ON(old != sa_entry);
823 }
824 
mlx5e_xfrm_free_state(struct xfrm_state * x)825 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
826 {
827 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
828 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
829 
830 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
831 		goto sa_entry_free;
832 
833 	if (sa_entry->work)
834 		cancel_work_sync(&sa_entry->work->work);
835 
836 	if (sa_entry->dwork)
837 		cancel_delayed_work_sync(&sa_entry->dwork->dwork);
838 
839 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
840 	mlx5_ipsec_free_sa_ctx(sa_entry);
841 	kfree(sa_entry->dwork);
842 	if (sa_entry->work)
843 		kfree(sa_entry->work->data);
844 	kfree(sa_entry->work);
845 	mlx5_eswitch_unblock_ipsec(ipsec->mdev);
846 sa_entry_free:
847 	kfree(sa_entry);
848 }
849 
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)850 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
851 				      unsigned long event, void *ptr)
852 {
853 	struct mlx5_accel_esp_xfrm_attrs *attrs;
854 	struct mlx5e_ipsec_netevent_data *data;
855 	struct mlx5e_ipsec_sa_entry *sa_entry;
856 	struct mlx5e_ipsec *ipsec;
857 	struct neighbour *n = ptr;
858 	struct net_device *netdev;
859 	struct xfrm_state *x;
860 	unsigned long idx;
861 
862 	if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
863 		return NOTIFY_DONE;
864 
865 	ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
866 	xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
867 		attrs = &sa_entry->attrs;
868 
869 		if (attrs->addrs.family == AF_INET) {
870 			if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
871 			    !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
872 				continue;
873 		} else {
874 			if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
875 			    !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
876 				continue;
877 		}
878 
879 		x = sa_entry->x;
880 		netdev = x->xso.real_dev;
881 		data = sa_entry->work->data;
882 
883 		neigh_ha_snapshot(data->addr, n, netdev);
884 		queue_work(ipsec->wq, &sa_entry->work->work);
885 	}
886 
887 	return NOTIFY_DONE;
888 }
889 
mlx5e_ipsec_init(struct mlx5e_priv * priv)890 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
891 {
892 	struct mlx5e_ipsec *ipsec;
893 	int ret = -ENOMEM;
894 
895 	if (!mlx5_ipsec_device_caps(priv->mdev)) {
896 		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
897 		return;
898 	}
899 
900 	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
901 	if (!ipsec)
902 		return;
903 
904 	xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
905 	ipsec->mdev = priv->mdev;
906 	init_completion(&ipsec->comp);
907 	ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
908 				    priv->netdev->name);
909 	if (!ipsec->wq)
910 		goto err_wq;
911 
912 	if (mlx5_ipsec_device_caps(priv->mdev) &
913 	    MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
914 		ret = mlx5e_ipsec_aso_init(ipsec);
915 		if (ret)
916 			goto err_aso;
917 	}
918 
919 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
920 		ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
921 		ret = register_netevent_notifier(&ipsec->netevent_nb);
922 		if (ret)
923 			goto clear_aso;
924 	}
925 
926 	ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
927 	ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
928 	if (ret)
929 		goto err_fs_init;
930 
931 	ipsec->fs = priv->fs;
932 	priv->ipsec = ipsec;
933 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
934 	return;
935 
936 err_fs_init:
937 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
938 		unregister_netevent_notifier(&ipsec->netevent_nb);
939 clear_aso:
940 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
941 		mlx5e_ipsec_aso_cleanup(ipsec);
942 err_aso:
943 	destroy_workqueue(ipsec->wq);
944 err_wq:
945 	kfree(ipsec);
946 	mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
947 	return;
948 }
949 
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)950 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
951 {
952 	struct mlx5e_ipsec *ipsec = priv->ipsec;
953 
954 	if (!ipsec)
955 		return;
956 
957 	mlx5e_accel_ipsec_fs_cleanup(ipsec);
958 	if (ipsec->netevent_nb.notifier_call) {
959 		unregister_netevent_notifier(&ipsec->netevent_nb);
960 		ipsec->netevent_nb.notifier_call = NULL;
961 	}
962 	if (ipsec->aso)
963 		mlx5e_ipsec_aso_cleanup(ipsec);
964 	destroy_workqueue(ipsec->wq);
965 	kfree(ipsec);
966 	priv->ipsec = NULL;
967 }
968 
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)969 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
970 {
971 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
972 	struct mlx5e_ipsec_work *work = sa_entry->work;
973 	struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
974 	bool need_update;
975 
976 	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
977 	if (!need_update)
978 		return;
979 
980 	sa_entry_shadow = work->data;
981 	memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
982 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
983 	queue_work(sa_entry->ipsec->wq, &work->work);
984 }
985 
mlx5e_xfrm_update_stats(struct xfrm_state * x)986 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
987 {
988 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
989 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
990 	struct net *net = dev_net(x->xso.dev);
991 	u64 trailer_packets = 0, trailer_bytes = 0;
992 	u64 replay_packets = 0, replay_bytes = 0;
993 	u64 auth_packets = 0, auth_bytes = 0;
994 	u64 success_packets, success_bytes;
995 	u64 packets, bytes, lastuse;
996 	size_t headers;
997 
998 	lockdep_assert(lockdep_is_held(&x->lock) ||
999 		       lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
1000 		       lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock));
1001 
1002 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1003 		return;
1004 
1005 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1006 		mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1007 				     &auth_packets, &lastuse);
1008 		x->stats.integrity_failed += auth_packets;
1009 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1010 
1011 		mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1012 				     &trailer_packets, &lastuse);
1013 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1014 	}
1015 
1016 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1017 		return;
1018 
1019 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1020 		mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1021 				     &replay_packets, &lastuse);
1022 		x->stats.replay += replay_packets;
1023 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1024 	}
1025 
1026 	mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1027 	success_packets = packets - auth_packets - trailer_packets - replay_packets;
1028 	x->curlft.packets += success_packets;
1029 	/* NIC counts all bytes passed through flow steering and doesn't have
1030 	 * an ability to count payload data size which is needed for SA.
1031 	 *
1032 	 * To overcome HW limitestion, let's approximate the payload size
1033 	 * by removing always available headers.
1034 	 */
1035 	headers = sizeof(struct ethhdr);
1036 	if (sa_entry->attrs.addrs.family == AF_INET)
1037 		headers += sizeof(struct iphdr);
1038 	else
1039 		headers += sizeof(struct ipv6hdr);
1040 
1041 	success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1042 	x->curlft.bytes += success_bytes - headers * success_packets;
1043 }
1044 
word_to_mask(int prefix)1045 static __be32 word_to_mask(int prefix)
1046 {
1047 	if (prefix < 0)
1048 		return 0;
1049 
1050 	if (!prefix || prefix > 31)
1051 		return cpu_to_be32(0xFFFFFFFF);
1052 
1053 	return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1054 }
1055 
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1056 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1057 				    struct xfrm_selector *sel)
1058 {
1059 	int i;
1060 
1061 	if (addrs->family == AF_INET) {
1062 		addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1063 		addrs->saddr.a4 &= addrs->smask.m4;
1064 		addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1065 		addrs->daddr.a4 &= addrs->dmask.m4;
1066 		return;
1067 	}
1068 
1069 	for (i = 0; i < 4; i++) {
1070 		if (sel->prefixlen_s != 32 * i)
1071 			addrs->smask.m6[i] =
1072 				word_to_mask(sel->prefixlen_s - 32 * i);
1073 		addrs->saddr.a6[i] &= addrs->smask.m6[i];
1074 
1075 		if (sel->prefixlen_d != 32 * i)
1076 			addrs->dmask.m6[i] =
1077 				word_to_mask(sel->prefixlen_d - 32 * i);
1078 		addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1079 	}
1080 }
1081 
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1082 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1083 				      struct xfrm_policy *x,
1084 				      struct netlink_ext_ack *extack)
1085 {
1086 	struct xfrm_selector *sel = &x->selector;
1087 
1088 	if (x->type != XFRM_POLICY_TYPE_MAIN) {
1089 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1090 		return -EINVAL;
1091 	}
1092 
1093 	/* Please pay attention that we support only one template */
1094 	if (x->xfrm_nr > 1) {
1095 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1096 		return -EINVAL;
1097 	}
1098 
1099 	if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1100 	    x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1101 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1102 		return -EINVAL;
1103 	}
1104 
1105 	if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1106 	    addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1107 		NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1108 		return -EINVAL;
1109 	}
1110 
1111 	if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1112 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1113 		return -EINVAL;
1114 	}
1115 
1116 	if (x->selector.proto != IPPROTO_IP &&
1117 	    x->selector.proto != IPPROTO_UDP &&
1118 	    x->selector.proto != IPPROTO_TCP) {
1119 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1120 		return -EINVAL;
1121 	}
1122 
1123 	if (x->priority) {
1124 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1125 			NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1126 			return -EINVAL;
1127 		}
1128 
1129 		if (x->priority == U32_MAX) {
1130 			NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1131 			return -EINVAL;
1132 		}
1133 	}
1134 
1135 	if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1136 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1137 		NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1138 		return -EINVAL;
1139 	}
1140 
1141 	return 0;
1142 }
1143 
1144 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1145 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1146 				  struct mlx5_accel_pol_xfrm_attrs *attrs)
1147 {
1148 	struct xfrm_policy *x = pol_entry->x;
1149 	struct xfrm_selector *sel;
1150 
1151 	sel = &x->selector;
1152 	memset(attrs, 0, sizeof(*attrs));
1153 
1154 	memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1155 	memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1156 	attrs->addrs.family = sel->family;
1157 	mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1158 	attrs->dir = x->xdo.dir;
1159 	attrs->action = x->action;
1160 	attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1161 	attrs->reqid = x->xfrm_vec[0].reqid;
1162 	attrs->upspec.dport = ntohs(sel->dport);
1163 	attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1164 	attrs->upspec.sport = ntohs(sel->sport);
1165 	attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1166 	attrs->upspec.proto = sel->proto;
1167 	attrs->prio = x->priority;
1168 }
1169 
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1170 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1171 				 struct netlink_ext_ack *extack)
1172 {
1173 	struct net_device *netdev = x->xdo.real_dev;
1174 	struct mlx5e_ipsec_pol_entry *pol_entry;
1175 	struct mlx5e_priv *priv;
1176 	int err;
1177 
1178 	priv = netdev_priv(netdev);
1179 	if (!priv->ipsec) {
1180 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1181 		return -EOPNOTSUPP;
1182 	}
1183 
1184 	err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1185 	if (err)
1186 		return err;
1187 
1188 	pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1189 	if (!pol_entry)
1190 		return -ENOMEM;
1191 
1192 	pol_entry->x = x;
1193 	pol_entry->ipsec = priv->ipsec;
1194 
1195 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1196 		err = -EBUSY;
1197 		goto ipsec_busy;
1198 	}
1199 
1200 	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1201 	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1202 	if (err)
1203 		goto err_fs;
1204 
1205 	x->xdo.offload_handle = (unsigned long)pol_entry;
1206 	return 0;
1207 
1208 err_fs:
1209 	mlx5_eswitch_unblock_ipsec(priv->mdev);
1210 ipsec_busy:
1211 	kfree(pol_entry);
1212 	NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1213 	return err;
1214 }
1215 
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1216 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1217 {
1218 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1219 
1220 	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1221 	mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1222 }
1223 
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1224 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1225 {
1226 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1227 
1228 	kfree(pol_entry);
1229 }
1230 
1231 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1232 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
1233 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
1234 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
1235 	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1236 
1237 	.xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1238 	.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1239 	.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1240 	.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1241 };
1242 
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1243 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1244 {
1245 	struct mlx5_core_dev *mdev = priv->mdev;
1246 	struct net_device *netdev = priv->netdev;
1247 
1248 	if (!mlx5_ipsec_device_caps(mdev))
1249 		return;
1250 
1251 	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1252 
1253 	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1254 	netdev->features |= NETIF_F_HW_ESP;
1255 	netdev->hw_enc_features |= NETIF_F_HW_ESP;
1256 
1257 	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1258 		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1259 		return;
1260 	}
1261 
1262 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1263 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1264 
1265 	if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1266 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1267 		return;
1268 	}
1269 
1270 	netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1271 	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1272 	netdev->features |= NETIF_F_GSO_ESP;
1273 	netdev->hw_features |= NETIF_F_GSO_ESP;
1274 	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1275 }
1276