1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39
40 #include "en.h"
41 #include "eswitch.h"
42 #include "ipsec.h"
43 #include "ipsec_rxtx.h"
44 #include "en_rep.h"
45
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
48
to_ipsec_sa_entry(struct xfrm_state * x)49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
50 {
51 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
52 }
53
to_ipsec_pol_entry(struct xfrm_policy * x)54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
55 {
56 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
57 }
58
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)59 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
60 {
61 struct mlx5e_ipsec_dwork *dwork =
62 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 struct xfrm_state *x = sa_entry->x;
65
66 if (sa_entry->attrs.drop)
67 return;
68
69 spin_lock_bh(&x->lock);
70 if (x->km.state == XFRM_STATE_EXPIRED) {
71 sa_entry->attrs.drop = true;
72 spin_unlock_bh(&x->lock);
73
74 mlx5e_accel_ipsec_fs_modify(sa_entry);
75 return;
76 }
77
78 if (x->km.state != XFRM_STATE_VALID) {
79 spin_unlock_bh(&x->lock);
80 return;
81 }
82
83 xfrm_state_check_expire(x);
84 spin_unlock_bh(&x->lock);
85
86 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
87 MLX5_IPSEC_RESCHED);
88 }
89
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)90 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
91 {
92 struct xfrm_state *x = sa_entry->x;
93 u32 seq_bottom = 0;
94 u32 esn, esn_msb;
95 u8 overlap;
96
97 switch (x->xso.dir) {
98 case XFRM_DEV_OFFLOAD_IN:
99 esn = x->replay_esn->seq;
100 esn_msb = x->replay_esn->seq_hi;
101 break;
102 case XFRM_DEV_OFFLOAD_OUT:
103 esn = x->replay_esn->oseq;
104 esn_msb = x->replay_esn->oseq_hi;
105 break;
106 default:
107 WARN_ON(true);
108 return false;
109 }
110
111 overlap = sa_entry->esn_state.overlap;
112
113 if (!x->replay_esn->replay_window) {
114 seq_bottom = esn;
115 } else {
116 if (esn >= x->replay_esn->replay_window)
117 seq_bottom = esn - x->replay_esn->replay_window + 1;
118
119 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
120 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
121 }
122
123 if (sa_entry->esn_state.esn_msb)
124 sa_entry->esn_state.esn = esn;
125 else
126 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
127 * the first packet sent using a given SA will contain a sequence
128 * number of 1.
129 */
130 sa_entry->esn_state.esn = max_t(u32, esn, 1);
131 sa_entry->esn_state.esn_msb = esn_msb;
132
133 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
134 sa_entry->esn_state.overlap = 0;
135 return true;
136 } else if (unlikely(!overlap &&
137 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
138 sa_entry->esn_state.overlap = 1;
139 return true;
140 }
141
142 return false;
143 }
144
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)145 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
146 struct mlx5_accel_esp_xfrm_attrs *attrs)
147 {
148 struct xfrm_state *x = sa_entry->x;
149 s64 start_value, n;
150
151 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
152 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
153 if (x->lft.soft_packet_limit == XFRM_INF)
154 return;
155
156 /* Compute hard limit initial value and number of rounds.
157 *
158 * The counting pattern of hardware counter goes:
159 * value -> 2^31-1
160 * 2^31 | (2^31-1) -> 2^31-1
161 * 2^31 | (2^31-1) -> 2^31-1
162 * [..]
163 * 2^31 | (2^31-1) -> 0
164 *
165 * The pattern is created by using an ASO operation to atomically set
166 * bit 31 after the down counter clears bit 31. This is effectively an
167 * atomic addition of 2**31 to the counter.
168 *
169 * We wish to configure the counter, within the above pattern, so that
170 * when it reaches 0, it has hit the hard limit. This is defined by this
171 * system of equations:
172 *
173 * hard_limit == start_value + n * 2^31
174 * n >= 0
175 * start_value < 2^32, start_value >= 0
176 *
177 * These equations are not single-solution, there are often two choices:
178 * hard_limit == start_value + n * 2^31
179 * hard_limit == (start_value+2^31) + (n-1) * 2^31
180 *
181 * The algorithm selects the solution that keeps the counter value
182 * above 2^31 until the final iteration.
183 */
184
185 /* Start by estimating n and compute start_value */
186 n = attrs->lft.hard_packet_limit / BIT_ULL(31);
187 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
188
189 /* Choose the best of the two solutions: */
190 if (n >= 1)
191 n -= 1;
192
193 /* Computed values solve the system of equations: */
194 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
195
196 /* The best solution means: when there are multiple iterations we must
197 * start above 2^31 and count down to 2**31 to get the interrupt.
198 */
199 attrs->lft.hard_packet_limit = lower_32_bits(start_value);
200 attrs->lft.numb_rounds_hard = (u64)n;
201
202 /* Compute soft limit initial value and number of rounds.
203 *
204 * The soft_limit is achieved by adjusting the counter's
205 * interrupt_value. This is embedded in the counting pattern created by
206 * hard packet calculations above.
207 *
208 * We wish to compute the interrupt_value for the soft_limit. This is
209 * defined by this system of equations:
210 *
211 * soft_limit == start_value - soft_value + n * 2^31
212 * n >= 0
213 * soft_value < 2^32, soft_value >= 0
214 * for n == 0 start_value > soft_value
215 *
216 * As with compute_hard_n_value() the equations are not single-solution.
217 * The algorithm selects the solution that has:
218 * 2^30 <= soft_limit < 2^31 + 2^30
219 * for the interior iterations, which guarantees a large guard band
220 * around the counter hard limit and next interrupt.
221 */
222
223 /* Start by estimating n and compute soft_value */
224 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
225 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
226 x->lft.soft_packet_limit;
227
228 /* Compare against constraints and adjust n */
229 if (n < 0)
230 n = 0;
231 else if (start_value >= BIT_ULL(32))
232 n -= 1;
233 else if (start_value < 0)
234 n += 1;
235
236 /* Choose the best of the two solutions: */
237 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
238 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
239 n += 1;
240
241 /* Note that the upper limit of soft_value happens naturally because we
242 * always select the lowest soft_value.
243 */
244
245 /* Computed values solve the system of equations: */
246 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
247
248 /* The best solution means: when there are multiple iterations we must
249 * not fall below 2^30 as that would get too close to the false
250 * hard_limit and when we reach an interior iteration for soft_limit it
251 * has to be far away from 2**32-1 which is the counter reset point
252 * after the +2^31 to accommodate latency.
253 */
254 attrs->lft.soft_packet_limit = lower_32_bits(start_value);
255 attrs->lft.numb_rounds_soft = (u64)n;
256 }
257
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)258 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
259 struct mlx5_accel_esp_xfrm_attrs *attrs)
260 {
261 struct mlx5e_ipsec_addr *addrs = &attrs->addrs;
262 struct net_device *netdev = sa_entry->dev;
263 struct xfrm_state *x = sa_entry->x;
264 struct dst_entry *rt_dst_entry;
265 struct flowi4 fl4 = {};
266 struct flowi6 fl6 = {};
267 struct neighbour *n;
268 u8 addr[ETH_ALEN];
269 struct rtable *rt;
270 const void *pkey;
271 u8 *dst, *src;
272
273 if (attrs->mode != XFRM_MODE_TUNNEL ||
274 attrs->type != XFRM_DEV_OFFLOAD_PACKET)
275 return;
276
277 ether_addr_copy(addr, netdev->dev_addr);
278 switch (attrs->dir) {
279 case XFRM_DEV_OFFLOAD_IN:
280 src = attrs->dmac;
281 dst = attrs->smac;
282
283 switch (addrs->family) {
284 case AF_INET:
285 fl4.flowi4_proto = x->sel.proto;
286 fl4.daddr = addrs->saddr.a4;
287 fl4.saddr = addrs->daddr.a4;
288 pkey = &addrs->saddr.a4;
289 break;
290 case AF_INET6:
291 fl6.flowi6_proto = x->sel.proto;
292 memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16);
293 memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16);
294 pkey = &addrs->saddr.a6;
295 break;
296 default:
297 return;
298 }
299 break;
300 case XFRM_DEV_OFFLOAD_OUT:
301 src = attrs->smac;
302 dst = attrs->dmac;
303 switch (addrs->family) {
304 case AF_INET:
305 fl4.flowi4_proto = x->sel.proto;
306 fl4.daddr = addrs->daddr.a4;
307 fl4.saddr = addrs->saddr.a4;
308 pkey = &addrs->daddr.a4;
309 break;
310 case AF_INET6:
311 fl6.flowi6_proto = x->sel.proto;
312 memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16);
313 memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16);
314 pkey = &addrs->daddr.a6;
315 break;
316 default:
317 return;
318 }
319 break;
320 default:
321 return;
322 }
323
324 ether_addr_copy(src, addr);
325
326 /* Destination can refer to a routed network, so perform FIB lookup
327 * to resolve nexthop and get its MAC. Neighbour resolution is used as
328 * fallback.
329 */
330 switch (addrs->family) {
331 case AF_INET:
332 rt = ip_route_output_key(dev_net(netdev), &fl4);
333 if (IS_ERR(rt))
334 goto neigh;
335
336 if (rt->rt_type != RTN_UNICAST) {
337 ip_rt_put(rt);
338 goto neigh;
339 }
340 rt_dst_entry = &rt->dst;
341 break;
342 case AF_INET6:
343 if (!IS_ENABLED(CONFIG_IPV6) ||
344 ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6))
345 goto neigh;
346 break;
347 default:
348 return;
349 }
350
351 n = dst_neigh_lookup(rt_dst_entry, pkey);
352 if (!n) {
353 dst_release(rt_dst_entry);
354 goto neigh;
355 }
356
357 neigh_ha_snapshot(addr, n, netdev);
358 ether_addr_copy(dst, addr);
359 if (attrs->dir == XFRM_DEV_OFFLOAD_OUT &&
360 is_zero_ether_addr(addr))
361 neigh_event_send(n, NULL);
362 dst_release(rt_dst_entry);
363 neigh_release(n);
364 return;
365
366 neigh:
367 n = neigh_lookup(&arp_tbl, pkey, netdev);
368 if (!n) {
369 n = neigh_create(&arp_tbl, pkey, netdev);
370 if (IS_ERR(n))
371 return;
372 neigh_event_send(n, NULL);
373 attrs->drop = true;
374 } else {
375 neigh_ha_snapshot(addr, n, netdev);
376 ether_addr_copy(dst, addr);
377 }
378 neigh_release(n);
379 }
380
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)381 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
382 {
383 /*
384 * State doesn't have subnet prefixes in outer headers.
385 * The match is performed for exaxt source/destination addresses.
386 */
387 memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
388 memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
389 }
390
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)391 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
392 struct mlx5_accel_esp_xfrm_attrs *attrs)
393 {
394 struct xfrm_state *x = sa_entry->x;
395 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
396 struct aead_geniv_ctx *geniv_ctx;
397 struct crypto_aead *aead;
398 unsigned int crypto_data_len, key_len;
399 int ivsize;
400
401 memset(attrs, 0, sizeof(*attrs));
402
403 /* key */
404 crypto_data_len = (x->aead->alg_key_len + 7) / 8;
405 key_len = crypto_data_len - 4; /* 4 bytes salt at end */
406
407 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
408 aes_gcm->key_len = key_len * 8;
409
410 /* salt and seq_iv */
411 aead = x->data;
412 geniv_ctx = crypto_aead_ctx(aead);
413 ivsize = crypto_aead_ivsize(aead);
414 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
415 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
416 sizeof(aes_gcm->salt));
417
418 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
419
420 /* iv len */
421 aes_gcm->icv_len = x->aead->alg_icv_len;
422
423 attrs->dir = x->xso.dir;
424
425 /* esn */
426 if (x->props.flags & XFRM_STATE_ESN) {
427 attrs->replay_esn.trigger = true;
428 attrs->replay_esn.esn = sa_entry->esn_state.esn;
429 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
430 attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
431 if (attrs->dir == XFRM_DEV_OFFLOAD_OUT ||
432 x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
433 goto skip_replay_window;
434
435 switch (x->replay_esn->replay_window) {
436 case 32:
437 attrs->replay_esn.replay_window =
438 MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
439 break;
440 case 64:
441 attrs->replay_esn.replay_window =
442 MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
443 break;
444 case 128:
445 attrs->replay_esn.replay_window =
446 MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
447 break;
448 case 256:
449 attrs->replay_esn.replay_window =
450 MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
451 break;
452 default:
453 WARN_ON(true);
454 return;
455 }
456 }
457
458 skip_replay_window:
459 /* spi */
460 attrs->spi = be32_to_cpu(x->id.spi);
461
462 /* source , destination ips */
463 memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
464 sizeof(attrs->addrs.saddr));
465 memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
466 attrs->addrs.family = x->props.family;
467 mlx5e_ipsec_state_mask(&attrs->addrs);
468 attrs->type = x->xso.type;
469 attrs->reqid = x->props.reqid;
470 attrs->upspec.dport = ntohs(x->sel.dport);
471 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
472 attrs->upspec.sport = ntohs(x->sel.sport);
473 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
474 attrs->upspec.proto = x->sel.proto;
475 attrs->mode = x->props.mode;
476
477 mlx5e_ipsec_init_limits(sa_entry, attrs);
478 mlx5e_ipsec_init_macs(sa_entry, attrs);
479
480 if (x->encap) {
481 attrs->encap = true;
482 attrs->sport = x->encap->encap_sport;
483 attrs->dport = x->encap->encap_dport;
484 }
485 }
486
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)487 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
488 struct xfrm_state *x,
489 struct netlink_ext_ack *extack)
490 {
491 if (x->props.aalgo != SADB_AALG_NONE) {
492 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
493 return -EINVAL;
494 }
495 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
496 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
497 return -EINVAL;
498 }
499 if (x->props.calgo != SADB_X_CALG_NONE) {
500 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
501 return -EINVAL;
502 }
503 if (x->props.flags & XFRM_STATE_ESN &&
504 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
505 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
506 return -EINVAL;
507 }
508 if (x->props.family != AF_INET &&
509 x->props.family != AF_INET6) {
510 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
511 return -EINVAL;
512 }
513 if (x->id.proto != IPPROTO_ESP) {
514 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
515 return -EINVAL;
516 }
517 if (x->encap) {
518 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
519 NL_SET_ERR_MSG_MOD(extack,
520 "Encapsulation is not supported");
521 return -EINVAL;
522 }
523
524 if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
525 NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
526 return -EINVAL;
527 }
528
529 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
530 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
531 return -EINVAL;
532 }
533
534 if (x->props.mode != XFRM_MODE_TRANSPORT) {
535 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
536 return -EINVAL;
537 }
538 }
539 if (!x->aead) {
540 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
541 return -EINVAL;
542 }
543 if (x->aead->alg_icv_len != 128) {
544 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
545 return -EINVAL;
546 }
547 if ((x->aead->alg_key_len != 128 + 32) &&
548 (x->aead->alg_key_len != 256 + 32)) {
549 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
550 return -EINVAL;
551 }
552 if (x->tfcpad) {
553 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
554 return -EINVAL;
555 }
556 if (!x->geniv) {
557 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
558 return -EINVAL;
559 }
560 if (strcmp(x->geniv, "seqiv")) {
561 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
562 return -EINVAL;
563 }
564
565 if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
566 x->sel.proto != IPPROTO_TCP) {
567 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
568 return -EINVAL;
569 }
570
571 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
572 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
573 return -EINVAL;
574 }
575
576 switch (x->xso.type) {
577 case XFRM_DEV_OFFLOAD_CRYPTO:
578 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
579 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
580 return -EINVAL;
581 }
582
583 break;
584 case XFRM_DEV_OFFLOAD_PACKET:
585 if (!(mlx5_ipsec_device_caps(mdev) &
586 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
587 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
588 return -EINVAL;
589 }
590
591 if (x->props.mode == XFRM_MODE_TUNNEL &&
592 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
593 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
594 return -EINVAL;
595 }
596
597 if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
598 x->replay_esn->replay_window != 32 &&
599 x->replay_esn->replay_window != 64 &&
600 x->replay_esn->replay_window != 128 &&
601 x->replay_esn->replay_window != 256) {
602 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
603 return -EINVAL;
604 }
605
606 if (!x->props.reqid) {
607 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
608 return -EINVAL;
609 }
610
611 if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
612 x->lft.hard_byte_limit != XFRM_INF) {
613 /* XFRM stack doesn't prevent such configuration :(. */
614 NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
615 return -EINVAL;
616 }
617
618 if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
619 NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
620 return -EINVAL;
621 }
622
623 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
624 x->lft.hard_packet_limit != XFRM_INF) {
625 /* XFRM stack doesn't prevent such configuration :(. */
626 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
627 return -EINVAL;
628 }
629
630 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
631 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
632 return -EINVAL;
633 }
634 break;
635 default:
636 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
637 return -EINVAL;
638 }
639 return 0;
640 }
641
mlx5e_ipsec_modify_state(struct work_struct * _work)642 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
643 {
644 struct mlx5e_ipsec_work *work =
645 container_of(_work, struct mlx5e_ipsec_work, work);
646 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
647 struct mlx5_accel_esp_xfrm_attrs *attrs;
648
649 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
650
651 mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
652 }
653
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)654 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
655 {
656 struct xfrm_state *x = sa_entry->x;
657
658 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
659 x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
660 return;
661
662 if (x->props.flags & XFRM_STATE_ESN) {
663 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
664 return;
665 }
666
667 sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
668 }
669
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)670 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
671 {
672 struct mlx5e_ipsec_work *work =
673 container_of(_work, struct mlx5e_ipsec_work, work);
674 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
675 struct mlx5e_ipsec_netevent_data *data = work->data;
676 struct mlx5_accel_esp_xfrm_attrs *attrs;
677
678 attrs = &sa_entry->attrs;
679
680 switch (attrs->dir) {
681 case XFRM_DEV_OFFLOAD_IN:
682 ether_addr_copy(attrs->smac, data->addr);
683 break;
684 case XFRM_DEV_OFFLOAD_OUT:
685 ether_addr_copy(attrs->dmac, data->addr);
686 break;
687 default:
688 WARN_ON_ONCE(true);
689 }
690 attrs->drop = false;
691 mlx5e_accel_ipsec_fs_modify(sa_entry);
692 }
693
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)694 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
695 {
696 struct xfrm_state *x = sa_entry->x;
697 struct mlx5e_ipsec_work *work;
698 void *data = NULL;
699
700 switch (x->xso.type) {
701 case XFRM_DEV_OFFLOAD_CRYPTO:
702 if (!(x->props.flags & XFRM_STATE_ESN))
703 return 0;
704 break;
705 case XFRM_DEV_OFFLOAD_PACKET:
706 if (x->props.mode != XFRM_MODE_TUNNEL)
707 return 0;
708 break;
709 default:
710 break;
711 }
712
713 work = kzalloc_obj(*work);
714 if (!work)
715 return -ENOMEM;
716
717 switch (x->xso.type) {
718 case XFRM_DEV_OFFLOAD_CRYPTO:
719 data = kzalloc_obj(*sa_entry);
720 if (!data)
721 goto free_work;
722
723 INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
724 break;
725 case XFRM_DEV_OFFLOAD_PACKET:
726 data = kzalloc_obj(struct mlx5e_ipsec_netevent_data);
727 if (!data)
728 goto free_work;
729
730 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
731 break;
732 default:
733 break;
734 }
735
736 work->data = data;
737 work->sa_entry = sa_entry;
738 sa_entry->work = work;
739 return 0;
740
741 free_work:
742 kfree(work);
743 return -ENOMEM;
744 }
745
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)746 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
747 {
748 struct xfrm_state *x = sa_entry->x;
749 struct mlx5e_ipsec_dwork *dwork;
750
751 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
752 return 0;
753
754 if (x->lft.soft_packet_limit == XFRM_INF &&
755 x->lft.hard_packet_limit == XFRM_INF &&
756 x->lft.soft_byte_limit == XFRM_INF &&
757 x->lft.hard_byte_limit == XFRM_INF)
758 return 0;
759
760 dwork = kzalloc_obj(*dwork);
761 if (!dwork)
762 return -ENOMEM;
763
764 dwork->sa_entry = sa_entry;
765 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
766 sa_entry->dwork = dwork;
767 return 0;
768 }
769
mlx5e_xfrm_add_state(struct net_device * dev,struct xfrm_state * x,struct netlink_ext_ack * extack)770 static int mlx5e_xfrm_add_state(struct net_device *dev,
771 struct xfrm_state *x,
772 struct netlink_ext_ack *extack)
773 {
774 struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
775 bool allow_tunnel_mode = false;
776 struct mlx5e_ipsec *ipsec;
777 struct mlx5e_priv *priv;
778 gfp_t gfp;
779 int err;
780
781 priv = netdev_priv(dev);
782 if (!priv->ipsec)
783 return -EOPNOTSUPP;
784
785 ipsec = priv->ipsec;
786 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
787 sa_entry = kzalloc_obj(*sa_entry, gfp);
788 if (!sa_entry)
789 return -ENOMEM;
790
791 sa_entry->x = x;
792 sa_entry->dev = dev;
793 sa_entry->ipsec = ipsec;
794 /* Check if this SA is originated from acquire flow temporary SA */
795 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
796 goto out;
797
798 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
799 if (err)
800 goto err_xfrm;
801
802 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
803 err = -EBUSY;
804 goto err_xfrm;
805 }
806
807 err = mlx5_eswitch_block_mode(priv->mdev);
808 if (err)
809 goto unblock_ipsec;
810
811 if (x->props.mode == XFRM_MODE_TUNNEL &&
812 x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
813 allow_tunnel_mode = mlx5e_ipsec_fs_tunnel_allowed(sa_entry);
814 if (!allow_tunnel_mode) {
815 NL_SET_ERR_MSG_MOD(extack,
816 "Packet offload tunnel mode is disabled due to encap settings");
817 err = -EINVAL;
818 goto unblock_mode;
819 }
820 }
821
822 /* check esn */
823 if (x->props.flags & XFRM_STATE_ESN)
824 mlx5e_ipsec_update_esn_state(sa_entry);
825 else
826 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
827 * the first packet sent using a given SA will contain a sequence
828 * number of 1.
829 */
830 sa_entry->esn_state.esn = 1;
831
832 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
833
834 err = mlx5_ipsec_create_work(sa_entry);
835 if (err)
836 goto unblock_encap;
837
838 err = mlx5e_ipsec_create_dwork(sa_entry);
839 if (err)
840 goto release_work;
841
842 /* create hw context */
843 err = mlx5_ipsec_create_sa_ctx(sa_entry);
844 if (err)
845 goto release_dwork;
846
847 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
848 if (err)
849 goto err_hw_ctx;
850
851 /* We use *_bh() variant because xfrm_timer_handler(), which runs
852 * in softirq context, can reach our state delete logic and we need
853 * xa_erase_bh() there.
854 */
855 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
856 GFP_KERNEL);
857 if (err)
858 goto err_add_rule;
859
860 mlx5e_ipsec_set_esn_ops(sa_entry);
861
862 if (sa_entry->dwork)
863 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
864 MLX5_IPSEC_RESCHED);
865
866 if (allow_tunnel_mode) {
867 xa_lock_bh(&ipsec->sadb);
868 __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
869 MLX5E_IPSEC_TUNNEL_SA);
870 xa_unlock_bh(&ipsec->sadb);
871 }
872
873 out:
874 x->xso.offload_handle = (unsigned long)sa_entry;
875 if (allow_tunnel_mode)
876 mlx5_eswitch_unblock_encap(priv->mdev);
877
878 mlx5_eswitch_unblock_mode(priv->mdev);
879
880 return 0;
881
882 err_add_rule:
883 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
884 err_hw_ctx:
885 mlx5_ipsec_free_sa_ctx(sa_entry);
886 release_dwork:
887 kfree(sa_entry->dwork);
888 release_work:
889 if (sa_entry->work)
890 kfree(sa_entry->work->data);
891 kfree(sa_entry->work);
892 unblock_encap:
893 if (allow_tunnel_mode)
894 mlx5_eswitch_unblock_encap(priv->mdev);
895 unblock_mode:
896 mlx5_eswitch_unblock_mode(priv->mdev);
897 unblock_ipsec:
898 mlx5_eswitch_unblock_ipsec(priv->mdev);
899 err_xfrm:
900 kfree(sa_entry);
901 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
902 return err;
903 }
904
mlx5e_xfrm_del_state(struct net_device * dev,struct xfrm_state * x)905 static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x)
906 {
907 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
908 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
909 struct mlx5e_ipsec_sa_entry *old;
910
911 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
912 return;
913
914 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
915 WARN_ON(old != sa_entry);
916 }
917
mlx5e_xfrm_free_state(struct net_device * dev,struct xfrm_state * x)918 static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x)
919 {
920 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
921 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
922
923 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
924 goto sa_entry_free;
925
926 if (sa_entry->work)
927 cancel_work_sync(&sa_entry->work->work);
928
929 if (sa_entry->dwork)
930 cancel_delayed_work_sync(&sa_entry->dwork->dwork);
931
932 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
933 mlx5_ipsec_free_sa_ctx(sa_entry);
934 kfree(sa_entry->dwork);
935 if (sa_entry->work)
936 kfree(sa_entry->work->data);
937 kfree(sa_entry->work);
938 mlx5_eswitch_unblock_ipsec(ipsec->mdev);
939 sa_entry_free:
940 kfree(sa_entry);
941 }
942
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)943 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
944 unsigned long event, void *ptr)
945 {
946 struct mlx5_accel_esp_xfrm_attrs *attrs;
947 struct mlx5e_ipsec_netevent_data *data;
948 struct mlx5e_ipsec_sa_entry *sa_entry;
949 struct mlx5e_ipsec *ipsec;
950 struct neighbour *n = ptr;
951 unsigned long idx;
952
953 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
954 return NOTIFY_DONE;
955
956 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
957 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
958 attrs = &sa_entry->attrs;
959
960 if (attrs->addrs.family == AF_INET) {
961 if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
962 !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
963 continue;
964 } else {
965 if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
966 !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
967 continue;
968 }
969
970 data = sa_entry->work->data;
971
972 neigh_ha_snapshot(data->addr, n, sa_entry->dev);
973 queue_work(ipsec->wq, &sa_entry->work->work);
974 }
975
976 return NOTIFY_DONE;
977 }
978
mlx5e_ipsec_init(struct mlx5e_priv * priv)979 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
980 {
981 struct mlx5e_ipsec *ipsec;
982 int ret = -ENOMEM;
983
984 if (!mlx5_ipsec_device_caps(priv->mdev)) {
985 netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
986 return;
987 }
988
989 ipsec = kzalloc_obj(*ipsec);
990 if (!ipsec)
991 return;
992
993 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
994 ipsec->mdev = priv->mdev;
995 init_completion(&ipsec->comp);
996 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
997 priv->netdev->name);
998 if (!ipsec->wq)
999 goto err_wq;
1000
1001 if (mlx5_ipsec_device_caps(priv->mdev) &
1002 MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
1003 ret = mlx5e_ipsec_aso_init(ipsec);
1004 if (ret)
1005 goto err_aso;
1006 }
1007
1008 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
1009 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
1010 ret = register_netevent_notifier(&ipsec->netevent_nb);
1011 if (ret)
1012 goto clear_aso;
1013 }
1014
1015 ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
1016 ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
1017 if (ret)
1018 goto err_fs_init;
1019
1020 ipsec->fs = priv->fs;
1021 priv->ipsec = ipsec;
1022 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
1023 return;
1024
1025 err_fs_init:
1026 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
1027 unregister_netevent_notifier(&ipsec->netevent_nb);
1028 clear_aso:
1029 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
1030 mlx5e_ipsec_aso_cleanup(ipsec);
1031 err_aso:
1032 destroy_workqueue(ipsec->wq);
1033 err_wq:
1034 kfree(ipsec);
1035 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
1036 return;
1037 }
1038
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)1039 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
1040 {
1041 struct mlx5e_ipsec *ipsec = priv->ipsec;
1042
1043 if (!ipsec)
1044 return;
1045
1046 mlx5e_accel_ipsec_fs_cleanup(ipsec);
1047 if (ipsec->netevent_nb.notifier_call) {
1048 unregister_netevent_notifier(&ipsec->netevent_nb);
1049 ipsec->netevent_nb.notifier_call = NULL;
1050 }
1051 if (ipsec->aso)
1052 mlx5e_ipsec_aso_cleanup(ipsec);
1053 destroy_workqueue(ipsec->wq);
1054 kfree(ipsec);
1055 priv->ipsec = NULL;
1056 }
1057
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)1058 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
1059 {
1060 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1061 struct mlx5e_ipsec_work *work = sa_entry->work;
1062 struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
1063 bool need_update;
1064
1065 need_update = mlx5e_ipsec_update_esn_state(sa_entry);
1066 if (!need_update)
1067 return;
1068
1069 sa_entry_shadow = work->data;
1070 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
1071 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
1072 queue_work(sa_entry->ipsec->wq, &work->work);
1073 }
1074
mlx5e_xfrm_update_stats(struct xfrm_state * x)1075 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
1076 {
1077 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1078 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
1079 struct net *net = dev_net(x->xso.dev);
1080 u64 trailer_packets = 0, trailer_bytes = 0;
1081 u64 replay_packets = 0, replay_bytes = 0;
1082 u64 auth_packets = 0, auth_bytes = 0;
1083 u64 success_packets, success_bytes;
1084 u64 packets, bytes, lastuse;
1085 size_t headers;
1086
1087 lockdep_assert(lockdep_is_held(&x->lock) ||
1088 lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) ||
1089 lockdep_is_held(&net->xfrm.xfrm_state_lock));
1090
1091 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1092 return;
1093
1094 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1095 mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1096 &auth_packets, &lastuse);
1097 x->stats.integrity_failed += auth_packets;
1098 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1099
1100 mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1101 &trailer_packets, &lastuse);
1102 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1103 }
1104
1105 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1106 return;
1107
1108 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1109 mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1110 &replay_packets, &lastuse);
1111 x->stats.replay += replay_packets;
1112 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1113 }
1114
1115 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1116 success_packets = packets - auth_packets - trailer_packets - replay_packets;
1117 x->curlft.packets += success_packets;
1118 /* NIC counts all bytes passed through flow steering and doesn't have
1119 * an ability to count payload data size which is needed for SA.
1120 *
1121 * To overcome HW limitestion, let's approximate the payload size
1122 * by removing always available headers.
1123 */
1124 headers = sizeof(struct ethhdr);
1125 if (sa_entry->attrs.addrs.family == AF_INET)
1126 headers += sizeof(struct iphdr);
1127 else
1128 headers += sizeof(struct ipv6hdr);
1129
1130 success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1131 x->curlft.bytes += success_bytes - headers * success_packets;
1132 }
1133
word_to_mask(int prefix)1134 static __be32 word_to_mask(int prefix)
1135 {
1136 if (prefix < 0)
1137 return 0;
1138
1139 if (!prefix || prefix > 31)
1140 return cpu_to_be32(0xFFFFFFFF);
1141
1142 return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1143 }
1144
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1145 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1146 struct xfrm_selector *sel)
1147 {
1148 int i;
1149
1150 if (addrs->family == AF_INET) {
1151 addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1152 addrs->saddr.a4 &= addrs->smask.m4;
1153 addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1154 addrs->daddr.a4 &= addrs->dmask.m4;
1155 return;
1156 }
1157
1158 for (i = 0; i < 4; i++) {
1159 if (sel->prefixlen_s != 32 * i)
1160 addrs->smask.m6[i] =
1161 word_to_mask(sel->prefixlen_s - 32 * i);
1162 addrs->saddr.a6[i] &= addrs->smask.m6[i];
1163
1164 if (sel->prefixlen_d != 32 * i)
1165 addrs->dmask.m6[i] =
1166 word_to_mask(sel->prefixlen_d - 32 * i);
1167 addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1168 }
1169 }
1170
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1171 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1172 struct xfrm_policy *x,
1173 struct netlink_ext_ack *extack)
1174 {
1175 struct xfrm_selector *sel = &x->selector;
1176
1177 if (x->type != XFRM_POLICY_TYPE_MAIN) {
1178 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1179 return -EINVAL;
1180 }
1181
1182 /* Please pay attention that we support only one template */
1183 if (x->xfrm_nr > 1) {
1184 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1185 return -EINVAL;
1186 }
1187
1188 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1189 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1190 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1191 return -EINVAL;
1192 }
1193
1194 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1195 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1196 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1197 return -EINVAL;
1198 }
1199
1200 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1201 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1202 return -EINVAL;
1203 }
1204
1205 if (x->selector.proto != IPPROTO_IP &&
1206 x->selector.proto != IPPROTO_UDP &&
1207 x->selector.proto != IPPROTO_TCP) {
1208 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1209 return -EINVAL;
1210 }
1211
1212 if (x->priority) {
1213 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1214 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1215 return -EINVAL;
1216 }
1217
1218 if (x->priority == U32_MAX) {
1219 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1220 return -EINVAL;
1221 }
1222 }
1223
1224 if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1225 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1226 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1227 return -EINVAL;
1228 }
1229
1230 return 0;
1231 }
1232
1233 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1234 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1235 struct mlx5_accel_pol_xfrm_attrs *attrs)
1236 {
1237 struct xfrm_policy *x = pol_entry->x;
1238 struct xfrm_selector *sel;
1239
1240 sel = &x->selector;
1241 memset(attrs, 0, sizeof(*attrs));
1242
1243 memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1244 memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1245 attrs->addrs.family = sel->family;
1246 mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1247 attrs->dir = x->xdo.dir;
1248 attrs->action = x->action;
1249 attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1250 attrs->reqid = x->xfrm_vec[0].reqid;
1251 attrs->upspec.dport = ntohs(sel->dport);
1252 attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1253 attrs->upspec.sport = ntohs(sel->sport);
1254 attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1255 attrs->upspec.proto = sel->proto;
1256 attrs->prio = x->priority;
1257 }
1258
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1259 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1260 struct netlink_ext_ack *extack)
1261 {
1262 struct net_device *netdev = x->xdo.dev;
1263 struct mlx5e_ipsec_pol_entry *pol_entry;
1264 struct mlx5e_priv *priv;
1265 int err;
1266
1267 priv = netdev_priv(netdev);
1268 if (!priv->ipsec) {
1269 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1270 return -EOPNOTSUPP;
1271 }
1272
1273 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1274 if (err)
1275 return err;
1276
1277 pol_entry = kzalloc_obj(*pol_entry);
1278 if (!pol_entry)
1279 return -ENOMEM;
1280
1281 pol_entry->x = x;
1282 pol_entry->ipsec = priv->ipsec;
1283
1284 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1285 err = -EBUSY;
1286 goto ipsec_busy;
1287 }
1288
1289 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1290 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1291 if (err)
1292 goto err_fs;
1293
1294 x->xdo.offload_handle = (unsigned long)pol_entry;
1295 return 0;
1296
1297 err_fs:
1298 mlx5_eswitch_unblock_ipsec(priv->mdev);
1299 ipsec_busy:
1300 kfree(pol_entry);
1301 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1302 return err;
1303 }
1304
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1305 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1306 {
1307 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1308
1309 mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1310 mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1311 }
1312
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1313 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1314 {
1315 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1316
1317 kfree(pol_entry);
1318 }
1319
1320 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1321 .xdo_dev_state_add = mlx5e_xfrm_add_state,
1322 .xdo_dev_state_delete = mlx5e_xfrm_del_state,
1323 .xdo_dev_state_free = mlx5e_xfrm_free_state,
1324 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1325
1326 .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1327 .xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1328 .xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1329 .xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1330 };
1331
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1332 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1333 {
1334 struct mlx5_core_dev *mdev = priv->mdev;
1335 struct net_device *netdev = priv->netdev;
1336
1337 if (!mlx5_ipsec_device_caps(mdev))
1338 return;
1339
1340 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1341
1342 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1343 netdev->features |= NETIF_F_HW_ESP;
1344 netdev->hw_enc_features |= NETIF_F_HW_ESP;
1345
1346 if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1347 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1348 return;
1349 }
1350
1351 netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1352 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1353
1354 if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1355 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1356 return;
1357 }
1358
1359 netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1360 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1361 netdev->features |= NETIF_F_GSO_ESP;
1362 netdev->hw_features |= NETIF_F_GSO_ESP;
1363 netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1364 }
1365