1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39
40 #include "en.h"
41 #include "eswitch.h"
42 #include "ipsec.h"
43 #include "ipsec_rxtx.h"
44 #include "en_rep.h"
45
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
48
to_ipsec_sa_entry(struct xfrm_state * x)49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
50 {
51 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
52 }
53
to_ipsec_pol_entry(struct xfrm_policy * x)54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
55 {
56 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
57 }
58
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)59 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
60 {
61 struct mlx5e_ipsec_dwork *dwork =
62 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 struct xfrm_state *x = sa_entry->x;
65
66 if (sa_entry->attrs.drop)
67 return;
68
69 spin_lock_bh(&x->lock);
70 if (x->km.state == XFRM_STATE_EXPIRED) {
71 sa_entry->attrs.drop = true;
72 spin_unlock_bh(&x->lock);
73
74 mlx5e_accel_ipsec_fs_modify(sa_entry);
75 return;
76 }
77
78 if (x->km.state != XFRM_STATE_VALID) {
79 spin_unlock_bh(&x->lock);
80 return;
81 }
82
83 xfrm_state_check_expire(x);
84 spin_unlock_bh(&x->lock);
85
86 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
87 MLX5_IPSEC_RESCHED);
88 }
89
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)90 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
91 {
92 struct xfrm_state *x = sa_entry->x;
93 u32 seq_bottom = 0;
94 u32 esn, esn_msb;
95 u8 overlap;
96
97 switch (x->xso.dir) {
98 case XFRM_DEV_OFFLOAD_IN:
99 esn = x->replay_esn->seq;
100 esn_msb = x->replay_esn->seq_hi;
101 break;
102 case XFRM_DEV_OFFLOAD_OUT:
103 esn = x->replay_esn->oseq;
104 esn_msb = x->replay_esn->oseq_hi;
105 break;
106 default:
107 WARN_ON(true);
108 return false;
109 }
110
111 overlap = sa_entry->esn_state.overlap;
112
113 if (!x->replay_esn->replay_window) {
114 seq_bottom = esn;
115 } else {
116 if (esn >= x->replay_esn->replay_window)
117 seq_bottom = esn - x->replay_esn->replay_window + 1;
118
119 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
120 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
121 }
122
123 if (sa_entry->esn_state.esn_msb)
124 sa_entry->esn_state.esn = esn;
125 else
126 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
127 * the first packet sent using a given SA will contain a sequence
128 * number of 1.
129 */
130 sa_entry->esn_state.esn = max_t(u32, esn, 1);
131 sa_entry->esn_state.esn_msb = esn_msb;
132
133 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
134 sa_entry->esn_state.overlap = 0;
135 return true;
136 } else if (unlikely(!overlap &&
137 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
138 sa_entry->esn_state.overlap = 1;
139 return true;
140 }
141
142 return false;
143 }
144
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)145 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
146 struct mlx5_accel_esp_xfrm_attrs *attrs)
147 {
148 struct xfrm_state *x = sa_entry->x;
149 s64 start_value, n;
150
151 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
152 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
153 if (x->lft.soft_packet_limit == XFRM_INF)
154 return;
155
156 /* Compute hard limit initial value and number of rounds.
157 *
158 * The counting pattern of hardware counter goes:
159 * value -> 2^31-1
160 * 2^31 | (2^31-1) -> 2^31-1
161 * 2^31 | (2^31-1) -> 2^31-1
162 * [..]
163 * 2^31 | (2^31-1) -> 0
164 *
165 * The pattern is created by using an ASO operation to atomically set
166 * bit 31 after the down counter clears bit 31. This is effectively an
167 * atomic addition of 2**31 to the counter.
168 *
169 * We wish to configure the counter, within the above pattern, so that
170 * when it reaches 0, it has hit the hard limit. This is defined by this
171 * system of equations:
172 *
173 * hard_limit == start_value + n * 2^31
174 * n >= 0
175 * start_value < 2^32, start_value >= 0
176 *
177 * These equations are not single-solution, there are often two choices:
178 * hard_limit == start_value + n * 2^31
179 * hard_limit == (start_value+2^31) + (n-1) * 2^31
180 *
181 * The algorithm selects the solution that keeps the counter value
182 * above 2^31 until the final iteration.
183 */
184
185 /* Start by estimating n and compute start_value */
186 n = attrs->lft.hard_packet_limit / BIT_ULL(31);
187 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
188
189 /* Choose the best of the two solutions: */
190 if (n >= 1)
191 n -= 1;
192
193 /* Computed values solve the system of equations: */
194 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
195
196 /* The best solution means: when there are multiple iterations we must
197 * start above 2^31 and count down to 2**31 to get the interrupt.
198 */
199 attrs->lft.hard_packet_limit = lower_32_bits(start_value);
200 attrs->lft.numb_rounds_hard = (u64)n;
201
202 /* Compute soft limit initial value and number of rounds.
203 *
204 * The soft_limit is achieved by adjusting the counter's
205 * interrupt_value. This is embedded in the counting pattern created by
206 * hard packet calculations above.
207 *
208 * We wish to compute the interrupt_value for the soft_limit. This is
209 * defined by this system of equations:
210 *
211 * soft_limit == start_value - soft_value + n * 2^31
212 * n >= 0
213 * soft_value < 2^32, soft_value >= 0
214 * for n == 0 start_value > soft_value
215 *
216 * As with compute_hard_n_value() the equations are not single-solution.
217 * The algorithm selects the solution that has:
218 * 2^30 <= soft_limit < 2^31 + 2^30
219 * for the interior iterations, which guarantees a large guard band
220 * around the counter hard limit and next interrupt.
221 */
222
223 /* Start by estimating n and compute soft_value */
224 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
225 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
226 x->lft.soft_packet_limit;
227
228 /* Compare against constraints and adjust n */
229 if (n < 0)
230 n = 0;
231 else if (start_value >= BIT_ULL(32))
232 n -= 1;
233 else if (start_value < 0)
234 n += 1;
235
236 /* Choose the best of the two solutions: */
237 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
238 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
239 n += 1;
240
241 /* Note that the upper limit of soft_value happens naturally because we
242 * always select the lowest soft_value.
243 */
244
245 /* Computed values solve the system of equations: */
246 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
247
248 /* The best solution means: when there are multiple iterations we must
249 * not fall below 2^30 as that would get too close to the false
250 * hard_limit and when we reach an interior iteration for soft_limit it
251 * has to be far away from 2**32-1 which is the counter reset point
252 * after the +2^31 to accommodate latency.
253 */
254 attrs->lft.soft_packet_limit = lower_32_bits(start_value);
255 attrs->lft.numb_rounds_soft = (u64)n;
256 }
257
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)258 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
259 struct mlx5_accel_esp_xfrm_attrs *attrs)
260 {
261 struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
262 struct xfrm_state *x = sa_entry->x;
263 struct net_device *netdev;
264 struct neighbour *n;
265 u8 addr[ETH_ALEN];
266 const void *pkey;
267 u8 *dst, *src;
268
269 if (attrs->mode != XFRM_MODE_TUNNEL ||
270 attrs->type != XFRM_DEV_OFFLOAD_PACKET)
271 return;
272
273 netdev = x->xso.real_dev;
274
275 mlx5_query_mac_address(mdev, addr);
276 switch (attrs->dir) {
277 case XFRM_DEV_OFFLOAD_IN:
278 src = attrs->dmac;
279 dst = attrs->smac;
280 pkey = &attrs->addrs.saddr.a4;
281 break;
282 case XFRM_DEV_OFFLOAD_OUT:
283 src = attrs->smac;
284 dst = attrs->dmac;
285 pkey = &attrs->addrs.daddr.a4;
286 break;
287 default:
288 return;
289 }
290
291 ether_addr_copy(src, addr);
292 n = neigh_lookup(&arp_tbl, pkey, netdev);
293 if (!n) {
294 n = neigh_create(&arp_tbl, pkey, netdev);
295 if (IS_ERR(n))
296 return;
297 neigh_event_send(n, NULL);
298 attrs->drop = true;
299 } else {
300 neigh_ha_snapshot(addr, n, netdev);
301 ether_addr_copy(dst, addr);
302 }
303 neigh_release(n);
304 }
305
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)306 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
307 {
308 /*
309 * State doesn't have subnet prefixes in outer headers.
310 * The match is performed for exaxt source/destination addresses.
311 */
312 memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
313 memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
314 }
315
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)316 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
317 struct mlx5_accel_esp_xfrm_attrs *attrs)
318 {
319 struct xfrm_state *x = sa_entry->x;
320 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
321 struct aead_geniv_ctx *geniv_ctx;
322 struct crypto_aead *aead;
323 unsigned int crypto_data_len, key_len;
324 int ivsize;
325
326 memset(attrs, 0, sizeof(*attrs));
327
328 /* key */
329 crypto_data_len = (x->aead->alg_key_len + 7) / 8;
330 key_len = crypto_data_len - 4; /* 4 bytes salt at end */
331
332 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
333 aes_gcm->key_len = key_len * 8;
334
335 /* salt and seq_iv */
336 aead = x->data;
337 geniv_ctx = crypto_aead_ctx(aead);
338 ivsize = crypto_aead_ivsize(aead);
339 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
340 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
341 sizeof(aes_gcm->salt));
342
343 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
344
345 /* iv len */
346 aes_gcm->icv_len = x->aead->alg_icv_len;
347
348 attrs->dir = x->xso.dir;
349
350 /* esn */
351 if (x->props.flags & XFRM_STATE_ESN) {
352 attrs->replay_esn.trigger = true;
353 attrs->replay_esn.esn = sa_entry->esn_state.esn;
354 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
355 attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
356 if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
357 goto skip_replay_window;
358
359 switch (x->replay_esn->replay_window) {
360 case 32:
361 attrs->replay_esn.replay_window =
362 MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
363 break;
364 case 64:
365 attrs->replay_esn.replay_window =
366 MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
367 break;
368 case 128:
369 attrs->replay_esn.replay_window =
370 MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
371 break;
372 case 256:
373 attrs->replay_esn.replay_window =
374 MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
375 break;
376 default:
377 WARN_ON(true);
378 return;
379 }
380 }
381
382 skip_replay_window:
383 /* spi */
384 attrs->spi = be32_to_cpu(x->id.spi);
385
386 /* source , destination ips */
387 memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
388 sizeof(attrs->addrs.saddr));
389 memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
390 attrs->addrs.family = x->props.family;
391 mlx5e_ipsec_state_mask(&attrs->addrs);
392 attrs->type = x->xso.type;
393 attrs->reqid = x->props.reqid;
394 attrs->upspec.dport = ntohs(x->sel.dport);
395 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
396 attrs->upspec.sport = ntohs(x->sel.sport);
397 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
398 attrs->upspec.proto = x->sel.proto;
399 attrs->mode = x->props.mode;
400
401 mlx5e_ipsec_init_limits(sa_entry, attrs);
402 mlx5e_ipsec_init_macs(sa_entry, attrs);
403
404 if (x->encap) {
405 attrs->encap = true;
406 attrs->sport = x->encap->encap_sport;
407 attrs->dport = x->encap->encap_dport;
408 }
409 }
410
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)411 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
412 struct xfrm_state *x,
413 struct netlink_ext_ack *extack)
414 {
415 if (x->props.aalgo != SADB_AALG_NONE) {
416 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
417 return -EINVAL;
418 }
419 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
420 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
421 return -EINVAL;
422 }
423 if (x->props.calgo != SADB_X_CALG_NONE) {
424 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
425 return -EINVAL;
426 }
427 if (x->props.flags & XFRM_STATE_ESN &&
428 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
429 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
430 return -EINVAL;
431 }
432 if (x->props.family != AF_INET &&
433 x->props.family != AF_INET6) {
434 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
435 return -EINVAL;
436 }
437 if (x->id.proto != IPPROTO_ESP) {
438 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
439 return -EINVAL;
440 }
441 if (x->encap) {
442 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
443 NL_SET_ERR_MSG_MOD(extack,
444 "Encapsulation is not supported");
445 return -EINVAL;
446 }
447
448 if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
449 NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
450 return -EINVAL;
451 }
452
453 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
454 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
455 return -EINVAL;
456 }
457
458 if (x->props.mode != XFRM_MODE_TRANSPORT) {
459 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
460 return -EINVAL;
461 }
462 }
463 if (!x->aead) {
464 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
465 return -EINVAL;
466 }
467 if (x->aead->alg_icv_len != 128) {
468 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
469 return -EINVAL;
470 }
471 if ((x->aead->alg_key_len != 128 + 32) &&
472 (x->aead->alg_key_len != 256 + 32)) {
473 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
474 return -EINVAL;
475 }
476 if (x->tfcpad) {
477 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
478 return -EINVAL;
479 }
480 if (!x->geniv) {
481 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
482 return -EINVAL;
483 }
484 if (strcmp(x->geniv, "seqiv")) {
485 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
486 return -EINVAL;
487 }
488
489 if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
490 x->sel.proto != IPPROTO_TCP) {
491 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
492 return -EINVAL;
493 }
494
495 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
496 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
497 return -EINVAL;
498 }
499
500 switch (x->xso.type) {
501 case XFRM_DEV_OFFLOAD_CRYPTO:
502 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
503 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
504 return -EINVAL;
505 }
506
507 break;
508 case XFRM_DEV_OFFLOAD_PACKET:
509 if (!(mlx5_ipsec_device_caps(mdev) &
510 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
511 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
512 return -EINVAL;
513 }
514
515 if (x->props.mode == XFRM_MODE_TUNNEL &&
516 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
517 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
518 return -EINVAL;
519 }
520
521 if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
522 x->replay_esn->replay_window != 32 &&
523 x->replay_esn->replay_window != 64 &&
524 x->replay_esn->replay_window != 128 &&
525 x->replay_esn->replay_window != 256) {
526 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
527 return -EINVAL;
528 }
529
530 if (!x->props.reqid) {
531 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
532 return -EINVAL;
533 }
534
535 if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
536 x->lft.hard_byte_limit != XFRM_INF) {
537 /* XFRM stack doesn't prevent such configuration :(. */
538 NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
539 return -EINVAL;
540 }
541
542 if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
543 NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
544 return -EINVAL;
545 }
546
547 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
548 x->lft.hard_packet_limit != XFRM_INF) {
549 /* XFRM stack doesn't prevent such configuration :(. */
550 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
551 return -EINVAL;
552 }
553
554 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
555 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
556 return -EINVAL;
557 }
558 break;
559 default:
560 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
561 return -EINVAL;
562 }
563 return 0;
564 }
565
mlx5e_ipsec_modify_state(struct work_struct * _work)566 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
567 {
568 struct mlx5e_ipsec_work *work =
569 container_of(_work, struct mlx5e_ipsec_work, work);
570 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
571 struct mlx5_accel_esp_xfrm_attrs *attrs;
572
573 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
574
575 mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
576 }
577
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)578 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
579 {
580 struct xfrm_state *x = sa_entry->x;
581
582 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
583 x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
584 return;
585
586 if (x->props.flags & XFRM_STATE_ESN) {
587 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
588 return;
589 }
590
591 sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
592 }
593
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)594 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
595 {
596 struct mlx5e_ipsec_work *work =
597 container_of(_work, struct mlx5e_ipsec_work, work);
598 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
599 struct mlx5e_ipsec_netevent_data *data = work->data;
600 struct mlx5_accel_esp_xfrm_attrs *attrs;
601
602 attrs = &sa_entry->attrs;
603
604 switch (attrs->dir) {
605 case XFRM_DEV_OFFLOAD_IN:
606 ether_addr_copy(attrs->smac, data->addr);
607 break;
608 case XFRM_DEV_OFFLOAD_OUT:
609 ether_addr_copy(attrs->dmac, data->addr);
610 break;
611 default:
612 WARN_ON_ONCE(true);
613 }
614 attrs->drop = false;
615 mlx5e_accel_ipsec_fs_modify(sa_entry);
616 }
617
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)618 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
619 {
620 struct xfrm_state *x = sa_entry->x;
621 struct mlx5e_ipsec_work *work;
622 void *data = NULL;
623
624 switch (x->xso.type) {
625 case XFRM_DEV_OFFLOAD_CRYPTO:
626 if (!(x->props.flags & XFRM_STATE_ESN))
627 return 0;
628 break;
629 case XFRM_DEV_OFFLOAD_PACKET:
630 if (x->props.mode != XFRM_MODE_TUNNEL)
631 return 0;
632 break;
633 default:
634 break;
635 }
636
637 work = kzalloc(sizeof(*work), GFP_KERNEL);
638 if (!work)
639 return -ENOMEM;
640
641 switch (x->xso.type) {
642 case XFRM_DEV_OFFLOAD_CRYPTO:
643 data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
644 if (!data)
645 goto free_work;
646
647 INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
648 break;
649 case XFRM_DEV_OFFLOAD_PACKET:
650 data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
651 GFP_KERNEL);
652 if (!data)
653 goto free_work;
654
655 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
656 break;
657 default:
658 break;
659 }
660
661 work->data = data;
662 work->sa_entry = sa_entry;
663 sa_entry->work = work;
664 return 0;
665
666 free_work:
667 kfree(work);
668 return -ENOMEM;
669 }
670
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)671 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
672 {
673 struct xfrm_state *x = sa_entry->x;
674 struct mlx5e_ipsec_dwork *dwork;
675
676 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
677 return 0;
678
679 if (x->lft.soft_packet_limit == XFRM_INF &&
680 x->lft.hard_packet_limit == XFRM_INF &&
681 x->lft.soft_byte_limit == XFRM_INF &&
682 x->lft.hard_byte_limit == XFRM_INF)
683 return 0;
684
685 dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
686 if (!dwork)
687 return -ENOMEM;
688
689 dwork->sa_entry = sa_entry;
690 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
691 sa_entry->dwork = dwork;
692 return 0;
693 }
694
mlx5e_xfrm_add_state(struct xfrm_state * x,struct netlink_ext_ack * extack)695 static int mlx5e_xfrm_add_state(struct xfrm_state *x,
696 struct netlink_ext_ack *extack)
697 {
698 struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
699 struct net_device *netdev = x->xso.real_dev;
700 struct mlx5e_ipsec *ipsec;
701 struct mlx5e_priv *priv;
702 gfp_t gfp;
703 int err;
704
705 priv = netdev_priv(netdev);
706 if (!priv->ipsec)
707 return -EOPNOTSUPP;
708
709 ipsec = priv->ipsec;
710 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
711 sa_entry = kzalloc(sizeof(*sa_entry), gfp);
712 if (!sa_entry)
713 return -ENOMEM;
714
715 sa_entry->x = x;
716 sa_entry->ipsec = ipsec;
717 /* Check if this SA is originated from acquire flow temporary SA */
718 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
719 goto out;
720
721 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
722 if (err)
723 goto err_xfrm;
724
725 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
726 err = -EBUSY;
727 goto err_xfrm;
728 }
729
730 /* check esn */
731 if (x->props.flags & XFRM_STATE_ESN)
732 mlx5e_ipsec_update_esn_state(sa_entry);
733 else
734 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
735 * the first packet sent using a given SA will contain a sequence
736 * number of 1.
737 */
738 sa_entry->esn_state.esn = 1;
739
740 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
741
742 err = mlx5_ipsec_create_work(sa_entry);
743 if (err)
744 goto unblock_ipsec;
745
746 err = mlx5e_ipsec_create_dwork(sa_entry);
747 if (err)
748 goto release_work;
749
750 /* create hw context */
751 err = mlx5_ipsec_create_sa_ctx(sa_entry);
752 if (err)
753 goto release_dwork;
754
755 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
756 if (err)
757 goto err_hw_ctx;
758
759 if (x->props.mode == XFRM_MODE_TUNNEL &&
760 x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
761 !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
762 NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
763 err = -EINVAL;
764 goto err_add_rule;
765 }
766
767 /* We use *_bh() variant because xfrm_timer_handler(), which runs
768 * in softirq context, can reach our state delete logic and we need
769 * xa_erase_bh() there.
770 */
771 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
772 GFP_KERNEL);
773 if (err)
774 goto err_add_rule;
775
776 mlx5e_ipsec_set_esn_ops(sa_entry);
777
778 if (sa_entry->dwork)
779 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
780 MLX5_IPSEC_RESCHED);
781
782 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
783 x->props.mode == XFRM_MODE_TUNNEL) {
784 xa_lock_bh(&ipsec->sadb);
785 __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
786 MLX5E_IPSEC_TUNNEL_SA);
787 xa_unlock_bh(&ipsec->sadb);
788 }
789
790 out:
791 x->xso.offload_handle = (unsigned long)sa_entry;
792 return 0;
793
794 err_add_rule:
795 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
796 err_hw_ctx:
797 mlx5_ipsec_free_sa_ctx(sa_entry);
798 release_dwork:
799 kfree(sa_entry->dwork);
800 release_work:
801 if (sa_entry->work)
802 kfree(sa_entry->work->data);
803 kfree(sa_entry->work);
804 unblock_ipsec:
805 mlx5_eswitch_unblock_ipsec(priv->mdev);
806 err_xfrm:
807 kfree(sa_entry);
808 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
809 return err;
810 }
811
mlx5e_xfrm_del_state(struct xfrm_state * x)812 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
813 {
814 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
815 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
816 struct mlx5e_ipsec_sa_entry *old;
817
818 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
819 return;
820
821 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
822 WARN_ON(old != sa_entry);
823 }
824
mlx5e_xfrm_free_state(struct xfrm_state * x)825 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
826 {
827 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
828 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
829
830 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
831 goto sa_entry_free;
832
833 if (sa_entry->work)
834 cancel_work_sync(&sa_entry->work->work);
835
836 if (sa_entry->dwork)
837 cancel_delayed_work_sync(&sa_entry->dwork->dwork);
838
839 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
840 mlx5_ipsec_free_sa_ctx(sa_entry);
841 kfree(sa_entry->dwork);
842 if (sa_entry->work)
843 kfree(sa_entry->work->data);
844 kfree(sa_entry->work);
845 mlx5_eswitch_unblock_ipsec(ipsec->mdev);
846 sa_entry_free:
847 kfree(sa_entry);
848 }
849
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)850 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
851 unsigned long event, void *ptr)
852 {
853 struct mlx5_accel_esp_xfrm_attrs *attrs;
854 struct mlx5e_ipsec_netevent_data *data;
855 struct mlx5e_ipsec_sa_entry *sa_entry;
856 struct mlx5e_ipsec *ipsec;
857 struct neighbour *n = ptr;
858 struct net_device *netdev;
859 struct xfrm_state *x;
860 unsigned long idx;
861
862 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
863 return NOTIFY_DONE;
864
865 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
866 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
867 attrs = &sa_entry->attrs;
868
869 if (attrs->addrs.family == AF_INET) {
870 if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
871 !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
872 continue;
873 } else {
874 if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
875 !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
876 continue;
877 }
878
879 x = sa_entry->x;
880 netdev = x->xso.real_dev;
881 data = sa_entry->work->data;
882
883 neigh_ha_snapshot(data->addr, n, netdev);
884 queue_work(ipsec->wq, &sa_entry->work->work);
885 }
886
887 return NOTIFY_DONE;
888 }
889
mlx5e_ipsec_init(struct mlx5e_priv * priv)890 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
891 {
892 struct mlx5e_ipsec *ipsec;
893 int ret = -ENOMEM;
894
895 if (!mlx5_ipsec_device_caps(priv->mdev)) {
896 netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
897 return;
898 }
899
900 ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
901 if (!ipsec)
902 return;
903
904 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
905 ipsec->mdev = priv->mdev;
906 init_completion(&ipsec->comp);
907 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
908 priv->netdev->name);
909 if (!ipsec->wq)
910 goto err_wq;
911
912 if (mlx5_ipsec_device_caps(priv->mdev) &
913 MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
914 ret = mlx5e_ipsec_aso_init(ipsec);
915 if (ret)
916 goto err_aso;
917 }
918
919 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
920 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
921 ret = register_netevent_notifier(&ipsec->netevent_nb);
922 if (ret)
923 goto clear_aso;
924 }
925
926 ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
927 ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
928 if (ret)
929 goto err_fs_init;
930
931 ipsec->fs = priv->fs;
932 priv->ipsec = ipsec;
933 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
934 return;
935
936 err_fs_init:
937 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
938 unregister_netevent_notifier(&ipsec->netevent_nb);
939 clear_aso:
940 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
941 mlx5e_ipsec_aso_cleanup(ipsec);
942 err_aso:
943 destroy_workqueue(ipsec->wq);
944 err_wq:
945 kfree(ipsec);
946 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
947 return;
948 }
949
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)950 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
951 {
952 struct mlx5e_ipsec *ipsec = priv->ipsec;
953
954 if (!ipsec)
955 return;
956
957 mlx5e_accel_ipsec_fs_cleanup(ipsec);
958 if (ipsec->netevent_nb.notifier_call) {
959 unregister_netevent_notifier(&ipsec->netevent_nb);
960 ipsec->netevent_nb.notifier_call = NULL;
961 }
962 if (ipsec->aso)
963 mlx5e_ipsec_aso_cleanup(ipsec);
964 destroy_workqueue(ipsec->wq);
965 kfree(ipsec);
966 priv->ipsec = NULL;
967 }
968
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)969 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
970 {
971 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
972 struct mlx5e_ipsec_work *work = sa_entry->work;
973 struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
974 bool need_update;
975
976 need_update = mlx5e_ipsec_update_esn_state(sa_entry);
977 if (!need_update)
978 return;
979
980 sa_entry_shadow = work->data;
981 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
982 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
983 queue_work(sa_entry->ipsec->wq, &work->work);
984 }
985
mlx5e_xfrm_update_stats(struct xfrm_state * x)986 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
987 {
988 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
989 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
990 struct net *net = dev_net(x->xso.dev);
991 u64 trailer_packets = 0, trailer_bytes = 0;
992 u64 replay_packets = 0, replay_bytes = 0;
993 u64 auth_packets = 0, auth_bytes = 0;
994 u64 success_packets, success_bytes;
995 u64 packets, bytes, lastuse;
996 size_t headers;
997
998 lockdep_assert(lockdep_is_held(&x->lock) ||
999 lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
1000 lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock));
1001
1002 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1003 return;
1004
1005 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1006 mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1007 &auth_packets, &lastuse);
1008 x->stats.integrity_failed += auth_packets;
1009 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1010
1011 mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1012 &trailer_packets, &lastuse);
1013 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1014 }
1015
1016 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1017 return;
1018
1019 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1020 mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1021 &replay_packets, &lastuse);
1022 x->stats.replay += replay_packets;
1023 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1024 }
1025
1026 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1027 success_packets = packets - auth_packets - trailer_packets - replay_packets;
1028 x->curlft.packets += success_packets;
1029 /* NIC counts all bytes passed through flow steering and doesn't have
1030 * an ability to count payload data size which is needed for SA.
1031 *
1032 * To overcome HW limitestion, let's approximate the payload size
1033 * by removing always available headers.
1034 */
1035 headers = sizeof(struct ethhdr);
1036 if (sa_entry->attrs.addrs.family == AF_INET)
1037 headers += sizeof(struct iphdr);
1038 else
1039 headers += sizeof(struct ipv6hdr);
1040
1041 success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1042 x->curlft.bytes += success_bytes - headers * success_packets;
1043 }
1044
word_to_mask(int prefix)1045 static __be32 word_to_mask(int prefix)
1046 {
1047 if (prefix < 0)
1048 return 0;
1049
1050 if (!prefix || prefix > 31)
1051 return cpu_to_be32(0xFFFFFFFF);
1052
1053 return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1054 }
1055
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1056 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1057 struct xfrm_selector *sel)
1058 {
1059 int i;
1060
1061 if (addrs->family == AF_INET) {
1062 addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1063 addrs->saddr.a4 &= addrs->smask.m4;
1064 addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1065 addrs->daddr.a4 &= addrs->dmask.m4;
1066 return;
1067 }
1068
1069 for (i = 0; i < 4; i++) {
1070 if (sel->prefixlen_s != 32 * i)
1071 addrs->smask.m6[i] =
1072 word_to_mask(sel->prefixlen_s - 32 * i);
1073 addrs->saddr.a6[i] &= addrs->smask.m6[i];
1074
1075 if (sel->prefixlen_d != 32 * i)
1076 addrs->dmask.m6[i] =
1077 word_to_mask(sel->prefixlen_d - 32 * i);
1078 addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1079 }
1080 }
1081
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1082 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1083 struct xfrm_policy *x,
1084 struct netlink_ext_ack *extack)
1085 {
1086 struct xfrm_selector *sel = &x->selector;
1087
1088 if (x->type != XFRM_POLICY_TYPE_MAIN) {
1089 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1090 return -EINVAL;
1091 }
1092
1093 /* Please pay attention that we support only one template */
1094 if (x->xfrm_nr > 1) {
1095 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1096 return -EINVAL;
1097 }
1098
1099 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1100 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1101 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1102 return -EINVAL;
1103 }
1104
1105 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1106 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1107 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1108 return -EINVAL;
1109 }
1110
1111 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1112 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1113 return -EINVAL;
1114 }
1115
1116 if (x->selector.proto != IPPROTO_IP &&
1117 x->selector.proto != IPPROTO_UDP &&
1118 x->selector.proto != IPPROTO_TCP) {
1119 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1120 return -EINVAL;
1121 }
1122
1123 if (x->priority) {
1124 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1125 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1126 return -EINVAL;
1127 }
1128
1129 if (x->priority == U32_MAX) {
1130 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1131 return -EINVAL;
1132 }
1133 }
1134
1135 if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1136 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1137 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1138 return -EINVAL;
1139 }
1140
1141 return 0;
1142 }
1143
1144 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1145 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1146 struct mlx5_accel_pol_xfrm_attrs *attrs)
1147 {
1148 struct xfrm_policy *x = pol_entry->x;
1149 struct xfrm_selector *sel;
1150
1151 sel = &x->selector;
1152 memset(attrs, 0, sizeof(*attrs));
1153
1154 memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1155 memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1156 attrs->addrs.family = sel->family;
1157 mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1158 attrs->dir = x->xdo.dir;
1159 attrs->action = x->action;
1160 attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1161 attrs->reqid = x->xfrm_vec[0].reqid;
1162 attrs->upspec.dport = ntohs(sel->dport);
1163 attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1164 attrs->upspec.sport = ntohs(sel->sport);
1165 attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1166 attrs->upspec.proto = sel->proto;
1167 attrs->prio = x->priority;
1168 }
1169
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1170 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1171 struct netlink_ext_ack *extack)
1172 {
1173 struct net_device *netdev = x->xdo.real_dev;
1174 struct mlx5e_ipsec_pol_entry *pol_entry;
1175 struct mlx5e_priv *priv;
1176 int err;
1177
1178 priv = netdev_priv(netdev);
1179 if (!priv->ipsec) {
1180 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1181 return -EOPNOTSUPP;
1182 }
1183
1184 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1185 if (err)
1186 return err;
1187
1188 pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1189 if (!pol_entry)
1190 return -ENOMEM;
1191
1192 pol_entry->x = x;
1193 pol_entry->ipsec = priv->ipsec;
1194
1195 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1196 err = -EBUSY;
1197 goto ipsec_busy;
1198 }
1199
1200 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1201 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1202 if (err)
1203 goto err_fs;
1204
1205 x->xdo.offload_handle = (unsigned long)pol_entry;
1206 return 0;
1207
1208 err_fs:
1209 mlx5_eswitch_unblock_ipsec(priv->mdev);
1210 ipsec_busy:
1211 kfree(pol_entry);
1212 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1213 return err;
1214 }
1215
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1216 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1217 {
1218 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1219
1220 mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1221 mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1222 }
1223
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1224 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1225 {
1226 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1227
1228 kfree(pol_entry);
1229 }
1230
1231 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1232 .xdo_dev_state_add = mlx5e_xfrm_add_state,
1233 .xdo_dev_state_delete = mlx5e_xfrm_del_state,
1234 .xdo_dev_state_free = mlx5e_xfrm_free_state,
1235 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1236
1237 .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1238 .xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1239 .xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1240 .xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1241 };
1242
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1243 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1244 {
1245 struct mlx5_core_dev *mdev = priv->mdev;
1246 struct net_device *netdev = priv->netdev;
1247
1248 if (!mlx5_ipsec_device_caps(mdev))
1249 return;
1250
1251 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1252
1253 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1254 netdev->features |= NETIF_F_HW_ESP;
1255 netdev->hw_enc_features |= NETIF_F_HW_ESP;
1256
1257 if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1258 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1259 return;
1260 }
1261
1262 netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1263 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1264
1265 if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1266 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1267 return;
1268 }
1269
1270 netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1271 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1272 netdev->features |= NETIF_F_GSO_ESP;
1273 netdev->hw_features |= NETIF_F_GSO_ESP;
1274 netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1275 }
1276