1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Support for AES-NI and VAES instructions. This file contains glue code.
4 * The real AES implementations are in aesni-intel_asm.S and other .S files.
5 *
6 * Copyright (C) 2008, Intel Corp.
7 * Author: Huang Ying <ying.huang@intel.com>
8 *
9 * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
10 * interface for 64-bit kernels.
11 * Authors: Adrian Hoban <adrian.hoban@intel.com>
12 * Gabriele Paoloni <gabriele.paoloni@intel.com>
13 * Tadeusz Struk (tadeusz.struk@intel.com)
14 * Aidan O'Mahony (aidan.o.mahony@intel.com)
15 * Copyright (c) 2010, Intel Corporation.
16 *
17 * Copyright 2024 Google LLC
18 */
19
20 #include <linux/hardirq.h>
21 #include <linux/types.h>
22 #include <linux/module.h>
23 #include <linux/err.h>
24 #include <crypto/algapi.h>
25 #include <crypto/aes.h>
26 #include <crypto/b128ops.h>
27 #include <crypto/gcm.h>
28 #include <crypto/xts.h>
29 #include <asm/cpu_device_id.h>
30 #include <asm/simd.h>
31 #include <crypto/scatterwalk.h>
32 #include <crypto/internal/aead.h>
33 #include <crypto/internal/simd.h>
34 #include <crypto/internal/skcipher.h>
35 #include <linux/jump_label.h>
36 #include <linux/workqueue.h>
37 #include <linux/spinlock.h>
38 #include <linux/static_call.h>
39
40
41 #define AESNI_ALIGN 16
42 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
43 #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
44 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
45 #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
46 #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
47
48 struct aesni_xts_ctx {
49 struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR;
50 struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR;
51 };
52
aes_align_addr(void * addr)53 static inline void *aes_align_addr(void *addr)
54 {
55 if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN)
56 return addr;
57 return PTR_ALIGN(addr, AESNI_ALIGN);
58 }
59
60 asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
61 unsigned int key_len);
62 asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
63 asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
64 asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
65 const u8 *in, unsigned int len);
66 asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
67 const u8 *in, unsigned int len);
68 asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
69 const u8 *in, unsigned int len, u8 *iv);
70 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
71 const u8 *in, unsigned int len, u8 *iv);
72 asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
73 const u8 *in, unsigned int len, u8 *iv);
74 asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
75 const u8 *in, unsigned int len, u8 *iv);
76
77 asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out,
78 const u8 *in, unsigned int len, u8 *iv);
79
80 asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out,
81 const u8 *in, unsigned int len, u8 *iv);
82
83 #ifdef CONFIG_X86_64
84 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
85 const u8 *in, unsigned int len, u8 *iv);
86 #endif
87
aes_ctx(void * raw_ctx)88 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
89 {
90 return aes_align_addr(raw_ctx);
91 }
92
aes_xts_ctx(struct crypto_skcipher * tfm)93 static inline struct aesni_xts_ctx *aes_xts_ctx(struct crypto_skcipher *tfm)
94 {
95 return aes_align_addr(crypto_skcipher_ctx(tfm));
96 }
97
aes_set_key_common(struct crypto_aes_ctx * ctx,const u8 * in_key,unsigned int key_len)98 static int aes_set_key_common(struct crypto_aes_ctx *ctx,
99 const u8 *in_key, unsigned int key_len)
100 {
101 int err;
102
103 if (!crypto_simd_usable())
104 return aes_expandkey(ctx, in_key, key_len);
105
106 err = aes_check_keylen(key_len);
107 if (err)
108 return err;
109
110 kernel_fpu_begin();
111 aesni_set_key(ctx, in_key, key_len);
112 kernel_fpu_end();
113 return 0;
114 }
115
aes_set_key(struct crypto_tfm * tfm,const u8 * in_key,unsigned int key_len)116 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
117 unsigned int key_len)
118 {
119 return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm)), in_key,
120 key_len);
121 }
122
aesni_encrypt(struct crypto_tfm * tfm,u8 * dst,const u8 * src)123 static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
124 {
125 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
126
127 if (!crypto_simd_usable()) {
128 aes_encrypt(ctx, dst, src);
129 } else {
130 kernel_fpu_begin();
131 aesni_enc(ctx, dst, src);
132 kernel_fpu_end();
133 }
134 }
135
aesni_decrypt(struct crypto_tfm * tfm,u8 * dst,const u8 * src)136 static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
137 {
138 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
139
140 if (!crypto_simd_usable()) {
141 aes_decrypt(ctx, dst, src);
142 } else {
143 kernel_fpu_begin();
144 aesni_dec(ctx, dst, src);
145 kernel_fpu_end();
146 }
147 }
148
aesni_skcipher_setkey(struct crypto_skcipher * tfm,const u8 * key,unsigned int len)149 static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
150 unsigned int len)
151 {
152 return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm)), key, len);
153 }
154
ecb_encrypt(struct skcipher_request * req)155 static int ecb_encrypt(struct skcipher_request *req)
156 {
157 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
158 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
159 struct skcipher_walk walk;
160 unsigned int nbytes;
161 int err;
162
163 err = skcipher_walk_virt(&walk, req, false);
164
165 while ((nbytes = walk.nbytes)) {
166 kernel_fpu_begin();
167 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
168 nbytes & AES_BLOCK_MASK);
169 kernel_fpu_end();
170 nbytes &= AES_BLOCK_SIZE - 1;
171 err = skcipher_walk_done(&walk, nbytes);
172 }
173
174 return err;
175 }
176
ecb_decrypt(struct skcipher_request * req)177 static int ecb_decrypt(struct skcipher_request *req)
178 {
179 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
180 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
181 struct skcipher_walk walk;
182 unsigned int nbytes;
183 int err;
184
185 err = skcipher_walk_virt(&walk, req, false);
186
187 while ((nbytes = walk.nbytes)) {
188 kernel_fpu_begin();
189 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
190 nbytes & AES_BLOCK_MASK);
191 kernel_fpu_end();
192 nbytes &= AES_BLOCK_SIZE - 1;
193 err = skcipher_walk_done(&walk, nbytes);
194 }
195
196 return err;
197 }
198
cbc_encrypt(struct skcipher_request * req)199 static int cbc_encrypt(struct skcipher_request *req)
200 {
201 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
202 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
203 struct skcipher_walk walk;
204 unsigned int nbytes;
205 int err;
206
207 err = skcipher_walk_virt(&walk, req, false);
208
209 while ((nbytes = walk.nbytes)) {
210 kernel_fpu_begin();
211 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
212 nbytes & AES_BLOCK_MASK, walk.iv);
213 kernel_fpu_end();
214 nbytes &= AES_BLOCK_SIZE - 1;
215 err = skcipher_walk_done(&walk, nbytes);
216 }
217
218 return err;
219 }
220
cbc_decrypt(struct skcipher_request * req)221 static int cbc_decrypt(struct skcipher_request *req)
222 {
223 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
224 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
225 struct skcipher_walk walk;
226 unsigned int nbytes;
227 int err;
228
229 err = skcipher_walk_virt(&walk, req, false);
230
231 while ((nbytes = walk.nbytes)) {
232 kernel_fpu_begin();
233 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
234 nbytes & AES_BLOCK_MASK, walk.iv);
235 kernel_fpu_end();
236 nbytes &= AES_BLOCK_SIZE - 1;
237 err = skcipher_walk_done(&walk, nbytes);
238 }
239
240 return err;
241 }
242
cts_cbc_encrypt(struct skcipher_request * req)243 static int cts_cbc_encrypt(struct skcipher_request *req)
244 {
245 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
246 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
247 int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
248 struct scatterlist *src = req->src, *dst = req->dst;
249 struct scatterlist sg_src[2], sg_dst[2];
250 struct skcipher_request subreq;
251 struct skcipher_walk walk;
252 int err;
253
254 skcipher_request_set_tfm(&subreq, tfm);
255 skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
256 NULL, NULL);
257
258 if (req->cryptlen <= AES_BLOCK_SIZE) {
259 if (req->cryptlen < AES_BLOCK_SIZE)
260 return -EINVAL;
261 cbc_blocks = 1;
262 }
263
264 if (cbc_blocks > 0) {
265 skcipher_request_set_crypt(&subreq, req->src, req->dst,
266 cbc_blocks * AES_BLOCK_SIZE,
267 req->iv);
268
269 err = cbc_encrypt(&subreq);
270 if (err)
271 return err;
272
273 if (req->cryptlen == AES_BLOCK_SIZE)
274 return 0;
275
276 dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
277 if (req->dst != req->src)
278 dst = scatterwalk_ffwd(sg_dst, req->dst,
279 subreq.cryptlen);
280 }
281
282 /* handle ciphertext stealing */
283 skcipher_request_set_crypt(&subreq, src, dst,
284 req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
285 req->iv);
286
287 err = skcipher_walk_virt(&walk, &subreq, false);
288 if (err)
289 return err;
290
291 kernel_fpu_begin();
292 aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
293 walk.nbytes, walk.iv);
294 kernel_fpu_end();
295
296 return skcipher_walk_done(&walk, 0);
297 }
298
cts_cbc_decrypt(struct skcipher_request * req)299 static int cts_cbc_decrypt(struct skcipher_request *req)
300 {
301 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
302 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
303 int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
304 struct scatterlist *src = req->src, *dst = req->dst;
305 struct scatterlist sg_src[2], sg_dst[2];
306 struct skcipher_request subreq;
307 struct skcipher_walk walk;
308 int err;
309
310 skcipher_request_set_tfm(&subreq, tfm);
311 skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
312 NULL, NULL);
313
314 if (req->cryptlen <= AES_BLOCK_SIZE) {
315 if (req->cryptlen < AES_BLOCK_SIZE)
316 return -EINVAL;
317 cbc_blocks = 1;
318 }
319
320 if (cbc_blocks > 0) {
321 skcipher_request_set_crypt(&subreq, req->src, req->dst,
322 cbc_blocks * AES_BLOCK_SIZE,
323 req->iv);
324
325 err = cbc_decrypt(&subreq);
326 if (err)
327 return err;
328
329 if (req->cryptlen == AES_BLOCK_SIZE)
330 return 0;
331
332 dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
333 if (req->dst != req->src)
334 dst = scatterwalk_ffwd(sg_dst, req->dst,
335 subreq.cryptlen);
336 }
337
338 /* handle ciphertext stealing */
339 skcipher_request_set_crypt(&subreq, src, dst,
340 req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
341 req->iv);
342
343 err = skcipher_walk_virt(&walk, &subreq, false);
344 if (err)
345 return err;
346
347 kernel_fpu_begin();
348 aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
349 walk.nbytes, walk.iv);
350 kernel_fpu_end();
351
352 return skcipher_walk_done(&walk, 0);
353 }
354
355 #ifdef CONFIG_X86_64
356 /* This is the non-AVX version. */
ctr_crypt_aesni(struct skcipher_request * req)357 static int ctr_crypt_aesni(struct skcipher_request *req)
358 {
359 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
360 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
361 u8 keystream[AES_BLOCK_SIZE];
362 struct skcipher_walk walk;
363 unsigned int nbytes;
364 int err;
365
366 err = skcipher_walk_virt(&walk, req, false);
367
368 while ((nbytes = walk.nbytes) > 0) {
369 kernel_fpu_begin();
370 if (nbytes & AES_BLOCK_MASK)
371 aesni_ctr_enc(ctx, walk.dst.virt.addr,
372 walk.src.virt.addr,
373 nbytes & AES_BLOCK_MASK, walk.iv);
374 nbytes &= ~AES_BLOCK_MASK;
375
376 if (walk.nbytes == walk.total && nbytes > 0) {
377 aesni_enc(ctx, keystream, walk.iv);
378 crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes,
379 walk.src.virt.addr + walk.nbytes - nbytes,
380 keystream, nbytes);
381 crypto_inc(walk.iv, AES_BLOCK_SIZE);
382 nbytes = 0;
383 }
384 kernel_fpu_end();
385 err = skcipher_walk_done(&walk, nbytes);
386 }
387 return err;
388 }
389 #endif
390
xts_setkey_aesni(struct crypto_skcipher * tfm,const u8 * key,unsigned int keylen)391 static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key,
392 unsigned int keylen)
393 {
394 struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
395 int err;
396
397 err = xts_verify_key(tfm, key, keylen);
398 if (err)
399 return err;
400
401 keylen /= 2;
402
403 /* first half of xts-key is for crypt */
404 err = aes_set_key_common(&ctx->crypt_ctx, key, keylen);
405 if (err)
406 return err;
407
408 /* second half of xts-key is for tweak */
409 return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen);
410 }
411
412 typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key,
413 u8 iv[AES_BLOCK_SIZE]);
414 typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key,
415 const u8 *src, u8 *dst, int len,
416 u8 tweak[AES_BLOCK_SIZE]);
417
418 /* This handles cases where the source and/or destination span pages. */
419 static noinline int
xts_crypt_slowpath(struct skcipher_request * req,xts_crypt_func crypt_func)420 xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func)
421 {
422 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
423 const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
424 int tail = req->cryptlen % AES_BLOCK_SIZE;
425 struct scatterlist sg_src[2], sg_dst[2];
426 struct skcipher_request subreq;
427 struct skcipher_walk walk;
428 struct scatterlist *src, *dst;
429 int err;
430
431 /*
432 * If the message length isn't divisible by the AES block size, then
433 * separate off the last full block and the partial block. This ensures
434 * that they are processed in the same call to the assembly function,
435 * which is required for ciphertext stealing.
436 */
437 if (tail) {
438 skcipher_request_set_tfm(&subreq, tfm);
439 skcipher_request_set_callback(&subreq,
440 skcipher_request_flags(req),
441 NULL, NULL);
442 skcipher_request_set_crypt(&subreq, req->src, req->dst,
443 req->cryptlen - tail - AES_BLOCK_SIZE,
444 req->iv);
445 req = &subreq;
446 }
447
448 err = skcipher_walk_virt(&walk, req, false);
449
450 while (walk.nbytes) {
451 kernel_fpu_begin();
452 (*crypt_func)(&ctx->crypt_ctx,
453 walk.src.virt.addr, walk.dst.virt.addr,
454 walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv);
455 kernel_fpu_end();
456 err = skcipher_walk_done(&walk,
457 walk.nbytes & (AES_BLOCK_SIZE - 1));
458 }
459
460 if (err || !tail)
461 return err;
462
463 /* Do ciphertext stealing with the last full block and partial block. */
464
465 dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
466 if (req->dst != req->src)
467 dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
468
469 skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
470 req->iv);
471
472 err = skcipher_walk_virt(&walk, req, false);
473 if (err)
474 return err;
475
476 kernel_fpu_begin();
477 (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr,
478 walk.nbytes, req->iv);
479 kernel_fpu_end();
480
481 return skcipher_walk_done(&walk, 0);
482 }
483
484 /* __always_inline to avoid indirect call in fastpath */
485 static __always_inline int
xts_crypt(struct skcipher_request * req,xts_encrypt_iv_func encrypt_iv,xts_crypt_func crypt_func)486 xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv,
487 xts_crypt_func crypt_func)
488 {
489 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
490 const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
491
492 if (unlikely(req->cryptlen < AES_BLOCK_SIZE))
493 return -EINVAL;
494
495 kernel_fpu_begin();
496 (*encrypt_iv)(&ctx->tweak_ctx, req->iv);
497
498 /*
499 * In practice, virtually all XTS plaintexts and ciphertexts are either
500 * 512 or 4096 bytes and do not use multiple scatterlist elements. To
501 * optimize the performance of these cases, the below fast-path handles
502 * single-scatterlist-element messages as efficiently as possible. The
503 * code is 64-bit specific, as it assumes no page mapping is needed.
504 */
505 if (IS_ENABLED(CONFIG_X86_64) &&
506 likely(req->src->length >= req->cryptlen &&
507 req->dst->length >= req->cryptlen)) {
508 (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src),
509 sg_virt(req->dst), req->cryptlen, req->iv);
510 kernel_fpu_end();
511 return 0;
512 }
513 kernel_fpu_end();
514 return xts_crypt_slowpath(req, crypt_func);
515 }
516
aesni_xts_encrypt_iv(const struct crypto_aes_ctx * tweak_key,u8 iv[AES_BLOCK_SIZE])517 static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
518 u8 iv[AES_BLOCK_SIZE])
519 {
520 aesni_enc(tweak_key, iv, iv);
521 }
522
aesni_xts_encrypt(const struct crypto_aes_ctx * key,const u8 * src,u8 * dst,int len,u8 tweak[AES_BLOCK_SIZE])523 static void aesni_xts_encrypt(const struct crypto_aes_ctx *key,
524 const u8 *src, u8 *dst, int len,
525 u8 tweak[AES_BLOCK_SIZE])
526 {
527 aesni_xts_enc(key, dst, src, len, tweak);
528 }
529
aesni_xts_decrypt(const struct crypto_aes_ctx * key,const u8 * src,u8 * dst,int len,u8 tweak[AES_BLOCK_SIZE])530 static void aesni_xts_decrypt(const struct crypto_aes_ctx *key,
531 const u8 *src, u8 *dst, int len,
532 u8 tweak[AES_BLOCK_SIZE])
533 {
534 aesni_xts_dec(key, dst, src, len, tweak);
535 }
536
xts_encrypt_aesni(struct skcipher_request * req)537 static int xts_encrypt_aesni(struct skcipher_request *req)
538 {
539 return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt);
540 }
541
xts_decrypt_aesni(struct skcipher_request * req)542 static int xts_decrypt_aesni(struct skcipher_request *req)
543 {
544 return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt);
545 }
546
547 static struct crypto_alg aesni_cipher_alg = {
548 .cra_name = "aes",
549 .cra_driver_name = "aes-aesni",
550 .cra_priority = 300,
551 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
552 .cra_blocksize = AES_BLOCK_SIZE,
553 .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
554 .cra_module = THIS_MODULE,
555 .cra_u = {
556 .cipher = {
557 .cia_min_keysize = AES_MIN_KEY_SIZE,
558 .cia_max_keysize = AES_MAX_KEY_SIZE,
559 .cia_setkey = aes_set_key,
560 .cia_encrypt = aesni_encrypt,
561 .cia_decrypt = aesni_decrypt
562 }
563 }
564 };
565
566 static struct skcipher_alg aesni_skciphers[] = {
567 {
568 .base = {
569 .cra_name = "__ecb(aes)",
570 .cra_driver_name = "__ecb-aes-aesni",
571 .cra_priority = 400,
572 .cra_flags = CRYPTO_ALG_INTERNAL,
573 .cra_blocksize = AES_BLOCK_SIZE,
574 .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
575 .cra_module = THIS_MODULE,
576 },
577 .min_keysize = AES_MIN_KEY_SIZE,
578 .max_keysize = AES_MAX_KEY_SIZE,
579 .setkey = aesni_skcipher_setkey,
580 .encrypt = ecb_encrypt,
581 .decrypt = ecb_decrypt,
582 }, {
583 .base = {
584 .cra_name = "__cbc(aes)",
585 .cra_driver_name = "__cbc-aes-aesni",
586 .cra_priority = 400,
587 .cra_flags = CRYPTO_ALG_INTERNAL,
588 .cra_blocksize = AES_BLOCK_SIZE,
589 .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
590 .cra_module = THIS_MODULE,
591 },
592 .min_keysize = AES_MIN_KEY_SIZE,
593 .max_keysize = AES_MAX_KEY_SIZE,
594 .ivsize = AES_BLOCK_SIZE,
595 .setkey = aesni_skcipher_setkey,
596 .encrypt = cbc_encrypt,
597 .decrypt = cbc_decrypt,
598 }, {
599 .base = {
600 .cra_name = "__cts(cbc(aes))",
601 .cra_driver_name = "__cts-cbc-aes-aesni",
602 .cra_priority = 400,
603 .cra_flags = CRYPTO_ALG_INTERNAL,
604 .cra_blocksize = AES_BLOCK_SIZE,
605 .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
606 .cra_module = THIS_MODULE,
607 },
608 .min_keysize = AES_MIN_KEY_SIZE,
609 .max_keysize = AES_MAX_KEY_SIZE,
610 .ivsize = AES_BLOCK_SIZE,
611 .walksize = 2 * AES_BLOCK_SIZE,
612 .setkey = aesni_skcipher_setkey,
613 .encrypt = cts_cbc_encrypt,
614 .decrypt = cts_cbc_decrypt,
615 #ifdef CONFIG_X86_64
616 }, {
617 .base = {
618 .cra_name = "__ctr(aes)",
619 .cra_driver_name = "__ctr-aes-aesni",
620 .cra_priority = 400,
621 .cra_flags = CRYPTO_ALG_INTERNAL,
622 .cra_blocksize = 1,
623 .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
624 .cra_module = THIS_MODULE,
625 },
626 .min_keysize = AES_MIN_KEY_SIZE,
627 .max_keysize = AES_MAX_KEY_SIZE,
628 .ivsize = AES_BLOCK_SIZE,
629 .chunksize = AES_BLOCK_SIZE,
630 .setkey = aesni_skcipher_setkey,
631 .encrypt = ctr_crypt_aesni,
632 .decrypt = ctr_crypt_aesni,
633 #endif
634 }, {
635 .base = {
636 .cra_name = "__xts(aes)",
637 .cra_driver_name = "__xts-aes-aesni",
638 .cra_priority = 401,
639 .cra_flags = CRYPTO_ALG_INTERNAL,
640 .cra_blocksize = AES_BLOCK_SIZE,
641 .cra_ctxsize = XTS_AES_CTX_SIZE,
642 .cra_module = THIS_MODULE,
643 },
644 .min_keysize = 2 * AES_MIN_KEY_SIZE,
645 .max_keysize = 2 * AES_MAX_KEY_SIZE,
646 .ivsize = AES_BLOCK_SIZE,
647 .walksize = 2 * AES_BLOCK_SIZE,
648 .setkey = xts_setkey_aesni,
649 .encrypt = xts_encrypt_aesni,
650 .decrypt = xts_decrypt_aesni,
651 }
652 };
653
654 static
655 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
656
657 #ifdef CONFIG_X86_64
658 asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
659 u8 iv[AES_BLOCK_SIZE]);
660
661 /* __always_inline to avoid indirect call */
662 static __always_inline int
ctr_crypt(struct skcipher_request * req,void (* ctr64_func)(const struct crypto_aes_ctx * key,const u8 * src,u8 * dst,int len,const u64 le_ctr[2]))663 ctr_crypt(struct skcipher_request *req,
664 void (*ctr64_func)(const struct crypto_aes_ctx *key,
665 const u8 *src, u8 *dst, int len,
666 const u64 le_ctr[2]))
667 {
668 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
669 const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm));
670 unsigned int nbytes, p1_nbytes, nblocks;
671 struct skcipher_walk walk;
672 u64 le_ctr[2];
673 u64 ctr64;
674 int err;
675
676 ctr64 = le_ctr[0] = get_unaligned_be64(&req->iv[8]);
677 le_ctr[1] = get_unaligned_be64(&req->iv[0]);
678
679 err = skcipher_walk_virt(&walk, req, false);
680
681 while ((nbytes = walk.nbytes) != 0) {
682 if (nbytes < walk.total) {
683 /* Not the end yet, so keep the length block-aligned. */
684 nbytes = round_down(nbytes, AES_BLOCK_SIZE);
685 nblocks = nbytes / AES_BLOCK_SIZE;
686 } else {
687 /* It's the end, so include any final partial block. */
688 nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE);
689 }
690 ctr64 += nblocks;
691
692 kernel_fpu_begin();
693 if (likely(ctr64 >= nblocks)) {
694 /* The low 64 bits of the counter won't overflow. */
695 (*ctr64_func)(key, walk.src.virt.addr,
696 walk.dst.virt.addr, nbytes, le_ctr);
697 } else {
698 /*
699 * The low 64 bits of the counter will overflow. The
700 * assembly doesn't handle this case, so split the
701 * operation into two at the point where the overflow
702 * will occur. After the first part, add the carry bit.
703 */
704 p1_nbytes = min_t(unsigned int, nbytes,
705 (nblocks - ctr64) * AES_BLOCK_SIZE);
706 (*ctr64_func)(key, walk.src.virt.addr,
707 walk.dst.virt.addr, p1_nbytes, le_ctr);
708 le_ctr[0] = 0;
709 le_ctr[1]++;
710 (*ctr64_func)(key, walk.src.virt.addr + p1_nbytes,
711 walk.dst.virt.addr + p1_nbytes,
712 nbytes - p1_nbytes, le_ctr);
713 }
714 kernel_fpu_end();
715 le_ctr[0] = ctr64;
716
717 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
718 }
719
720 put_unaligned_be64(ctr64, &req->iv[8]);
721 put_unaligned_be64(le_ctr[1], &req->iv[0]);
722
723 return err;
724 }
725
726 /* __always_inline to avoid indirect call */
727 static __always_inline int
xctr_crypt(struct skcipher_request * req,void (* xctr_func)(const struct crypto_aes_ctx * key,const u8 * src,u8 * dst,int len,const u8 iv[AES_BLOCK_SIZE],u64 ctr))728 xctr_crypt(struct skcipher_request *req,
729 void (*xctr_func)(const struct crypto_aes_ctx *key,
730 const u8 *src, u8 *dst, int len,
731 const u8 iv[AES_BLOCK_SIZE], u64 ctr))
732 {
733 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
734 const struct crypto_aes_ctx *key = aes_ctx(crypto_skcipher_ctx(tfm));
735 struct skcipher_walk walk;
736 unsigned int nbytes;
737 u64 ctr = 1;
738 int err;
739
740 err = skcipher_walk_virt(&walk, req, false);
741 while ((nbytes = walk.nbytes) != 0) {
742 if (nbytes < walk.total)
743 nbytes = round_down(nbytes, AES_BLOCK_SIZE);
744
745 kernel_fpu_begin();
746 (*xctr_func)(key, walk.src.virt.addr, walk.dst.virt.addr,
747 nbytes, req->iv, ctr);
748 kernel_fpu_end();
749
750 ctr += DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE);
751 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
752 }
753 return err;
754 }
755
756 #define DEFINE_AVX_SKCIPHER_ALGS(suffix, driver_name_suffix, priority) \
757 \
758 asmlinkage void \
759 aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
760 u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \
761 asmlinkage void \
762 aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \
763 u8 *dst, int len, u8 tweak[AES_BLOCK_SIZE]); \
764 \
765 static int xts_encrypt_##suffix(struct skcipher_request *req) \
766 { \
767 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \
768 } \
769 \
770 static int xts_decrypt_##suffix(struct skcipher_request *req) \
771 { \
772 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \
773 } \
774 \
775 asmlinkage void \
776 aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key, \
777 const u8 *src, u8 *dst, int len, const u64 le_ctr[2]);\
778 \
779 static int ctr_crypt_##suffix(struct skcipher_request *req) \
780 { \
781 return ctr_crypt(req, aes_ctr64_crypt_##suffix); \
782 } \
783 \
784 asmlinkage void \
785 aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key, \
786 const u8 *src, u8 *dst, int len, \
787 const u8 iv[AES_BLOCK_SIZE], u64 ctr); \
788 \
789 static int xctr_crypt_##suffix(struct skcipher_request *req) \
790 { \
791 return xctr_crypt(req, aes_xctr_crypt_##suffix); \
792 } \
793 \
794 static struct skcipher_alg skcipher_algs_##suffix[] = {{ \
795 .base.cra_name = "__xts(aes)", \
796 .base.cra_driver_name = "__xts-aes-" driver_name_suffix, \
797 .base.cra_priority = priority, \
798 .base.cra_flags = CRYPTO_ALG_INTERNAL, \
799 .base.cra_blocksize = AES_BLOCK_SIZE, \
800 .base.cra_ctxsize = XTS_AES_CTX_SIZE, \
801 .base.cra_module = THIS_MODULE, \
802 .min_keysize = 2 * AES_MIN_KEY_SIZE, \
803 .max_keysize = 2 * AES_MAX_KEY_SIZE, \
804 .ivsize = AES_BLOCK_SIZE, \
805 .walksize = 2 * AES_BLOCK_SIZE, \
806 .setkey = xts_setkey_aesni, \
807 .encrypt = xts_encrypt_##suffix, \
808 .decrypt = xts_decrypt_##suffix, \
809 }, { \
810 .base.cra_name = "__ctr(aes)", \
811 .base.cra_driver_name = "__ctr-aes-" driver_name_suffix, \
812 .base.cra_priority = priority, \
813 .base.cra_flags = CRYPTO_ALG_INTERNAL, \
814 .base.cra_blocksize = 1, \
815 .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \
816 .base.cra_module = THIS_MODULE, \
817 .min_keysize = AES_MIN_KEY_SIZE, \
818 .max_keysize = AES_MAX_KEY_SIZE, \
819 .ivsize = AES_BLOCK_SIZE, \
820 .chunksize = AES_BLOCK_SIZE, \
821 .setkey = aesni_skcipher_setkey, \
822 .encrypt = ctr_crypt_##suffix, \
823 .decrypt = ctr_crypt_##suffix, \
824 }, { \
825 .base.cra_name = "__xctr(aes)", \
826 .base.cra_driver_name = "__xctr-aes-" driver_name_suffix, \
827 .base.cra_priority = priority, \
828 .base.cra_flags = CRYPTO_ALG_INTERNAL, \
829 .base.cra_blocksize = 1, \
830 .base.cra_ctxsize = CRYPTO_AES_CTX_SIZE, \
831 .base.cra_module = THIS_MODULE, \
832 .min_keysize = AES_MIN_KEY_SIZE, \
833 .max_keysize = AES_MAX_KEY_SIZE, \
834 .ivsize = AES_BLOCK_SIZE, \
835 .chunksize = AES_BLOCK_SIZE, \
836 .setkey = aesni_skcipher_setkey, \
837 .encrypt = xctr_crypt_##suffix, \
838 .decrypt = xctr_crypt_##suffix, \
839 }}; \
840 \
841 static struct simd_skcipher_alg * \
842 simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)]
843
844 DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
845 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
846 DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
847 DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700);
848 DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800);
849 #endif
850
851 /* The common part of the x86_64 AES-GCM key struct */
852 struct aes_gcm_key {
853 /* Expanded AES key and the AES key length in bytes */
854 struct crypto_aes_ctx aes_key;
855
856 /* RFC4106 nonce (used only by the rfc4106 algorithms) */
857 u32 rfc4106_nonce;
858 };
859
860 /* Key struct used by the AES-NI implementations of AES-GCM */
861 struct aes_gcm_key_aesni {
862 /*
863 * Common part of the key. The assembly code requires 16-byte alignment
864 * for the round keys; we get this by them being located at the start of
865 * the struct and the whole struct being 16-byte aligned.
866 */
867 struct aes_gcm_key base;
868
869 /*
870 * Powers of the hash key H^8 through H^1. These are 128-bit values.
871 * They all have an extra factor of x^-1 and are byte-reversed. 16-byte
872 * alignment is required by the assembly code.
873 */
874 u64 h_powers[8][2] __aligned(16);
875
876 /*
877 * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd
878 * together. It's used for Karatsuba multiplication. 16-byte alignment
879 * is required by the assembly code.
880 */
881 u64 h_powers_xored[8] __aligned(16);
882
883 /*
884 * H^1 times x^64 (and also the usual extra factor of x^-1). 16-byte
885 * alignment is required by the assembly code.
886 */
887 u64 h_times_x64[2] __aligned(16);
888 };
889 #define AES_GCM_KEY_AESNI(key) \
890 container_of((key), struct aes_gcm_key_aesni, base)
891 #define AES_GCM_KEY_AESNI_SIZE \
892 (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1)))
893
894 /* Key struct used by the VAES + AVX10 implementations of AES-GCM */
895 struct aes_gcm_key_avx10 {
896 /*
897 * Common part of the key. The assembly code prefers 16-byte alignment
898 * for the round keys; we get this by them being located at the start of
899 * the struct and the whole struct being 64-byte aligned.
900 */
901 struct aes_gcm_key base;
902
903 /*
904 * Powers of the hash key H^16 through H^1. These are 128-bit values.
905 * They all have an extra factor of x^-1 and are byte-reversed. This
906 * array is aligned to a 64-byte boundary to make it naturally aligned
907 * for 512-bit loads, which can improve performance. (The assembly code
908 * doesn't *need* the alignment; this is just an optimization.)
909 */
910 u64 h_powers[16][2] __aligned(64);
911
912 /* Three padding blocks required by the assembly code */
913 u64 padding[3][2];
914 };
915 #define AES_GCM_KEY_AVX10(key) \
916 container_of((key), struct aes_gcm_key_avx10, base)
917 #define AES_GCM_KEY_AVX10_SIZE \
918 (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
919
920 /*
921 * These flags are passed to the AES-GCM helper functions to specify the
922 * specific version of AES-GCM (RFC4106 or not), whether it's encryption or
923 * decryption, and which assembly functions should be called. Assembly
924 * functions are selected using flags instead of function pointers to avoid
925 * indirect calls (which are very expensive on x86) regardless of inlining.
926 */
927 #define FLAG_RFC4106 BIT(0)
928 #define FLAG_ENC BIT(1)
929 #define FLAG_AVX BIT(2)
930 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
931 # define FLAG_AVX10_256 BIT(3)
932 # define FLAG_AVX10_512 BIT(4)
933 #else
934 /*
935 * This should cause all calls to the AVX10 assembly functions to be
936 * optimized out, avoiding the need to ifdef each call individually.
937 */
938 # define FLAG_AVX10_256 0
939 # define FLAG_AVX10_512 0
940 #endif
941
942 static inline struct aes_gcm_key *
aes_gcm_key_get(struct crypto_aead * tfm,int flags)943 aes_gcm_key_get(struct crypto_aead *tfm, int flags)
944 {
945 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
946 return PTR_ALIGN(crypto_aead_ctx(tfm), 64);
947 else
948 return PTR_ALIGN(crypto_aead_ctx(tfm), 16);
949 }
950
951 asmlinkage void
952 aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key);
953 asmlinkage void
954 aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key);
955 asmlinkage void
956 aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key);
957 asmlinkage void
958 aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key);
959
aes_gcm_precompute(struct aes_gcm_key * key,int flags)960 static void aes_gcm_precompute(struct aes_gcm_key *key, int flags)
961 {
962 /*
963 * To make things a bit easier on the assembly side, the AVX10
964 * implementations use the same key format. Therefore, a single
965 * function using 256-bit vectors would suffice here. However, it's
966 * straightforward to provide a 512-bit one because of how the assembly
967 * code is structured, and it works nicely because the total size of the
968 * key powers is a multiple of 512 bits. So we take advantage of that.
969 *
970 * A similar situation applies to the AES-NI implementations.
971 */
972 if (flags & FLAG_AVX10_512)
973 aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key));
974 else if (flags & FLAG_AVX10_256)
975 aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key));
976 else if (flags & FLAG_AVX)
977 aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key));
978 else
979 aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key));
980 }
981
982 asmlinkage void
983 aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key,
984 u8 ghash_acc[16], const u8 *aad, int aadlen);
985 asmlinkage void
986 aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key,
987 u8 ghash_acc[16], const u8 *aad, int aadlen);
988 asmlinkage void
989 aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
990 u8 ghash_acc[16], const u8 *aad, int aadlen);
991
aes_gcm_aad_update(const struct aes_gcm_key * key,u8 ghash_acc[16],const u8 * aad,int aadlen,int flags)992 static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16],
993 const u8 *aad, int aadlen, int flags)
994 {
995 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
996 aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc,
997 aad, aadlen);
998 else if (flags & FLAG_AVX)
999 aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc,
1000 aad, aadlen);
1001 else
1002 aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key), ghash_acc,
1003 aad, aadlen);
1004 }
1005
1006 asmlinkage void
1007 aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni *key,
1008 const u32 le_ctr[4], u8 ghash_acc[16],
1009 const u8 *src, u8 *dst, int datalen);
1010 asmlinkage void
1011 aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key,
1012 const u32 le_ctr[4], u8 ghash_acc[16],
1013 const u8 *src, u8 *dst, int datalen);
1014 asmlinkage void
1015 aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
1016 const u32 le_ctr[4], u8 ghash_acc[16],
1017 const u8 *src, u8 *dst, int datalen);
1018 asmlinkage void
1019 aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
1020 const u32 le_ctr[4], u8 ghash_acc[16],
1021 const u8 *src, u8 *dst, int datalen);
1022
1023 asmlinkage void
1024 aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key,
1025 const u32 le_ctr[4], u8 ghash_acc[16],
1026 const u8 *src, u8 *dst, int datalen);
1027 asmlinkage void
1028 aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key,
1029 const u32 le_ctr[4], u8 ghash_acc[16],
1030 const u8 *src, u8 *dst, int datalen);
1031 asmlinkage void
1032 aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
1033 const u32 le_ctr[4], u8 ghash_acc[16],
1034 const u8 *src, u8 *dst, int datalen);
1035 asmlinkage void
1036 aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
1037 const u32 le_ctr[4], u8 ghash_acc[16],
1038 const u8 *src, u8 *dst, int datalen);
1039
1040 /* __always_inline to optimize out the branches based on @flags */
1041 static __always_inline void
aes_gcm_update(const struct aes_gcm_key * key,const u32 le_ctr[4],u8 ghash_acc[16],const u8 * src,u8 * dst,int datalen,int flags)1042 aes_gcm_update(const struct aes_gcm_key *key,
1043 const u32 le_ctr[4], u8 ghash_acc[16],
1044 const u8 *src, u8 *dst, int datalen, int flags)
1045 {
1046 if (flags & FLAG_ENC) {
1047 if (flags & FLAG_AVX10_512)
1048 aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
1049 le_ctr, ghash_acc,
1050 src, dst, datalen);
1051 else if (flags & FLAG_AVX10_256)
1052 aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
1053 le_ctr, ghash_acc,
1054 src, dst, datalen);
1055 else if (flags & FLAG_AVX)
1056 aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key),
1057 le_ctr, ghash_acc,
1058 src, dst, datalen);
1059 else
1060 aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr,
1061 ghash_acc, src, dst, datalen);
1062 } else {
1063 if (flags & FLAG_AVX10_512)
1064 aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
1065 le_ctr, ghash_acc,
1066 src, dst, datalen);
1067 else if (flags & FLAG_AVX10_256)
1068 aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
1069 le_ctr, ghash_acc,
1070 src, dst, datalen);
1071 else if (flags & FLAG_AVX)
1072 aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key),
1073 le_ctr, ghash_acc,
1074 src, dst, datalen);
1075 else
1076 aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key),
1077 le_ctr, ghash_acc,
1078 src, dst, datalen);
1079 }
1080 }
1081
1082 asmlinkage void
1083 aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni *key,
1084 const u32 le_ctr[4], u8 ghash_acc[16],
1085 u64 total_aadlen, u64 total_datalen);
1086 asmlinkage void
1087 aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key,
1088 const u32 le_ctr[4], u8 ghash_acc[16],
1089 u64 total_aadlen, u64 total_datalen);
1090 asmlinkage void
1091 aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
1092 const u32 le_ctr[4], u8 ghash_acc[16],
1093 u64 total_aadlen, u64 total_datalen);
1094
1095 /* __always_inline to optimize out the branches based on @flags */
1096 static __always_inline void
aes_gcm_enc_final(const struct aes_gcm_key * key,const u32 le_ctr[4],u8 ghash_acc[16],u64 total_aadlen,u64 total_datalen,int flags)1097 aes_gcm_enc_final(const struct aes_gcm_key *key,
1098 const u32 le_ctr[4], u8 ghash_acc[16],
1099 u64 total_aadlen, u64 total_datalen, int flags)
1100 {
1101 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
1102 aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
1103 le_ctr, ghash_acc,
1104 total_aadlen, total_datalen);
1105 else if (flags & FLAG_AVX)
1106 aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key),
1107 le_ctr, ghash_acc,
1108 total_aadlen, total_datalen);
1109 else
1110 aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key),
1111 le_ctr, ghash_acc,
1112 total_aadlen, total_datalen);
1113 }
1114
1115 asmlinkage bool __must_check
1116 aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni *key,
1117 const u32 le_ctr[4], const u8 ghash_acc[16],
1118 u64 total_aadlen, u64 total_datalen,
1119 const u8 tag[16], int taglen);
1120 asmlinkage bool __must_check
1121 aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key,
1122 const u32 le_ctr[4], const u8 ghash_acc[16],
1123 u64 total_aadlen, u64 total_datalen,
1124 const u8 tag[16], int taglen);
1125 asmlinkage bool __must_check
1126 aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
1127 const u32 le_ctr[4], const u8 ghash_acc[16],
1128 u64 total_aadlen, u64 total_datalen,
1129 const u8 tag[16], int taglen);
1130
1131 /* __always_inline to optimize out the branches based on @flags */
1132 static __always_inline bool __must_check
aes_gcm_dec_final(const struct aes_gcm_key * key,const u32 le_ctr[4],u8 ghash_acc[16],u64 total_aadlen,u64 total_datalen,u8 tag[16],int taglen,int flags)1133 aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4],
1134 u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen,
1135 u8 tag[16], int taglen, int flags)
1136 {
1137 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
1138 return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
1139 le_ctr, ghash_acc,
1140 total_aadlen, total_datalen,
1141 tag, taglen);
1142 else if (flags & FLAG_AVX)
1143 return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key),
1144 le_ctr, ghash_acc,
1145 total_aadlen, total_datalen,
1146 tag, taglen);
1147 else
1148 return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key),
1149 le_ctr, ghash_acc,
1150 total_aadlen, total_datalen,
1151 tag, taglen);
1152 }
1153
1154 /*
1155 * This is the Integrity Check Value (aka the authentication tag) length and can
1156 * be 8, 12 or 16 bytes long.
1157 */
common_rfc4106_set_authsize(struct crypto_aead * aead,unsigned int authsize)1158 static int common_rfc4106_set_authsize(struct crypto_aead *aead,
1159 unsigned int authsize)
1160 {
1161 switch (authsize) {
1162 case 8:
1163 case 12:
1164 case 16:
1165 break;
1166 default:
1167 return -EINVAL;
1168 }
1169
1170 return 0;
1171 }
1172
generic_gcmaes_set_authsize(struct crypto_aead * tfm,unsigned int authsize)1173 static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
1174 unsigned int authsize)
1175 {
1176 switch (authsize) {
1177 case 4:
1178 case 8:
1179 case 12:
1180 case 13:
1181 case 14:
1182 case 15:
1183 case 16:
1184 break;
1185 default:
1186 return -EINVAL;
1187 }
1188
1189 return 0;
1190 }
1191
1192 /*
1193 * This is the setkey function for the x86_64 implementations of AES-GCM. It
1194 * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes
1195 * powers of the hash key.
1196 *
1197 * To comply with the crypto_aead API, this has to be usable in no-SIMD context.
1198 * For that reason, this function includes a portable C implementation of the
1199 * needed logic. However, the portable C implementation is very slow, taking
1200 * about the same time as encrypting 37 KB of data. To be ready for users that
1201 * may set a key even somewhat frequently, we therefore also include a SIMD
1202 * assembly implementation, expanding the AES key using AES-NI and precomputing
1203 * the hash key powers using PCLMULQDQ or VPCLMULQDQ.
1204 */
gcm_setkey(struct crypto_aead * tfm,const u8 * raw_key,unsigned int keylen,int flags)1205 static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
1206 unsigned int keylen, int flags)
1207 {
1208 struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
1209 int err;
1210
1211 if (flags & FLAG_RFC4106) {
1212 if (keylen < 4)
1213 return -EINVAL;
1214 keylen -= 4;
1215 key->rfc4106_nonce = get_unaligned_be32(raw_key + keylen);
1216 }
1217
1218 /* The assembly code assumes the following offsets. */
1219 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0);
1220 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480);
1221 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496);
1222 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624);
1223 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688);
1224 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0);
1225 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480);
1226 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512);
1227 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768);
1228
1229 if (likely(crypto_simd_usable())) {
1230 err = aes_check_keylen(keylen);
1231 if (err)
1232 return err;
1233 kernel_fpu_begin();
1234 aesni_set_key(&key->aes_key, raw_key, keylen);
1235 aes_gcm_precompute(key, flags);
1236 kernel_fpu_end();
1237 } else {
1238 static const u8 x_to_the_minus1[16] __aligned(__alignof__(be128)) = {
1239 [0] = 0xc2, [15] = 1
1240 };
1241 static const u8 x_to_the_63[16] __aligned(__alignof__(be128)) = {
1242 [7] = 1,
1243 };
1244 be128 h1 = {};
1245 be128 h;
1246 int i;
1247
1248 err = aes_expandkey(&key->aes_key, raw_key, keylen);
1249 if (err)
1250 return err;
1251
1252 /* Encrypt the all-zeroes block to get the hash key H^1 */
1253 aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1);
1254
1255 /* Compute H^1 * x^-1 */
1256 h = h1;
1257 gf128mul_lle(&h, (const be128 *)x_to_the_minus1);
1258
1259 /* Compute the needed key powers */
1260 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) {
1261 struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key);
1262
1263 for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
1264 k->h_powers[i][0] = be64_to_cpu(h.b);
1265 k->h_powers[i][1] = be64_to_cpu(h.a);
1266 gf128mul_lle(&h, &h1);
1267 }
1268 memset(k->padding, 0, sizeof(k->padding));
1269 } else {
1270 struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key);
1271
1272 for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
1273 k->h_powers[i][0] = be64_to_cpu(h.b);
1274 k->h_powers[i][1] = be64_to_cpu(h.a);
1275 k->h_powers_xored[i] = k->h_powers[i][0] ^
1276 k->h_powers[i][1];
1277 gf128mul_lle(&h, &h1);
1278 }
1279 gf128mul_lle(&h1, (const be128 *)x_to_the_63);
1280 k->h_times_x64[0] = be64_to_cpu(h1.b);
1281 k->h_times_x64[1] = be64_to_cpu(h1.a);
1282 }
1283 }
1284 return 0;
1285 }
1286
1287 /*
1288 * Initialize @ghash_acc, then pass all @assoclen bytes of associated data
1289 * (a.k.a. additional authenticated data) from @sg_src through the GHASH update
1290 * assembly function. kernel_fpu_begin() must have already been called.
1291 */
gcm_process_assoc(const struct aes_gcm_key * key,u8 ghash_acc[16],struct scatterlist * sg_src,unsigned int assoclen,int flags)1292 static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16],
1293 struct scatterlist *sg_src, unsigned int assoclen,
1294 int flags)
1295 {
1296 struct scatter_walk walk;
1297 /*
1298 * The assembly function requires that the length of any non-last
1299 * segment of associated data be a multiple of 16 bytes, so this
1300 * function does the buffering needed to achieve that.
1301 */
1302 unsigned int pos = 0;
1303 u8 buf[16];
1304
1305 memset(ghash_acc, 0, 16);
1306 scatterwalk_start(&walk, sg_src);
1307
1308 while (assoclen) {
1309 unsigned int orig_len_this_step = scatterwalk_next(
1310 &walk, assoclen);
1311 unsigned int len_this_step = orig_len_this_step;
1312 unsigned int len;
1313 const u8 *src = walk.addr;
1314
1315 if (unlikely(pos)) {
1316 len = min(len_this_step, 16 - pos);
1317 memcpy(&buf[pos], src, len);
1318 pos += len;
1319 src += len;
1320 len_this_step -= len;
1321 if (pos < 16)
1322 goto next;
1323 aes_gcm_aad_update(key, ghash_acc, buf, 16, flags);
1324 pos = 0;
1325 }
1326 len = len_this_step;
1327 if (unlikely(assoclen)) /* Not the last segment yet? */
1328 len = round_down(len, 16);
1329 aes_gcm_aad_update(key, ghash_acc, src, len, flags);
1330 src += len;
1331 len_this_step -= len;
1332 if (unlikely(len_this_step)) {
1333 memcpy(buf, src, len_this_step);
1334 pos = len_this_step;
1335 }
1336 next:
1337 scatterwalk_done_src(&walk, orig_len_this_step);
1338 if (need_resched()) {
1339 kernel_fpu_end();
1340 kernel_fpu_begin();
1341 }
1342 assoclen -= orig_len_this_step;
1343 }
1344 if (unlikely(pos))
1345 aes_gcm_aad_update(key, ghash_acc, buf, pos, flags);
1346 }
1347
1348
1349 /* __always_inline to optimize out the branches based on @flags */
1350 static __always_inline int
gcm_crypt(struct aead_request * req,int flags)1351 gcm_crypt(struct aead_request *req, int flags)
1352 {
1353 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1354 const struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags);
1355 unsigned int assoclen = req->assoclen;
1356 struct skcipher_walk walk;
1357 unsigned int nbytes;
1358 u8 ghash_acc[16]; /* GHASH accumulator */
1359 u32 le_ctr[4]; /* Counter in little-endian format */
1360 int taglen;
1361 int err;
1362
1363 /* Initialize the counter and determine the associated data length. */
1364 le_ctr[0] = 2;
1365 if (flags & FLAG_RFC4106) {
1366 if (unlikely(assoclen != 16 && assoclen != 20))
1367 return -EINVAL;
1368 assoclen -= 8;
1369 le_ctr[1] = get_unaligned_be32(req->iv + 4);
1370 le_ctr[2] = get_unaligned_be32(req->iv + 0);
1371 le_ctr[3] = key->rfc4106_nonce; /* already byte-swapped */
1372 } else {
1373 le_ctr[1] = get_unaligned_be32(req->iv + 8);
1374 le_ctr[2] = get_unaligned_be32(req->iv + 4);
1375 le_ctr[3] = get_unaligned_be32(req->iv + 0);
1376 }
1377
1378 /* Begin walking through the plaintext or ciphertext. */
1379 if (flags & FLAG_ENC)
1380 err = skcipher_walk_aead_encrypt(&walk, req, false);
1381 else
1382 err = skcipher_walk_aead_decrypt(&walk, req, false);
1383 if (err)
1384 return err;
1385
1386 /*
1387 * Since the AES-GCM assembly code requires that at least three assembly
1388 * functions be called to process any message (this is needed to support
1389 * incremental updates cleanly), to reduce overhead we try to do all
1390 * three calls in the same kernel FPU section if possible. We close the
1391 * section and start a new one if there are multiple data segments or if
1392 * rescheduling is needed while processing the associated data.
1393 */
1394 kernel_fpu_begin();
1395
1396 /* Pass the associated data through GHASH. */
1397 gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags);
1398
1399 /* En/decrypt the data and pass the ciphertext through GHASH. */
1400 while (unlikely((nbytes = walk.nbytes) < walk.total)) {
1401 /*
1402 * Non-last segment. In this case, the assembly function
1403 * requires that the length be a multiple of 16 (AES_BLOCK_SIZE)
1404 * bytes. The needed buffering of up to 16 bytes is handled by
1405 * the skcipher_walk. Here we just need to round down to a
1406 * multiple of 16.
1407 */
1408 nbytes = round_down(nbytes, AES_BLOCK_SIZE);
1409 aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
1410 walk.dst.virt.addr, nbytes, flags);
1411 le_ctr[0] += nbytes / AES_BLOCK_SIZE;
1412 kernel_fpu_end();
1413 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
1414 if (err)
1415 return err;
1416 kernel_fpu_begin();
1417 }
1418 /* Last segment: process all remaining data. */
1419 aes_gcm_update(key, le_ctr, ghash_acc, walk.src.virt.addr,
1420 walk.dst.virt.addr, nbytes, flags);
1421 /*
1422 * The low word of the counter isn't used by the finalize, so there's no
1423 * need to increment it here.
1424 */
1425
1426 /* Finalize */
1427 taglen = crypto_aead_authsize(tfm);
1428 if (flags & FLAG_ENC) {
1429 /* Finish computing the auth tag. */
1430 aes_gcm_enc_final(key, le_ctr, ghash_acc, assoclen,
1431 req->cryptlen, flags);
1432
1433 /* Store the computed auth tag in the dst scatterlist. */
1434 scatterwalk_map_and_copy(ghash_acc, req->dst, req->assoclen +
1435 req->cryptlen, taglen, 1);
1436 } else {
1437 unsigned int datalen = req->cryptlen - taglen;
1438 u8 tag[16];
1439
1440 /* Get the transmitted auth tag from the src scatterlist. */
1441 scatterwalk_map_and_copy(tag, req->src, req->assoclen + datalen,
1442 taglen, 0);
1443 /*
1444 * Finish computing the auth tag and compare it to the
1445 * transmitted one. The assembly function does the actual tag
1446 * comparison. Here, just check the boolean result.
1447 */
1448 if (!aes_gcm_dec_final(key, le_ctr, ghash_acc, assoclen,
1449 datalen, tag, taglen, flags))
1450 err = -EBADMSG;
1451 }
1452 kernel_fpu_end();
1453 if (nbytes)
1454 skcipher_walk_done(&walk, 0);
1455 return err;
1456 }
1457
1458 #define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name, \
1459 ctxsize, priority) \
1460 \
1461 static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
1462 unsigned int keylen) \
1463 { \
1464 return gcm_setkey(tfm, raw_key, keylen, (flags)); \
1465 } \
1466 \
1467 static int gcm_encrypt_##suffix(struct aead_request *req) \
1468 { \
1469 return gcm_crypt(req, (flags) | FLAG_ENC); \
1470 } \
1471 \
1472 static int gcm_decrypt_##suffix(struct aead_request *req) \
1473 { \
1474 return gcm_crypt(req, (flags)); \
1475 } \
1476 \
1477 static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \
1478 unsigned int keylen) \
1479 { \
1480 return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106); \
1481 } \
1482 \
1483 static int rfc4106_encrypt_##suffix(struct aead_request *req) \
1484 { \
1485 return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC); \
1486 } \
1487 \
1488 static int rfc4106_decrypt_##suffix(struct aead_request *req) \
1489 { \
1490 return gcm_crypt(req, (flags) | FLAG_RFC4106); \
1491 } \
1492 \
1493 static struct aead_alg aes_gcm_algs_##suffix[] = { { \
1494 .setkey = gcm_setkey_##suffix, \
1495 .setauthsize = generic_gcmaes_set_authsize, \
1496 .encrypt = gcm_encrypt_##suffix, \
1497 .decrypt = gcm_decrypt_##suffix, \
1498 .ivsize = GCM_AES_IV_SIZE, \
1499 .chunksize = AES_BLOCK_SIZE, \
1500 .maxauthsize = 16, \
1501 .base = { \
1502 .cra_name = "__gcm(aes)", \
1503 .cra_driver_name = "__" generic_driver_name, \
1504 .cra_priority = (priority), \
1505 .cra_flags = CRYPTO_ALG_INTERNAL, \
1506 .cra_blocksize = 1, \
1507 .cra_ctxsize = (ctxsize), \
1508 .cra_module = THIS_MODULE, \
1509 }, \
1510 }, { \
1511 .setkey = rfc4106_setkey_##suffix, \
1512 .setauthsize = common_rfc4106_set_authsize, \
1513 .encrypt = rfc4106_encrypt_##suffix, \
1514 .decrypt = rfc4106_decrypt_##suffix, \
1515 .ivsize = GCM_RFC4106_IV_SIZE, \
1516 .chunksize = AES_BLOCK_SIZE, \
1517 .maxauthsize = 16, \
1518 .base = { \
1519 .cra_name = "__rfc4106(gcm(aes))", \
1520 .cra_driver_name = "__" rfc_driver_name, \
1521 .cra_priority = (priority), \
1522 .cra_flags = CRYPTO_ALG_INTERNAL, \
1523 .cra_blocksize = 1, \
1524 .cra_ctxsize = (ctxsize), \
1525 .cra_module = THIS_MODULE, \
1526 }, \
1527 } }; \
1528 \
1529 static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \
1530
1531 /* aes_gcm_algs_aesni */
1532 DEFINE_GCM_ALGS(aesni, /* no flags */ 0,
1533 "generic-gcm-aesni", "rfc4106-gcm-aesni",
1534 AES_GCM_KEY_AESNI_SIZE, 400);
1535
1536 /* aes_gcm_algs_aesni_avx */
1537 DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX,
1538 "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
1539 AES_GCM_KEY_AESNI_SIZE, 500);
1540
1541 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1542 /* aes_gcm_algs_vaes_avx10_256 */
1543 DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
1544 "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
1545 AES_GCM_KEY_AVX10_SIZE, 700);
1546
1547 /* aes_gcm_algs_vaes_avx10_512 */
1548 DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
1549 "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
1550 AES_GCM_KEY_AVX10_SIZE, 800);
1551 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
1552
register_avx_algs(void)1553 static int __init register_avx_algs(void)
1554 {
1555 int err;
1556
1557 if (!boot_cpu_has(X86_FEATURE_AVX))
1558 return 0;
1559 err = simd_register_skciphers_compat(skcipher_algs_aesni_avx,
1560 ARRAY_SIZE(skcipher_algs_aesni_avx),
1561 simd_skcipher_algs_aesni_avx);
1562 if (err)
1563 return err;
1564 err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx,
1565 ARRAY_SIZE(aes_gcm_algs_aesni_avx),
1566 aes_gcm_simdalgs_aesni_avx);
1567 if (err)
1568 return err;
1569 /*
1570 * Note: not all the algorithms registered below actually require
1571 * VPCLMULQDQ. But in practice every CPU with VAES also has VPCLMULQDQ.
1572 * Similarly, the assembler support was added at about the same time.
1573 * For simplicity, just always check for VAES and VPCLMULQDQ together.
1574 */
1575 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1576 if (!boot_cpu_has(X86_FEATURE_AVX2) ||
1577 !boot_cpu_has(X86_FEATURE_VAES) ||
1578 !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) ||
1579 !boot_cpu_has(X86_FEATURE_PCLMULQDQ) ||
1580 !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
1581 return 0;
1582 err = simd_register_skciphers_compat(skcipher_algs_vaes_avx2,
1583 ARRAY_SIZE(skcipher_algs_vaes_avx2),
1584 simd_skcipher_algs_vaes_avx2);
1585 if (err)
1586 return err;
1587
1588 if (!boot_cpu_has(X86_FEATURE_AVX512BW) ||
1589 !boot_cpu_has(X86_FEATURE_AVX512VL) ||
1590 !boot_cpu_has(X86_FEATURE_BMI2) ||
1591 !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
1592 XFEATURE_MASK_AVX512, NULL))
1593 return 0;
1594
1595 err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256,
1596 ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
1597 simd_skcipher_algs_vaes_avx10_256);
1598 if (err)
1599 return err;
1600 err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
1601 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
1602 aes_gcm_simdalgs_vaes_avx10_256);
1603 if (err)
1604 return err;
1605
1606 if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
1607 int i;
1608
1609 for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
1610 skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1;
1611 for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
1612 aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
1613 }
1614
1615 err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512,
1616 ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
1617 simd_skcipher_algs_vaes_avx10_512);
1618 if (err)
1619 return err;
1620 err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
1621 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
1622 aes_gcm_simdalgs_vaes_avx10_512);
1623 if (err)
1624 return err;
1625 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
1626 return 0;
1627 }
1628
unregister_avx_algs(void)1629 static void unregister_avx_algs(void)
1630 {
1631 if (simd_skcipher_algs_aesni_avx[0])
1632 simd_unregister_skciphers(skcipher_algs_aesni_avx,
1633 ARRAY_SIZE(skcipher_algs_aesni_avx),
1634 simd_skcipher_algs_aesni_avx);
1635 if (aes_gcm_simdalgs_aesni_avx[0])
1636 simd_unregister_aeads(aes_gcm_algs_aesni_avx,
1637 ARRAY_SIZE(aes_gcm_algs_aesni_avx),
1638 aes_gcm_simdalgs_aesni_avx);
1639 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1640 if (simd_skcipher_algs_vaes_avx2[0])
1641 simd_unregister_skciphers(skcipher_algs_vaes_avx2,
1642 ARRAY_SIZE(skcipher_algs_vaes_avx2),
1643 simd_skcipher_algs_vaes_avx2);
1644 if (simd_skcipher_algs_vaes_avx10_256[0])
1645 simd_unregister_skciphers(skcipher_algs_vaes_avx10_256,
1646 ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
1647 simd_skcipher_algs_vaes_avx10_256);
1648 if (aes_gcm_simdalgs_vaes_avx10_256[0])
1649 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
1650 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
1651 aes_gcm_simdalgs_vaes_avx10_256);
1652 if (simd_skcipher_algs_vaes_avx10_512[0])
1653 simd_unregister_skciphers(skcipher_algs_vaes_avx10_512,
1654 ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
1655 simd_skcipher_algs_vaes_avx10_512);
1656 if (aes_gcm_simdalgs_vaes_avx10_512[0])
1657 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
1658 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),
1659 aes_gcm_simdalgs_vaes_avx10_512);
1660 #endif
1661 }
1662 #else /* CONFIG_X86_64 */
1663 static struct aead_alg aes_gcm_algs_aesni[0];
1664 static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0];
1665
register_avx_algs(void)1666 static int __init register_avx_algs(void)
1667 {
1668 return 0;
1669 }
1670
unregister_avx_algs(void)1671 static void unregister_avx_algs(void)
1672 {
1673 }
1674 #endif /* !CONFIG_X86_64 */
1675
1676 static const struct x86_cpu_id aesni_cpu_id[] = {
1677 X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
1678 {}
1679 };
1680 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
1681
aesni_init(void)1682 static int __init aesni_init(void)
1683 {
1684 int err;
1685
1686 if (!x86_match_cpu(aesni_cpu_id))
1687 return -ENODEV;
1688
1689 err = crypto_register_alg(&aesni_cipher_alg);
1690 if (err)
1691 return err;
1692
1693 err = simd_register_skciphers_compat(aesni_skciphers,
1694 ARRAY_SIZE(aesni_skciphers),
1695 aesni_simd_skciphers);
1696 if (err)
1697 goto unregister_cipher;
1698
1699 err = simd_register_aeads_compat(aes_gcm_algs_aesni,
1700 ARRAY_SIZE(aes_gcm_algs_aesni),
1701 aes_gcm_simdalgs_aesni);
1702 if (err)
1703 goto unregister_skciphers;
1704
1705 err = register_avx_algs();
1706 if (err)
1707 goto unregister_avx;
1708
1709 return 0;
1710
1711 unregister_avx:
1712 unregister_avx_algs();
1713 simd_unregister_aeads(aes_gcm_algs_aesni,
1714 ARRAY_SIZE(aes_gcm_algs_aesni),
1715 aes_gcm_simdalgs_aesni);
1716 unregister_skciphers:
1717 simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
1718 aesni_simd_skciphers);
1719 unregister_cipher:
1720 crypto_unregister_alg(&aesni_cipher_alg);
1721 return err;
1722 }
1723
aesni_exit(void)1724 static void __exit aesni_exit(void)
1725 {
1726 simd_unregister_aeads(aes_gcm_algs_aesni,
1727 ARRAY_SIZE(aes_gcm_algs_aesni),
1728 aes_gcm_simdalgs_aesni);
1729 simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
1730 aesni_simd_skciphers);
1731 crypto_unregister_alg(&aesni_cipher_alg);
1732 unregister_avx_algs();
1733 }
1734
1735 module_init(aesni_init);
1736 module_exit(aesni_exit);
1737
1738 MODULE_DESCRIPTION("AES cipher and modes, optimized with AES-NI or VAES instructions");
1739 MODULE_LICENSE("GPL");
1740 MODULE_ALIAS_CRYPTO("aes");
1741