1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. 4 * 5 * Copyright (C) 2015 - 2018 Linaro Ltd. 6 * Copyright (C) 2023 Google LLC. 7 */ 8 9 #include <asm/hwcap.h> 10 #include <asm/neon.h> 11 #include <asm/simd.h> 12 #include <linux/unaligned.h> 13 #include <crypto/aes.h> 14 #include <crypto/gcm.h> 15 #include <crypto/b128ops.h> 16 #include <crypto/cryptd.h> 17 #include <crypto/internal/aead.h> 18 #include <crypto/internal/hash.h> 19 #include <crypto/internal/simd.h> 20 #include <crypto/internal/skcipher.h> 21 #include <crypto/gf128mul.h> 22 #include <crypto/scatterwalk.h> 23 #include <linux/cpufeature.h> 24 #include <linux/crypto.h> 25 #include <linux/jump_label.h> 26 #include <linux/module.h> 27 28 MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions"); 29 MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>"); 30 MODULE_LICENSE("GPL"); 31 MODULE_ALIAS_CRYPTO("ghash"); 32 MODULE_ALIAS_CRYPTO("gcm(aes)"); 33 MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))"); 34 35 #define GHASH_BLOCK_SIZE 16 36 #define GHASH_DIGEST_SIZE 16 37 38 #define RFC4106_NONCE_SIZE 4 39 40 struct ghash_key { 41 be128 k; 42 u64 h[][2]; 43 }; 44 45 struct gcm_key { 46 u64 h[4][2]; 47 u32 rk[AES_MAX_KEYLENGTH_U32]; 48 int rounds; 49 u8 nonce[]; // for RFC4106 nonce 50 }; 51 52 struct ghash_desc_ctx { 53 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 54 u8 buf[GHASH_BLOCK_SIZE]; 55 u32 count; 56 }; 57 58 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 59 u64 const h[][2], const char *head); 60 61 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 62 u64 const h[][2], const char *head); 63 64 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64); 65 66 static int ghash_init(struct shash_desc *desc) 67 { 68 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 69 70 *ctx = (struct ghash_desc_ctx){}; 71 return 0; 72 } 73 74 static void ghash_do_update(int blocks, u64 dg[], const char *src, 75 struct ghash_key *key, const char *head) 76 { 77 kernel_neon_begin(); 78 if (static_branch_likely(&use_p64)) 79 pmull_ghash_update_p64(blocks, dg, src, key->h, head); 80 else 81 pmull_ghash_update_p8(blocks, dg, src, key->h, head); 82 kernel_neon_end(); 83 } 84 85 static int ghash_update(struct shash_desc *desc, const u8 *src, 86 unsigned int len) 87 { 88 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 89 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 90 91 ctx->count += len; 92 93 if ((partial + len) >= GHASH_BLOCK_SIZE) { 94 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 95 int blocks; 96 97 if (partial) { 98 int p = GHASH_BLOCK_SIZE - partial; 99 100 memcpy(ctx->buf + partial, src, p); 101 src += p; 102 len -= p; 103 } 104 105 blocks = len / GHASH_BLOCK_SIZE; 106 len %= GHASH_BLOCK_SIZE; 107 108 ghash_do_update(blocks, ctx->digest, src, key, 109 partial ? ctx->buf : NULL); 110 src += blocks * GHASH_BLOCK_SIZE; 111 partial = 0; 112 } 113 if (len) 114 memcpy(ctx->buf + partial, src, len); 115 return 0; 116 } 117 118 static int ghash_final(struct shash_desc *desc, u8 *dst) 119 { 120 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 121 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 122 123 if (partial) { 124 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 125 126 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); 127 ghash_do_update(1, ctx->digest, ctx->buf, key, NULL); 128 } 129 put_unaligned_be64(ctx->digest[1], dst); 130 put_unaligned_be64(ctx->digest[0], dst + 8); 131 132 *ctx = (struct ghash_desc_ctx){}; 133 return 0; 134 } 135 136 static void ghash_reflect(u64 h[], const be128 *k) 137 { 138 u64 carry = be64_to_cpu(k->a) >> 63; 139 140 h[0] = (be64_to_cpu(k->b) << 1) | carry; 141 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); 142 143 if (carry) 144 h[1] ^= 0xc200000000000000UL; 145 } 146 147 static int ghash_setkey(struct crypto_shash *tfm, 148 const u8 *inkey, unsigned int keylen) 149 { 150 struct ghash_key *key = crypto_shash_ctx(tfm); 151 152 if (keylen != GHASH_BLOCK_SIZE) 153 return -EINVAL; 154 155 /* needed for the fallback */ 156 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 157 ghash_reflect(key->h[0], &key->k); 158 159 if (static_branch_likely(&use_p64)) { 160 be128 h = key->k; 161 162 gf128mul_lle(&h, &key->k); 163 ghash_reflect(key->h[1], &h); 164 165 gf128mul_lle(&h, &key->k); 166 ghash_reflect(key->h[2], &h); 167 168 gf128mul_lle(&h, &key->k); 169 ghash_reflect(key->h[3], &h); 170 } 171 return 0; 172 } 173 174 static struct shash_alg ghash_alg = { 175 .digestsize = GHASH_DIGEST_SIZE, 176 .init = ghash_init, 177 .update = ghash_update, 178 .final = ghash_final, 179 .setkey = ghash_setkey, 180 .descsize = sizeof(struct ghash_desc_ctx), 181 182 .base.cra_name = "ghash", 183 .base.cra_driver_name = "ghash-ce", 184 .base.cra_priority = 300, 185 .base.cra_blocksize = GHASH_BLOCK_SIZE, 186 .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 187 .base.cra_module = THIS_MODULE, 188 }; 189 190 void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src, 191 struct gcm_key const *k, char *dst, 192 const char *iv, int rounds, u32 counter); 193 194 void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag, 195 struct gcm_key const *k, char *head, 196 const char *iv, int rounds, u32 counter); 197 198 void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src, 199 struct gcm_key const *k, char *dst, 200 const char *iv, int rounds, u32 counter); 201 202 int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag, 203 struct gcm_key const *k, char *head, 204 const char *iv, int rounds, u32 counter, 205 const char *otag, int authsize); 206 207 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 208 unsigned int keylen) 209 { 210 struct gcm_key *ctx = crypto_aead_ctx(tfm); 211 struct crypto_aes_ctx aes_ctx; 212 be128 h, k; 213 int ret; 214 215 ret = aes_expandkey(&aes_ctx, inkey, keylen); 216 if (ret) 217 return -EINVAL; 218 219 aes_encrypt(&aes_ctx, (u8 *)&k, (u8[AES_BLOCK_SIZE]){}); 220 221 memcpy(ctx->rk, aes_ctx.key_enc, sizeof(ctx->rk)); 222 ctx->rounds = 6 + keylen / 4; 223 224 memzero_explicit(&aes_ctx, sizeof(aes_ctx)); 225 226 ghash_reflect(ctx->h[0], &k); 227 228 h = k; 229 gf128mul_lle(&h, &k); 230 ghash_reflect(ctx->h[1], &h); 231 232 gf128mul_lle(&h, &k); 233 ghash_reflect(ctx->h[2], &h); 234 235 gf128mul_lle(&h, &k); 236 ghash_reflect(ctx->h[3], &h); 237 238 return 0; 239 } 240 241 static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 242 { 243 return crypto_gcm_check_authsize(authsize); 244 } 245 246 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], 247 int *buf_count, struct gcm_key *ctx) 248 { 249 if (*buf_count > 0) { 250 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); 251 252 memcpy(&buf[*buf_count], src, buf_added); 253 254 *buf_count += buf_added; 255 src += buf_added; 256 count -= buf_added; 257 } 258 259 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { 260 int blocks = count / GHASH_BLOCK_SIZE; 261 262 pmull_ghash_update_p64(blocks, dg, src, ctx->h, 263 *buf_count ? buf : NULL); 264 265 src += blocks * GHASH_BLOCK_SIZE; 266 count %= GHASH_BLOCK_SIZE; 267 *buf_count = 0; 268 } 269 270 if (count > 0) { 271 memcpy(buf, src, count); 272 *buf_count = count; 273 } 274 } 275 276 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 277 { 278 struct crypto_aead *aead = crypto_aead_reqtfm(req); 279 struct gcm_key *ctx = crypto_aead_ctx(aead); 280 u8 buf[GHASH_BLOCK_SIZE]; 281 struct scatter_walk walk; 282 int buf_count = 0; 283 284 scatterwalk_start(&walk, req->src); 285 286 do { 287 unsigned int n; 288 289 n = scatterwalk_next(&walk, len); 290 gcm_update_mac(dg, walk.addr, n, buf, &buf_count, ctx); 291 scatterwalk_done_src(&walk, n); 292 293 if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) { 294 kernel_neon_end(); 295 kernel_neon_begin(); 296 } 297 298 len -= n; 299 } while (len); 300 301 if (buf_count) { 302 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 303 pmull_ghash_update_p64(1, dg, buf, ctx->h, NULL); 304 } 305 } 306 307 static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 308 { 309 struct crypto_aead *aead = crypto_aead_reqtfm(req); 310 struct gcm_key *ctx = crypto_aead_ctx(aead); 311 struct skcipher_walk walk; 312 u8 buf[AES_BLOCK_SIZE]; 313 u32 counter = 2; 314 u64 dg[2] = {}; 315 be128 lengths; 316 const u8 *src; 317 u8 *tag, *dst; 318 int tail, err; 319 320 if (WARN_ON_ONCE(!may_use_simd())) 321 return -EBUSY; 322 323 err = skcipher_walk_aead_encrypt(&walk, req, false); 324 325 kernel_neon_begin(); 326 327 if (assoclen) 328 gcm_calculate_auth_mac(req, dg, assoclen); 329 330 src = walk.src.virt.addr; 331 dst = walk.dst.virt.addr; 332 333 while (walk.nbytes >= AES_BLOCK_SIZE) { 334 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 335 336 pmull_gcm_encrypt(nblocks, dg, src, ctx, dst, iv, 337 ctx->rounds, counter); 338 counter += nblocks; 339 340 if (walk.nbytes == walk.total) { 341 src += nblocks * AES_BLOCK_SIZE; 342 dst += nblocks * AES_BLOCK_SIZE; 343 break; 344 } 345 346 kernel_neon_end(); 347 348 err = skcipher_walk_done(&walk, 349 walk.nbytes % AES_BLOCK_SIZE); 350 if (err) 351 return err; 352 353 src = walk.src.virt.addr; 354 dst = walk.dst.virt.addr; 355 356 kernel_neon_begin(); 357 } 358 359 360 lengths.a = cpu_to_be64(assoclen * 8); 361 lengths.b = cpu_to_be64(req->cryptlen * 8); 362 363 tag = (u8 *)&lengths; 364 tail = walk.nbytes % AES_BLOCK_SIZE; 365 366 /* 367 * Bounce via a buffer unless we are encrypting in place and src/dst 368 * are not pointing to the start of the walk buffer. In that case, we 369 * can do a NEON load/xor/store sequence in place as long as we move 370 * the plain/ciphertext and keystream to the start of the register. If 371 * not, do a memcpy() to the end of the buffer so we can reuse the same 372 * logic. 373 */ 374 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 375 src = memcpy(buf + sizeof(buf) - tail, src, tail); 376 377 pmull_gcm_enc_final(tail, dg, tag, ctx, (u8 *)src, iv, 378 ctx->rounds, counter); 379 kernel_neon_end(); 380 381 if (unlikely(tail && src != dst)) 382 memcpy(dst, src, tail); 383 384 if (walk.nbytes) { 385 err = skcipher_walk_done(&walk, 0); 386 if (err) 387 return err; 388 } 389 390 /* copy authtag to end of dst */ 391 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, 392 crypto_aead_authsize(aead), 1); 393 394 return 0; 395 } 396 397 static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 398 { 399 struct crypto_aead *aead = crypto_aead_reqtfm(req); 400 struct gcm_key *ctx = crypto_aead_ctx(aead); 401 int authsize = crypto_aead_authsize(aead); 402 struct skcipher_walk walk; 403 u8 otag[AES_BLOCK_SIZE]; 404 u8 buf[AES_BLOCK_SIZE]; 405 u32 counter = 2; 406 u64 dg[2] = {}; 407 be128 lengths; 408 const u8 *src; 409 u8 *tag, *dst; 410 int tail, err, ret; 411 412 if (WARN_ON_ONCE(!may_use_simd())) 413 return -EBUSY; 414 415 scatterwalk_map_and_copy(otag, req->src, 416 req->assoclen + req->cryptlen - authsize, 417 authsize, 0); 418 419 err = skcipher_walk_aead_decrypt(&walk, req, false); 420 421 kernel_neon_begin(); 422 423 if (assoclen) 424 gcm_calculate_auth_mac(req, dg, assoclen); 425 426 src = walk.src.virt.addr; 427 dst = walk.dst.virt.addr; 428 429 while (walk.nbytes >= AES_BLOCK_SIZE) { 430 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 431 432 pmull_gcm_decrypt(nblocks, dg, src, ctx, dst, iv, 433 ctx->rounds, counter); 434 counter += nblocks; 435 436 if (walk.nbytes == walk.total) { 437 src += nblocks * AES_BLOCK_SIZE; 438 dst += nblocks * AES_BLOCK_SIZE; 439 break; 440 } 441 442 kernel_neon_end(); 443 444 err = skcipher_walk_done(&walk, 445 walk.nbytes % AES_BLOCK_SIZE); 446 if (err) 447 return err; 448 449 src = walk.src.virt.addr; 450 dst = walk.dst.virt.addr; 451 452 kernel_neon_begin(); 453 } 454 455 lengths.a = cpu_to_be64(assoclen * 8); 456 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 457 458 tag = (u8 *)&lengths; 459 tail = walk.nbytes % AES_BLOCK_SIZE; 460 461 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 462 src = memcpy(buf + sizeof(buf) - tail, src, tail); 463 464 ret = pmull_gcm_dec_final(tail, dg, tag, ctx, (u8 *)src, iv, 465 ctx->rounds, counter, otag, authsize); 466 kernel_neon_end(); 467 468 if (unlikely(tail && src != dst)) 469 memcpy(dst, src, tail); 470 471 if (walk.nbytes) { 472 err = skcipher_walk_done(&walk, 0); 473 if (err) 474 return err; 475 } 476 477 return ret ? -EBADMSG : 0; 478 } 479 480 static int gcm_aes_encrypt(struct aead_request *req) 481 { 482 return gcm_encrypt(req, req->iv, req->assoclen); 483 } 484 485 static int gcm_aes_decrypt(struct aead_request *req) 486 { 487 return gcm_decrypt(req, req->iv, req->assoclen); 488 } 489 490 static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 491 unsigned int keylen) 492 { 493 struct gcm_key *ctx = crypto_aead_ctx(tfm); 494 int err; 495 496 keylen -= RFC4106_NONCE_SIZE; 497 err = gcm_aes_setkey(tfm, inkey, keylen); 498 if (err) 499 return err; 500 501 memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 502 return 0; 503 } 504 505 static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 506 { 507 return crypto_rfc4106_check_authsize(authsize); 508 } 509 510 static int rfc4106_encrypt(struct aead_request *req) 511 { 512 struct crypto_aead *aead = crypto_aead_reqtfm(req); 513 struct gcm_key *ctx = crypto_aead_ctx(aead); 514 u8 iv[GCM_AES_IV_SIZE]; 515 516 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 517 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 518 519 return crypto_ipsec_check_assoclen(req->assoclen) ?: 520 gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 521 } 522 523 static int rfc4106_decrypt(struct aead_request *req) 524 { 525 struct crypto_aead *aead = crypto_aead_reqtfm(req); 526 struct gcm_key *ctx = crypto_aead_ctx(aead); 527 u8 iv[GCM_AES_IV_SIZE]; 528 529 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 530 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 531 532 return crypto_ipsec_check_assoclen(req->assoclen) ?: 533 gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 534 } 535 536 static struct aead_alg gcm_aes_algs[] = {{ 537 .ivsize = GCM_AES_IV_SIZE, 538 .chunksize = AES_BLOCK_SIZE, 539 .maxauthsize = AES_BLOCK_SIZE, 540 .setkey = gcm_aes_setkey, 541 .setauthsize = gcm_aes_setauthsize, 542 .encrypt = gcm_aes_encrypt, 543 .decrypt = gcm_aes_decrypt, 544 545 .base.cra_name = "gcm(aes)", 546 .base.cra_driver_name = "gcm-aes-ce", 547 .base.cra_priority = 400, 548 .base.cra_blocksize = 1, 549 .base.cra_ctxsize = sizeof(struct gcm_key), 550 .base.cra_module = THIS_MODULE, 551 }, { 552 .ivsize = GCM_RFC4106_IV_SIZE, 553 .chunksize = AES_BLOCK_SIZE, 554 .maxauthsize = AES_BLOCK_SIZE, 555 .setkey = rfc4106_setkey, 556 .setauthsize = rfc4106_setauthsize, 557 .encrypt = rfc4106_encrypt, 558 .decrypt = rfc4106_decrypt, 559 560 .base.cra_name = "rfc4106(gcm(aes))", 561 .base.cra_driver_name = "rfc4106-gcm-aes-ce", 562 .base.cra_priority = 400, 563 .base.cra_blocksize = 1, 564 .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE, 565 .base.cra_module = THIS_MODULE, 566 }}; 567 568 static int __init ghash_ce_mod_init(void) 569 { 570 int err; 571 572 if (!(elf_hwcap & HWCAP_NEON)) 573 return -ENODEV; 574 575 if (elf_hwcap2 & HWCAP2_PMULL) { 576 err = crypto_register_aeads(gcm_aes_algs, 577 ARRAY_SIZE(gcm_aes_algs)); 578 if (err) 579 return err; 580 ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]); 581 static_branch_enable(&use_p64); 582 } 583 584 err = crypto_register_shash(&ghash_alg); 585 if (err) 586 goto err_aead; 587 588 return 0; 589 590 err_aead: 591 if (elf_hwcap2 & HWCAP2_PMULL) 592 crypto_unregister_aeads(gcm_aes_algs, 593 ARRAY_SIZE(gcm_aes_algs)); 594 return err; 595 } 596 597 static void __exit ghash_ce_mod_exit(void) 598 { 599 crypto_unregister_shash(&ghash_alg); 600 if (elf_hwcap2 & HWCAP2_PMULL) 601 crypto_unregister_aeads(gcm_aes_algs, 602 ARRAY_SIZE(gcm_aes_algs)); 603 } 604 605 module_init(ghash_ce_mod_init); 606 module_exit(ghash_ce_mod_exit); 607