1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. 4 * 5 * Copyright (C) 2015 - 2018 Linaro Ltd. 6 * Copyright (C) 2023 Google LLC. 7 */ 8 9 #include <asm/hwcap.h> 10 #include <asm/neon.h> 11 #include <crypto/aes.h> 12 #include <crypto/b128ops.h> 13 #include <crypto/gcm.h> 14 #include <crypto/gf128mul.h> 15 #include <crypto/ghash.h> 16 #include <crypto/internal/aead.h> 17 #include <crypto/internal/hash.h> 18 #include <crypto/internal/skcipher.h> 19 #include <crypto/scatterwalk.h> 20 #include <linux/cpufeature.h> 21 #include <linux/errno.h> 22 #include <linux/jump_label.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/string.h> 26 #include <linux/unaligned.h> 27 28 MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions"); 29 MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>"); 30 MODULE_LICENSE("GPL"); 31 MODULE_ALIAS_CRYPTO("ghash"); 32 MODULE_ALIAS_CRYPTO("gcm(aes)"); 33 MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))"); 34 35 #define RFC4106_NONCE_SIZE 4 36 37 struct ghash_key { 38 be128 k; 39 u64 h[][2]; 40 }; 41 42 struct gcm_key { 43 u64 h[4][2]; 44 u32 rk[AES_MAX_KEYLENGTH_U32]; 45 int rounds; 46 u8 nonce[]; // for RFC4106 nonce 47 }; 48 49 struct arm_ghash_desc_ctx { 50 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 51 }; 52 53 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 54 u64 const h[][2], const char *head); 55 56 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 57 u64 const h[][2], const char *head); 58 59 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64); 60 61 static int ghash_init(struct shash_desc *desc) 62 { 63 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 64 65 *ctx = (struct arm_ghash_desc_ctx){}; 66 return 0; 67 } 68 69 static void ghash_do_update(int blocks, u64 dg[], const char *src, 70 struct ghash_key *key, const char *head) 71 { 72 kernel_neon_begin(); 73 if (static_branch_likely(&use_p64)) 74 pmull_ghash_update_p64(blocks, dg, src, key->h, head); 75 else 76 pmull_ghash_update_p8(blocks, dg, src, key->h, head); 77 kernel_neon_end(); 78 } 79 80 static int ghash_update(struct shash_desc *desc, const u8 *src, 81 unsigned int len) 82 { 83 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 84 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 85 int blocks; 86 87 blocks = len / GHASH_BLOCK_SIZE; 88 ghash_do_update(blocks, ctx->digest, src, key, NULL); 89 return len - blocks * GHASH_BLOCK_SIZE; 90 } 91 92 static int ghash_export(struct shash_desc *desc, void *out) 93 { 94 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 95 u8 *dst = out; 96 97 put_unaligned_be64(ctx->digest[1], dst); 98 put_unaligned_be64(ctx->digest[0], dst + 8); 99 return 0; 100 } 101 102 static int ghash_import(struct shash_desc *desc, const void *in) 103 { 104 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 105 const u8 *src = in; 106 107 ctx->digest[1] = get_unaligned_be64(src); 108 ctx->digest[0] = get_unaligned_be64(src + 8); 109 return 0; 110 } 111 112 static int ghash_finup(struct shash_desc *desc, const u8 *src, 113 unsigned int len, u8 *dst) 114 { 115 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 116 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 117 118 if (len) { 119 u8 buf[GHASH_BLOCK_SIZE] = {}; 120 121 memcpy(buf, src, len); 122 ghash_do_update(1, ctx->digest, buf, key, NULL); 123 memzero_explicit(buf, sizeof(buf)); 124 } 125 return ghash_export(desc, dst); 126 } 127 128 static void ghash_reflect(u64 h[], const be128 *k) 129 { 130 u64 carry = be64_to_cpu(k->a) >> 63; 131 132 h[0] = (be64_to_cpu(k->b) << 1) | carry; 133 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); 134 135 if (carry) 136 h[1] ^= 0xc200000000000000UL; 137 } 138 139 static int ghash_setkey(struct crypto_shash *tfm, 140 const u8 *inkey, unsigned int keylen) 141 { 142 struct ghash_key *key = crypto_shash_ctx(tfm); 143 144 if (keylen != GHASH_BLOCK_SIZE) 145 return -EINVAL; 146 147 /* needed for the fallback */ 148 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 149 ghash_reflect(key->h[0], &key->k); 150 151 if (static_branch_likely(&use_p64)) { 152 be128 h = key->k; 153 154 gf128mul_lle(&h, &key->k); 155 ghash_reflect(key->h[1], &h); 156 157 gf128mul_lle(&h, &key->k); 158 ghash_reflect(key->h[2], &h); 159 160 gf128mul_lle(&h, &key->k); 161 ghash_reflect(key->h[3], &h); 162 } 163 return 0; 164 } 165 166 static struct shash_alg ghash_alg = { 167 .digestsize = GHASH_DIGEST_SIZE, 168 .init = ghash_init, 169 .update = ghash_update, 170 .finup = ghash_finup, 171 .setkey = ghash_setkey, 172 .export = ghash_export, 173 .import = ghash_import, 174 .descsize = sizeof(struct arm_ghash_desc_ctx), 175 .statesize = sizeof(struct ghash_desc_ctx), 176 177 .base.cra_name = "ghash", 178 .base.cra_driver_name = "ghash-ce", 179 .base.cra_priority = 300, 180 .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, 181 .base.cra_blocksize = GHASH_BLOCK_SIZE, 182 .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 183 .base.cra_module = THIS_MODULE, 184 }; 185 186 void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src, 187 struct gcm_key const *k, char *dst, 188 const char *iv, int rounds, u32 counter); 189 190 void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag, 191 struct gcm_key const *k, char *head, 192 const char *iv, int rounds, u32 counter); 193 194 void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src, 195 struct gcm_key const *k, char *dst, 196 const char *iv, int rounds, u32 counter); 197 198 int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag, 199 struct gcm_key const *k, char *head, 200 const char *iv, int rounds, u32 counter, 201 const char *otag, int authsize); 202 203 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 204 unsigned int keylen) 205 { 206 struct gcm_key *ctx = crypto_aead_ctx(tfm); 207 struct crypto_aes_ctx aes_ctx; 208 be128 h, k; 209 int ret; 210 211 ret = aes_expandkey(&aes_ctx, inkey, keylen); 212 if (ret) 213 return -EINVAL; 214 215 aes_encrypt(&aes_ctx, (u8 *)&k, (u8[AES_BLOCK_SIZE]){}); 216 217 memcpy(ctx->rk, aes_ctx.key_enc, sizeof(ctx->rk)); 218 ctx->rounds = 6 + keylen / 4; 219 220 memzero_explicit(&aes_ctx, sizeof(aes_ctx)); 221 222 ghash_reflect(ctx->h[0], &k); 223 224 h = k; 225 gf128mul_lle(&h, &k); 226 ghash_reflect(ctx->h[1], &h); 227 228 gf128mul_lle(&h, &k); 229 ghash_reflect(ctx->h[2], &h); 230 231 gf128mul_lle(&h, &k); 232 ghash_reflect(ctx->h[3], &h); 233 234 return 0; 235 } 236 237 static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 238 { 239 return crypto_gcm_check_authsize(authsize); 240 } 241 242 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], 243 int *buf_count, struct gcm_key *ctx) 244 { 245 if (*buf_count > 0) { 246 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); 247 248 memcpy(&buf[*buf_count], src, buf_added); 249 250 *buf_count += buf_added; 251 src += buf_added; 252 count -= buf_added; 253 } 254 255 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { 256 int blocks = count / GHASH_BLOCK_SIZE; 257 258 pmull_ghash_update_p64(blocks, dg, src, ctx->h, 259 *buf_count ? buf : NULL); 260 261 src += blocks * GHASH_BLOCK_SIZE; 262 count %= GHASH_BLOCK_SIZE; 263 *buf_count = 0; 264 } 265 266 if (count > 0) { 267 memcpy(buf, src, count); 268 *buf_count = count; 269 } 270 } 271 272 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 273 { 274 struct crypto_aead *aead = crypto_aead_reqtfm(req); 275 struct gcm_key *ctx = crypto_aead_ctx(aead); 276 u8 buf[GHASH_BLOCK_SIZE]; 277 struct scatter_walk walk; 278 int buf_count = 0; 279 280 scatterwalk_start(&walk, req->src); 281 282 do { 283 unsigned int n; 284 285 n = scatterwalk_next(&walk, len); 286 gcm_update_mac(dg, walk.addr, n, buf, &buf_count, ctx); 287 scatterwalk_done_src(&walk, n); 288 289 if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) { 290 kernel_neon_end(); 291 kernel_neon_begin(); 292 } 293 294 len -= n; 295 } while (len); 296 297 if (buf_count) { 298 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 299 pmull_ghash_update_p64(1, dg, buf, ctx->h, NULL); 300 } 301 } 302 303 static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 304 { 305 struct crypto_aead *aead = crypto_aead_reqtfm(req); 306 struct gcm_key *ctx = crypto_aead_ctx(aead); 307 struct skcipher_walk walk; 308 u8 buf[AES_BLOCK_SIZE]; 309 u32 counter = 2; 310 u64 dg[2] = {}; 311 be128 lengths; 312 const u8 *src; 313 u8 *tag, *dst; 314 int tail, err; 315 316 err = skcipher_walk_aead_encrypt(&walk, req, false); 317 318 kernel_neon_begin(); 319 320 if (assoclen) 321 gcm_calculate_auth_mac(req, dg, assoclen); 322 323 src = walk.src.virt.addr; 324 dst = walk.dst.virt.addr; 325 326 while (walk.nbytes >= AES_BLOCK_SIZE) { 327 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 328 329 pmull_gcm_encrypt(nblocks, dg, src, ctx, dst, iv, 330 ctx->rounds, counter); 331 counter += nblocks; 332 333 if (walk.nbytes == walk.total) { 334 src += nblocks * AES_BLOCK_SIZE; 335 dst += nblocks * AES_BLOCK_SIZE; 336 break; 337 } 338 339 kernel_neon_end(); 340 341 err = skcipher_walk_done(&walk, 342 walk.nbytes % AES_BLOCK_SIZE); 343 if (err) 344 return err; 345 346 src = walk.src.virt.addr; 347 dst = walk.dst.virt.addr; 348 349 kernel_neon_begin(); 350 } 351 352 353 lengths.a = cpu_to_be64(assoclen * 8); 354 lengths.b = cpu_to_be64(req->cryptlen * 8); 355 356 tag = (u8 *)&lengths; 357 tail = walk.nbytes % AES_BLOCK_SIZE; 358 359 /* 360 * Bounce via a buffer unless we are encrypting in place and src/dst 361 * are not pointing to the start of the walk buffer. In that case, we 362 * can do a NEON load/xor/store sequence in place as long as we move 363 * the plain/ciphertext and keystream to the start of the register. If 364 * not, do a memcpy() to the end of the buffer so we can reuse the same 365 * logic. 366 */ 367 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 368 src = memcpy(buf + sizeof(buf) - tail, src, tail); 369 370 pmull_gcm_enc_final(tail, dg, tag, ctx, (u8 *)src, iv, 371 ctx->rounds, counter); 372 kernel_neon_end(); 373 374 if (unlikely(tail && src != dst)) 375 memcpy(dst, src, tail); 376 377 if (walk.nbytes) { 378 err = skcipher_walk_done(&walk, 0); 379 if (err) 380 return err; 381 } 382 383 /* copy authtag to end of dst */ 384 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, 385 crypto_aead_authsize(aead), 1); 386 387 return 0; 388 } 389 390 static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 391 { 392 struct crypto_aead *aead = crypto_aead_reqtfm(req); 393 struct gcm_key *ctx = crypto_aead_ctx(aead); 394 int authsize = crypto_aead_authsize(aead); 395 struct skcipher_walk walk; 396 u8 otag[AES_BLOCK_SIZE]; 397 u8 buf[AES_BLOCK_SIZE]; 398 u32 counter = 2; 399 u64 dg[2] = {}; 400 be128 lengths; 401 const u8 *src; 402 u8 *tag, *dst; 403 int tail, err, ret; 404 405 scatterwalk_map_and_copy(otag, req->src, 406 req->assoclen + req->cryptlen - authsize, 407 authsize, 0); 408 409 err = skcipher_walk_aead_decrypt(&walk, req, false); 410 411 kernel_neon_begin(); 412 413 if (assoclen) 414 gcm_calculate_auth_mac(req, dg, assoclen); 415 416 src = walk.src.virt.addr; 417 dst = walk.dst.virt.addr; 418 419 while (walk.nbytes >= AES_BLOCK_SIZE) { 420 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 421 422 pmull_gcm_decrypt(nblocks, dg, src, ctx, dst, iv, 423 ctx->rounds, counter); 424 counter += nblocks; 425 426 if (walk.nbytes == walk.total) { 427 src += nblocks * AES_BLOCK_SIZE; 428 dst += nblocks * AES_BLOCK_SIZE; 429 break; 430 } 431 432 kernel_neon_end(); 433 434 err = skcipher_walk_done(&walk, 435 walk.nbytes % AES_BLOCK_SIZE); 436 if (err) 437 return err; 438 439 src = walk.src.virt.addr; 440 dst = walk.dst.virt.addr; 441 442 kernel_neon_begin(); 443 } 444 445 lengths.a = cpu_to_be64(assoclen * 8); 446 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 447 448 tag = (u8 *)&lengths; 449 tail = walk.nbytes % AES_BLOCK_SIZE; 450 451 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 452 src = memcpy(buf + sizeof(buf) - tail, src, tail); 453 454 ret = pmull_gcm_dec_final(tail, dg, tag, ctx, (u8 *)src, iv, 455 ctx->rounds, counter, otag, authsize); 456 kernel_neon_end(); 457 458 if (unlikely(tail && src != dst)) 459 memcpy(dst, src, tail); 460 461 if (walk.nbytes) { 462 err = skcipher_walk_done(&walk, 0); 463 if (err) 464 return err; 465 } 466 467 return ret ? -EBADMSG : 0; 468 } 469 470 static int gcm_aes_encrypt(struct aead_request *req) 471 { 472 return gcm_encrypt(req, req->iv, req->assoclen); 473 } 474 475 static int gcm_aes_decrypt(struct aead_request *req) 476 { 477 return gcm_decrypt(req, req->iv, req->assoclen); 478 } 479 480 static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 481 unsigned int keylen) 482 { 483 struct gcm_key *ctx = crypto_aead_ctx(tfm); 484 int err; 485 486 keylen -= RFC4106_NONCE_SIZE; 487 err = gcm_aes_setkey(tfm, inkey, keylen); 488 if (err) 489 return err; 490 491 memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 492 return 0; 493 } 494 495 static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 496 { 497 return crypto_rfc4106_check_authsize(authsize); 498 } 499 500 static int rfc4106_encrypt(struct aead_request *req) 501 { 502 struct crypto_aead *aead = crypto_aead_reqtfm(req); 503 struct gcm_key *ctx = crypto_aead_ctx(aead); 504 u8 iv[GCM_AES_IV_SIZE]; 505 506 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 507 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 508 509 return crypto_ipsec_check_assoclen(req->assoclen) ?: 510 gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 511 } 512 513 static int rfc4106_decrypt(struct aead_request *req) 514 { 515 struct crypto_aead *aead = crypto_aead_reqtfm(req); 516 struct gcm_key *ctx = crypto_aead_ctx(aead); 517 u8 iv[GCM_AES_IV_SIZE]; 518 519 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 520 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 521 522 return crypto_ipsec_check_assoclen(req->assoclen) ?: 523 gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 524 } 525 526 static struct aead_alg gcm_aes_algs[] = {{ 527 .ivsize = GCM_AES_IV_SIZE, 528 .chunksize = AES_BLOCK_SIZE, 529 .maxauthsize = AES_BLOCK_SIZE, 530 .setkey = gcm_aes_setkey, 531 .setauthsize = gcm_aes_setauthsize, 532 .encrypt = gcm_aes_encrypt, 533 .decrypt = gcm_aes_decrypt, 534 535 .base.cra_name = "gcm(aes)", 536 .base.cra_driver_name = "gcm-aes-ce", 537 .base.cra_priority = 400, 538 .base.cra_blocksize = 1, 539 .base.cra_ctxsize = sizeof(struct gcm_key), 540 .base.cra_module = THIS_MODULE, 541 }, { 542 .ivsize = GCM_RFC4106_IV_SIZE, 543 .chunksize = AES_BLOCK_SIZE, 544 .maxauthsize = AES_BLOCK_SIZE, 545 .setkey = rfc4106_setkey, 546 .setauthsize = rfc4106_setauthsize, 547 .encrypt = rfc4106_encrypt, 548 .decrypt = rfc4106_decrypt, 549 550 .base.cra_name = "rfc4106(gcm(aes))", 551 .base.cra_driver_name = "rfc4106-gcm-aes-ce", 552 .base.cra_priority = 400, 553 .base.cra_blocksize = 1, 554 .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE, 555 .base.cra_module = THIS_MODULE, 556 }}; 557 558 static int __init ghash_ce_mod_init(void) 559 { 560 int err; 561 562 if (!(elf_hwcap & HWCAP_NEON)) 563 return -ENODEV; 564 565 if (elf_hwcap2 & HWCAP2_PMULL) { 566 err = crypto_register_aeads(gcm_aes_algs, 567 ARRAY_SIZE(gcm_aes_algs)); 568 if (err) 569 return err; 570 ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]); 571 static_branch_enable(&use_p64); 572 } 573 574 err = crypto_register_shash(&ghash_alg); 575 if (err) 576 goto err_aead; 577 578 return 0; 579 580 err_aead: 581 if (elf_hwcap2 & HWCAP2_PMULL) 582 crypto_unregister_aeads(gcm_aes_algs, 583 ARRAY_SIZE(gcm_aes_algs)); 584 return err; 585 } 586 587 static void __exit ghash_ce_mod_exit(void) 588 { 589 crypto_unregister_shash(&ghash_alg); 590 if (elf_hwcap2 & HWCAP2_PMULL) 591 crypto_unregister_aeads(gcm_aes_algs, 592 ARRAY_SIZE(gcm_aes_algs)); 593 } 594 595 module_init(ghash_ce_mod_init); 596 module_exit(ghash_ce_mod_exit); 597