13d176751SEric Biggers // SPDX-License-Identifier: GPL-2.0-or-later
23d176751SEric Biggers /*
361f66c52SEric Biggers * GF(2^128) polynomial hashing: GHASH and POLYVAL
43d176751SEric Biggers *
53d176751SEric Biggers * Copyright 2025 Google LLC
63d176751SEric Biggers */
73d176751SEric Biggers
861f66c52SEric Biggers #include <crypto/gf128hash.h>
93d176751SEric Biggers #include <linux/export.h>
103d176751SEric Biggers #include <linux/module.h>
113d176751SEric Biggers #include <linux/string.h>
123d176751SEric Biggers #include <linux/unaligned.h>
133d176751SEric Biggers
143d176751SEric Biggers /*
15*c417e704SEric Biggers * GHASH and POLYVAL are almost-XOR-universal hash functions. They interpret
16*c417e704SEric Biggers * the message as the coefficients of a polynomial in the finite field GF(2^128)
17*c417e704SEric Biggers * and evaluate that polynomial at a secret point.
183d176751SEric Biggers *
19*c417e704SEric Biggers * Neither GHASH nor POLYVAL is a cryptographic hash function. They should be
20*c417e704SEric Biggers * used only by algorithms that are specifically designed to use them.
213d176751SEric Biggers *
22*c417e704SEric Biggers * GHASH is the older variant, defined as part of GCM in NIST SP 800-38D
23*c417e704SEric Biggers * (https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf).
24*c417e704SEric Biggers * GHASH is hard to implement directly, due to its backwards mapping between
25*c417e704SEric Biggers * bits and polynomial coefficients. GHASH implementations typically pre and
26*c417e704SEric Biggers * post-process the inputs and outputs (mainly by byte-swapping) to convert the
27*c417e704SEric Biggers * GHASH computation into an equivalent computation over a different,
28*c417e704SEric Biggers * easier-to-use representation of GF(2^128).
293d176751SEric Biggers *
30*c417e704SEric Biggers * POLYVAL is a newer GF(2^128) polynomial hash, originally defined as part of
31*c417e704SEric Biggers * AES-GCM-SIV (https://datatracker.ietf.org/doc/html/rfc8452) and also used by
32*c417e704SEric Biggers * HCTR2 (https://eprint.iacr.org/2021/1441.pdf). It uses that easier-to-use
33*c417e704SEric Biggers * field representation directly, eliminating the data conversion steps.
343d176751SEric Biggers *
35*c417e704SEric Biggers * This file provides library APIs for GHASH and POLYVAL. These APIs can
36*c417e704SEric Biggers * delegate to either a generic implementation or an architecture-optimized
37*c417e704SEric Biggers * implementation. Due to the mathematical relationship between GHASH and
38*c417e704SEric Biggers * POLYVAL, in some cases code for one is reused with the other.
393d176751SEric Biggers *
403d176751SEric Biggers * For the generic implementation, we don't use the traditional table approach
413d176751SEric Biggers * to GF(2^128) multiplication. That approach is not constant-time and requires
423d176751SEric Biggers * a lot of memory. Instead, we use a different approach which emulates
433d176751SEric Biggers * carryless multiplication using standard multiplications by spreading the data
443d176751SEric Biggers * bits apart using "holes". This allows the carries to spill harmlessly. This
453d176751SEric Biggers * approach is borrowed from BoringSSL, which in turn credits BearSSL's
463d176751SEric Biggers * documentation (https://bearssl.org/constanttime.html#ghash-for-gcm) for the
473d176751SEric Biggers * "holes" trick and a presentation by Shay Gueron
483d176751SEric Biggers * (https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf) for the
493d176751SEric Biggers * 256-bit => 128-bit reduction algorithm.
503d176751SEric Biggers */
513d176751SEric Biggers
523d176751SEric Biggers #ifdef CONFIG_ARCH_SUPPORTS_INT128
533d176751SEric Biggers
543d176751SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
clmul64(u64 a,u64 b,u64 * out_lo,u64 * out_hi)553d176751SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
563d176751SEric Biggers {
573d176751SEric Biggers /*
583d176751SEric Biggers * With 64-bit multiplicands and one term every 4 bits, there would be
593d176751SEric Biggers * up to 64 / 4 = 16 one bits per column when each multiplication is
603d176751SEric Biggers * written out as a series of additions in the schoolbook manner.
613d176751SEric Biggers * Unfortunately, that doesn't work since the value 16 is 1 too large to
623d176751SEric Biggers * fit in 4 bits. Carries would sometimes overflow into the next term.
633d176751SEric Biggers *
643d176751SEric Biggers * Using one term every 5 bits would work. However, that would cost
653d176751SEric Biggers * 5 x 5 = 25 multiplications instead of 4 x 4 = 16.
663d176751SEric Biggers *
673d176751SEric Biggers * Instead, mask off 4 bits from one multiplicand, giving a max of 15
683d176751SEric Biggers * one bits per column. Then handle those 4 bits separately.
693d176751SEric Biggers */
703d176751SEric Biggers u64 a0 = a & 0x1111111111111110;
713d176751SEric Biggers u64 a1 = a & 0x2222222222222220;
723d176751SEric Biggers u64 a2 = a & 0x4444444444444440;
733d176751SEric Biggers u64 a3 = a & 0x8888888888888880;
743d176751SEric Biggers
753d176751SEric Biggers u64 b0 = b & 0x1111111111111111;
763d176751SEric Biggers u64 b1 = b & 0x2222222222222222;
773d176751SEric Biggers u64 b2 = b & 0x4444444444444444;
783d176751SEric Biggers u64 b3 = b & 0x8888888888888888;
793d176751SEric Biggers
803d176751SEric Biggers /* Multiply the high 60 bits of @a by @b. */
813d176751SEric Biggers u128 c0 = (a0 * (u128)b0) ^ (a1 * (u128)b3) ^
823d176751SEric Biggers (a2 * (u128)b2) ^ (a3 * (u128)b1);
833d176751SEric Biggers u128 c1 = (a0 * (u128)b1) ^ (a1 * (u128)b0) ^
843d176751SEric Biggers (a2 * (u128)b3) ^ (a3 * (u128)b2);
853d176751SEric Biggers u128 c2 = (a0 * (u128)b2) ^ (a1 * (u128)b1) ^
863d176751SEric Biggers (a2 * (u128)b0) ^ (a3 * (u128)b3);
873d176751SEric Biggers u128 c3 = (a0 * (u128)b3) ^ (a1 * (u128)b2) ^
883d176751SEric Biggers (a2 * (u128)b1) ^ (a3 * (u128)b0);
893d176751SEric Biggers
903d176751SEric Biggers /* Multiply the low 4 bits of @a by @b. */
913d176751SEric Biggers u64 e0 = -(a & 1) & b;
923d176751SEric Biggers u64 e1 = -((a >> 1) & 1) & b;
933d176751SEric Biggers u64 e2 = -((a >> 2) & 1) & b;
943d176751SEric Biggers u64 e3 = -((a >> 3) & 1) & b;
953d176751SEric Biggers u64 extra_lo = e0 ^ (e1 << 1) ^ (e2 << 2) ^ (e3 << 3);
963d176751SEric Biggers u64 extra_hi = (e1 >> 63) ^ (e2 >> 62) ^ (e3 >> 61);
973d176751SEric Biggers
983d176751SEric Biggers /* Add all the intermediate products together. */
993d176751SEric Biggers *out_lo = (((u64)c0) & 0x1111111111111111) ^
1003d176751SEric Biggers (((u64)c1) & 0x2222222222222222) ^
1013d176751SEric Biggers (((u64)c2) & 0x4444444444444444) ^
1023d176751SEric Biggers (((u64)c3) & 0x8888888888888888) ^ extra_lo;
1033d176751SEric Biggers *out_hi = (((u64)(c0 >> 64)) & 0x1111111111111111) ^
1043d176751SEric Biggers (((u64)(c1 >> 64)) & 0x2222222222222222) ^
1053d176751SEric Biggers (((u64)(c2 >> 64)) & 0x4444444444444444) ^
1063d176751SEric Biggers (((u64)(c3 >> 64)) & 0x8888888888888888) ^ extra_hi;
1073d176751SEric Biggers }
1083d176751SEric Biggers
1093d176751SEric Biggers #else /* CONFIG_ARCH_SUPPORTS_INT128 */
1103d176751SEric Biggers
1113d176751SEric Biggers /* Do a 32 x 32 => 64 bit carryless multiplication. */
clmul32(u32 a,u32 b)1123d176751SEric Biggers static u64 clmul32(u32 a, u32 b)
1133d176751SEric Biggers {
1143d176751SEric Biggers /*
1153d176751SEric Biggers * With 32-bit multiplicands and one term every 4 bits, there are up to
1163d176751SEric Biggers * 32 / 4 = 8 one bits per column when each multiplication is written
1173d176751SEric Biggers * out as a series of additions in the schoolbook manner. The value 8
1183d176751SEric Biggers * fits in 4 bits, so the carries don't overflow into the next term.
1193d176751SEric Biggers */
1203d176751SEric Biggers u32 a0 = a & 0x11111111;
1213d176751SEric Biggers u32 a1 = a & 0x22222222;
1223d176751SEric Biggers u32 a2 = a & 0x44444444;
1233d176751SEric Biggers u32 a3 = a & 0x88888888;
1243d176751SEric Biggers
1253d176751SEric Biggers u32 b0 = b & 0x11111111;
1263d176751SEric Biggers u32 b1 = b & 0x22222222;
1273d176751SEric Biggers u32 b2 = b & 0x44444444;
1283d176751SEric Biggers u32 b3 = b & 0x88888888;
1293d176751SEric Biggers
1303d176751SEric Biggers u64 c0 = (a0 * (u64)b0) ^ (a1 * (u64)b3) ^
1313d176751SEric Biggers (a2 * (u64)b2) ^ (a3 * (u64)b1);
1323d176751SEric Biggers u64 c1 = (a0 * (u64)b1) ^ (a1 * (u64)b0) ^
1333d176751SEric Biggers (a2 * (u64)b3) ^ (a3 * (u64)b2);
1343d176751SEric Biggers u64 c2 = (a0 * (u64)b2) ^ (a1 * (u64)b1) ^
1353d176751SEric Biggers (a2 * (u64)b0) ^ (a3 * (u64)b3);
1363d176751SEric Biggers u64 c3 = (a0 * (u64)b3) ^ (a1 * (u64)b2) ^
1373d176751SEric Biggers (a2 * (u64)b1) ^ (a3 * (u64)b0);
1383d176751SEric Biggers
1393d176751SEric Biggers /* Add all the intermediate products together. */
1403d176751SEric Biggers return (c0 & 0x1111111111111111) ^
1413d176751SEric Biggers (c1 & 0x2222222222222222) ^
1423d176751SEric Biggers (c2 & 0x4444444444444444) ^
1433d176751SEric Biggers (c3 & 0x8888888888888888);
1443d176751SEric Biggers }
1453d176751SEric Biggers
1463d176751SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
clmul64(u64 a,u64 b,u64 * out_lo,u64 * out_hi)1473d176751SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
1483d176751SEric Biggers {
1493d176751SEric Biggers u32 a_lo = (u32)a;
1503d176751SEric Biggers u32 a_hi = a >> 32;
1513d176751SEric Biggers u32 b_lo = (u32)b;
1523d176751SEric Biggers u32 b_hi = b >> 32;
1533d176751SEric Biggers
1543d176751SEric Biggers /* Karatsuba multiplication */
1553d176751SEric Biggers u64 lo = clmul32(a_lo, b_lo);
1563d176751SEric Biggers u64 hi = clmul32(a_hi, b_hi);
1573d176751SEric Biggers u64 mi = clmul32(a_lo ^ a_hi, b_lo ^ b_hi) ^ lo ^ hi;
1583d176751SEric Biggers
1593d176751SEric Biggers *out_lo = lo ^ (mi << 32);
1603d176751SEric Biggers *out_hi = hi ^ (mi >> 32);
1613d176751SEric Biggers }
1623d176751SEric Biggers #endif /* !CONFIG_ARCH_SUPPORTS_INT128 */
1633d176751SEric Biggers
1643d176751SEric Biggers /* Compute @a = @a * @b * x^-128 in the POLYVAL field. */
1653d176751SEric Biggers static void __maybe_unused
polyval_mul_generic(struct polyval_elem * a,const struct polyval_elem * b)1663d176751SEric Biggers polyval_mul_generic(struct polyval_elem *a, const struct polyval_elem *b)
1673d176751SEric Biggers {
1683d176751SEric Biggers u64 c0, c1, c2, c3, mi0, mi1;
1693d176751SEric Biggers
1703d176751SEric Biggers /*
1713d176751SEric Biggers * Carryless-multiply @a by @b using Karatsuba multiplication. Store
1723d176751SEric Biggers * the 256-bit product in @c0 (low) through @c3 (high).
1733d176751SEric Biggers */
1743d176751SEric Biggers clmul64(le64_to_cpu(a->lo), le64_to_cpu(b->lo), &c0, &c1);
1753d176751SEric Biggers clmul64(le64_to_cpu(a->hi), le64_to_cpu(b->hi), &c2, &c3);
1763d176751SEric Biggers clmul64(le64_to_cpu(a->lo ^ a->hi), le64_to_cpu(b->lo ^ b->hi),
1773d176751SEric Biggers &mi0, &mi1);
1783d176751SEric Biggers mi0 ^= c0 ^ c2;
1793d176751SEric Biggers mi1 ^= c1 ^ c3;
1803d176751SEric Biggers c1 ^= mi0;
1813d176751SEric Biggers c2 ^= mi1;
1823d176751SEric Biggers
1833d176751SEric Biggers /*
1843d176751SEric Biggers * Cancel out the low 128 bits of the product by adding multiples of
1853d176751SEric Biggers * G(x) = x^128 + x^127 + x^126 + x^121 + 1. Do this in two steps, each
1863d176751SEric Biggers * of which cancels out 64 bits. Note that we break G(x) into three
1873d176751SEric Biggers * parts: 1, x^64 * (x^63 + x^62 + x^57), and x^128 * 1.
1883d176751SEric Biggers */
1893d176751SEric Biggers
1903d176751SEric Biggers /*
1913d176751SEric Biggers * First, add G(x) times c0 as follows:
1923d176751SEric Biggers *
1933d176751SEric Biggers * (c0, c1, c2) = (0,
1943d176751SEric Biggers * c1 + (c0 * (x^63 + x^62 + x^57) mod x^64),
1953d176751SEric Biggers * c2 + c0 + floor((c0 * (x^63 + x^62 + x^57)) / x^64))
1963d176751SEric Biggers */
1973d176751SEric Biggers c1 ^= (c0 << 63) ^ (c0 << 62) ^ (c0 << 57);
1983d176751SEric Biggers c2 ^= c0 ^ (c0 >> 1) ^ (c0 >> 2) ^ (c0 >> 7);
1993d176751SEric Biggers
2003d176751SEric Biggers /*
2013d176751SEric Biggers * Second, add G(x) times the new c1:
2023d176751SEric Biggers *
2033d176751SEric Biggers * (c1, c2, c3) = (0,
2043d176751SEric Biggers * c2 + (c1 * (x^63 + x^62 + x^57) mod x^64),
2053d176751SEric Biggers * c3 + c1 + floor((c1 * (x^63 + x^62 + x^57)) / x^64))
2063d176751SEric Biggers */
2073d176751SEric Biggers c2 ^= (c1 << 63) ^ (c1 << 62) ^ (c1 << 57);
2083d176751SEric Biggers c3 ^= c1 ^ (c1 >> 1) ^ (c1 >> 2) ^ (c1 >> 7);
2093d176751SEric Biggers
2103d176751SEric Biggers /* Return (c2, c3). This implicitly multiplies by x^-128. */
2113d176751SEric Biggers a->lo = cpu_to_le64(c2);
2123d176751SEric Biggers a->hi = cpu_to_le64(c3);
2133d176751SEric Biggers }
2143d176751SEric Biggers
ghash_blocks_generic(struct polyval_elem * acc,const struct polyval_elem * key,const u8 * data,size_t nblocks)215*c417e704SEric Biggers static void __maybe_unused ghash_blocks_generic(struct polyval_elem *acc,
216*c417e704SEric Biggers const struct polyval_elem *key,
217*c417e704SEric Biggers const u8 *data, size_t nblocks)
218*c417e704SEric Biggers {
219*c417e704SEric Biggers do {
220*c417e704SEric Biggers acc->lo ^=
221*c417e704SEric Biggers cpu_to_le64(get_unaligned_be64((__be64 *)(data + 8)));
222*c417e704SEric Biggers acc->hi ^= cpu_to_le64(get_unaligned_be64((__be64 *)data));
223*c417e704SEric Biggers polyval_mul_generic(acc, key);
224*c417e704SEric Biggers data += GHASH_BLOCK_SIZE;
225*c417e704SEric Biggers } while (--nblocks);
226*c417e704SEric Biggers }
227*c417e704SEric Biggers
2283d176751SEric Biggers static void __maybe_unused
polyval_blocks_generic(struct polyval_elem * acc,const struct polyval_elem * key,const u8 * data,size_t nblocks)2293d176751SEric Biggers polyval_blocks_generic(struct polyval_elem *acc, const struct polyval_elem *key,
2303d176751SEric Biggers const u8 *data, size_t nblocks)
2313d176751SEric Biggers {
2323d176751SEric Biggers do {
2333d176751SEric Biggers acc->lo ^= get_unaligned((__le64 *)data);
2343d176751SEric Biggers acc->hi ^= get_unaligned((__le64 *)(data + 8));
2353d176751SEric Biggers polyval_mul_generic(acc, key);
2363d176751SEric Biggers data += POLYVAL_BLOCK_SIZE;
2373d176751SEric Biggers } while (--nblocks);
2383d176751SEric Biggers }
2393d176751SEric Biggers
240*c417e704SEric Biggers /* Convert the key from GHASH format to POLYVAL format. */
ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE],struct polyval_elem * out)241*c417e704SEric Biggers static void __maybe_unused ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
242*c417e704SEric Biggers struct polyval_elem *out)
243*c417e704SEric Biggers {
244*c417e704SEric Biggers u64 hi = get_unaligned_be64(&in[0]);
245*c417e704SEric Biggers u64 lo = get_unaligned_be64(&in[8]);
246*c417e704SEric Biggers u64 mask = (s64)hi >> 63;
247*c417e704SEric Biggers
248*c417e704SEric Biggers hi = (hi << 1) ^ (lo >> 63) ^ (mask & ((u64)0xc2 << 56));
249*c417e704SEric Biggers lo = (lo << 1) ^ (mask & 1);
250*c417e704SEric Biggers out->lo = cpu_to_le64(lo);
251*c417e704SEric Biggers out->hi = cpu_to_le64(hi);
252*c417e704SEric Biggers }
253*c417e704SEric Biggers
254*c417e704SEric Biggers /* Convert the accumulator from POLYVAL format to GHASH format. */
polyval_acc_to_ghash(const struct polyval_elem * in,u8 out[GHASH_BLOCK_SIZE])255*c417e704SEric Biggers static void polyval_acc_to_ghash(const struct polyval_elem *in,
256*c417e704SEric Biggers u8 out[GHASH_BLOCK_SIZE])
257*c417e704SEric Biggers {
258*c417e704SEric Biggers put_unaligned_be64(le64_to_cpu(in->hi), &out[0]);
259*c417e704SEric Biggers put_unaligned_be64(le64_to_cpu(in->lo), &out[8]);
260*c417e704SEric Biggers }
261*c417e704SEric Biggers
262*c417e704SEric Biggers /* Convert the accumulator from GHASH format to POLYVAL format. */
ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE],struct polyval_elem * out)263*c417e704SEric Biggers static void __maybe_unused ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
264*c417e704SEric Biggers struct polyval_elem *out)
265*c417e704SEric Biggers {
266*c417e704SEric Biggers out->lo = cpu_to_le64(get_unaligned_be64(&in[8]));
267*c417e704SEric Biggers out->hi = cpu_to_le64(get_unaligned_be64(&in[0]));
268*c417e704SEric Biggers }
269*c417e704SEric Biggers
27061f66c52SEric Biggers #ifdef CONFIG_CRYPTO_LIB_GF128HASH_ARCH
27161f66c52SEric Biggers #include "gf128hash.h" /* $(SRCARCH)/gf128hash.h */
272b3b6e8f9SEric Biggers #endif
273b3b6e8f9SEric Biggers
ghash_preparekey(struct ghash_key * key,const u8 raw_key[GHASH_BLOCK_SIZE])274*c417e704SEric Biggers void ghash_preparekey(struct ghash_key *key, const u8 raw_key[GHASH_BLOCK_SIZE])
275*c417e704SEric Biggers {
276*c417e704SEric Biggers #ifdef ghash_preparekey_arch
277*c417e704SEric Biggers ghash_preparekey_arch(key, raw_key);
278*c417e704SEric Biggers #else
279*c417e704SEric Biggers ghash_key_to_polyval(raw_key, &key->h);
280*c417e704SEric Biggers #endif
281*c417e704SEric Biggers }
282*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_preparekey);
283*c417e704SEric Biggers
ghash_mul(struct ghash_ctx * ctx)284*c417e704SEric Biggers static void ghash_mul(struct ghash_ctx *ctx)
285*c417e704SEric Biggers {
286*c417e704SEric Biggers #ifdef ghash_mul_arch
287*c417e704SEric Biggers ghash_mul_arch(&ctx->acc, ctx->key);
288*c417e704SEric Biggers #elif defined(ghash_blocks_arch)
289*c417e704SEric Biggers static const u8 zeroes[GHASH_BLOCK_SIZE];
290*c417e704SEric Biggers
291*c417e704SEric Biggers ghash_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
292*c417e704SEric Biggers #else
293*c417e704SEric Biggers polyval_mul_generic(&ctx->acc, &ctx->key->h);
294*c417e704SEric Biggers #endif
295*c417e704SEric Biggers }
296*c417e704SEric Biggers
297*c417e704SEric Biggers /* nblocks is always >= 1. */
ghash_blocks(struct ghash_ctx * ctx,const u8 * data,size_t nblocks)298*c417e704SEric Biggers static void ghash_blocks(struct ghash_ctx *ctx, const u8 *data, size_t nblocks)
299*c417e704SEric Biggers {
300*c417e704SEric Biggers #ifdef ghash_blocks_arch
301*c417e704SEric Biggers ghash_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
302*c417e704SEric Biggers #else
303*c417e704SEric Biggers ghash_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
304*c417e704SEric Biggers #endif
305*c417e704SEric Biggers }
306*c417e704SEric Biggers
ghash_update(struct ghash_ctx * ctx,const u8 * data,size_t len)307*c417e704SEric Biggers void ghash_update(struct ghash_ctx *ctx, const u8 *data, size_t len)
308*c417e704SEric Biggers {
309*c417e704SEric Biggers if (unlikely(ctx->partial)) {
310*c417e704SEric Biggers size_t n = min(len, GHASH_BLOCK_SIZE - ctx->partial);
311*c417e704SEric Biggers
312*c417e704SEric Biggers len -= n;
313*c417e704SEric Biggers while (n--)
314*c417e704SEric Biggers ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - ctx->partial++] ^=
315*c417e704SEric Biggers *data++;
316*c417e704SEric Biggers if (ctx->partial < GHASH_BLOCK_SIZE)
317*c417e704SEric Biggers return;
318*c417e704SEric Biggers ghash_mul(ctx);
319*c417e704SEric Biggers }
320*c417e704SEric Biggers if (len >= GHASH_BLOCK_SIZE) {
321*c417e704SEric Biggers size_t nblocks = len / GHASH_BLOCK_SIZE;
322*c417e704SEric Biggers
323*c417e704SEric Biggers ghash_blocks(ctx, data, nblocks);
324*c417e704SEric Biggers data += len & ~(GHASH_BLOCK_SIZE - 1);
325*c417e704SEric Biggers len &= GHASH_BLOCK_SIZE - 1;
326*c417e704SEric Biggers }
327*c417e704SEric Biggers for (size_t i = 0; i < len; i++)
328*c417e704SEric Biggers ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - i] ^= data[i];
329*c417e704SEric Biggers ctx->partial = len;
330*c417e704SEric Biggers }
331*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_update);
332*c417e704SEric Biggers
ghash_final(struct ghash_ctx * ctx,u8 out[GHASH_BLOCK_SIZE])333*c417e704SEric Biggers void ghash_final(struct ghash_ctx *ctx, u8 out[GHASH_BLOCK_SIZE])
334*c417e704SEric Biggers {
335*c417e704SEric Biggers if (unlikely(ctx->partial))
336*c417e704SEric Biggers ghash_mul(ctx);
337*c417e704SEric Biggers polyval_acc_to_ghash(&ctx->acc, out);
338*c417e704SEric Biggers memzero_explicit(ctx, sizeof(*ctx));
339*c417e704SEric Biggers }
340*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_final);
341*c417e704SEric Biggers
polyval_preparekey(struct polyval_key * key,const u8 raw_key[POLYVAL_BLOCK_SIZE])3423d176751SEric Biggers void polyval_preparekey(struct polyval_key *key,
3433d176751SEric Biggers const u8 raw_key[POLYVAL_BLOCK_SIZE])
3443d176751SEric Biggers {
345b3b6e8f9SEric Biggers #ifdef polyval_preparekey_arch
3463d176751SEric Biggers polyval_preparekey_arch(key, raw_key);
347b3b6e8f9SEric Biggers #else
348b3b6e8f9SEric Biggers memcpy(key->h.bytes, raw_key, POLYVAL_BLOCK_SIZE);
349b3b6e8f9SEric Biggers #endif
3503d176751SEric Biggers }
3513d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_preparekey);
3523d176751SEric Biggers
3533d176751SEric Biggers /*
3543d176751SEric Biggers * polyval_mul_generic() and polyval_blocks_generic() take the key as a
3553d176751SEric Biggers * polyval_elem rather than a polyval_key, so that arch-optimized
3563d176751SEric Biggers * implementations with a different key format can use it as a fallback (if they
3573d176751SEric Biggers * have H^1 stored somewhere in their struct). Thus, the following dispatch
3583d176751SEric Biggers * code is needed to pass the appropriate key argument.
3593d176751SEric Biggers */
3603d176751SEric Biggers
polyval_mul(struct polyval_ctx * ctx)3613d176751SEric Biggers static void polyval_mul(struct polyval_ctx *ctx)
3623d176751SEric Biggers {
363b3b6e8f9SEric Biggers #ifdef polyval_mul_arch
3643d176751SEric Biggers polyval_mul_arch(&ctx->acc, ctx->key);
365b3b6e8f9SEric Biggers #elif defined(polyval_blocks_arch)
366b3b6e8f9SEric Biggers static const u8 zeroes[POLYVAL_BLOCK_SIZE];
367b3b6e8f9SEric Biggers
368b3b6e8f9SEric Biggers polyval_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
3693d176751SEric Biggers #else
3703d176751SEric Biggers polyval_mul_generic(&ctx->acc, &ctx->key->h);
3713d176751SEric Biggers #endif
3723d176751SEric Biggers }
3733d176751SEric Biggers
374*c417e704SEric Biggers /* nblocks is always >= 1. */
polyval_blocks(struct polyval_ctx * ctx,const u8 * data,size_t nblocks)3753d176751SEric Biggers static void polyval_blocks(struct polyval_ctx *ctx,
3763d176751SEric Biggers const u8 *data, size_t nblocks)
3773d176751SEric Biggers {
378b3b6e8f9SEric Biggers #ifdef polyval_blocks_arch
3793d176751SEric Biggers polyval_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
3803d176751SEric Biggers #else
3813d176751SEric Biggers polyval_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
3823d176751SEric Biggers #endif
3833d176751SEric Biggers }
3843d176751SEric Biggers
polyval_update(struct polyval_ctx * ctx,const u8 * data,size_t len)3853d176751SEric Biggers void polyval_update(struct polyval_ctx *ctx, const u8 *data, size_t len)
3863d176751SEric Biggers {
3873d176751SEric Biggers if (unlikely(ctx->partial)) {
3883d176751SEric Biggers size_t n = min(len, POLYVAL_BLOCK_SIZE - ctx->partial);
3893d176751SEric Biggers
3903d176751SEric Biggers len -= n;
3913d176751SEric Biggers while (n--)
3923d176751SEric Biggers ctx->acc.bytes[ctx->partial++] ^= *data++;
3933d176751SEric Biggers if (ctx->partial < POLYVAL_BLOCK_SIZE)
3943d176751SEric Biggers return;
3953d176751SEric Biggers polyval_mul(ctx);
3963d176751SEric Biggers }
3973d176751SEric Biggers if (len >= POLYVAL_BLOCK_SIZE) {
3983d176751SEric Biggers size_t nblocks = len / POLYVAL_BLOCK_SIZE;
3993d176751SEric Biggers
4003d176751SEric Biggers polyval_blocks(ctx, data, nblocks);
4013d176751SEric Biggers data += len & ~(POLYVAL_BLOCK_SIZE - 1);
4023d176751SEric Biggers len &= POLYVAL_BLOCK_SIZE - 1;
4033d176751SEric Biggers }
4043d176751SEric Biggers for (size_t i = 0; i < len; i++)
4053d176751SEric Biggers ctx->acc.bytes[i] ^= data[i];
4063d176751SEric Biggers ctx->partial = len;
4073d176751SEric Biggers }
4083d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_update);
4093d176751SEric Biggers
polyval_final(struct polyval_ctx * ctx,u8 out[POLYVAL_BLOCK_SIZE])4103d176751SEric Biggers void polyval_final(struct polyval_ctx *ctx, u8 out[POLYVAL_BLOCK_SIZE])
4113d176751SEric Biggers {
4123d176751SEric Biggers if (unlikely(ctx->partial))
4133d176751SEric Biggers polyval_mul(ctx);
4143d176751SEric Biggers memcpy(out, &ctx->acc, POLYVAL_BLOCK_SIZE);
4153d176751SEric Biggers memzero_explicit(ctx, sizeof(*ctx));
4163d176751SEric Biggers }
4173d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_final);
4183d176751SEric Biggers
41961f66c52SEric Biggers #ifdef gf128hash_mod_init_arch
gf128hash_mod_init(void)42061f66c52SEric Biggers static int __init gf128hash_mod_init(void)
4213d176751SEric Biggers {
42261f66c52SEric Biggers gf128hash_mod_init_arch();
4233d176751SEric Biggers return 0;
4243d176751SEric Biggers }
42561f66c52SEric Biggers subsys_initcall(gf128hash_mod_init);
4263d176751SEric Biggers
gf128hash_mod_exit(void)42761f66c52SEric Biggers static void __exit gf128hash_mod_exit(void)
4283d176751SEric Biggers {
4293d176751SEric Biggers }
43061f66c52SEric Biggers module_exit(gf128hash_mod_exit);
4313d176751SEric Biggers #endif
4323d176751SEric Biggers
43361f66c52SEric Biggers MODULE_DESCRIPTION("GF(2^128) polynomial hashing: GHASH and POLYVAL");
4343d176751SEric Biggers MODULE_LICENSE("GPL");
435