xref: /linux/lib/crypto/gf128hash.c (revision c417e7045b70345f59643fb2db67b0e7fbd7fbd0)
13d176751SEric Biggers // SPDX-License-Identifier: GPL-2.0-or-later
23d176751SEric Biggers /*
361f66c52SEric Biggers  * GF(2^128) polynomial hashing: GHASH and POLYVAL
43d176751SEric Biggers  *
53d176751SEric Biggers  * Copyright 2025 Google LLC
63d176751SEric Biggers  */
73d176751SEric Biggers 
861f66c52SEric Biggers #include <crypto/gf128hash.h>
93d176751SEric Biggers #include <linux/export.h>
103d176751SEric Biggers #include <linux/module.h>
113d176751SEric Biggers #include <linux/string.h>
123d176751SEric Biggers #include <linux/unaligned.h>
133d176751SEric Biggers 
143d176751SEric Biggers /*
15*c417e704SEric Biggers  * GHASH and POLYVAL are almost-XOR-universal hash functions.  They interpret
16*c417e704SEric Biggers  * the message as the coefficients of a polynomial in the finite field GF(2^128)
17*c417e704SEric Biggers  * and evaluate that polynomial at a secret point.
183d176751SEric Biggers  *
19*c417e704SEric Biggers  * Neither GHASH nor POLYVAL is a cryptographic hash function.  They should be
20*c417e704SEric Biggers  * used only by algorithms that are specifically designed to use them.
213d176751SEric Biggers  *
22*c417e704SEric Biggers  * GHASH is the older variant, defined as part of GCM in NIST SP 800-38D
23*c417e704SEric Biggers  * (https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf).
24*c417e704SEric Biggers  * GHASH is hard to implement directly, due to its backwards mapping between
25*c417e704SEric Biggers  * bits and polynomial coefficients.  GHASH implementations typically pre and
26*c417e704SEric Biggers  * post-process the inputs and outputs (mainly by byte-swapping) to convert the
27*c417e704SEric Biggers  * GHASH computation into an equivalent computation over a different,
28*c417e704SEric Biggers  * easier-to-use representation of GF(2^128).
293d176751SEric Biggers  *
30*c417e704SEric Biggers  * POLYVAL is a newer GF(2^128) polynomial hash, originally defined as part of
31*c417e704SEric Biggers  * AES-GCM-SIV (https://datatracker.ietf.org/doc/html/rfc8452) and also used by
32*c417e704SEric Biggers  * HCTR2 (https://eprint.iacr.org/2021/1441.pdf).  It uses that easier-to-use
33*c417e704SEric Biggers  * field representation directly, eliminating the data conversion steps.
343d176751SEric Biggers  *
35*c417e704SEric Biggers  * This file provides library APIs for GHASH and POLYVAL.  These APIs can
36*c417e704SEric Biggers  * delegate to either a generic implementation or an architecture-optimized
37*c417e704SEric Biggers  * implementation.  Due to the mathematical relationship between GHASH and
38*c417e704SEric Biggers  * POLYVAL, in some cases code for one is reused with the other.
393d176751SEric Biggers  *
403d176751SEric Biggers  * For the generic implementation, we don't use the traditional table approach
413d176751SEric Biggers  * to GF(2^128) multiplication.  That approach is not constant-time and requires
423d176751SEric Biggers  * a lot of memory.  Instead, we use a different approach which emulates
433d176751SEric Biggers  * carryless multiplication using standard multiplications by spreading the data
443d176751SEric Biggers  * bits apart using "holes".  This allows the carries to spill harmlessly.  This
453d176751SEric Biggers  * approach is borrowed from BoringSSL, which in turn credits BearSSL's
463d176751SEric Biggers  * documentation (https://bearssl.org/constanttime.html#ghash-for-gcm) for the
473d176751SEric Biggers  * "holes" trick and a presentation by Shay Gueron
483d176751SEric Biggers  * (https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf) for the
493d176751SEric Biggers  * 256-bit => 128-bit reduction algorithm.
503d176751SEric Biggers  */
513d176751SEric Biggers 
523d176751SEric Biggers #ifdef CONFIG_ARCH_SUPPORTS_INT128
533d176751SEric Biggers 
543d176751SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
clmul64(u64 a,u64 b,u64 * out_lo,u64 * out_hi)553d176751SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
563d176751SEric Biggers {
573d176751SEric Biggers 	/*
583d176751SEric Biggers 	 * With 64-bit multiplicands and one term every 4 bits, there would be
593d176751SEric Biggers 	 * up to 64 / 4 = 16 one bits per column when each multiplication is
603d176751SEric Biggers 	 * written out as a series of additions in the schoolbook manner.
613d176751SEric Biggers 	 * Unfortunately, that doesn't work since the value 16 is 1 too large to
623d176751SEric Biggers 	 * fit in 4 bits.  Carries would sometimes overflow into the next term.
633d176751SEric Biggers 	 *
643d176751SEric Biggers 	 * Using one term every 5 bits would work.  However, that would cost
653d176751SEric Biggers 	 * 5 x 5 = 25 multiplications instead of 4 x 4 = 16.
663d176751SEric Biggers 	 *
673d176751SEric Biggers 	 * Instead, mask off 4 bits from one multiplicand, giving a max of 15
683d176751SEric Biggers 	 * one bits per column.  Then handle those 4 bits separately.
693d176751SEric Biggers 	 */
703d176751SEric Biggers 	u64 a0 = a & 0x1111111111111110;
713d176751SEric Biggers 	u64 a1 = a & 0x2222222222222220;
723d176751SEric Biggers 	u64 a2 = a & 0x4444444444444440;
733d176751SEric Biggers 	u64 a3 = a & 0x8888888888888880;
743d176751SEric Biggers 
753d176751SEric Biggers 	u64 b0 = b & 0x1111111111111111;
763d176751SEric Biggers 	u64 b1 = b & 0x2222222222222222;
773d176751SEric Biggers 	u64 b2 = b & 0x4444444444444444;
783d176751SEric Biggers 	u64 b3 = b & 0x8888888888888888;
793d176751SEric Biggers 
803d176751SEric Biggers 	/* Multiply the high 60 bits of @a by @b. */
813d176751SEric Biggers 	u128 c0 = (a0 * (u128)b0) ^ (a1 * (u128)b3) ^
823d176751SEric Biggers 		  (a2 * (u128)b2) ^ (a3 * (u128)b1);
833d176751SEric Biggers 	u128 c1 = (a0 * (u128)b1) ^ (a1 * (u128)b0) ^
843d176751SEric Biggers 		  (a2 * (u128)b3) ^ (a3 * (u128)b2);
853d176751SEric Biggers 	u128 c2 = (a0 * (u128)b2) ^ (a1 * (u128)b1) ^
863d176751SEric Biggers 		  (a2 * (u128)b0) ^ (a3 * (u128)b3);
873d176751SEric Biggers 	u128 c3 = (a0 * (u128)b3) ^ (a1 * (u128)b2) ^
883d176751SEric Biggers 		  (a2 * (u128)b1) ^ (a3 * (u128)b0);
893d176751SEric Biggers 
903d176751SEric Biggers 	/* Multiply the low 4 bits of @a by @b. */
913d176751SEric Biggers 	u64 e0 = -(a & 1) & b;
923d176751SEric Biggers 	u64 e1 = -((a >> 1) & 1) & b;
933d176751SEric Biggers 	u64 e2 = -((a >> 2) & 1) & b;
943d176751SEric Biggers 	u64 e3 = -((a >> 3) & 1) & b;
953d176751SEric Biggers 	u64 extra_lo = e0 ^ (e1 << 1) ^ (e2 << 2) ^ (e3 << 3);
963d176751SEric Biggers 	u64 extra_hi = (e1 >> 63) ^ (e2 >> 62) ^ (e3 >> 61);
973d176751SEric Biggers 
983d176751SEric Biggers 	/* Add all the intermediate products together. */
993d176751SEric Biggers 	*out_lo = (((u64)c0) & 0x1111111111111111) ^
1003d176751SEric Biggers 		  (((u64)c1) & 0x2222222222222222) ^
1013d176751SEric Biggers 		  (((u64)c2) & 0x4444444444444444) ^
1023d176751SEric Biggers 		  (((u64)c3) & 0x8888888888888888) ^ extra_lo;
1033d176751SEric Biggers 	*out_hi = (((u64)(c0 >> 64)) & 0x1111111111111111) ^
1043d176751SEric Biggers 		  (((u64)(c1 >> 64)) & 0x2222222222222222) ^
1053d176751SEric Biggers 		  (((u64)(c2 >> 64)) & 0x4444444444444444) ^
1063d176751SEric Biggers 		  (((u64)(c3 >> 64)) & 0x8888888888888888) ^ extra_hi;
1073d176751SEric Biggers }
1083d176751SEric Biggers 
1093d176751SEric Biggers #else /* CONFIG_ARCH_SUPPORTS_INT128 */
1103d176751SEric Biggers 
1113d176751SEric Biggers /* Do a 32 x 32 => 64 bit carryless multiplication. */
clmul32(u32 a,u32 b)1123d176751SEric Biggers static u64 clmul32(u32 a, u32 b)
1133d176751SEric Biggers {
1143d176751SEric Biggers 	/*
1153d176751SEric Biggers 	 * With 32-bit multiplicands and one term every 4 bits, there are up to
1163d176751SEric Biggers 	 * 32 / 4 = 8 one bits per column when each multiplication is written
1173d176751SEric Biggers 	 * out as a series of additions in the schoolbook manner.  The value 8
1183d176751SEric Biggers 	 * fits in 4 bits, so the carries don't overflow into the next term.
1193d176751SEric Biggers 	 */
1203d176751SEric Biggers 	u32 a0 = a & 0x11111111;
1213d176751SEric Biggers 	u32 a1 = a & 0x22222222;
1223d176751SEric Biggers 	u32 a2 = a & 0x44444444;
1233d176751SEric Biggers 	u32 a3 = a & 0x88888888;
1243d176751SEric Biggers 
1253d176751SEric Biggers 	u32 b0 = b & 0x11111111;
1263d176751SEric Biggers 	u32 b1 = b & 0x22222222;
1273d176751SEric Biggers 	u32 b2 = b & 0x44444444;
1283d176751SEric Biggers 	u32 b3 = b & 0x88888888;
1293d176751SEric Biggers 
1303d176751SEric Biggers 	u64 c0 = (a0 * (u64)b0) ^ (a1 * (u64)b3) ^
1313d176751SEric Biggers 		 (a2 * (u64)b2) ^ (a3 * (u64)b1);
1323d176751SEric Biggers 	u64 c1 = (a0 * (u64)b1) ^ (a1 * (u64)b0) ^
1333d176751SEric Biggers 		 (a2 * (u64)b3) ^ (a3 * (u64)b2);
1343d176751SEric Biggers 	u64 c2 = (a0 * (u64)b2) ^ (a1 * (u64)b1) ^
1353d176751SEric Biggers 		 (a2 * (u64)b0) ^ (a3 * (u64)b3);
1363d176751SEric Biggers 	u64 c3 = (a0 * (u64)b3) ^ (a1 * (u64)b2) ^
1373d176751SEric Biggers 		 (a2 * (u64)b1) ^ (a3 * (u64)b0);
1383d176751SEric Biggers 
1393d176751SEric Biggers 	/* Add all the intermediate products together. */
1403d176751SEric Biggers 	return (c0 & 0x1111111111111111) ^
1413d176751SEric Biggers 	       (c1 & 0x2222222222222222) ^
1423d176751SEric Biggers 	       (c2 & 0x4444444444444444) ^
1433d176751SEric Biggers 	       (c3 & 0x8888888888888888);
1443d176751SEric Biggers }
1453d176751SEric Biggers 
1463d176751SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
clmul64(u64 a,u64 b,u64 * out_lo,u64 * out_hi)1473d176751SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
1483d176751SEric Biggers {
1493d176751SEric Biggers 	u32 a_lo = (u32)a;
1503d176751SEric Biggers 	u32 a_hi = a >> 32;
1513d176751SEric Biggers 	u32 b_lo = (u32)b;
1523d176751SEric Biggers 	u32 b_hi = b >> 32;
1533d176751SEric Biggers 
1543d176751SEric Biggers 	/* Karatsuba multiplication */
1553d176751SEric Biggers 	u64 lo = clmul32(a_lo, b_lo);
1563d176751SEric Biggers 	u64 hi = clmul32(a_hi, b_hi);
1573d176751SEric Biggers 	u64 mi = clmul32(a_lo ^ a_hi, b_lo ^ b_hi) ^ lo ^ hi;
1583d176751SEric Biggers 
1593d176751SEric Biggers 	*out_lo = lo ^ (mi << 32);
1603d176751SEric Biggers 	*out_hi = hi ^ (mi >> 32);
1613d176751SEric Biggers }
1623d176751SEric Biggers #endif /* !CONFIG_ARCH_SUPPORTS_INT128 */
1633d176751SEric Biggers 
1643d176751SEric Biggers /* Compute @a = @a * @b * x^-128 in the POLYVAL field. */
1653d176751SEric Biggers static void __maybe_unused
polyval_mul_generic(struct polyval_elem * a,const struct polyval_elem * b)1663d176751SEric Biggers polyval_mul_generic(struct polyval_elem *a, const struct polyval_elem *b)
1673d176751SEric Biggers {
1683d176751SEric Biggers 	u64 c0, c1, c2, c3, mi0, mi1;
1693d176751SEric Biggers 
1703d176751SEric Biggers 	/*
1713d176751SEric Biggers 	 * Carryless-multiply @a by @b using Karatsuba multiplication.  Store
1723d176751SEric Biggers 	 * the 256-bit product in @c0 (low) through @c3 (high).
1733d176751SEric Biggers 	 */
1743d176751SEric Biggers 	clmul64(le64_to_cpu(a->lo), le64_to_cpu(b->lo), &c0, &c1);
1753d176751SEric Biggers 	clmul64(le64_to_cpu(a->hi), le64_to_cpu(b->hi), &c2, &c3);
1763d176751SEric Biggers 	clmul64(le64_to_cpu(a->lo ^ a->hi), le64_to_cpu(b->lo ^ b->hi),
1773d176751SEric Biggers 		&mi0, &mi1);
1783d176751SEric Biggers 	mi0 ^= c0 ^ c2;
1793d176751SEric Biggers 	mi1 ^= c1 ^ c3;
1803d176751SEric Biggers 	c1 ^= mi0;
1813d176751SEric Biggers 	c2 ^= mi1;
1823d176751SEric Biggers 
1833d176751SEric Biggers 	/*
1843d176751SEric Biggers 	 * Cancel out the low 128 bits of the product by adding multiples of
1853d176751SEric Biggers 	 * G(x) = x^128 + x^127 + x^126 + x^121 + 1.  Do this in two steps, each
1863d176751SEric Biggers 	 * of which cancels out 64 bits.  Note that we break G(x) into three
1873d176751SEric Biggers 	 * parts: 1, x^64 * (x^63 + x^62 + x^57), and x^128 * 1.
1883d176751SEric Biggers 	 */
1893d176751SEric Biggers 
1903d176751SEric Biggers 	/*
1913d176751SEric Biggers 	 * First, add G(x) times c0 as follows:
1923d176751SEric Biggers 	 *
1933d176751SEric Biggers 	 * (c0, c1, c2) = (0,
1943d176751SEric Biggers 	 *                 c1 + (c0 * (x^63 + x^62 + x^57) mod x^64),
1953d176751SEric Biggers 	 *		   c2 + c0 + floor((c0 * (x^63 + x^62 + x^57)) / x^64))
1963d176751SEric Biggers 	 */
1973d176751SEric Biggers 	c1 ^= (c0 << 63) ^ (c0 << 62) ^ (c0 << 57);
1983d176751SEric Biggers 	c2 ^= c0 ^ (c0 >> 1) ^ (c0 >> 2) ^ (c0 >> 7);
1993d176751SEric Biggers 
2003d176751SEric Biggers 	/*
2013d176751SEric Biggers 	 * Second, add G(x) times the new c1:
2023d176751SEric Biggers 	 *
2033d176751SEric Biggers 	 * (c1, c2, c3) = (0,
2043d176751SEric Biggers 	 *                 c2 + (c1 * (x^63 + x^62 + x^57) mod x^64),
2053d176751SEric Biggers 	 *		   c3 + c1 + floor((c1 * (x^63 + x^62 + x^57)) / x^64))
2063d176751SEric Biggers 	 */
2073d176751SEric Biggers 	c2 ^= (c1 << 63) ^ (c1 << 62) ^ (c1 << 57);
2083d176751SEric Biggers 	c3 ^= c1 ^ (c1 >> 1) ^ (c1 >> 2) ^ (c1 >> 7);
2093d176751SEric Biggers 
2103d176751SEric Biggers 	/* Return (c2, c3).  This implicitly multiplies by x^-128. */
2113d176751SEric Biggers 	a->lo = cpu_to_le64(c2);
2123d176751SEric Biggers 	a->hi = cpu_to_le64(c3);
2133d176751SEric Biggers }
2143d176751SEric Biggers 
ghash_blocks_generic(struct polyval_elem * acc,const struct polyval_elem * key,const u8 * data,size_t nblocks)215*c417e704SEric Biggers static void __maybe_unused ghash_blocks_generic(struct polyval_elem *acc,
216*c417e704SEric Biggers 						const struct polyval_elem *key,
217*c417e704SEric Biggers 						const u8 *data, size_t nblocks)
218*c417e704SEric Biggers {
219*c417e704SEric Biggers 	do {
220*c417e704SEric Biggers 		acc->lo ^=
221*c417e704SEric Biggers 			cpu_to_le64(get_unaligned_be64((__be64 *)(data + 8)));
222*c417e704SEric Biggers 		acc->hi ^= cpu_to_le64(get_unaligned_be64((__be64 *)data));
223*c417e704SEric Biggers 		polyval_mul_generic(acc, key);
224*c417e704SEric Biggers 		data += GHASH_BLOCK_SIZE;
225*c417e704SEric Biggers 	} while (--nblocks);
226*c417e704SEric Biggers }
227*c417e704SEric Biggers 
2283d176751SEric Biggers static void __maybe_unused
polyval_blocks_generic(struct polyval_elem * acc,const struct polyval_elem * key,const u8 * data,size_t nblocks)2293d176751SEric Biggers polyval_blocks_generic(struct polyval_elem *acc, const struct polyval_elem *key,
2303d176751SEric Biggers 		       const u8 *data, size_t nblocks)
2313d176751SEric Biggers {
2323d176751SEric Biggers 	do {
2333d176751SEric Biggers 		acc->lo ^= get_unaligned((__le64 *)data);
2343d176751SEric Biggers 		acc->hi ^= get_unaligned((__le64 *)(data + 8));
2353d176751SEric Biggers 		polyval_mul_generic(acc, key);
2363d176751SEric Biggers 		data += POLYVAL_BLOCK_SIZE;
2373d176751SEric Biggers 	} while (--nblocks);
2383d176751SEric Biggers }
2393d176751SEric Biggers 
240*c417e704SEric Biggers /* Convert the key from GHASH format to POLYVAL format. */
ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE],struct polyval_elem * out)241*c417e704SEric Biggers static void __maybe_unused ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
242*c417e704SEric Biggers 						struct polyval_elem *out)
243*c417e704SEric Biggers {
244*c417e704SEric Biggers 	u64 hi = get_unaligned_be64(&in[0]);
245*c417e704SEric Biggers 	u64 lo = get_unaligned_be64(&in[8]);
246*c417e704SEric Biggers 	u64 mask = (s64)hi >> 63;
247*c417e704SEric Biggers 
248*c417e704SEric Biggers 	hi = (hi << 1) ^ (lo >> 63) ^ (mask & ((u64)0xc2 << 56));
249*c417e704SEric Biggers 	lo = (lo << 1) ^ (mask & 1);
250*c417e704SEric Biggers 	out->lo = cpu_to_le64(lo);
251*c417e704SEric Biggers 	out->hi = cpu_to_le64(hi);
252*c417e704SEric Biggers }
253*c417e704SEric Biggers 
254*c417e704SEric Biggers /* Convert the accumulator from POLYVAL format to GHASH format. */
polyval_acc_to_ghash(const struct polyval_elem * in,u8 out[GHASH_BLOCK_SIZE])255*c417e704SEric Biggers static void polyval_acc_to_ghash(const struct polyval_elem *in,
256*c417e704SEric Biggers 				 u8 out[GHASH_BLOCK_SIZE])
257*c417e704SEric Biggers {
258*c417e704SEric Biggers 	put_unaligned_be64(le64_to_cpu(in->hi), &out[0]);
259*c417e704SEric Biggers 	put_unaligned_be64(le64_to_cpu(in->lo), &out[8]);
260*c417e704SEric Biggers }
261*c417e704SEric Biggers 
262*c417e704SEric Biggers /* Convert the accumulator from GHASH format to POLYVAL format. */
ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE],struct polyval_elem * out)263*c417e704SEric Biggers static void __maybe_unused ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
264*c417e704SEric Biggers 						struct polyval_elem *out)
265*c417e704SEric Biggers {
266*c417e704SEric Biggers 	out->lo = cpu_to_le64(get_unaligned_be64(&in[8]));
267*c417e704SEric Biggers 	out->hi = cpu_to_le64(get_unaligned_be64(&in[0]));
268*c417e704SEric Biggers }
269*c417e704SEric Biggers 
27061f66c52SEric Biggers #ifdef CONFIG_CRYPTO_LIB_GF128HASH_ARCH
27161f66c52SEric Biggers #include "gf128hash.h" /* $(SRCARCH)/gf128hash.h */
272b3b6e8f9SEric Biggers #endif
273b3b6e8f9SEric Biggers 
ghash_preparekey(struct ghash_key * key,const u8 raw_key[GHASH_BLOCK_SIZE])274*c417e704SEric Biggers void ghash_preparekey(struct ghash_key *key, const u8 raw_key[GHASH_BLOCK_SIZE])
275*c417e704SEric Biggers {
276*c417e704SEric Biggers #ifdef ghash_preparekey_arch
277*c417e704SEric Biggers 	ghash_preparekey_arch(key, raw_key);
278*c417e704SEric Biggers #else
279*c417e704SEric Biggers 	ghash_key_to_polyval(raw_key, &key->h);
280*c417e704SEric Biggers #endif
281*c417e704SEric Biggers }
282*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_preparekey);
283*c417e704SEric Biggers 
ghash_mul(struct ghash_ctx * ctx)284*c417e704SEric Biggers static void ghash_mul(struct ghash_ctx *ctx)
285*c417e704SEric Biggers {
286*c417e704SEric Biggers #ifdef ghash_mul_arch
287*c417e704SEric Biggers 	ghash_mul_arch(&ctx->acc, ctx->key);
288*c417e704SEric Biggers #elif defined(ghash_blocks_arch)
289*c417e704SEric Biggers 	static const u8 zeroes[GHASH_BLOCK_SIZE];
290*c417e704SEric Biggers 
291*c417e704SEric Biggers 	ghash_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
292*c417e704SEric Biggers #else
293*c417e704SEric Biggers 	polyval_mul_generic(&ctx->acc, &ctx->key->h);
294*c417e704SEric Biggers #endif
295*c417e704SEric Biggers }
296*c417e704SEric Biggers 
297*c417e704SEric Biggers /* nblocks is always >= 1. */
ghash_blocks(struct ghash_ctx * ctx,const u8 * data,size_t nblocks)298*c417e704SEric Biggers static void ghash_blocks(struct ghash_ctx *ctx, const u8 *data, size_t nblocks)
299*c417e704SEric Biggers {
300*c417e704SEric Biggers #ifdef ghash_blocks_arch
301*c417e704SEric Biggers 	ghash_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
302*c417e704SEric Biggers #else
303*c417e704SEric Biggers 	ghash_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
304*c417e704SEric Biggers #endif
305*c417e704SEric Biggers }
306*c417e704SEric Biggers 
ghash_update(struct ghash_ctx * ctx,const u8 * data,size_t len)307*c417e704SEric Biggers void ghash_update(struct ghash_ctx *ctx, const u8 *data, size_t len)
308*c417e704SEric Biggers {
309*c417e704SEric Biggers 	if (unlikely(ctx->partial)) {
310*c417e704SEric Biggers 		size_t n = min(len, GHASH_BLOCK_SIZE - ctx->partial);
311*c417e704SEric Biggers 
312*c417e704SEric Biggers 		len -= n;
313*c417e704SEric Biggers 		while (n--)
314*c417e704SEric Biggers 			ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - ctx->partial++] ^=
315*c417e704SEric Biggers 				*data++;
316*c417e704SEric Biggers 		if (ctx->partial < GHASH_BLOCK_SIZE)
317*c417e704SEric Biggers 			return;
318*c417e704SEric Biggers 		ghash_mul(ctx);
319*c417e704SEric Biggers 	}
320*c417e704SEric Biggers 	if (len >= GHASH_BLOCK_SIZE) {
321*c417e704SEric Biggers 		size_t nblocks = len / GHASH_BLOCK_SIZE;
322*c417e704SEric Biggers 
323*c417e704SEric Biggers 		ghash_blocks(ctx, data, nblocks);
324*c417e704SEric Biggers 		data += len & ~(GHASH_BLOCK_SIZE - 1);
325*c417e704SEric Biggers 		len &= GHASH_BLOCK_SIZE - 1;
326*c417e704SEric Biggers 	}
327*c417e704SEric Biggers 	for (size_t i = 0; i < len; i++)
328*c417e704SEric Biggers 		ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - i] ^= data[i];
329*c417e704SEric Biggers 	ctx->partial = len;
330*c417e704SEric Biggers }
331*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_update);
332*c417e704SEric Biggers 
ghash_final(struct ghash_ctx * ctx,u8 out[GHASH_BLOCK_SIZE])333*c417e704SEric Biggers void ghash_final(struct ghash_ctx *ctx, u8 out[GHASH_BLOCK_SIZE])
334*c417e704SEric Biggers {
335*c417e704SEric Biggers 	if (unlikely(ctx->partial))
336*c417e704SEric Biggers 		ghash_mul(ctx);
337*c417e704SEric Biggers 	polyval_acc_to_ghash(&ctx->acc, out);
338*c417e704SEric Biggers 	memzero_explicit(ctx, sizeof(*ctx));
339*c417e704SEric Biggers }
340*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_final);
341*c417e704SEric Biggers 
polyval_preparekey(struct polyval_key * key,const u8 raw_key[POLYVAL_BLOCK_SIZE])3423d176751SEric Biggers void polyval_preparekey(struct polyval_key *key,
3433d176751SEric Biggers 			const u8 raw_key[POLYVAL_BLOCK_SIZE])
3443d176751SEric Biggers {
345b3b6e8f9SEric Biggers #ifdef polyval_preparekey_arch
3463d176751SEric Biggers 	polyval_preparekey_arch(key, raw_key);
347b3b6e8f9SEric Biggers #else
348b3b6e8f9SEric Biggers 	memcpy(key->h.bytes, raw_key, POLYVAL_BLOCK_SIZE);
349b3b6e8f9SEric Biggers #endif
3503d176751SEric Biggers }
3513d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_preparekey);
3523d176751SEric Biggers 
3533d176751SEric Biggers /*
3543d176751SEric Biggers  * polyval_mul_generic() and polyval_blocks_generic() take the key as a
3553d176751SEric Biggers  * polyval_elem rather than a polyval_key, so that arch-optimized
3563d176751SEric Biggers  * implementations with a different key format can use it as a fallback (if they
3573d176751SEric Biggers  * have H^1 stored somewhere in their struct).  Thus, the following dispatch
3583d176751SEric Biggers  * code is needed to pass the appropriate key argument.
3593d176751SEric Biggers  */
3603d176751SEric Biggers 
polyval_mul(struct polyval_ctx * ctx)3613d176751SEric Biggers static void polyval_mul(struct polyval_ctx *ctx)
3623d176751SEric Biggers {
363b3b6e8f9SEric Biggers #ifdef polyval_mul_arch
3643d176751SEric Biggers 	polyval_mul_arch(&ctx->acc, ctx->key);
365b3b6e8f9SEric Biggers #elif defined(polyval_blocks_arch)
366b3b6e8f9SEric Biggers 	static const u8 zeroes[POLYVAL_BLOCK_SIZE];
367b3b6e8f9SEric Biggers 
368b3b6e8f9SEric Biggers 	polyval_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
3693d176751SEric Biggers #else
3703d176751SEric Biggers 	polyval_mul_generic(&ctx->acc, &ctx->key->h);
3713d176751SEric Biggers #endif
3723d176751SEric Biggers }
3733d176751SEric Biggers 
374*c417e704SEric Biggers /* nblocks is always >= 1. */
polyval_blocks(struct polyval_ctx * ctx,const u8 * data,size_t nblocks)3753d176751SEric Biggers static void polyval_blocks(struct polyval_ctx *ctx,
3763d176751SEric Biggers 			   const u8 *data, size_t nblocks)
3773d176751SEric Biggers {
378b3b6e8f9SEric Biggers #ifdef polyval_blocks_arch
3793d176751SEric Biggers 	polyval_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
3803d176751SEric Biggers #else
3813d176751SEric Biggers 	polyval_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
3823d176751SEric Biggers #endif
3833d176751SEric Biggers }
3843d176751SEric Biggers 
polyval_update(struct polyval_ctx * ctx,const u8 * data,size_t len)3853d176751SEric Biggers void polyval_update(struct polyval_ctx *ctx, const u8 *data, size_t len)
3863d176751SEric Biggers {
3873d176751SEric Biggers 	if (unlikely(ctx->partial)) {
3883d176751SEric Biggers 		size_t n = min(len, POLYVAL_BLOCK_SIZE - ctx->partial);
3893d176751SEric Biggers 
3903d176751SEric Biggers 		len -= n;
3913d176751SEric Biggers 		while (n--)
3923d176751SEric Biggers 			ctx->acc.bytes[ctx->partial++] ^= *data++;
3933d176751SEric Biggers 		if (ctx->partial < POLYVAL_BLOCK_SIZE)
3943d176751SEric Biggers 			return;
3953d176751SEric Biggers 		polyval_mul(ctx);
3963d176751SEric Biggers 	}
3973d176751SEric Biggers 	if (len >= POLYVAL_BLOCK_SIZE) {
3983d176751SEric Biggers 		size_t nblocks = len / POLYVAL_BLOCK_SIZE;
3993d176751SEric Biggers 
4003d176751SEric Biggers 		polyval_blocks(ctx, data, nblocks);
4013d176751SEric Biggers 		data += len & ~(POLYVAL_BLOCK_SIZE - 1);
4023d176751SEric Biggers 		len &= POLYVAL_BLOCK_SIZE - 1;
4033d176751SEric Biggers 	}
4043d176751SEric Biggers 	for (size_t i = 0; i < len; i++)
4053d176751SEric Biggers 		ctx->acc.bytes[i] ^= data[i];
4063d176751SEric Biggers 	ctx->partial = len;
4073d176751SEric Biggers }
4083d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_update);
4093d176751SEric Biggers 
polyval_final(struct polyval_ctx * ctx,u8 out[POLYVAL_BLOCK_SIZE])4103d176751SEric Biggers void polyval_final(struct polyval_ctx *ctx, u8 out[POLYVAL_BLOCK_SIZE])
4113d176751SEric Biggers {
4123d176751SEric Biggers 	if (unlikely(ctx->partial))
4133d176751SEric Biggers 		polyval_mul(ctx);
4143d176751SEric Biggers 	memcpy(out, &ctx->acc, POLYVAL_BLOCK_SIZE);
4153d176751SEric Biggers 	memzero_explicit(ctx, sizeof(*ctx));
4163d176751SEric Biggers }
4173d176751SEric Biggers EXPORT_SYMBOL_GPL(polyval_final);
4183d176751SEric Biggers 
41961f66c52SEric Biggers #ifdef gf128hash_mod_init_arch
gf128hash_mod_init(void)42061f66c52SEric Biggers static int __init gf128hash_mod_init(void)
4213d176751SEric Biggers {
42261f66c52SEric Biggers 	gf128hash_mod_init_arch();
4233d176751SEric Biggers 	return 0;
4243d176751SEric Biggers }
42561f66c52SEric Biggers subsys_initcall(gf128hash_mod_init);
4263d176751SEric Biggers 
gf128hash_mod_exit(void)42761f66c52SEric Biggers static void __exit gf128hash_mod_exit(void)
4283d176751SEric Biggers {
4293d176751SEric Biggers }
43061f66c52SEric Biggers module_exit(gf128hash_mod_exit);
4313d176751SEric Biggers #endif
4323d176751SEric Biggers 
43361f66c52SEric Biggers MODULE_DESCRIPTION("GF(2^128) polynomial hashing: GHASH and POLYVAL");
4343d176751SEric Biggers MODULE_LICENSE("GPL");
435