1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
4 * instructions. This file contains glue code.
5 *
6 * Copyright (c) 2009 Intel Corp.
7 * Author: Huang Ying <ying.huang@intel.com>
8 */
9
10 #include <linux/err.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/kernel.h>
14 #include <linux/crypto.h>
15 #include <crypto/algapi.h>
16 #include <crypto/cryptd.h>
17 #include <crypto/gf128mul.h>
18 #include <crypto/internal/hash.h>
19 #include <crypto/internal/simd.h>
20 #include <asm/cpu_device_id.h>
21 #include <asm/simd.h>
22 #include <linux/unaligned.h>
23
24 #define GHASH_BLOCK_SIZE 16
25 #define GHASH_DIGEST_SIZE 16
26
27 void clmul_ghash_mul(char *dst, const le128 *shash);
28
29 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
30 const le128 *shash);
31
32 struct ghash_async_ctx {
33 struct cryptd_ahash *cryptd_tfm;
34 };
35
36 struct ghash_ctx {
37 le128 shash;
38 };
39
40 struct ghash_desc_ctx {
41 u8 buffer[GHASH_BLOCK_SIZE];
42 u32 bytes;
43 };
44
ghash_init(struct shash_desc * desc)45 static int ghash_init(struct shash_desc *desc)
46 {
47 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
48
49 memset(dctx, 0, sizeof(*dctx));
50
51 return 0;
52 }
53
ghash_setkey(struct crypto_shash * tfm,const u8 * key,unsigned int keylen)54 static int ghash_setkey(struct crypto_shash *tfm,
55 const u8 *key, unsigned int keylen)
56 {
57 struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
58 u64 a, b;
59
60 if (keylen != GHASH_BLOCK_SIZE)
61 return -EINVAL;
62
63 /*
64 * GHASH maps bits to polynomial coefficients backwards, which makes it
65 * hard to implement. But it can be shown that the GHASH multiplication
66 *
67 * D * K (mod x^128 + x^7 + x^2 + x + 1)
68 *
69 * (where D is a data block and K is the key) is equivalent to:
70 *
71 * bitreflect(D) * bitreflect(K) * x^(-127)
72 * (mod x^128 + x^127 + x^126 + x^121 + 1)
73 *
74 * So, the code below precomputes:
75 *
76 * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1)
77 *
78 * ... but in Montgomery form (so that Montgomery multiplication can be
79 * used), i.e. with an extra x^128 factor, which means actually:
80 *
81 * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1)
82 *
83 * The within-a-byte part of bitreflect() cancels out GHASH's built-in
84 * reflection, and thus bitreflect() is actually a byteswap.
85 */
86 a = get_unaligned_be64(key);
87 b = get_unaligned_be64(key + 8);
88 ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63));
89 ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63));
90 if (a >> 63)
91 ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56);
92 return 0;
93 }
94
ghash_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)95 static int ghash_update(struct shash_desc *desc,
96 const u8 *src, unsigned int srclen)
97 {
98 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
99 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
100 u8 *dst = dctx->buffer;
101
102 kernel_fpu_begin();
103 if (dctx->bytes) {
104 int n = min(srclen, dctx->bytes);
105 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
106
107 dctx->bytes -= n;
108 srclen -= n;
109
110 while (n--)
111 *pos++ ^= *src++;
112
113 if (!dctx->bytes)
114 clmul_ghash_mul(dst, &ctx->shash);
115 }
116
117 clmul_ghash_update(dst, src, srclen, &ctx->shash);
118 kernel_fpu_end();
119
120 if (srclen & 0xf) {
121 src += srclen - (srclen & 0xf);
122 srclen &= 0xf;
123 dctx->bytes = GHASH_BLOCK_SIZE - srclen;
124 while (srclen--)
125 *dst++ ^= *src++;
126 }
127
128 return 0;
129 }
130
ghash_flush(struct ghash_ctx * ctx,struct ghash_desc_ctx * dctx)131 static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
132 {
133 u8 *dst = dctx->buffer;
134
135 if (dctx->bytes) {
136 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
137
138 while (dctx->bytes--)
139 *tmp++ ^= 0;
140
141 kernel_fpu_begin();
142 clmul_ghash_mul(dst, &ctx->shash);
143 kernel_fpu_end();
144 }
145
146 dctx->bytes = 0;
147 }
148
ghash_final(struct shash_desc * desc,u8 * dst)149 static int ghash_final(struct shash_desc *desc, u8 *dst)
150 {
151 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
152 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
153 u8 *buf = dctx->buffer;
154
155 ghash_flush(ctx, dctx);
156 memcpy(dst, buf, GHASH_BLOCK_SIZE);
157
158 return 0;
159 }
160
161 static struct shash_alg ghash_alg = {
162 .digestsize = GHASH_DIGEST_SIZE,
163 .init = ghash_init,
164 .update = ghash_update,
165 .final = ghash_final,
166 .setkey = ghash_setkey,
167 .descsize = sizeof(struct ghash_desc_ctx),
168 .base = {
169 .cra_name = "__ghash",
170 .cra_driver_name = "__ghash-pclmulqdqni",
171 .cra_priority = 0,
172 .cra_flags = CRYPTO_ALG_INTERNAL,
173 .cra_blocksize = GHASH_BLOCK_SIZE,
174 .cra_ctxsize = sizeof(struct ghash_ctx),
175 .cra_module = THIS_MODULE,
176 },
177 };
178
ghash_async_init(struct ahash_request * req)179 static int ghash_async_init(struct ahash_request *req)
180 {
181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
183 struct ahash_request *cryptd_req = ahash_request_ctx(req);
184 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
186 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
187
188 desc->tfm = child;
189 return crypto_shash_init(desc);
190 }
191
ghash_init_cryptd_req(struct ahash_request * req)192 static void ghash_init_cryptd_req(struct ahash_request *req)
193 {
194 struct ahash_request *cryptd_req = ahash_request_ctx(req);
195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
200 ahash_request_set_callback(cryptd_req, req->base.flags,
201 req->base.complete, req->base.data);
202 ahash_request_set_crypt(cryptd_req, req->src, req->result,
203 req->nbytes);
204 }
205
ghash_async_update(struct ahash_request * req)206 static int ghash_async_update(struct ahash_request *req)
207 {
208 struct ahash_request *cryptd_req = ahash_request_ctx(req);
209 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
210 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
211 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
212
213 if (!crypto_simd_usable() ||
214 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
215 ghash_init_cryptd_req(req);
216 return crypto_ahash_update(cryptd_req);
217 } else {
218 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
219 return shash_ahash_update(req, desc);
220 }
221 }
222
ghash_async_final(struct ahash_request * req)223 static int ghash_async_final(struct ahash_request *req)
224 {
225 struct ahash_request *cryptd_req = ahash_request_ctx(req);
226 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
227 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
228 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
229
230 if (!crypto_simd_usable() ||
231 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
232 ghash_init_cryptd_req(req);
233 return crypto_ahash_final(cryptd_req);
234 } else {
235 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
236 return crypto_shash_final(desc, req->result);
237 }
238 }
239
ghash_async_import(struct ahash_request * req,const void * in)240 static int ghash_async_import(struct ahash_request *req, const void *in)
241 {
242 struct ahash_request *cryptd_req = ahash_request_ctx(req);
243 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
244 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
245
246 ghash_async_init(req);
247 memcpy(dctx, in, sizeof(*dctx));
248 return 0;
249
250 }
251
ghash_async_export(struct ahash_request * req,void * out)252 static int ghash_async_export(struct ahash_request *req, void *out)
253 {
254 struct ahash_request *cryptd_req = ahash_request_ctx(req);
255 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
256 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
257
258 memcpy(out, dctx, sizeof(*dctx));
259 return 0;
260
261 }
262
ghash_async_digest(struct ahash_request * req)263 static int ghash_async_digest(struct ahash_request *req)
264 {
265 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
266 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
267 struct ahash_request *cryptd_req = ahash_request_ctx(req);
268 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
269
270 if (!crypto_simd_usable() ||
271 (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
272 ghash_init_cryptd_req(req);
273 return crypto_ahash_digest(cryptd_req);
274 } else {
275 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
276 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
277
278 desc->tfm = child;
279 return shash_ahash_digest(req, desc);
280 }
281 }
282
ghash_async_setkey(struct crypto_ahash * tfm,const u8 * key,unsigned int keylen)283 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
284 unsigned int keylen)
285 {
286 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
287 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
288
289 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
290 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
291 & CRYPTO_TFM_REQ_MASK);
292 return crypto_ahash_setkey(child, key, keylen);
293 }
294
ghash_async_init_tfm(struct crypto_tfm * tfm)295 static int ghash_async_init_tfm(struct crypto_tfm *tfm)
296 {
297 struct cryptd_ahash *cryptd_tfm;
298 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
299
300 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
301 CRYPTO_ALG_INTERNAL,
302 CRYPTO_ALG_INTERNAL);
303 if (IS_ERR(cryptd_tfm))
304 return PTR_ERR(cryptd_tfm);
305 ctx->cryptd_tfm = cryptd_tfm;
306 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
307 sizeof(struct ahash_request) +
308 crypto_ahash_reqsize(&cryptd_tfm->base));
309
310 return 0;
311 }
312
ghash_async_exit_tfm(struct crypto_tfm * tfm)313 static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
314 {
315 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
316
317 cryptd_free_ahash(ctx->cryptd_tfm);
318 }
319
320 static struct ahash_alg ghash_async_alg = {
321 .init = ghash_async_init,
322 .update = ghash_async_update,
323 .final = ghash_async_final,
324 .setkey = ghash_async_setkey,
325 .digest = ghash_async_digest,
326 .export = ghash_async_export,
327 .import = ghash_async_import,
328 .halg = {
329 .digestsize = GHASH_DIGEST_SIZE,
330 .statesize = sizeof(struct ghash_desc_ctx),
331 .base = {
332 .cra_name = "ghash",
333 .cra_driver_name = "ghash-clmulni",
334 .cra_priority = 400,
335 .cra_ctxsize = sizeof(struct ghash_async_ctx),
336 .cra_flags = CRYPTO_ALG_ASYNC,
337 .cra_blocksize = GHASH_BLOCK_SIZE,
338 .cra_module = THIS_MODULE,
339 .cra_init = ghash_async_init_tfm,
340 .cra_exit = ghash_async_exit_tfm,
341 },
342 },
343 };
344
345 static const struct x86_cpu_id pcmul_cpu_id[] = {
346 X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), /* Pickle-Mickle-Duck */
347 {}
348 };
349 MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
350
ghash_pclmulqdqni_mod_init(void)351 static int __init ghash_pclmulqdqni_mod_init(void)
352 {
353 int err;
354
355 if (!x86_match_cpu(pcmul_cpu_id))
356 return -ENODEV;
357
358 err = crypto_register_shash(&ghash_alg);
359 if (err)
360 goto err_out;
361 err = crypto_register_ahash(&ghash_async_alg);
362 if (err)
363 goto err_shash;
364
365 return 0;
366
367 err_shash:
368 crypto_unregister_shash(&ghash_alg);
369 err_out:
370 return err;
371 }
372
ghash_pclmulqdqni_mod_exit(void)373 static void __exit ghash_pclmulqdqni_mod_exit(void)
374 {
375 crypto_unregister_ahash(&ghash_async_alg);
376 crypto_unregister_shash(&ghash_alg);
377 }
378
379 module_init(ghash_pclmulqdqni_mod_init);
380 module_exit(ghash_pclmulqdqni_mod_exit);
381
382 MODULE_LICENSE("GPL");
383 MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
384 MODULE_ALIAS_CRYPTO("ghash");
385