1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cryptographic API.
4  *
5  * Support for VIA PadLock hardware crypto engine.
6  *
7  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
8  */
9 
10 #include <asm/cpu_device_id.h>
11 #include <crypto/internal/hash.h>
12 #include <crypto/padlock.h>
13 #include <crypto/sha1.h>
14 #include <crypto/sha2.h>
15 #include <linux/cpufeature.h>
16 #include <linux/err.h>
17 #include <linux/kernel.h>
18 #include <linux/module.h>
19 
20 #define PADLOCK_SHA_DESCSIZE (128 + ((PADLOCK_ALIGNMENT - 1) & \
21 				     ~(CRYPTO_MINALIGN - 1)))
22 
23 struct padlock_sha_ctx {
24 	struct crypto_ahash *fallback;
25 };
26 
27 static inline void *padlock_shash_desc_ctx(struct shash_desc *desc)
28 {
29 	return PTR_ALIGN(shash_desc_ctx(desc), PADLOCK_ALIGNMENT);
30 }
31 
32 static int padlock_sha1_init(struct shash_desc *desc)
33 {
34 	struct sha1_state *sctx = padlock_shash_desc_ctx(desc);
35 
36 	*sctx = (struct sha1_state){
37 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
38 	};
39 
40 	return 0;
41 }
42 
43 static int padlock_sha256_init(struct shash_desc *desc)
44 {
45 	struct crypto_sha256_state *sctx = padlock_shash_desc_ctx(desc);
46 
47 	sha256_block_init(sctx);
48 	return 0;
49 }
50 
51 static int padlock_sha_update(struct shash_desc *desc,
52 			      const u8 *data, unsigned int length)
53 {
54 	u8 *state = padlock_shash_desc_ctx(desc);
55 	struct crypto_shash *tfm = desc->tfm;
56 	int err, remain;
57 
58 	remain = length - round_down(length, crypto_shash_blocksize(tfm));
59 	{
60 		struct padlock_sha_ctx *ctx = crypto_shash_ctx(tfm);
61 		HASH_REQUEST_ON_STACK(req, ctx->fallback);
62 
63 		ahash_request_set_callback(req, 0, NULL, NULL);
64 		ahash_request_set_virt(req, data, NULL, length - remain);
65 		err = crypto_ahash_import_core(req, state) ?:
66 		      crypto_ahash_update(req) ?:
67 		      crypto_ahash_export_core(req, state);
68 		HASH_REQUEST_ZERO(req);
69 	}
70 
71 	return err ?: remain;
72 }
73 
74 static int padlock_sha_export(struct shash_desc *desc, void *out)
75 {
76 	memcpy(out, padlock_shash_desc_ctx(desc),
77 	       crypto_shash_coresize(desc->tfm));
78 	return 0;
79 }
80 
81 static int padlock_sha_import(struct shash_desc *desc, const void *in)
82 {
83 	unsigned int bs = crypto_shash_blocksize(desc->tfm);
84 	unsigned int ss = crypto_shash_coresize(desc->tfm);
85 	u64 *state = padlock_shash_desc_ctx(desc);
86 
87 	memcpy(state, in, ss);
88 
89 	/* Stop evil imports from generating a fault. */
90 	state[ss / 8 - 1] &= ~(bs - 1);
91 
92 	return 0;
93 }
94 
95 static inline void padlock_output_block(uint32_t *src,
96 		 	uint32_t *dst, size_t count)
97 {
98 	while (count--)
99 		*dst++ = swab32(*src++);
100 }
101 
102 static int padlock_sha_finup(struct shash_desc *desc, const u8 *in,
103 			     unsigned int count, u8 *out)
104 {
105 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
106 	HASH_REQUEST_ON_STACK(req, ctx->fallback);
107 
108 	ahash_request_set_callback(req, 0, NULL, NULL);
109 	ahash_request_set_virt(req, in, out, count);
110 	return crypto_ahash_import_core(req, padlock_shash_desc_ctx(desc)) ?:
111 	       crypto_ahash_finup(req);
112 }
113 
114 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
115 			      unsigned int count, u8 *out)
116 {
117 	/* We can't store directly to *out as it may be unaligned. */
118 	/* BTW Don't reduce the buffer size below 128 Bytes!
119 	 *     PadLock microcode needs it that big. */
120 	struct sha1_state *state = padlock_shash_desc_ctx(desc);
121 	u64 start = state->count;
122 
123 	if (start + count > ULONG_MAX)
124 		return padlock_sha_finup(desc, in, count, out);
125 
126 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
127 		      : \
128 		      : "c"((unsigned long)start + count), \
129 			"a"((unsigned long)start), \
130 			"S"(in), "D"(state));
131 
132 	padlock_output_block(state->state, (uint32_t *)out, 5);
133 	return 0;
134 }
135 
136 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
137 				unsigned int count, u8 *out)
138 {
139 	/* We can't store directly to *out as it may be unaligned. */
140 	/* BTW Don't reduce the buffer size below 128 Bytes!
141 	 *     PadLock microcode needs it that big. */
142 	struct sha256_state *state = padlock_shash_desc_ctx(desc);
143 	u64 start = state->count;
144 
145 	if (start + count > ULONG_MAX)
146 		return padlock_sha_finup(desc, in, count, out);
147 
148 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
149 		      : \
150 		      : "c"((unsigned long)start + count), \
151 			"a"((unsigned long)start), \
152 			"S"(in), "D"(state));
153 
154 	padlock_output_block(state->state, (uint32_t *)out, 8);
155 	return 0;
156 }
157 
158 static int padlock_init_tfm(struct crypto_shash *hash)
159 {
160 	const char *fallback_driver_name = crypto_shash_alg_name(hash);
161 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
162 	struct crypto_ahash *fallback_tfm;
163 
164 	/* Allocate a fallback and abort if it failed. */
165 	fallback_tfm = crypto_alloc_ahash(fallback_driver_name, 0,
166 					  CRYPTO_ALG_NEED_FALLBACK |
167 					  CRYPTO_ALG_ASYNC);
168 	if (IS_ERR(fallback_tfm)) {
169 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
170 		       fallback_driver_name);
171 		return PTR_ERR(fallback_tfm);
172 	}
173 
174 	if (crypto_shash_statesize(hash) !=
175 	    crypto_ahash_statesize(fallback_tfm)) {
176 		crypto_free_ahash(fallback_tfm);
177 		return -EINVAL;
178 	}
179 
180 	ctx->fallback = fallback_tfm;
181 
182 	return 0;
183 }
184 
185 static void padlock_exit_tfm(struct crypto_shash *hash)
186 {
187 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
188 
189 	crypto_free_ahash(ctx->fallback);
190 }
191 
192 static struct shash_alg sha1_alg = {
193 	.digestsize	=	SHA1_DIGEST_SIZE,
194 	.init   	= 	padlock_sha1_init,
195 	.update 	=	padlock_sha_update,
196 	.finup  	=	padlock_sha1_finup,
197 	.export		=	padlock_sha_export,
198 	.import		=	padlock_sha_import,
199 	.init_tfm	=	padlock_init_tfm,
200 	.exit_tfm	=	padlock_exit_tfm,
201 	.descsize	=	PADLOCK_SHA_DESCSIZE,
202 	.statesize	=	SHA1_STATE_SIZE,
203 	.base		=	{
204 		.cra_name		=	"sha1",
205 		.cra_driver_name	=	"sha1-padlock",
206 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
207 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK |
208 						CRYPTO_AHASH_ALG_BLOCK_ONLY |
209 						CRYPTO_AHASH_ALG_FINUP_MAX,
210 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
211 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
212 		.cra_module		=	THIS_MODULE,
213 	}
214 };
215 
216 static struct shash_alg sha256_alg = {
217 	.digestsize	=	SHA256_DIGEST_SIZE,
218 	.init   	= 	padlock_sha256_init,
219 	.update 	=	padlock_sha_update,
220 	.finup  	=	padlock_sha256_finup,
221 	.init_tfm	=	padlock_init_tfm,
222 	.export		=	padlock_sha_export,
223 	.import		=	padlock_sha_import,
224 	.exit_tfm	=	padlock_exit_tfm,
225 	.descsize	=	PADLOCK_SHA_DESCSIZE,
226 	.statesize	=	sizeof(struct crypto_sha256_state),
227 	.base		=	{
228 		.cra_name		=	"sha256",
229 		.cra_driver_name	=	"sha256-padlock",
230 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
231 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK |
232 						CRYPTO_AHASH_ALG_BLOCK_ONLY |
233 						CRYPTO_AHASH_ALG_FINUP_MAX,
234 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
235 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
236 		.cra_module		=	THIS_MODULE,
237 	}
238 };
239 
240 /* Add two shash_alg instance for hardware-implemented *
241 * multiple-parts hash supported by VIA Nano Processor.*/
242 
243 static int padlock_sha1_update_nano(struct shash_desc *desc,
244 				    const u8 *src, unsigned int len)
245 {
246 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
247 	struct sha1_state *state = padlock_shash_desc_ctx(desc);
248 	int blocks = len / SHA1_BLOCK_SIZE;
249 
250 	len -= blocks * SHA1_BLOCK_SIZE;
251 	state->count += blocks * SHA1_BLOCK_SIZE;
252 
253 	/* Process the left bytes from the input data */
254 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
255 		      : "+S"(src), "+D"(state)
256 		      : "a"((long)-1),
257 			"c"((unsigned long)blocks));
258 	return len;
259 }
260 
261 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *src,
262 			  unsigned int len)
263 {
264 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
265 	struct crypto_sha256_state *state = padlock_shash_desc_ctx(desc);
266 	int blocks = len / SHA256_BLOCK_SIZE;
267 
268 	len -= blocks * SHA256_BLOCK_SIZE;
269 	state->count += blocks * SHA256_BLOCK_SIZE;
270 
271 	/* Process the left bytes from input data*/
272 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
273 		      : "+S"(src), "+D"(state)
274 		      : "a"((long)-1),
275 		      "c"((unsigned long)blocks));
276 	return len;
277 }
278 
279 static struct shash_alg sha1_alg_nano = {
280 	.digestsize	=	SHA1_DIGEST_SIZE,
281 	.init		=	padlock_sha1_init,
282 	.update		=	padlock_sha1_update_nano,
283 	.finup  	=	padlock_sha1_finup,
284 	.export		=	padlock_sha_export,
285 	.import		=	padlock_sha_import,
286 	.descsize	=	PADLOCK_SHA_DESCSIZE,
287 	.statesize	=	SHA1_STATE_SIZE,
288 	.base		=	{
289 		.cra_name		=	"sha1",
290 		.cra_driver_name	=	"sha1-padlock-nano",
291 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
292 		.cra_flags		=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
293 						CRYPTO_AHASH_ALG_FINUP_MAX,
294 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
295 		.cra_module		=	THIS_MODULE,
296 	}
297 };
298 
299 static struct shash_alg sha256_alg_nano = {
300 	.digestsize	=	SHA256_DIGEST_SIZE,
301 	.init		=	padlock_sha256_init,
302 	.update		=	padlock_sha256_update_nano,
303 	.finup		=	padlock_sha256_finup,
304 	.export		=	padlock_sha_export,
305 	.import		=	padlock_sha_import,
306 	.descsize	=	PADLOCK_SHA_DESCSIZE,
307 	.statesize	=	sizeof(struct crypto_sha256_state),
308 	.base		=	{
309 		.cra_name		=	"sha256",
310 		.cra_driver_name	=	"sha256-padlock-nano",
311 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
312 		.cra_flags		=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
313 						CRYPTO_AHASH_ALG_FINUP_MAX,
314 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
315 		.cra_module		=	THIS_MODULE,
316 	}
317 };
318 
319 static const struct x86_cpu_id padlock_sha_ids[] = {
320 	X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
321 	{}
322 };
323 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
324 
325 static int __init padlock_init(void)
326 {
327 	int rc = -ENODEV;
328 	struct cpuinfo_x86 *c = &cpu_data(0);
329 	struct shash_alg *sha1;
330 	struct shash_alg *sha256;
331 
332 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
333 		return -ENODEV;
334 
335 	/* Register the newly added algorithm module if on *
336 	* VIA Nano processor, or else just do as before */
337 	if (c->x86_model < 0x0f) {
338 		sha1 = &sha1_alg;
339 		sha256 = &sha256_alg;
340 	} else {
341 		sha1 = &sha1_alg_nano;
342 		sha256 = &sha256_alg_nano;
343 	}
344 
345 	rc = crypto_register_shash(sha1);
346 	if (rc)
347 		goto out;
348 
349 	rc = crypto_register_shash(sha256);
350 	if (rc)
351 		goto out_unreg1;
352 
353 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
354 
355 	return 0;
356 
357 out_unreg1:
358 	crypto_unregister_shash(sha1);
359 
360 out:
361 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
362 	return rc;
363 }
364 
365 static void __exit padlock_fini(void)
366 {
367 	struct cpuinfo_x86 *c = &cpu_data(0);
368 
369 	if (c->x86_model >= 0x0f) {
370 		crypto_unregister_shash(&sha1_alg_nano);
371 		crypto_unregister_shash(&sha256_alg_nano);
372 	} else {
373 		crypto_unregister_shash(&sha1_alg);
374 		crypto_unregister_shash(&sha256_alg);
375 	}
376 }
377 
378 module_init(padlock_init);
379 module_exit(padlock_fini);
380 
381 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
382 MODULE_LICENSE("GPL");
383 MODULE_AUTHOR("Michal Ludvig");
384 
385 MODULE_ALIAS_CRYPTO("sha1-all");
386 MODULE_ALIAS_CRYPTO("sha256-all");
387 MODULE_ALIAS_CRYPTO("sha1-padlock");
388 MODULE_ALIAS_CRYPTO("sha256-padlock");
389