1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4  * including ChaCha20 (RFC7539)
5  *
6  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7  * Copyright (C) 2015 Martin Willi
8  */
9 
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
20 #include <asm/neon.h>
21 #include <asm/simd.h>
22 
23 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24 				      int nrounds);
25 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26 				       int nrounds, unsigned int nbytes);
27 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29 
30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 			     const u32 *state, int nrounds);
32 
33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34 
neon_usable(void)35 static inline bool neon_usable(void)
36 {
37 	return static_branch_likely(&use_neon) && crypto_simd_usable();
38 }
39 
chacha_doneon(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)40 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41 			  unsigned int bytes, int nrounds)
42 {
43 	u8 buf[CHACHA_BLOCK_SIZE];
44 
45 	while (bytes > CHACHA_BLOCK_SIZE) {
46 		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
47 
48 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
49 		bytes -= l;
50 		src += l;
51 		dst += l;
52 		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
53 	}
54 	if (bytes) {
55 		const u8 *s = src;
56 		u8 *d = dst;
57 
58 		if (bytes != CHACHA_BLOCK_SIZE)
59 			s = d = memcpy(buf, src, bytes);
60 		chacha_block_xor_neon(state, d, s, nrounds);
61 		if (d != dst)
62 			memcpy(dst, buf, bytes);
63 		state[12]++;
64 	}
65 }
66 
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)67 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
68 {
69 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
70 		hchacha_block_arm(state, stream, nrounds);
71 	} else {
72 		kernel_neon_begin();
73 		hchacha_block_neon(state, stream, nrounds);
74 		kernel_neon_end();
75 	}
76 }
77 EXPORT_SYMBOL(hchacha_block_arch);
78 
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)79 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
80 		       int nrounds)
81 {
82 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
83 	    bytes <= CHACHA_BLOCK_SIZE) {
84 		chacha_doarm(dst, src, bytes, state, nrounds);
85 		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
86 		return;
87 	}
88 
89 	do {
90 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
91 
92 		kernel_neon_begin();
93 		chacha_doneon(state, dst, src, todo, nrounds);
94 		kernel_neon_end();
95 
96 		bytes -= todo;
97 		src += todo;
98 		dst += todo;
99 	} while (bytes);
100 }
101 EXPORT_SYMBOL(chacha_crypt_arch);
102 
chacha_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv,bool neon)103 static int chacha_stream_xor(struct skcipher_request *req,
104 			     const struct chacha_ctx *ctx, const u8 *iv,
105 			     bool neon)
106 {
107 	struct skcipher_walk walk;
108 	u32 state[16];
109 	int err;
110 
111 	err = skcipher_walk_virt(&walk, req, false);
112 
113 	chacha_init(state, ctx->key, iv);
114 
115 	while (walk.nbytes > 0) {
116 		unsigned int nbytes = walk.nbytes;
117 
118 		if (nbytes < walk.total)
119 			nbytes = round_down(nbytes, walk.stride);
120 
121 		if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
122 			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
123 				     nbytes, state, ctx->nrounds);
124 			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
125 		} else {
126 			kernel_neon_begin();
127 			chacha_doneon(state, walk.dst.virt.addr,
128 				      walk.src.virt.addr, nbytes, ctx->nrounds);
129 			kernel_neon_end();
130 		}
131 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
132 	}
133 
134 	return err;
135 }
136 
do_chacha(struct skcipher_request * req,bool neon)137 static int do_chacha(struct skcipher_request *req, bool neon)
138 {
139 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
140 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
141 
142 	return chacha_stream_xor(req, ctx, req->iv, neon);
143 }
144 
chacha_arm(struct skcipher_request * req)145 static int chacha_arm(struct skcipher_request *req)
146 {
147 	return do_chacha(req, false);
148 }
149 
chacha_neon(struct skcipher_request * req)150 static int chacha_neon(struct skcipher_request *req)
151 {
152 	return do_chacha(req, neon_usable());
153 }
154 
do_xchacha(struct skcipher_request * req,bool neon)155 static int do_xchacha(struct skcipher_request *req, bool neon)
156 {
157 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
158 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
159 	struct chacha_ctx subctx;
160 	u32 state[16];
161 	u8 real_iv[16];
162 
163 	chacha_init(state, ctx->key, req->iv);
164 
165 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
166 		hchacha_block_arm(state, subctx.key, ctx->nrounds);
167 	} else {
168 		kernel_neon_begin();
169 		hchacha_block_neon(state, subctx.key, ctx->nrounds);
170 		kernel_neon_end();
171 	}
172 	subctx.nrounds = ctx->nrounds;
173 
174 	memcpy(&real_iv[0], req->iv + 24, 8);
175 	memcpy(&real_iv[8], req->iv + 16, 8);
176 	return chacha_stream_xor(req, &subctx, real_iv, neon);
177 }
178 
xchacha_arm(struct skcipher_request * req)179 static int xchacha_arm(struct skcipher_request *req)
180 {
181 	return do_xchacha(req, false);
182 }
183 
xchacha_neon(struct skcipher_request * req)184 static int xchacha_neon(struct skcipher_request *req)
185 {
186 	return do_xchacha(req, neon_usable());
187 }
188 
189 static struct skcipher_alg arm_algs[] = {
190 	{
191 		.base.cra_name		= "chacha20",
192 		.base.cra_driver_name	= "chacha20-arm",
193 		.base.cra_priority	= 200,
194 		.base.cra_blocksize	= 1,
195 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
196 		.base.cra_module	= THIS_MODULE,
197 
198 		.min_keysize		= CHACHA_KEY_SIZE,
199 		.max_keysize		= CHACHA_KEY_SIZE,
200 		.ivsize			= CHACHA_IV_SIZE,
201 		.chunksize		= CHACHA_BLOCK_SIZE,
202 		.setkey			= chacha20_setkey,
203 		.encrypt		= chacha_arm,
204 		.decrypt		= chacha_arm,
205 	}, {
206 		.base.cra_name		= "xchacha20",
207 		.base.cra_driver_name	= "xchacha20-arm",
208 		.base.cra_priority	= 200,
209 		.base.cra_blocksize	= 1,
210 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
211 		.base.cra_module	= THIS_MODULE,
212 
213 		.min_keysize		= CHACHA_KEY_SIZE,
214 		.max_keysize		= CHACHA_KEY_SIZE,
215 		.ivsize			= XCHACHA_IV_SIZE,
216 		.chunksize		= CHACHA_BLOCK_SIZE,
217 		.setkey			= chacha20_setkey,
218 		.encrypt		= xchacha_arm,
219 		.decrypt		= xchacha_arm,
220 	}, {
221 		.base.cra_name		= "xchacha12",
222 		.base.cra_driver_name	= "xchacha12-arm",
223 		.base.cra_priority	= 200,
224 		.base.cra_blocksize	= 1,
225 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
226 		.base.cra_module	= THIS_MODULE,
227 
228 		.min_keysize		= CHACHA_KEY_SIZE,
229 		.max_keysize		= CHACHA_KEY_SIZE,
230 		.ivsize			= XCHACHA_IV_SIZE,
231 		.chunksize		= CHACHA_BLOCK_SIZE,
232 		.setkey			= chacha12_setkey,
233 		.encrypt		= xchacha_arm,
234 		.decrypt		= xchacha_arm,
235 	},
236 };
237 
238 static struct skcipher_alg neon_algs[] = {
239 	{
240 		.base.cra_name		= "chacha20",
241 		.base.cra_driver_name	= "chacha20-neon",
242 		.base.cra_priority	= 300,
243 		.base.cra_blocksize	= 1,
244 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
245 		.base.cra_module	= THIS_MODULE,
246 
247 		.min_keysize		= CHACHA_KEY_SIZE,
248 		.max_keysize		= CHACHA_KEY_SIZE,
249 		.ivsize			= CHACHA_IV_SIZE,
250 		.chunksize		= CHACHA_BLOCK_SIZE,
251 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
252 		.setkey			= chacha20_setkey,
253 		.encrypt		= chacha_neon,
254 		.decrypt		= chacha_neon,
255 	}, {
256 		.base.cra_name		= "xchacha20",
257 		.base.cra_driver_name	= "xchacha20-neon",
258 		.base.cra_priority	= 300,
259 		.base.cra_blocksize	= 1,
260 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
261 		.base.cra_module	= THIS_MODULE,
262 
263 		.min_keysize		= CHACHA_KEY_SIZE,
264 		.max_keysize		= CHACHA_KEY_SIZE,
265 		.ivsize			= XCHACHA_IV_SIZE,
266 		.chunksize		= CHACHA_BLOCK_SIZE,
267 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
268 		.setkey			= chacha20_setkey,
269 		.encrypt		= xchacha_neon,
270 		.decrypt		= xchacha_neon,
271 	}, {
272 		.base.cra_name		= "xchacha12",
273 		.base.cra_driver_name	= "xchacha12-neon",
274 		.base.cra_priority	= 300,
275 		.base.cra_blocksize	= 1,
276 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
277 		.base.cra_module	= THIS_MODULE,
278 
279 		.min_keysize		= CHACHA_KEY_SIZE,
280 		.max_keysize		= CHACHA_KEY_SIZE,
281 		.ivsize			= XCHACHA_IV_SIZE,
282 		.chunksize		= CHACHA_BLOCK_SIZE,
283 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
284 		.setkey			= chacha12_setkey,
285 		.encrypt		= xchacha_neon,
286 		.decrypt		= xchacha_neon,
287 	}
288 };
289 
chacha_simd_mod_init(void)290 static int __init chacha_simd_mod_init(void)
291 {
292 	int err = 0;
293 
294 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
295 		err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
296 		if (err)
297 			return err;
298 	}
299 
300 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
301 		int i;
302 
303 		switch (read_cpuid_part()) {
304 		case ARM_CPU_PART_CORTEX_A7:
305 		case ARM_CPU_PART_CORTEX_A5:
306 			/*
307 			 * The Cortex-A7 and Cortex-A5 do not perform well with
308 			 * the NEON implementation but do incredibly with the
309 			 * scalar one and use less power.
310 			 */
311 			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
312 				neon_algs[i].base.cra_priority = 0;
313 			break;
314 		default:
315 			static_branch_enable(&use_neon);
316 		}
317 
318 		if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
319 			err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
320 			if (err)
321 				crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
322 		}
323 	}
324 	return err;
325 }
326 
chacha_simd_mod_fini(void)327 static void __exit chacha_simd_mod_fini(void)
328 {
329 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
330 		crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
331 		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
332 			crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
333 	}
334 }
335 
336 module_init(chacha_simd_mod_init);
337 module_exit(chacha_simd_mod_fini);
338 
339 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
340 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
341 MODULE_LICENSE("GPL v2");
342 MODULE_ALIAS_CRYPTO("chacha20");
343 MODULE_ALIAS_CRYPTO("chacha20-arm");
344 MODULE_ALIAS_CRYPTO("xchacha20");
345 MODULE_ALIAS_CRYPTO("xchacha20-arm");
346 MODULE_ALIAS_CRYPTO("xchacha12");
347 MODULE_ALIAS_CRYPTO("xchacha12-arm");
348 #ifdef CONFIG_KERNEL_MODE_NEON
349 MODULE_ALIAS_CRYPTO("chacha20-neon");
350 MODULE_ALIAS_CRYPTO("xchacha20-neon");
351 MODULE_ALIAS_CRYPTO("xchacha12-neon");
352 #endif
353