1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
5 *
6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 * Copyright (C) 2015 Martin Willi
8 */
9
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
20 #include <asm/neon.h>
21 #include <asm/simd.h>
22
23 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24 int nrounds);
25 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26 int nrounds, unsigned int nbytes);
27 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29
30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 const u32 *state, int nrounds);
32
33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34
neon_usable(void)35 static inline bool neon_usable(void)
36 {
37 return static_branch_likely(&use_neon) && crypto_simd_usable();
38 }
39
chacha_doneon(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)40 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41 unsigned int bytes, int nrounds)
42 {
43 u8 buf[CHACHA_BLOCK_SIZE];
44
45 while (bytes > CHACHA_BLOCK_SIZE) {
46 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
47
48 chacha_4block_xor_neon(state, dst, src, nrounds, l);
49 bytes -= l;
50 src += l;
51 dst += l;
52 state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
53 }
54 if (bytes) {
55 const u8 *s = src;
56 u8 *d = dst;
57
58 if (bytes != CHACHA_BLOCK_SIZE)
59 s = d = memcpy(buf, src, bytes);
60 chacha_block_xor_neon(state, d, s, nrounds);
61 if (d != dst)
62 memcpy(dst, buf, bytes);
63 state[12]++;
64 }
65 }
66
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)67 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
68 {
69 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
70 hchacha_block_arm(state, stream, nrounds);
71 } else {
72 kernel_neon_begin();
73 hchacha_block_neon(state, stream, nrounds);
74 kernel_neon_end();
75 }
76 }
77 EXPORT_SYMBOL(hchacha_block_arch);
78
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)79 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
80 int nrounds)
81 {
82 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
83 bytes <= CHACHA_BLOCK_SIZE) {
84 chacha_doarm(dst, src, bytes, state, nrounds);
85 state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
86 return;
87 }
88
89 do {
90 unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
91
92 kernel_neon_begin();
93 chacha_doneon(state, dst, src, todo, nrounds);
94 kernel_neon_end();
95
96 bytes -= todo;
97 src += todo;
98 dst += todo;
99 } while (bytes);
100 }
101 EXPORT_SYMBOL(chacha_crypt_arch);
102
chacha_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv,bool neon)103 static int chacha_stream_xor(struct skcipher_request *req,
104 const struct chacha_ctx *ctx, const u8 *iv,
105 bool neon)
106 {
107 struct skcipher_walk walk;
108 u32 state[16];
109 int err;
110
111 err = skcipher_walk_virt(&walk, req, false);
112
113 chacha_init(state, ctx->key, iv);
114
115 while (walk.nbytes > 0) {
116 unsigned int nbytes = walk.nbytes;
117
118 if (nbytes < walk.total)
119 nbytes = round_down(nbytes, walk.stride);
120
121 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
122 chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
123 nbytes, state, ctx->nrounds);
124 state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
125 } else {
126 kernel_neon_begin();
127 chacha_doneon(state, walk.dst.virt.addr,
128 walk.src.virt.addr, nbytes, ctx->nrounds);
129 kernel_neon_end();
130 }
131 err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
132 }
133
134 return err;
135 }
136
do_chacha(struct skcipher_request * req,bool neon)137 static int do_chacha(struct skcipher_request *req, bool neon)
138 {
139 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
140 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
141
142 return chacha_stream_xor(req, ctx, req->iv, neon);
143 }
144
chacha_arm(struct skcipher_request * req)145 static int chacha_arm(struct skcipher_request *req)
146 {
147 return do_chacha(req, false);
148 }
149
chacha_neon(struct skcipher_request * req)150 static int chacha_neon(struct skcipher_request *req)
151 {
152 return do_chacha(req, neon_usable());
153 }
154
do_xchacha(struct skcipher_request * req,bool neon)155 static int do_xchacha(struct skcipher_request *req, bool neon)
156 {
157 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
158 struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
159 struct chacha_ctx subctx;
160 u32 state[16];
161 u8 real_iv[16];
162
163 chacha_init(state, ctx->key, req->iv);
164
165 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
166 hchacha_block_arm(state, subctx.key, ctx->nrounds);
167 } else {
168 kernel_neon_begin();
169 hchacha_block_neon(state, subctx.key, ctx->nrounds);
170 kernel_neon_end();
171 }
172 subctx.nrounds = ctx->nrounds;
173
174 memcpy(&real_iv[0], req->iv + 24, 8);
175 memcpy(&real_iv[8], req->iv + 16, 8);
176 return chacha_stream_xor(req, &subctx, real_iv, neon);
177 }
178
xchacha_arm(struct skcipher_request * req)179 static int xchacha_arm(struct skcipher_request *req)
180 {
181 return do_xchacha(req, false);
182 }
183
xchacha_neon(struct skcipher_request * req)184 static int xchacha_neon(struct skcipher_request *req)
185 {
186 return do_xchacha(req, neon_usable());
187 }
188
189 static struct skcipher_alg arm_algs[] = {
190 {
191 .base.cra_name = "chacha20",
192 .base.cra_driver_name = "chacha20-arm",
193 .base.cra_priority = 200,
194 .base.cra_blocksize = 1,
195 .base.cra_ctxsize = sizeof(struct chacha_ctx),
196 .base.cra_module = THIS_MODULE,
197
198 .min_keysize = CHACHA_KEY_SIZE,
199 .max_keysize = CHACHA_KEY_SIZE,
200 .ivsize = CHACHA_IV_SIZE,
201 .chunksize = CHACHA_BLOCK_SIZE,
202 .setkey = chacha20_setkey,
203 .encrypt = chacha_arm,
204 .decrypt = chacha_arm,
205 }, {
206 .base.cra_name = "xchacha20",
207 .base.cra_driver_name = "xchacha20-arm",
208 .base.cra_priority = 200,
209 .base.cra_blocksize = 1,
210 .base.cra_ctxsize = sizeof(struct chacha_ctx),
211 .base.cra_module = THIS_MODULE,
212
213 .min_keysize = CHACHA_KEY_SIZE,
214 .max_keysize = CHACHA_KEY_SIZE,
215 .ivsize = XCHACHA_IV_SIZE,
216 .chunksize = CHACHA_BLOCK_SIZE,
217 .setkey = chacha20_setkey,
218 .encrypt = xchacha_arm,
219 .decrypt = xchacha_arm,
220 }, {
221 .base.cra_name = "xchacha12",
222 .base.cra_driver_name = "xchacha12-arm",
223 .base.cra_priority = 200,
224 .base.cra_blocksize = 1,
225 .base.cra_ctxsize = sizeof(struct chacha_ctx),
226 .base.cra_module = THIS_MODULE,
227
228 .min_keysize = CHACHA_KEY_SIZE,
229 .max_keysize = CHACHA_KEY_SIZE,
230 .ivsize = XCHACHA_IV_SIZE,
231 .chunksize = CHACHA_BLOCK_SIZE,
232 .setkey = chacha12_setkey,
233 .encrypt = xchacha_arm,
234 .decrypt = xchacha_arm,
235 },
236 };
237
238 static struct skcipher_alg neon_algs[] = {
239 {
240 .base.cra_name = "chacha20",
241 .base.cra_driver_name = "chacha20-neon",
242 .base.cra_priority = 300,
243 .base.cra_blocksize = 1,
244 .base.cra_ctxsize = sizeof(struct chacha_ctx),
245 .base.cra_module = THIS_MODULE,
246
247 .min_keysize = CHACHA_KEY_SIZE,
248 .max_keysize = CHACHA_KEY_SIZE,
249 .ivsize = CHACHA_IV_SIZE,
250 .chunksize = CHACHA_BLOCK_SIZE,
251 .walksize = 4 * CHACHA_BLOCK_SIZE,
252 .setkey = chacha20_setkey,
253 .encrypt = chacha_neon,
254 .decrypt = chacha_neon,
255 }, {
256 .base.cra_name = "xchacha20",
257 .base.cra_driver_name = "xchacha20-neon",
258 .base.cra_priority = 300,
259 .base.cra_blocksize = 1,
260 .base.cra_ctxsize = sizeof(struct chacha_ctx),
261 .base.cra_module = THIS_MODULE,
262
263 .min_keysize = CHACHA_KEY_SIZE,
264 .max_keysize = CHACHA_KEY_SIZE,
265 .ivsize = XCHACHA_IV_SIZE,
266 .chunksize = CHACHA_BLOCK_SIZE,
267 .walksize = 4 * CHACHA_BLOCK_SIZE,
268 .setkey = chacha20_setkey,
269 .encrypt = xchacha_neon,
270 .decrypt = xchacha_neon,
271 }, {
272 .base.cra_name = "xchacha12",
273 .base.cra_driver_name = "xchacha12-neon",
274 .base.cra_priority = 300,
275 .base.cra_blocksize = 1,
276 .base.cra_ctxsize = sizeof(struct chacha_ctx),
277 .base.cra_module = THIS_MODULE,
278
279 .min_keysize = CHACHA_KEY_SIZE,
280 .max_keysize = CHACHA_KEY_SIZE,
281 .ivsize = XCHACHA_IV_SIZE,
282 .chunksize = CHACHA_BLOCK_SIZE,
283 .walksize = 4 * CHACHA_BLOCK_SIZE,
284 .setkey = chacha12_setkey,
285 .encrypt = xchacha_neon,
286 .decrypt = xchacha_neon,
287 }
288 };
289
chacha_simd_mod_init(void)290 static int __init chacha_simd_mod_init(void)
291 {
292 int err = 0;
293
294 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
295 err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
296 if (err)
297 return err;
298 }
299
300 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
301 int i;
302
303 switch (read_cpuid_part()) {
304 case ARM_CPU_PART_CORTEX_A7:
305 case ARM_CPU_PART_CORTEX_A5:
306 /*
307 * The Cortex-A7 and Cortex-A5 do not perform well with
308 * the NEON implementation but do incredibly with the
309 * scalar one and use less power.
310 */
311 for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
312 neon_algs[i].base.cra_priority = 0;
313 break;
314 default:
315 static_branch_enable(&use_neon);
316 }
317
318 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
319 err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
320 if (err)
321 crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
322 }
323 }
324 return err;
325 }
326
chacha_simd_mod_fini(void)327 static void __exit chacha_simd_mod_fini(void)
328 {
329 if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
330 crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
331 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
332 crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
333 }
334 }
335
336 module_init(chacha_simd_mod_init);
337 module_exit(chacha_simd_mod_fini);
338
339 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
340 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
341 MODULE_LICENSE("GPL v2");
342 MODULE_ALIAS_CRYPTO("chacha20");
343 MODULE_ALIAS_CRYPTO("chacha20-arm");
344 MODULE_ALIAS_CRYPTO("xchacha20");
345 MODULE_ALIAS_CRYPTO("xchacha20-arm");
346 MODULE_ALIAS_CRYPTO("xchacha12");
347 MODULE_ALIAS_CRYPTO("xchacha12-arm");
348 #ifdef CONFIG_KERNEL_MODE_NEON
349 MODULE_ALIAS_CRYPTO("chacha20-neon");
350 MODULE_ALIAS_CRYPTO("xchacha20-neon");
351 MODULE_ALIAS_CRYPTO("xchacha12-neon");
352 #endif
353