1 /*
2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
3 *
4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/bitops.h"
14
15 #include "tcg/tcg-gvec-desc.h"
16 #include "crypto/aes-round.h"
17 #include "crypto/sm4.h"
18 #include "vec_internal.h"
19
20 #define HELPER_H "tcg/helper.h"
21 #include "exec/helper-proto.h.inc"
22
23 union CRYPTO_STATE {
24 uint8_t bytes[16];
25 uint32_t words[4];
26 uint64_t l[2];
27 };
28
29 #if HOST_BIG_ENDIAN
30 #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
31 #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
32 #else
33 #define CR_ST_BYTE(state, i) ((state).bytes[i])
34 #define CR_ST_WORD(state, i) ((state).words[i])
35 #endif
36
37 /*
38 * The caller has not been converted to full gvec, and so only
39 * modifies the low 16 bytes of the vector register.
40 */
clear_tail_16(void * vd,uint32_t desc)41 static void clear_tail_16(void *vd, uint32_t desc)
42 {
43 int opr_sz = simd_oprsz(desc);
44 int max_sz = simd_maxsz(desc);
45
46 assert(opr_sz == 16);
47 clear_tail(vd, opr_sz, max_sz);
48 }
49
50 static const AESState aes_zero = { };
51
HELPER(crypto_aese)52 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
53 {
54 intptr_t i, opr_sz = simd_oprsz(desc);
55
56 for (i = 0; i < opr_sz; i += 16) {
57 AESState *ad = (AESState *)(vd + i);
58 AESState *st = (AESState *)(vn + i);
59 AESState *rk = (AESState *)(vm + i);
60 AESState t;
61
62 /*
63 * Our uint64_t are in the wrong order for big-endian.
64 * The Arm AddRoundKey comes first, while the API AddRoundKey
65 * comes last: perform the xor here, and provide zero to API.
66 */
67 if (HOST_BIG_ENDIAN) {
68 t.d[0] = st->d[1] ^ rk->d[1];
69 t.d[1] = st->d[0] ^ rk->d[0];
70 aesenc_SB_SR_AK(&t, &t, &aes_zero, false);
71 ad->d[0] = t.d[1];
72 ad->d[1] = t.d[0];
73 } else {
74 t.v = st->v ^ rk->v;
75 aesenc_SB_SR_AK(ad, &t, &aes_zero, false);
76 }
77 }
78 clear_tail(vd, opr_sz, simd_maxsz(desc));
79 }
80
HELPER(crypto_aesd)81 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc)
82 {
83 intptr_t i, opr_sz = simd_oprsz(desc);
84
85 for (i = 0; i < opr_sz; i += 16) {
86 AESState *ad = (AESState *)(vd + i);
87 AESState *st = (AESState *)(vn + i);
88 AESState *rk = (AESState *)(vm + i);
89 AESState t;
90
91 /* Our uint64_t are in the wrong order for big-endian. */
92 if (HOST_BIG_ENDIAN) {
93 t.d[0] = st->d[1] ^ rk->d[1];
94 t.d[1] = st->d[0] ^ rk->d[0];
95 aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false);
96 ad->d[0] = t.d[1];
97 ad->d[1] = t.d[0];
98 } else {
99 t.v = st->v ^ rk->v;
100 aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false);
101 }
102 }
103 clear_tail(vd, opr_sz, simd_maxsz(desc));
104 }
105
HELPER(crypto_aesmc)106 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
107 {
108 intptr_t i, opr_sz = simd_oprsz(desc);
109
110 for (i = 0; i < opr_sz; i += 16) {
111 AESState *ad = (AESState *)(vd + i);
112 AESState *st = (AESState *)(vm + i);
113 AESState t;
114
115 /* Our uint64_t are in the wrong order for big-endian. */
116 if (HOST_BIG_ENDIAN) {
117 t.d[0] = st->d[1];
118 t.d[1] = st->d[0];
119 aesenc_MC(&t, &t, false);
120 ad->d[0] = t.d[1];
121 ad->d[1] = t.d[0];
122 } else {
123 aesenc_MC(ad, st, false);
124 }
125 }
126 clear_tail(vd, opr_sz, simd_maxsz(desc));
127 }
128
HELPER(crypto_aesimc)129 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc)
130 {
131 intptr_t i, opr_sz = simd_oprsz(desc);
132
133 for (i = 0; i < opr_sz; i += 16) {
134 AESState *ad = (AESState *)(vd + i);
135 AESState *st = (AESState *)(vm + i);
136 AESState t;
137
138 /* Our uint64_t are in the wrong order for big-endian. */
139 if (HOST_BIG_ENDIAN) {
140 t.d[0] = st->d[1];
141 t.d[1] = st->d[0];
142 aesdec_IMC(&t, &t, false);
143 ad->d[0] = t.d[1];
144 ad->d[1] = t.d[0];
145 } else {
146 aesdec_IMC(ad, st, false);
147 }
148 }
149 clear_tail(vd, opr_sz, simd_maxsz(desc));
150 }
151
152 /*
153 * SHA-1 logical functions
154 */
155
cho(uint32_t x,uint32_t y,uint32_t z)156 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
157 {
158 return (x & (y ^ z)) ^ z;
159 }
160
par(uint32_t x,uint32_t y,uint32_t z)161 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
162 {
163 return x ^ y ^ z;
164 }
165
maj(uint32_t x,uint32_t y,uint32_t z)166 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
167 {
168 return (x & y) | ((x | y) & z);
169 }
170
HELPER(crypto_sha1su0)171 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
172 {
173 uint64_t *d = vd, *n = vn, *m = vm;
174 uint64_t d0, d1;
175
176 d0 = d[1] ^ d[0] ^ m[0];
177 d1 = n[0] ^ d[1] ^ m[1];
178 d[0] = d0;
179 d[1] = d1;
180
181 clear_tail_16(vd, desc);
182 }
183
crypto_sha1_3reg(uint64_t * rd,uint64_t * rn,uint64_t * rm,uint32_t desc,uint32_t (* fn)(union CRYPTO_STATE * d))184 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
185 uint64_t *rm, uint32_t desc,
186 uint32_t (*fn)(union CRYPTO_STATE *d))
187 {
188 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
189 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
190 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
191 int i;
192
193 for (i = 0; i < 4; i++) {
194 uint32_t t = fn(&d);
195
196 t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
197 + CR_ST_WORD(m, i);
198
199 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
200 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
201 CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
202 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
203 CR_ST_WORD(d, 0) = t;
204 }
205 rd[0] = d.l[0];
206 rd[1] = d.l[1];
207
208 clear_tail_16(rd, desc);
209 }
210
do_sha1c(union CRYPTO_STATE * d)211 static uint32_t do_sha1c(union CRYPTO_STATE *d)
212 {
213 return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
214 }
215
HELPER(crypto_sha1c)216 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
217 {
218 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
219 }
220
do_sha1p(union CRYPTO_STATE * d)221 static uint32_t do_sha1p(union CRYPTO_STATE *d)
222 {
223 return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
224 }
225
HELPER(crypto_sha1p)226 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
227 {
228 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
229 }
230
do_sha1m(union CRYPTO_STATE * d)231 static uint32_t do_sha1m(union CRYPTO_STATE *d)
232 {
233 return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
234 }
235
HELPER(crypto_sha1m)236 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
237 {
238 crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
239 }
240
HELPER(crypto_sha1h)241 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
242 {
243 uint64_t *rd = vd;
244 uint64_t *rm = vm;
245 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
246
247 CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
248 CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
249
250 rd[0] = m.l[0];
251 rd[1] = m.l[1];
252
253 clear_tail_16(vd, desc);
254 }
255
HELPER(crypto_sha1su1)256 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
257 {
258 uint64_t *rd = vd;
259 uint64_t *rm = vm;
260 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
261 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
262
263 CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
264 CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
265 CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
266 CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
267
268 rd[0] = d.l[0];
269 rd[1] = d.l[1];
270
271 clear_tail_16(vd, desc);
272 }
273
274 /*
275 * The SHA-256 logical functions, according to
276 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
277 */
278
S0(uint32_t x)279 static uint32_t S0(uint32_t x)
280 {
281 return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
282 }
283
S1(uint32_t x)284 static uint32_t S1(uint32_t x)
285 {
286 return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
287 }
288
s0(uint32_t x)289 static uint32_t s0(uint32_t x)
290 {
291 return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
292 }
293
s1(uint32_t x)294 static uint32_t s1(uint32_t x)
295 {
296 return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
297 }
298
HELPER(crypto_sha256h)299 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
300 {
301 uint64_t *rd = vd;
302 uint64_t *rn = vn;
303 uint64_t *rm = vm;
304 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
305 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
306 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
307 int i;
308
309 for (i = 0; i < 4; i++) {
310 uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
311 + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
312 + CR_ST_WORD(m, i);
313
314 CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
315 CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
316 CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
317 CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
318
319 t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
320 + S0(CR_ST_WORD(d, 0));
321
322 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
323 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
324 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
325 CR_ST_WORD(d, 0) = t;
326 }
327
328 rd[0] = d.l[0];
329 rd[1] = d.l[1];
330
331 clear_tail_16(vd, desc);
332 }
333
HELPER(crypto_sha256h2)334 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
335 {
336 uint64_t *rd = vd;
337 uint64_t *rn = vn;
338 uint64_t *rm = vm;
339 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
340 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
341 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
342 int i;
343
344 for (i = 0; i < 4; i++) {
345 uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
346 + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
347 + CR_ST_WORD(m, i);
348
349 CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
350 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
351 CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
352 CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
353 }
354
355 rd[0] = d.l[0];
356 rd[1] = d.l[1];
357
358 clear_tail_16(vd, desc);
359 }
360
HELPER(crypto_sha256su0)361 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
362 {
363 uint64_t *rd = vd;
364 uint64_t *rm = vm;
365 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
366 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
367
368 CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
369 CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
370 CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
371 CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
372
373 rd[0] = d.l[0];
374 rd[1] = d.l[1];
375
376 clear_tail_16(vd, desc);
377 }
378
HELPER(crypto_sha256su1)379 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
380 {
381 uint64_t *rd = vd;
382 uint64_t *rn = vn;
383 uint64_t *rm = vm;
384 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
385 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
386 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
387
388 CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
389 CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
390 CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
391 CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
392
393 rd[0] = d.l[0];
394 rd[1] = d.l[1];
395
396 clear_tail_16(vd, desc);
397 }
398
399 /*
400 * The SHA-512 logical functions (same as above but using 64-bit operands)
401 */
402
cho512(uint64_t x,uint64_t y,uint64_t z)403 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
404 {
405 return (x & (y ^ z)) ^ z;
406 }
407
maj512(uint64_t x,uint64_t y,uint64_t z)408 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
409 {
410 return (x & y) | ((x | y) & z);
411 }
412
S0_512(uint64_t x)413 static uint64_t S0_512(uint64_t x)
414 {
415 return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
416 }
417
S1_512(uint64_t x)418 static uint64_t S1_512(uint64_t x)
419 {
420 return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
421 }
422
s0_512(uint64_t x)423 static uint64_t s0_512(uint64_t x)
424 {
425 return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
426 }
427
s1_512(uint64_t x)428 static uint64_t s1_512(uint64_t x)
429 {
430 return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
431 }
432
HELPER(crypto_sha512h)433 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
434 {
435 uint64_t *rd = vd;
436 uint64_t *rn = vn;
437 uint64_t *rm = vm;
438 uint64_t d0 = rd[0];
439 uint64_t d1 = rd[1];
440
441 d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
442 d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
443
444 rd[0] = d0;
445 rd[1] = d1;
446
447 clear_tail_16(vd, desc);
448 }
449
HELPER(crypto_sha512h2)450 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
451 {
452 uint64_t *rd = vd;
453 uint64_t *rn = vn;
454 uint64_t *rm = vm;
455 uint64_t d0 = rd[0];
456 uint64_t d1 = rd[1];
457
458 d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
459 d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
460
461 rd[0] = d0;
462 rd[1] = d1;
463
464 clear_tail_16(vd, desc);
465 }
466
HELPER(crypto_sha512su0)467 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
468 {
469 uint64_t *rd = vd;
470 uint64_t *rn = vn;
471 uint64_t d0 = rd[0];
472 uint64_t d1 = rd[1];
473
474 d0 += s0_512(rd[1]);
475 d1 += s0_512(rn[0]);
476
477 rd[0] = d0;
478 rd[1] = d1;
479
480 clear_tail_16(vd, desc);
481 }
482
HELPER(crypto_sha512su1)483 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
484 {
485 uint64_t *rd = vd;
486 uint64_t *rn = vn;
487 uint64_t *rm = vm;
488
489 rd[0] += s1_512(rn[0]) + rm[0];
490 rd[1] += s1_512(rn[1]) + rm[1];
491
492 clear_tail_16(vd, desc);
493 }
494
HELPER(crypto_sm3partw1)495 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
496 {
497 uint64_t *rd = vd;
498 uint64_t *rn = vn;
499 uint64_t *rm = vm;
500 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
501 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
502 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
503 uint32_t t;
504
505 t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
506 CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
507
508 t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
509 CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
510
511 t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
512 CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
513
514 t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
515 CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
516
517 rd[0] = d.l[0];
518 rd[1] = d.l[1];
519
520 clear_tail_16(vd, desc);
521 }
522
HELPER(crypto_sm3partw2)523 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
524 {
525 uint64_t *rd = vd;
526 uint64_t *rn = vn;
527 uint64_t *rm = vm;
528 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
529 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
530 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
531 uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
532
533 CR_ST_WORD(d, 0) ^= t;
534 CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
535 CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
536 CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
537 ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
538
539 rd[0] = d.l[0];
540 rd[1] = d.l[1];
541
542 clear_tail_16(vd, desc);
543 }
544
545 static inline void QEMU_ALWAYS_INLINE
crypto_sm3tt(uint64_t * rd,uint64_t * rn,uint64_t * rm,uint32_t desc,uint32_t opcode)546 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
547 uint32_t desc, uint32_t opcode)
548 {
549 union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
550 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
551 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
552 uint32_t imm2 = simd_data(desc);
553 uint32_t t;
554
555 assert(imm2 < 4);
556
557 if (opcode == 0 || opcode == 2) {
558 /* SM3TT1A, SM3TT2A */
559 t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
560 } else if (opcode == 1) {
561 /* SM3TT1B */
562 t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
563 } else if (opcode == 3) {
564 /* SM3TT2B */
565 t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
566 } else {
567 qemu_build_not_reached();
568 }
569
570 t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
571
572 CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
573
574 if (opcode < 2) {
575 /* SM3TT1A, SM3TT1B */
576 t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
577
578 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
579 } else {
580 /* SM3TT2A, SM3TT2B */
581 t += CR_ST_WORD(n, 3);
582 t ^= rol32(t, 9) ^ rol32(t, 17);
583
584 CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
585 }
586
587 CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
588 CR_ST_WORD(d, 3) = t;
589
590 rd[0] = d.l[0];
591 rd[1] = d.l[1];
592
593 clear_tail_16(rd, desc);
594 }
595
596 #define DO_SM3TT(NAME, OPCODE) \
597 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
598 { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
599
600 DO_SM3TT(crypto_sm3tt1a, 0)
601 DO_SM3TT(crypto_sm3tt1b, 1)
602 DO_SM3TT(crypto_sm3tt2a, 2)
603 DO_SM3TT(crypto_sm3tt2b, 3)
604
605 #undef DO_SM3TT
606
do_crypto_sm4e(uint64_t * rd,uint64_t * rn,uint64_t * rm)607 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
608 {
609 union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
610 union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
611 uint32_t t, i;
612
613 for (i = 0; i < 4; i++) {
614 t = CR_ST_WORD(d, (i + 1) % 4) ^
615 CR_ST_WORD(d, (i + 2) % 4) ^
616 CR_ST_WORD(d, (i + 3) % 4) ^
617 CR_ST_WORD(n, i);
618
619 t = sm4_subword(t);
620
621 CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
622 rol32(t, 24);
623 }
624
625 rd[0] = d.l[0];
626 rd[1] = d.l[1];
627 }
628
HELPER(crypto_sm4e)629 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
630 {
631 intptr_t i, opr_sz = simd_oprsz(desc);
632
633 for (i = 0; i < opr_sz; i += 16) {
634 do_crypto_sm4e(vd + i, vn + i, vm + i);
635 }
636 clear_tail(vd, opr_sz, simd_maxsz(desc));
637 }
638
do_crypto_sm4ekey(uint64_t * rd,uint64_t * rn,uint64_t * rm)639 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
640 {
641 union CRYPTO_STATE d;
642 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
643 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
644 uint32_t t, i;
645
646 d = n;
647 for (i = 0; i < 4; i++) {
648 t = CR_ST_WORD(d, (i + 1) % 4) ^
649 CR_ST_WORD(d, (i + 2) % 4) ^
650 CR_ST_WORD(d, (i + 3) % 4) ^
651 CR_ST_WORD(m, i);
652
653 t = sm4_subword(t);
654
655 CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
656 }
657
658 rd[0] = d.l[0];
659 rd[1] = d.l[1];
660 }
661
HELPER(crypto_sm4ekey)662 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
663 {
664 intptr_t i, opr_sz = simd_oprsz(desc);
665
666 for (i = 0; i < opr_sz; i += 16) {
667 do_crypto_sm4ekey(vd + i, vn + i, vm + i);
668 }
669 clear_tail(vd, opr_sz, simd_maxsz(desc));
670 }
671
HELPER(crypto_rax1)672 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
673 {
674 intptr_t i, opr_sz = simd_oprsz(desc);
675 uint64_t *d = vd, *n = vn, *m = vm;
676
677 for (i = 0; i < opr_sz / 8; ++i) {
678 d[i] = n[i] ^ rol64(m[i], 1);
679 }
680 clear_tail(vd, opr_sz, simd_maxsz(desc));
681 }
682