xref: /qemu/target/arm/tcg/crypto_helper.c (revision 9f8d002499e06f60878f803cb6ad70f1220a3ce4)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/bitops.h"
14 
15 #include "tcg/tcg-gvec-desc.h"
16 #include "crypto/aes-round.h"
17 #include "crypto/sm4.h"
18 #include "vec_internal.h"
19 
20 #define HELPER_H "tcg/helper.h"
21 #include "exec/helper-proto.h.inc"
22 
23 union CRYPTO_STATE {
24     uint8_t    bytes[16];
25     uint32_t   words[4];
26     uint64_t   l[2];
27 };
28 
29 #if HOST_BIG_ENDIAN
30 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
31 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
32 #else
33 #define CR_ST_BYTE(state, i)   ((state).bytes[i])
34 #define CR_ST_WORD(state, i)   ((state).words[i])
35 #endif
36 
37 /*
38  * The caller has not been converted to full gvec, and so only
39  * modifies the low 16 bytes of the vector register.
40  */
clear_tail_16(void * vd,uint32_t desc)41 static void clear_tail_16(void *vd, uint32_t desc)
42 {
43     int opr_sz = simd_oprsz(desc);
44     int max_sz = simd_maxsz(desc);
45 
46     assert(opr_sz == 16);
47     clear_tail(vd, opr_sz, max_sz);
48 }
49 
50 static const AESState aes_zero = { };
51 
HELPER(crypto_aese)52 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
53 {
54     intptr_t i, opr_sz = simd_oprsz(desc);
55 
56     for (i = 0; i < opr_sz; i += 16) {
57         AESState *ad = (AESState *)(vd + i);
58         AESState *st = (AESState *)(vn + i);
59         AESState *rk = (AESState *)(vm + i);
60         AESState t;
61 
62         /*
63          * Our uint64_t are in the wrong order for big-endian.
64          * The Arm AddRoundKey comes first, while the API AddRoundKey
65          * comes last: perform the xor here, and provide zero to API.
66          */
67         if (HOST_BIG_ENDIAN) {
68             t.d[0] = st->d[1] ^ rk->d[1];
69             t.d[1] = st->d[0] ^ rk->d[0];
70             aesenc_SB_SR_AK(&t, &t, &aes_zero, false);
71             ad->d[0] = t.d[1];
72             ad->d[1] = t.d[0];
73         } else {
74             t.v = st->v ^ rk->v;
75             aesenc_SB_SR_AK(ad, &t, &aes_zero, false);
76         }
77     }
78     clear_tail(vd, opr_sz, simd_maxsz(desc));
79 }
80 
HELPER(crypto_aesd)81 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc)
82 {
83     intptr_t i, opr_sz = simd_oprsz(desc);
84 
85     for (i = 0; i < opr_sz; i += 16) {
86         AESState *ad = (AESState *)(vd + i);
87         AESState *st = (AESState *)(vn + i);
88         AESState *rk = (AESState *)(vm + i);
89         AESState t;
90 
91         /* Our uint64_t are in the wrong order for big-endian. */
92         if (HOST_BIG_ENDIAN) {
93             t.d[0] = st->d[1] ^ rk->d[1];
94             t.d[1] = st->d[0] ^ rk->d[0];
95             aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false);
96             ad->d[0] = t.d[1];
97             ad->d[1] = t.d[0];
98         } else {
99             t.v = st->v ^ rk->v;
100             aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false);
101         }
102     }
103     clear_tail(vd, opr_sz, simd_maxsz(desc));
104 }
105 
HELPER(crypto_aesmc)106 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
107 {
108     intptr_t i, opr_sz = simd_oprsz(desc);
109 
110     for (i = 0; i < opr_sz; i += 16) {
111         AESState *ad = (AESState *)(vd + i);
112         AESState *st = (AESState *)(vm + i);
113         AESState t;
114 
115         /* Our uint64_t are in the wrong order for big-endian. */
116         if (HOST_BIG_ENDIAN) {
117             t.d[0] = st->d[1];
118             t.d[1] = st->d[0];
119             aesenc_MC(&t, &t, false);
120             ad->d[0] = t.d[1];
121             ad->d[1] = t.d[0];
122         } else {
123             aesenc_MC(ad, st, false);
124         }
125     }
126     clear_tail(vd, opr_sz, simd_maxsz(desc));
127 }
128 
HELPER(crypto_aesimc)129 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc)
130 {
131     intptr_t i, opr_sz = simd_oprsz(desc);
132 
133     for (i = 0; i < opr_sz; i += 16) {
134         AESState *ad = (AESState *)(vd + i);
135         AESState *st = (AESState *)(vm + i);
136         AESState t;
137 
138         /* Our uint64_t are in the wrong order for big-endian. */
139         if (HOST_BIG_ENDIAN) {
140             t.d[0] = st->d[1];
141             t.d[1] = st->d[0];
142             aesdec_IMC(&t, &t, false);
143             ad->d[0] = t.d[1];
144             ad->d[1] = t.d[0];
145         } else {
146             aesdec_IMC(ad, st, false);
147         }
148     }
149     clear_tail(vd, opr_sz, simd_maxsz(desc));
150 }
151 
152 /*
153  * SHA-1 logical functions
154  */
155 
cho(uint32_t x,uint32_t y,uint32_t z)156 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
157 {
158     return (x & (y ^ z)) ^ z;
159 }
160 
par(uint32_t x,uint32_t y,uint32_t z)161 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
162 {
163     return x ^ y ^ z;
164 }
165 
maj(uint32_t x,uint32_t y,uint32_t z)166 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
167 {
168     return (x & y) | ((x | y) & z);
169 }
170 
HELPER(crypto_sha1su0)171 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
172 {
173     uint64_t *d = vd, *n = vn, *m = vm;
174     uint64_t d0, d1;
175 
176     d0 = d[1] ^ d[0] ^ m[0];
177     d1 = n[0] ^ d[1] ^ m[1];
178     d[0] = d0;
179     d[1] = d1;
180 
181     clear_tail_16(vd, desc);
182 }
183 
crypto_sha1_3reg(uint64_t * rd,uint64_t * rn,uint64_t * rm,uint32_t desc,uint32_t (* fn)(union CRYPTO_STATE * d))184 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
185                                     uint64_t *rm, uint32_t desc,
186                                     uint32_t (*fn)(union CRYPTO_STATE *d))
187 {
188     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
189     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
190     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
191     int i;
192 
193     for (i = 0; i < 4; i++) {
194         uint32_t t = fn(&d);
195 
196         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
197              + CR_ST_WORD(m, i);
198 
199         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
200         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
201         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
202         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
203         CR_ST_WORD(d, 0) = t;
204     }
205     rd[0] = d.l[0];
206     rd[1] = d.l[1];
207 
208     clear_tail_16(rd, desc);
209 }
210 
do_sha1c(union CRYPTO_STATE * d)211 static uint32_t do_sha1c(union CRYPTO_STATE *d)
212 {
213     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
214 }
215 
HELPER(crypto_sha1c)216 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
217 {
218     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
219 }
220 
do_sha1p(union CRYPTO_STATE * d)221 static uint32_t do_sha1p(union CRYPTO_STATE *d)
222 {
223     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
224 }
225 
HELPER(crypto_sha1p)226 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
227 {
228     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
229 }
230 
do_sha1m(union CRYPTO_STATE * d)231 static uint32_t do_sha1m(union CRYPTO_STATE *d)
232 {
233     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
234 }
235 
HELPER(crypto_sha1m)236 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
237 {
238     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
239 }
240 
HELPER(crypto_sha1h)241 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
242 {
243     uint64_t *rd = vd;
244     uint64_t *rm = vm;
245     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
246 
247     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
248     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
249 
250     rd[0] = m.l[0];
251     rd[1] = m.l[1];
252 
253     clear_tail_16(vd, desc);
254 }
255 
HELPER(crypto_sha1su1)256 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
257 {
258     uint64_t *rd = vd;
259     uint64_t *rm = vm;
260     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
261     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
262 
263     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
264     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
265     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
266     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
267 
268     rd[0] = d.l[0];
269     rd[1] = d.l[1];
270 
271     clear_tail_16(vd, desc);
272 }
273 
274 /*
275  * The SHA-256 logical functions, according to
276  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
277  */
278 
S0(uint32_t x)279 static uint32_t S0(uint32_t x)
280 {
281     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
282 }
283 
S1(uint32_t x)284 static uint32_t S1(uint32_t x)
285 {
286     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
287 }
288 
s0(uint32_t x)289 static uint32_t s0(uint32_t x)
290 {
291     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
292 }
293 
s1(uint32_t x)294 static uint32_t s1(uint32_t x)
295 {
296     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
297 }
298 
HELPER(crypto_sha256h)299 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
300 {
301     uint64_t *rd = vd;
302     uint64_t *rn = vn;
303     uint64_t *rm = vm;
304     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
305     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
306     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
307     int i;
308 
309     for (i = 0; i < 4; i++) {
310         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
311                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
312                      + CR_ST_WORD(m, i);
313 
314         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
315         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
316         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
317         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
318 
319         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
320              + S0(CR_ST_WORD(d, 0));
321 
322         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
323         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
324         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
325         CR_ST_WORD(d, 0) = t;
326     }
327 
328     rd[0] = d.l[0];
329     rd[1] = d.l[1];
330 
331     clear_tail_16(vd, desc);
332 }
333 
HELPER(crypto_sha256h2)334 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
335 {
336     uint64_t *rd = vd;
337     uint64_t *rn = vn;
338     uint64_t *rm = vm;
339     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
340     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
341     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
342     int i;
343 
344     for (i = 0; i < 4; i++) {
345         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
346                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
347                      + CR_ST_WORD(m, i);
348 
349         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
350         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
351         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
352         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
353     }
354 
355     rd[0] = d.l[0];
356     rd[1] = d.l[1];
357 
358     clear_tail_16(vd, desc);
359 }
360 
HELPER(crypto_sha256su0)361 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
362 {
363     uint64_t *rd = vd;
364     uint64_t *rm = vm;
365     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
366     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
367 
368     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
369     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
370     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
371     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
372 
373     rd[0] = d.l[0];
374     rd[1] = d.l[1];
375 
376     clear_tail_16(vd, desc);
377 }
378 
HELPER(crypto_sha256su1)379 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
380 {
381     uint64_t *rd = vd;
382     uint64_t *rn = vn;
383     uint64_t *rm = vm;
384     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
385     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
386     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
387 
388     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
389     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
390     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
391     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
392 
393     rd[0] = d.l[0];
394     rd[1] = d.l[1];
395 
396     clear_tail_16(vd, desc);
397 }
398 
399 /*
400  * The SHA-512 logical functions (same as above but using 64-bit operands)
401  */
402 
cho512(uint64_t x,uint64_t y,uint64_t z)403 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
404 {
405     return (x & (y ^ z)) ^ z;
406 }
407 
maj512(uint64_t x,uint64_t y,uint64_t z)408 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
409 {
410     return (x & y) | ((x | y) & z);
411 }
412 
S0_512(uint64_t x)413 static uint64_t S0_512(uint64_t x)
414 {
415     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
416 }
417 
S1_512(uint64_t x)418 static uint64_t S1_512(uint64_t x)
419 {
420     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
421 }
422 
s0_512(uint64_t x)423 static uint64_t s0_512(uint64_t x)
424 {
425     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
426 }
427 
s1_512(uint64_t x)428 static uint64_t s1_512(uint64_t x)
429 {
430     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
431 }
432 
HELPER(crypto_sha512h)433 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
434 {
435     uint64_t *rd = vd;
436     uint64_t *rn = vn;
437     uint64_t *rm = vm;
438     uint64_t d0 = rd[0];
439     uint64_t d1 = rd[1];
440 
441     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
442     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
443 
444     rd[0] = d0;
445     rd[1] = d1;
446 
447     clear_tail_16(vd, desc);
448 }
449 
HELPER(crypto_sha512h2)450 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
451 {
452     uint64_t *rd = vd;
453     uint64_t *rn = vn;
454     uint64_t *rm = vm;
455     uint64_t d0 = rd[0];
456     uint64_t d1 = rd[1];
457 
458     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
459     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
460 
461     rd[0] = d0;
462     rd[1] = d1;
463 
464     clear_tail_16(vd, desc);
465 }
466 
HELPER(crypto_sha512su0)467 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
468 {
469     uint64_t *rd = vd;
470     uint64_t *rn = vn;
471     uint64_t d0 = rd[0];
472     uint64_t d1 = rd[1];
473 
474     d0 += s0_512(rd[1]);
475     d1 += s0_512(rn[0]);
476 
477     rd[0] = d0;
478     rd[1] = d1;
479 
480     clear_tail_16(vd, desc);
481 }
482 
HELPER(crypto_sha512su1)483 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
484 {
485     uint64_t *rd = vd;
486     uint64_t *rn = vn;
487     uint64_t *rm = vm;
488 
489     rd[0] += s1_512(rn[0]) + rm[0];
490     rd[1] += s1_512(rn[1]) + rm[1];
491 
492     clear_tail_16(vd, desc);
493 }
494 
HELPER(crypto_sm3partw1)495 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
496 {
497     uint64_t *rd = vd;
498     uint64_t *rn = vn;
499     uint64_t *rm = vm;
500     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
501     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
502     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
503     uint32_t t;
504 
505     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
506     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
507 
508     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
509     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
510 
511     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
512     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
513 
514     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
515     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
516 
517     rd[0] = d.l[0];
518     rd[1] = d.l[1];
519 
520     clear_tail_16(vd, desc);
521 }
522 
HELPER(crypto_sm3partw2)523 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
524 {
525     uint64_t *rd = vd;
526     uint64_t *rn = vn;
527     uint64_t *rm = vm;
528     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
529     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
530     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
531     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
532 
533     CR_ST_WORD(d, 0) ^= t;
534     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
535     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
536     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
537                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
538 
539     rd[0] = d.l[0];
540     rd[1] = d.l[1];
541 
542     clear_tail_16(vd, desc);
543 }
544 
545 static inline void QEMU_ALWAYS_INLINE
crypto_sm3tt(uint64_t * rd,uint64_t * rn,uint64_t * rm,uint32_t desc,uint32_t opcode)546 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
547              uint32_t desc, uint32_t opcode)
548 {
549     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
550     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
551     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
552     uint32_t imm2 = simd_data(desc);
553     uint32_t t;
554 
555     assert(imm2 < 4);
556 
557     if (opcode == 0 || opcode == 2) {
558         /* SM3TT1A, SM3TT2A */
559         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
560     } else if (opcode == 1) {
561         /* SM3TT1B */
562         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
563     } else if (opcode == 3) {
564         /* SM3TT2B */
565         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
566     } else {
567         qemu_build_not_reached();
568     }
569 
570     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
571 
572     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
573 
574     if (opcode < 2) {
575         /* SM3TT1A, SM3TT1B */
576         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
577 
578         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
579     } else {
580         /* SM3TT2A, SM3TT2B */
581         t += CR_ST_WORD(n, 3);
582         t ^= rol32(t, 9) ^ rol32(t, 17);
583 
584         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
585     }
586 
587     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
588     CR_ST_WORD(d, 3) = t;
589 
590     rd[0] = d.l[0];
591     rd[1] = d.l[1];
592 
593     clear_tail_16(rd, desc);
594 }
595 
596 #define DO_SM3TT(NAME, OPCODE) \
597     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
598     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
599 
600 DO_SM3TT(crypto_sm3tt1a, 0)
601 DO_SM3TT(crypto_sm3tt1b, 1)
602 DO_SM3TT(crypto_sm3tt2a, 2)
603 DO_SM3TT(crypto_sm3tt2b, 3)
604 
605 #undef DO_SM3TT
606 
do_crypto_sm4e(uint64_t * rd,uint64_t * rn,uint64_t * rm)607 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
608 {
609     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
610     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
611     uint32_t t, i;
612 
613     for (i = 0; i < 4; i++) {
614         t = CR_ST_WORD(d, (i + 1) % 4) ^
615             CR_ST_WORD(d, (i + 2) % 4) ^
616             CR_ST_WORD(d, (i + 3) % 4) ^
617             CR_ST_WORD(n, i);
618 
619         t = sm4_subword(t);
620 
621         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
622                             rol32(t, 24);
623     }
624 
625     rd[0] = d.l[0];
626     rd[1] = d.l[1];
627 }
628 
HELPER(crypto_sm4e)629 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
630 {
631     intptr_t i, opr_sz = simd_oprsz(desc);
632 
633     for (i = 0; i < opr_sz; i += 16) {
634         do_crypto_sm4e(vd + i, vn + i, vm + i);
635     }
636     clear_tail(vd, opr_sz, simd_maxsz(desc));
637 }
638 
do_crypto_sm4ekey(uint64_t * rd,uint64_t * rn,uint64_t * rm)639 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
640 {
641     union CRYPTO_STATE d;
642     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
643     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
644     uint32_t t, i;
645 
646     d = n;
647     for (i = 0; i < 4; i++) {
648         t = CR_ST_WORD(d, (i + 1) % 4) ^
649             CR_ST_WORD(d, (i + 2) % 4) ^
650             CR_ST_WORD(d, (i + 3) % 4) ^
651             CR_ST_WORD(m, i);
652 
653         t = sm4_subword(t);
654 
655         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
656     }
657 
658     rd[0] = d.l[0];
659     rd[1] = d.l[1];
660 }
661 
HELPER(crypto_sm4ekey)662 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
663 {
664     intptr_t i, opr_sz = simd_oprsz(desc);
665 
666     for (i = 0; i < opr_sz; i += 16) {
667         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
668     }
669     clear_tail(vd, opr_sz, simd_maxsz(desc));
670 }
671 
HELPER(crypto_rax1)672 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
673 {
674     intptr_t i, opr_sz = simd_oprsz(desc);
675     uint64_t *d = vd, *n = vn, *m = vm;
676 
677     for (i = 0; i < opr_sz / 8; ++i) {
678         d[i] = n[i] ^ rol64(m[i], 1);
679     }
680     clear_tail(vd, opr_sz, simd_maxsz(desc));
681 }
682