xref: /qemu/target/arm/tcg/crypto_helper.c (revision 0f23908c5c9809f59ebe23e7ec49b5c774bc3ab5)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "crypto/sm4.h"
19 #include "vec_internal.h"
20 
21 union CRYPTO_STATE {
22     uint8_t    bytes[16];
23     uint32_t   words[4];
24     uint64_t   l[2];
25 };
26 
27 #if HOST_BIG_ENDIAN
28 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
29 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
30 #else
31 #define CR_ST_BYTE(state, i)   ((state).bytes[i])
32 #define CR_ST_WORD(state, i)   ((state).words[i])
33 #endif
34 
35 /*
36  * The caller has not been converted to full gvec, and so only
37  * modifies the low 16 bytes of the vector register.
38  */
39 static void clear_tail_16(void *vd, uint32_t desc)
40 {
41     int opr_sz = simd_oprsz(desc);
42     int max_sz = simd_maxsz(desc);
43 
44     assert(opr_sz == 16);
45     clear_tail(vd, opr_sz, max_sz);
46 }
47 
48 static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm,
49                            const uint8_t *sbox, const uint8_t *shift)
50 {
51     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
52     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
53     int i;
54 
55     /* xor state vector with round key */
56     rk.l[0] ^= st.l[0];
57     rk.l[1] ^= st.l[1];
58 
59     /* combine ShiftRows operation and sbox substitution */
60     for (i = 0; i < 16; i++) {
61         CR_ST_BYTE(st, i) = sbox[CR_ST_BYTE(rk, shift[i])];
62     }
63 
64     rd[0] = st.l[0];
65     rd[1] = st.l[1];
66 }
67 
68 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
69 {
70     intptr_t i, opr_sz = simd_oprsz(desc);
71 
72     for (i = 0; i < opr_sz; i += 16) {
73         do_crypto_aese(vd + i, vn + i, vm + i, AES_sbox, AES_shifts);
74     }
75     clear_tail(vd, opr_sz, simd_maxsz(desc));
76 }
77 
78 void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc)
79 {
80     intptr_t i, opr_sz = simd_oprsz(desc);
81 
82     for (i = 0; i < opr_sz; i += 16) {
83         do_crypto_aese(vd + i, vn + i, vm + i, AES_isbox, AES_ishifts);
84     }
85     clear_tail(vd, opr_sz, simd_maxsz(desc));
86 }
87 
88 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, const uint32_t *mc)
89 {
90     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
91     int i;
92 
93     for (i = 0; i < 16; i += 4) {
94         CR_ST_WORD(st, i >> 2) =
95             mc[CR_ST_BYTE(st, i)] ^
96             rol32(mc[CR_ST_BYTE(st, i + 1)], 8) ^
97             rol32(mc[CR_ST_BYTE(st, i + 2)], 16) ^
98             rol32(mc[CR_ST_BYTE(st, i + 3)], 24);
99     }
100 
101     rd[0] = st.l[0];
102     rd[1] = st.l[1];
103 }
104 
105 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
106 {
107     intptr_t i, opr_sz = simd_oprsz(desc);
108 
109     for (i = 0; i < opr_sz; i += 16) {
110         do_crypto_aesmc(vd + i, vm + i, AES_mc_rot);
111     }
112     clear_tail(vd, opr_sz, simd_maxsz(desc));
113 }
114 
115 void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc)
116 {
117     intptr_t i, opr_sz = simd_oprsz(desc);
118 
119     for (i = 0; i < opr_sz; i += 16) {
120         do_crypto_aesmc(vd + i, vm + i, AES_imc_rot);
121     }
122     clear_tail(vd, opr_sz, simd_maxsz(desc));
123 }
124 
125 /*
126  * SHA-1 logical functions
127  */
128 
129 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
130 {
131     return (x & (y ^ z)) ^ z;
132 }
133 
134 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
135 {
136     return x ^ y ^ z;
137 }
138 
139 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
140 {
141     return (x & y) | ((x | y) & z);
142 }
143 
144 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
145 {
146     uint64_t *d = vd, *n = vn, *m = vm;
147     uint64_t d0, d1;
148 
149     d0 = d[1] ^ d[0] ^ m[0];
150     d1 = n[0] ^ d[1] ^ m[1];
151     d[0] = d0;
152     d[1] = d1;
153 
154     clear_tail_16(vd, desc);
155 }
156 
157 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
158                                     uint64_t *rm, uint32_t desc,
159                                     uint32_t (*fn)(union CRYPTO_STATE *d))
160 {
161     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
162     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
163     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
164     int i;
165 
166     for (i = 0; i < 4; i++) {
167         uint32_t t = fn(&d);
168 
169         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
170              + CR_ST_WORD(m, i);
171 
172         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
173         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
174         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
175         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
176         CR_ST_WORD(d, 0) = t;
177     }
178     rd[0] = d.l[0];
179     rd[1] = d.l[1];
180 
181     clear_tail_16(rd, desc);
182 }
183 
184 static uint32_t do_sha1c(union CRYPTO_STATE *d)
185 {
186     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
187 }
188 
189 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
190 {
191     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
192 }
193 
194 static uint32_t do_sha1p(union CRYPTO_STATE *d)
195 {
196     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
197 }
198 
199 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
200 {
201     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
202 }
203 
204 static uint32_t do_sha1m(union CRYPTO_STATE *d)
205 {
206     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
207 }
208 
209 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
210 {
211     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
212 }
213 
214 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
215 {
216     uint64_t *rd = vd;
217     uint64_t *rm = vm;
218     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
219 
220     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
221     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
222 
223     rd[0] = m.l[0];
224     rd[1] = m.l[1];
225 
226     clear_tail_16(vd, desc);
227 }
228 
229 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
230 {
231     uint64_t *rd = vd;
232     uint64_t *rm = vm;
233     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
234     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
235 
236     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
237     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
238     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
239     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
240 
241     rd[0] = d.l[0];
242     rd[1] = d.l[1];
243 
244     clear_tail_16(vd, desc);
245 }
246 
247 /*
248  * The SHA-256 logical functions, according to
249  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
250  */
251 
252 static uint32_t S0(uint32_t x)
253 {
254     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
255 }
256 
257 static uint32_t S1(uint32_t x)
258 {
259     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
260 }
261 
262 static uint32_t s0(uint32_t x)
263 {
264     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
265 }
266 
267 static uint32_t s1(uint32_t x)
268 {
269     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
270 }
271 
272 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
273 {
274     uint64_t *rd = vd;
275     uint64_t *rn = vn;
276     uint64_t *rm = vm;
277     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
278     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
279     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
280     int i;
281 
282     for (i = 0; i < 4; i++) {
283         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
284                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
285                      + CR_ST_WORD(m, i);
286 
287         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
288         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
289         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
290         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
291 
292         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
293              + S0(CR_ST_WORD(d, 0));
294 
295         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
296         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
297         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
298         CR_ST_WORD(d, 0) = t;
299     }
300 
301     rd[0] = d.l[0];
302     rd[1] = d.l[1];
303 
304     clear_tail_16(vd, desc);
305 }
306 
307 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
308 {
309     uint64_t *rd = vd;
310     uint64_t *rn = vn;
311     uint64_t *rm = vm;
312     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
313     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
314     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
315     int i;
316 
317     for (i = 0; i < 4; i++) {
318         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
319                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
320                      + CR_ST_WORD(m, i);
321 
322         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
323         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
324         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
325         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
326     }
327 
328     rd[0] = d.l[0];
329     rd[1] = d.l[1];
330 
331     clear_tail_16(vd, desc);
332 }
333 
334 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
335 {
336     uint64_t *rd = vd;
337     uint64_t *rm = vm;
338     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
339     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
340 
341     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
342     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
343     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
344     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
345 
346     rd[0] = d.l[0];
347     rd[1] = d.l[1];
348 
349     clear_tail_16(vd, desc);
350 }
351 
352 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
353 {
354     uint64_t *rd = vd;
355     uint64_t *rn = vn;
356     uint64_t *rm = vm;
357     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
358     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
359     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
360 
361     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
362     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
363     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
364     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
365 
366     rd[0] = d.l[0];
367     rd[1] = d.l[1];
368 
369     clear_tail_16(vd, desc);
370 }
371 
372 /*
373  * The SHA-512 logical functions (same as above but using 64-bit operands)
374  */
375 
376 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
377 {
378     return (x & (y ^ z)) ^ z;
379 }
380 
381 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
382 {
383     return (x & y) | ((x | y) & z);
384 }
385 
386 static uint64_t S0_512(uint64_t x)
387 {
388     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
389 }
390 
391 static uint64_t S1_512(uint64_t x)
392 {
393     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
394 }
395 
396 static uint64_t s0_512(uint64_t x)
397 {
398     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
399 }
400 
401 static uint64_t s1_512(uint64_t x)
402 {
403     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
404 }
405 
406 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
407 {
408     uint64_t *rd = vd;
409     uint64_t *rn = vn;
410     uint64_t *rm = vm;
411     uint64_t d0 = rd[0];
412     uint64_t d1 = rd[1];
413 
414     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
415     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
416 
417     rd[0] = d0;
418     rd[1] = d1;
419 
420     clear_tail_16(vd, desc);
421 }
422 
423 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
424 {
425     uint64_t *rd = vd;
426     uint64_t *rn = vn;
427     uint64_t *rm = vm;
428     uint64_t d0 = rd[0];
429     uint64_t d1 = rd[1];
430 
431     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
432     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
433 
434     rd[0] = d0;
435     rd[1] = d1;
436 
437     clear_tail_16(vd, desc);
438 }
439 
440 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
441 {
442     uint64_t *rd = vd;
443     uint64_t *rn = vn;
444     uint64_t d0 = rd[0];
445     uint64_t d1 = rd[1];
446 
447     d0 += s0_512(rd[1]);
448     d1 += s0_512(rn[0]);
449 
450     rd[0] = d0;
451     rd[1] = d1;
452 
453     clear_tail_16(vd, desc);
454 }
455 
456 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
457 {
458     uint64_t *rd = vd;
459     uint64_t *rn = vn;
460     uint64_t *rm = vm;
461 
462     rd[0] += s1_512(rn[0]) + rm[0];
463     rd[1] += s1_512(rn[1]) + rm[1];
464 
465     clear_tail_16(vd, desc);
466 }
467 
468 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
469 {
470     uint64_t *rd = vd;
471     uint64_t *rn = vn;
472     uint64_t *rm = vm;
473     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
474     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
475     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
476     uint32_t t;
477 
478     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
479     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
480 
481     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
482     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
483 
484     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
485     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
486 
487     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
488     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
489 
490     rd[0] = d.l[0];
491     rd[1] = d.l[1];
492 
493     clear_tail_16(vd, desc);
494 }
495 
496 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
497 {
498     uint64_t *rd = vd;
499     uint64_t *rn = vn;
500     uint64_t *rm = vm;
501     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
502     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
503     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
504     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
505 
506     CR_ST_WORD(d, 0) ^= t;
507     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
508     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
509     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
510                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
511 
512     rd[0] = d.l[0];
513     rd[1] = d.l[1];
514 
515     clear_tail_16(vd, desc);
516 }
517 
518 static inline void QEMU_ALWAYS_INLINE
519 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
520              uint32_t desc, uint32_t opcode)
521 {
522     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
523     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
524     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
525     uint32_t imm2 = simd_data(desc);
526     uint32_t t;
527 
528     assert(imm2 < 4);
529 
530     if (opcode == 0 || opcode == 2) {
531         /* SM3TT1A, SM3TT2A */
532         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
533     } else if (opcode == 1) {
534         /* SM3TT1B */
535         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
536     } else if (opcode == 3) {
537         /* SM3TT2B */
538         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
539     } else {
540         qemu_build_not_reached();
541     }
542 
543     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
544 
545     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
546 
547     if (opcode < 2) {
548         /* SM3TT1A, SM3TT1B */
549         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
550 
551         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
552     } else {
553         /* SM3TT2A, SM3TT2B */
554         t += CR_ST_WORD(n, 3);
555         t ^= rol32(t, 9) ^ rol32(t, 17);
556 
557         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
558     }
559 
560     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
561     CR_ST_WORD(d, 3) = t;
562 
563     rd[0] = d.l[0];
564     rd[1] = d.l[1];
565 
566     clear_tail_16(rd, desc);
567 }
568 
569 #define DO_SM3TT(NAME, OPCODE) \
570     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
571     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
572 
573 DO_SM3TT(crypto_sm3tt1a, 0)
574 DO_SM3TT(crypto_sm3tt1b, 1)
575 DO_SM3TT(crypto_sm3tt2a, 2)
576 DO_SM3TT(crypto_sm3tt2b, 3)
577 
578 #undef DO_SM3TT
579 
580 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
581 {
582     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
583     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
584     uint32_t t, i;
585 
586     for (i = 0; i < 4; i++) {
587         t = CR_ST_WORD(d, (i + 1) % 4) ^
588             CR_ST_WORD(d, (i + 2) % 4) ^
589             CR_ST_WORD(d, (i + 3) % 4) ^
590             CR_ST_WORD(n, i);
591 
592         t = sm4_sbox[t & 0xff] |
593             sm4_sbox[(t >> 8) & 0xff] << 8 |
594             sm4_sbox[(t >> 16) & 0xff] << 16 |
595             sm4_sbox[(t >> 24) & 0xff] << 24;
596 
597         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
598                             rol32(t, 24);
599     }
600 
601     rd[0] = d.l[0];
602     rd[1] = d.l[1];
603 }
604 
605 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
606 {
607     intptr_t i, opr_sz = simd_oprsz(desc);
608 
609     for (i = 0; i < opr_sz; i += 16) {
610         do_crypto_sm4e(vd + i, vn + i, vm + i);
611     }
612     clear_tail(vd, opr_sz, simd_maxsz(desc));
613 }
614 
615 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
616 {
617     union CRYPTO_STATE d;
618     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
619     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
620     uint32_t t, i;
621 
622     d = n;
623     for (i = 0; i < 4; i++) {
624         t = CR_ST_WORD(d, (i + 1) % 4) ^
625             CR_ST_WORD(d, (i + 2) % 4) ^
626             CR_ST_WORD(d, (i + 3) % 4) ^
627             CR_ST_WORD(m, i);
628 
629         t = sm4_sbox[t & 0xff] |
630             sm4_sbox[(t >> 8) & 0xff] << 8 |
631             sm4_sbox[(t >> 16) & 0xff] << 16 |
632             sm4_sbox[(t >> 24) & 0xff] << 24;
633 
634         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
635     }
636 
637     rd[0] = d.l[0];
638     rd[1] = d.l[1];
639 }
640 
641 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
642 {
643     intptr_t i, opr_sz = simd_oprsz(desc);
644 
645     for (i = 0; i < opr_sz; i += 16) {
646         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
647     }
648     clear_tail(vd, opr_sz, simd_maxsz(desc));
649 }
650 
651 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
652 {
653     intptr_t i, opr_sz = simd_oprsz(desc);
654     uint64_t *d = vd, *n = vn, *m = vm;
655 
656     for (i = 0; i < opr_sz / 8; ++i) {
657         d[i] = n[i] ^ rol64(m[i], 1);
658     }
659     clear_tail(vd, opr_sz, simd_maxsz(desc));
660 }
661