xref: /qemu/target/arm/tcg/crypto_helper.c (revision aaffebd6d3135b8aed7e61932af53b004d261579)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "vec_internal.h"
19 
20 union CRYPTO_STATE {
21     uint8_t    bytes[16];
22     uint32_t   words[4];
23     uint64_t   l[2];
24 };
25 
26 #ifdef HOST_WORDS_BIGENDIAN
27 #define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
28 #define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
29 #else
30 #define CR_ST_BYTE(state, i)   (state.bytes[i])
31 #define CR_ST_WORD(state, i)   (state.words[i])
32 #endif
33 
34 /*
35  * The caller has not been converted to full gvec, and so only
36  * modifies the low 16 bytes of the vector register.
37  */
38 static void clear_tail_16(void *vd, uint32_t desc)
39 {
40     int opr_sz = simd_oprsz(desc);
41     int max_sz = simd_maxsz(desc);
42 
43     assert(opr_sz == 16);
44     clear_tail(vd, opr_sz, max_sz);
45 }
46 
47 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
48                            uint64_t *rm, bool decrypt)
49 {
50     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
51     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
52     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
53     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
54     int i;
55 
56     /* xor state vector with round key */
57     rk.l[0] ^= st.l[0];
58     rk.l[1] ^= st.l[1];
59 
60     /* combine ShiftRows operation and sbox substitution */
61     for (i = 0; i < 16; i++) {
62         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
63     }
64 
65     rd[0] = st.l[0];
66     rd[1] = st.l[1];
67 }
68 
69 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
70 {
71     intptr_t i, opr_sz = simd_oprsz(desc);
72     bool decrypt = simd_data(desc);
73 
74     for (i = 0; i < opr_sz; i += 16) {
75         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
76     }
77     clear_tail(vd, opr_sz, simd_maxsz(desc));
78 }
79 
80 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
81 {
82     static uint32_t const mc[][256] = { {
83         /* MixColumns lookup table */
84         0x00000000, 0x03010102, 0x06020204, 0x05030306,
85         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
86         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
87         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
88         0x30101020, 0x33111122, 0x36121224, 0x35131326,
89         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
90         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
91         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
92         0x60202040, 0x63212142, 0x66222244, 0x65232346,
93         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
94         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
95         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
96         0x50303060, 0x53313162, 0x56323264, 0x55333366,
97         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
98         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
99         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
100         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
101         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
102         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
103         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
104         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
105         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
106         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
107         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
108         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
109         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
110         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
111         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
112         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
113         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
114         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
115         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
116         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
117         0x97848413, 0x94858511, 0x91868617, 0x92878715,
118         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
119         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
120         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
121         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
122         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
123         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
124         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
125         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
126         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
127         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
128         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
129         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
130         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
131         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
132         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
133         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
134         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
135         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
136         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
137         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
138         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
139         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
140         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
141         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
142         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
143         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
144         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
145         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
146         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
147         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
148     }, {
149         /* Inverse MixColumns lookup table */
150         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
151         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
152         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
153         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
154         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
155         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
156         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
157         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
158         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
159         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
160         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
161         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
162         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
163         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
164         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
165         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
166         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
167         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
168         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
169         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
170         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
171         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
172         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
173         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
174         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
175         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
176         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
177         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
178         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
179         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
180         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
181         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
182         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
183         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
184         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
185         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
186         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
187         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
188         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
189         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
190         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
191         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
192         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
193         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
194         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
195         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
196         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
197         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
198         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
199         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
200         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
201         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
202         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
203         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
204         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
205         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
206         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
207         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
208         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
209         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
210         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
211         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
212         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
213         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
214     } };
215 
216     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
217     int i;
218 
219     for (i = 0; i < 16; i += 4) {
220         CR_ST_WORD(st, i >> 2) =
221             mc[decrypt][CR_ST_BYTE(st, i)] ^
222             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
223             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
224             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
225     }
226 
227     rd[0] = st.l[0];
228     rd[1] = st.l[1];
229 }
230 
231 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
232 {
233     intptr_t i, opr_sz = simd_oprsz(desc);
234     bool decrypt = simd_data(desc);
235 
236     for (i = 0; i < opr_sz; i += 16) {
237         do_crypto_aesmc(vd + i, vm + i, decrypt);
238     }
239     clear_tail(vd, opr_sz, simd_maxsz(desc));
240 }
241 
242 /*
243  * SHA-1 logical functions
244  */
245 
246 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
247 {
248     return (x & (y ^ z)) ^ z;
249 }
250 
251 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
252 {
253     return x ^ y ^ z;
254 }
255 
256 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
257 {
258     return (x & y) | ((x | y) & z);
259 }
260 
261 void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
262 {
263     uint64_t *rd = vd;
264     uint64_t *rn = vn;
265     uint64_t *rm = vm;
266     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
267     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
268     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
269 
270     if (op == 3) { /* sha1su0 */
271         d.l[0] ^= d.l[1] ^ m.l[0];
272         d.l[1] ^= n.l[0] ^ m.l[1];
273     } else {
274         int i;
275 
276         for (i = 0; i < 4; i++) {
277             uint32_t t;
278 
279             switch (op) {
280             case 0: /* sha1c */
281                 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
282                 break;
283             case 1: /* sha1p */
284                 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
285                 break;
286             case 2: /* sha1m */
287                 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
288                 break;
289             default:
290                 g_assert_not_reached();
291             }
292             t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
293                  + CR_ST_WORD(m, i);
294 
295             CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
296             CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
297             CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
298             CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
299             CR_ST_WORD(d, 0) = t;
300         }
301     }
302     rd[0] = d.l[0];
303     rd[1] = d.l[1];
304 }
305 
306 void HELPER(crypto_sha1h)(void *vd, void *vm)
307 {
308     uint64_t *rd = vd;
309     uint64_t *rm = vm;
310     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
311 
312     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
313     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
314 
315     rd[0] = m.l[0];
316     rd[1] = m.l[1];
317 }
318 
319 void HELPER(crypto_sha1su1)(void *vd, void *vm)
320 {
321     uint64_t *rd = vd;
322     uint64_t *rm = vm;
323     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
324     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
325 
326     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
327     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
328     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
329     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
330 
331     rd[0] = d.l[0];
332     rd[1] = d.l[1];
333 }
334 
335 /*
336  * The SHA-256 logical functions, according to
337  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
338  */
339 
340 static uint32_t S0(uint32_t x)
341 {
342     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
343 }
344 
345 static uint32_t S1(uint32_t x)
346 {
347     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
348 }
349 
350 static uint32_t s0(uint32_t x)
351 {
352     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
353 }
354 
355 static uint32_t s1(uint32_t x)
356 {
357     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
358 }
359 
360 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
361 {
362     uint64_t *rd = vd;
363     uint64_t *rn = vn;
364     uint64_t *rm = vm;
365     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
366     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
367     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
368     int i;
369 
370     for (i = 0; i < 4; i++) {
371         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
372                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
373                      + CR_ST_WORD(m, i);
374 
375         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
376         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
377         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
378         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
379 
380         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
381              + S0(CR_ST_WORD(d, 0));
382 
383         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
384         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
385         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
386         CR_ST_WORD(d, 0) = t;
387     }
388 
389     rd[0] = d.l[0];
390     rd[1] = d.l[1];
391 }
392 
393 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
394 {
395     uint64_t *rd = vd;
396     uint64_t *rn = vn;
397     uint64_t *rm = vm;
398     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
399     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
400     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
401     int i;
402 
403     for (i = 0; i < 4; i++) {
404         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
405                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
406                      + CR_ST_WORD(m, i);
407 
408         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
409         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
410         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
411         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
412     }
413 
414     rd[0] = d.l[0];
415     rd[1] = d.l[1];
416 }
417 
418 void HELPER(crypto_sha256su0)(void *vd, void *vm)
419 {
420     uint64_t *rd = vd;
421     uint64_t *rm = vm;
422     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
423     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
424 
425     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
426     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
427     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
428     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
429 
430     rd[0] = d.l[0];
431     rd[1] = d.l[1];
432 }
433 
434 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
435 {
436     uint64_t *rd = vd;
437     uint64_t *rn = vn;
438     uint64_t *rm = vm;
439     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
440     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
441     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
442 
443     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
444     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
445     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
446     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
447 
448     rd[0] = d.l[0];
449     rd[1] = d.l[1];
450 }
451 
452 /*
453  * The SHA-512 logical functions (same as above but using 64-bit operands)
454  */
455 
456 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
457 {
458     return (x & (y ^ z)) ^ z;
459 }
460 
461 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
462 {
463     return (x & y) | ((x | y) & z);
464 }
465 
466 static uint64_t S0_512(uint64_t x)
467 {
468     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
469 }
470 
471 static uint64_t S1_512(uint64_t x)
472 {
473     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
474 }
475 
476 static uint64_t s0_512(uint64_t x)
477 {
478     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
479 }
480 
481 static uint64_t s1_512(uint64_t x)
482 {
483     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
484 }
485 
486 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
487 {
488     uint64_t *rd = vd;
489     uint64_t *rn = vn;
490     uint64_t *rm = vm;
491     uint64_t d0 = rd[0];
492     uint64_t d1 = rd[1];
493 
494     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
495     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
496 
497     rd[0] = d0;
498     rd[1] = d1;
499 
500     clear_tail_16(vd, desc);
501 }
502 
503 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
504 {
505     uint64_t *rd = vd;
506     uint64_t *rn = vn;
507     uint64_t *rm = vm;
508     uint64_t d0 = rd[0];
509     uint64_t d1 = rd[1];
510 
511     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
512     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
513 
514     rd[0] = d0;
515     rd[1] = d1;
516 
517     clear_tail_16(vd, desc);
518 }
519 
520 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
521 {
522     uint64_t *rd = vd;
523     uint64_t *rn = vn;
524     uint64_t d0 = rd[0];
525     uint64_t d1 = rd[1];
526 
527     d0 += s0_512(rd[1]);
528     d1 += s0_512(rn[0]);
529 
530     rd[0] = d0;
531     rd[1] = d1;
532 
533     clear_tail_16(vd, desc);
534 }
535 
536 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
537 {
538     uint64_t *rd = vd;
539     uint64_t *rn = vn;
540     uint64_t *rm = vm;
541 
542     rd[0] += s1_512(rn[0]) + rm[0];
543     rd[1] += s1_512(rn[1]) + rm[1];
544 
545     clear_tail_16(vd, desc);
546 }
547 
548 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
549 {
550     uint64_t *rd = vd;
551     uint64_t *rn = vn;
552     uint64_t *rm = vm;
553     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
554     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
555     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
556     uint32_t t;
557 
558     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
559     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
560 
561     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
562     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
563 
564     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
565     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
566 
567     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
568     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
569 
570     rd[0] = d.l[0];
571     rd[1] = d.l[1];
572 
573     clear_tail_16(vd, desc);
574 }
575 
576 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
577 {
578     uint64_t *rd = vd;
579     uint64_t *rn = vn;
580     uint64_t *rm = vm;
581     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
582     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
583     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
584     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
585 
586     CR_ST_WORD(d, 0) ^= t;
587     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
588     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
589     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
590                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
591 
592     rd[0] = d.l[0];
593     rd[1] = d.l[1];
594 
595     clear_tail_16(vd, desc);
596 }
597 
598 void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
599                           uint32_t opcode)
600 {
601     uint64_t *rd = vd;
602     uint64_t *rn = vn;
603     uint64_t *rm = vm;
604     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
605     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
606     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
607     uint32_t t;
608 
609     assert(imm2 < 4);
610 
611     if (opcode == 0 || opcode == 2) {
612         /* SM3TT1A, SM3TT2A */
613         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
614     } else if (opcode == 1) {
615         /* SM3TT1B */
616         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
617     } else if (opcode == 3) {
618         /* SM3TT2B */
619         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
620     } else {
621         g_assert_not_reached();
622     }
623 
624     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
625 
626     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
627 
628     if (opcode < 2) {
629         /* SM3TT1A, SM3TT1B */
630         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
631 
632         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
633     } else {
634         /* SM3TT2A, SM3TT2B */
635         t += CR_ST_WORD(n, 3);
636         t ^= rol32(t, 9) ^ rol32(t, 17);
637 
638         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
639     }
640 
641     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
642     CR_ST_WORD(d, 3) = t;
643 
644     rd[0] = d.l[0];
645     rd[1] = d.l[1];
646 }
647 
648 static uint8_t const sm4_sbox[] = {
649     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
650     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
651     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
652     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
653     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
654     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
655     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
656     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
657     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
658     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
659     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
660     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
661     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
662     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
663     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
664     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
665     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
666     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
667     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
668     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
669     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
670     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
671     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
672     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
673     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
674     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
675     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
676     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
677     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
678     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
679     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
680     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
681 };
682 
683 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
684 {
685     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
686     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
687     uint32_t t, i;
688 
689     for (i = 0; i < 4; i++) {
690         t = CR_ST_WORD(d, (i + 1) % 4) ^
691             CR_ST_WORD(d, (i + 2) % 4) ^
692             CR_ST_WORD(d, (i + 3) % 4) ^
693             CR_ST_WORD(n, i);
694 
695         t = sm4_sbox[t & 0xff] |
696             sm4_sbox[(t >> 8) & 0xff] << 8 |
697             sm4_sbox[(t >> 16) & 0xff] << 16 |
698             sm4_sbox[(t >> 24) & 0xff] << 24;
699 
700         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
701                             rol32(t, 24);
702     }
703 
704     rd[0] = d.l[0];
705     rd[1] = d.l[1];
706 }
707 
708 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
709 {
710     intptr_t i, opr_sz = simd_oprsz(desc);
711 
712     for (i = 0; i < opr_sz; i += 16) {
713         do_crypto_sm4e(vd + i, vn + i, vm + i);
714     }
715     clear_tail(vd, opr_sz, simd_maxsz(desc));
716 }
717 
718 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
719 {
720     union CRYPTO_STATE d;
721     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
722     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
723     uint32_t t, i;
724 
725     d = n;
726     for (i = 0; i < 4; i++) {
727         t = CR_ST_WORD(d, (i + 1) % 4) ^
728             CR_ST_WORD(d, (i + 2) % 4) ^
729             CR_ST_WORD(d, (i + 3) % 4) ^
730             CR_ST_WORD(m, i);
731 
732         t = sm4_sbox[t & 0xff] |
733             sm4_sbox[(t >> 8) & 0xff] << 8 |
734             sm4_sbox[(t >> 16) & 0xff] << 16 |
735             sm4_sbox[(t >> 24) & 0xff] << 24;
736 
737         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
738     }
739 
740     rd[0] = d.l[0];
741     rd[1] = d.l[1];
742 }
743 
744 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
745 {
746     intptr_t i, opr_sz = simd_oprsz(desc);
747 
748     for (i = 0; i < opr_sz; i += 16) {
749         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
750     }
751     clear_tail(vd, opr_sz, simd_maxsz(desc));
752 }
753 
754 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
755 {
756     intptr_t i, opr_sz = simd_oprsz(desc);
757     uint64_t *d = vd, *n = vn, *m = vm;
758 
759     for (i = 0; i < opr_sz / 8; ++i) {
760         d[i] = n[i] ^ rol64(m[i], 1);
761     }
762     clear_tail(vd, opr_sz, simd_maxsz(desc));
763 }
764