xref: /qemu/target/arm/tcg/crypto_helper.c (revision 50f57e09fda4b7ffbc5ba62aad6cebf660824023)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "vec_internal.h"
19 
20 union CRYPTO_STATE {
21     uint8_t    bytes[16];
22     uint32_t   words[4];
23     uint64_t   l[2];
24 };
25 
26 #ifdef HOST_WORDS_BIGENDIAN
27 #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
28 #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
29 #else
30 #define CR_ST_BYTE(state, i)   ((state).bytes[i])
31 #define CR_ST_WORD(state, i)   ((state).words[i])
32 #endif
33 
34 /*
35  * The caller has not been converted to full gvec, and so only
36  * modifies the low 16 bytes of the vector register.
37  */
38 static void clear_tail_16(void *vd, uint32_t desc)
39 {
40     int opr_sz = simd_oprsz(desc);
41     int max_sz = simd_maxsz(desc);
42 
43     assert(opr_sz == 16);
44     clear_tail(vd, opr_sz, max_sz);
45 }
46 
47 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
48                            uint64_t *rm, bool decrypt)
49 {
50     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
51     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
52     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
53     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
54     int i;
55 
56     /* xor state vector with round key */
57     rk.l[0] ^= st.l[0];
58     rk.l[1] ^= st.l[1];
59 
60     /* combine ShiftRows operation and sbox substitution */
61     for (i = 0; i < 16; i++) {
62         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
63     }
64 
65     rd[0] = st.l[0];
66     rd[1] = st.l[1];
67 }
68 
69 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
70 {
71     intptr_t i, opr_sz = simd_oprsz(desc);
72     bool decrypt = simd_data(desc);
73 
74     for (i = 0; i < opr_sz; i += 16) {
75         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
76     }
77     clear_tail(vd, opr_sz, simd_maxsz(desc));
78 }
79 
80 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
81 {
82     static uint32_t const mc[][256] = { {
83         /* MixColumns lookup table */
84         0x00000000, 0x03010102, 0x06020204, 0x05030306,
85         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
86         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
87         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
88         0x30101020, 0x33111122, 0x36121224, 0x35131326,
89         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
90         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
91         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
92         0x60202040, 0x63212142, 0x66222244, 0x65232346,
93         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
94         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
95         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
96         0x50303060, 0x53313162, 0x56323264, 0x55333366,
97         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
98         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
99         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
100         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
101         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
102         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
103         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
104         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
105         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
106         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
107         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
108         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
109         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
110         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
111         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
112         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
113         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
114         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
115         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
116         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
117         0x97848413, 0x94858511, 0x91868617, 0x92878715,
118         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
119         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
120         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
121         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
122         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
123         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
124         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
125         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
126         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
127         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
128         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
129         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
130         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
131         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
132         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
133         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
134         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
135         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
136         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
137         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
138         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
139         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
140         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
141         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
142         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
143         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
144         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
145         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
146         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
147         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
148     }, {
149         /* Inverse MixColumns lookup table */
150         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
151         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
152         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
153         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
154         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
155         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
156         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
157         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
158         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
159         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
160         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
161         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
162         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
163         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
164         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
165         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
166         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
167         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
168         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
169         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
170         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
171         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
172         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
173         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
174         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
175         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
176         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
177         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
178         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
179         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
180         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
181         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
182         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
183         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
184         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
185         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
186         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
187         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
188         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
189         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
190         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
191         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
192         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
193         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
194         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
195         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
196         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
197         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
198         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
199         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
200         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
201         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
202         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
203         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
204         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
205         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
206         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
207         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
208         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
209         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
210         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
211         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
212         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
213         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
214     } };
215 
216     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
217     int i;
218 
219     for (i = 0; i < 16; i += 4) {
220         CR_ST_WORD(st, i >> 2) =
221             mc[decrypt][CR_ST_BYTE(st, i)] ^
222             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
223             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
224             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
225     }
226 
227     rd[0] = st.l[0];
228     rd[1] = st.l[1];
229 }
230 
231 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
232 {
233     intptr_t i, opr_sz = simd_oprsz(desc);
234     bool decrypt = simd_data(desc);
235 
236     for (i = 0; i < opr_sz; i += 16) {
237         do_crypto_aesmc(vd + i, vm + i, decrypt);
238     }
239     clear_tail(vd, opr_sz, simd_maxsz(desc));
240 }
241 
242 /*
243  * SHA-1 logical functions
244  */
245 
246 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
247 {
248     return (x & (y ^ z)) ^ z;
249 }
250 
251 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
252 {
253     return x ^ y ^ z;
254 }
255 
256 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
257 {
258     return (x & y) | ((x | y) & z);
259 }
260 
261 void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
262 {
263     uint64_t *d = vd, *n = vn, *m = vm;
264     uint64_t d0, d1;
265 
266     d0 = d[1] ^ d[0] ^ m[0];
267     d1 = n[0] ^ d[1] ^ m[1];
268     d[0] = d0;
269     d[1] = d1;
270 
271     clear_tail_16(vd, desc);
272 }
273 
274 static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
275                                     uint64_t *rm, uint32_t desc,
276                                     uint32_t (*fn)(union CRYPTO_STATE *d))
277 {
278     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
279     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
280     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
281     int i;
282 
283     for (i = 0; i < 4; i++) {
284         uint32_t t = fn(&d);
285 
286         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
287              + CR_ST_WORD(m, i);
288 
289         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
290         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
291         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
292         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
293         CR_ST_WORD(d, 0) = t;
294     }
295     rd[0] = d.l[0];
296     rd[1] = d.l[1];
297 
298     clear_tail_16(rd, desc);
299 }
300 
301 static uint32_t do_sha1c(union CRYPTO_STATE *d)
302 {
303     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
304 }
305 
306 void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
307 {
308     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
309 }
310 
311 static uint32_t do_sha1p(union CRYPTO_STATE *d)
312 {
313     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
314 }
315 
316 void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
317 {
318     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
319 }
320 
321 static uint32_t do_sha1m(union CRYPTO_STATE *d)
322 {
323     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
324 }
325 
326 void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
327 {
328     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
329 }
330 
331 void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
332 {
333     uint64_t *rd = vd;
334     uint64_t *rm = vm;
335     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
336 
337     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
338     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
339 
340     rd[0] = m.l[0];
341     rd[1] = m.l[1];
342 
343     clear_tail_16(vd, desc);
344 }
345 
346 void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
347 {
348     uint64_t *rd = vd;
349     uint64_t *rm = vm;
350     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
351     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
352 
353     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
354     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
355     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
356     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
357 
358     rd[0] = d.l[0];
359     rd[1] = d.l[1];
360 
361     clear_tail_16(vd, desc);
362 }
363 
364 /*
365  * The SHA-256 logical functions, according to
366  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
367  */
368 
369 static uint32_t S0(uint32_t x)
370 {
371     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
372 }
373 
374 static uint32_t S1(uint32_t x)
375 {
376     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
377 }
378 
379 static uint32_t s0(uint32_t x)
380 {
381     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
382 }
383 
384 static uint32_t s1(uint32_t x)
385 {
386     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
387 }
388 
389 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
390 {
391     uint64_t *rd = vd;
392     uint64_t *rn = vn;
393     uint64_t *rm = vm;
394     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
395     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
396     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
397     int i;
398 
399     for (i = 0; i < 4; i++) {
400         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
401                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
402                      + CR_ST_WORD(m, i);
403 
404         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
405         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
406         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
407         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
408 
409         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
410              + S0(CR_ST_WORD(d, 0));
411 
412         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
413         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
414         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
415         CR_ST_WORD(d, 0) = t;
416     }
417 
418     rd[0] = d.l[0];
419     rd[1] = d.l[1];
420 
421     clear_tail_16(vd, desc);
422 }
423 
424 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
425 {
426     uint64_t *rd = vd;
427     uint64_t *rn = vn;
428     uint64_t *rm = vm;
429     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
430     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
431     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
432     int i;
433 
434     for (i = 0; i < 4; i++) {
435         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
436                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
437                      + CR_ST_WORD(m, i);
438 
439         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
440         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
441         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
442         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
443     }
444 
445     rd[0] = d.l[0];
446     rd[1] = d.l[1];
447 
448     clear_tail_16(vd, desc);
449 }
450 
451 void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
452 {
453     uint64_t *rd = vd;
454     uint64_t *rm = vm;
455     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
456     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
457 
458     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
459     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
460     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
461     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
462 
463     rd[0] = d.l[0];
464     rd[1] = d.l[1];
465 
466     clear_tail_16(vd, desc);
467 }
468 
469 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
470 {
471     uint64_t *rd = vd;
472     uint64_t *rn = vn;
473     uint64_t *rm = vm;
474     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
475     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
476     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
477 
478     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
479     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
480     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
481     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
482 
483     rd[0] = d.l[0];
484     rd[1] = d.l[1];
485 
486     clear_tail_16(vd, desc);
487 }
488 
489 /*
490  * The SHA-512 logical functions (same as above but using 64-bit operands)
491  */
492 
493 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
494 {
495     return (x & (y ^ z)) ^ z;
496 }
497 
498 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
499 {
500     return (x & y) | ((x | y) & z);
501 }
502 
503 static uint64_t S0_512(uint64_t x)
504 {
505     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
506 }
507 
508 static uint64_t S1_512(uint64_t x)
509 {
510     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
511 }
512 
513 static uint64_t s0_512(uint64_t x)
514 {
515     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
516 }
517 
518 static uint64_t s1_512(uint64_t x)
519 {
520     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
521 }
522 
523 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
524 {
525     uint64_t *rd = vd;
526     uint64_t *rn = vn;
527     uint64_t *rm = vm;
528     uint64_t d0 = rd[0];
529     uint64_t d1 = rd[1];
530 
531     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
532     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
533 
534     rd[0] = d0;
535     rd[1] = d1;
536 
537     clear_tail_16(vd, desc);
538 }
539 
540 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
541 {
542     uint64_t *rd = vd;
543     uint64_t *rn = vn;
544     uint64_t *rm = vm;
545     uint64_t d0 = rd[0];
546     uint64_t d1 = rd[1];
547 
548     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
549     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
550 
551     rd[0] = d0;
552     rd[1] = d1;
553 
554     clear_tail_16(vd, desc);
555 }
556 
557 void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
558 {
559     uint64_t *rd = vd;
560     uint64_t *rn = vn;
561     uint64_t d0 = rd[0];
562     uint64_t d1 = rd[1];
563 
564     d0 += s0_512(rd[1]);
565     d1 += s0_512(rn[0]);
566 
567     rd[0] = d0;
568     rd[1] = d1;
569 
570     clear_tail_16(vd, desc);
571 }
572 
573 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
574 {
575     uint64_t *rd = vd;
576     uint64_t *rn = vn;
577     uint64_t *rm = vm;
578 
579     rd[0] += s1_512(rn[0]) + rm[0];
580     rd[1] += s1_512(rn[1]) + rm[1];
581 
582     clear_tail_16(vd, desc);
583 }
584 
585 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
586 {
587     uint64_t *rd = vd;
588     uint64_t *rn = vn;
589     uint64_t *rm = vm;
590     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
591     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
592     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
593     uint32_t t;
594 
595     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
596     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
597 
598     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
599     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
600 
601     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
602     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
603 
604     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
605     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
606 
607     rd[0] = d.l[0];
608     rd[1] = d.l[1];
609 
610     clear_tail_16(vd, desc);
611 }
612 
613 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
614 {
615     uint64_t *rd = vd;
616     uint64_t *rn = vn;
617     uint64_t *rm = vm;
618     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
619     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
620     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
621     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
622 
623     CR_ST_WORD(d, 0) ^= t;
624     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
625     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
626     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
627                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
628 
629     rd[0] = d.l[0];
630     rd[1] = d.l[1];
631 
632     clear_tail_16(vd, desc);
633 }
634 
635 static inline void QEMU_ALWAYS_INLINE
636 crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
637              uint32_t desc, uint32_t opcode)
638 {
639     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
640     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
641     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
642     uint32_t imm2 = simd_data(desc);
643     uint32_t t;
644 
645     assert(imm2 < 4);
646 
647     if (opcode == 0 || opcode == 2) {
648         /* SM3TT1A, SM3TT2A */
649         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
650     } else if (opcode == 1) {
651         /* SM3TT1B */
652         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
653     } else if (opcode == 3) {
654         /* SM3TT2B */
655         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
656     } else {
657         qemu_build_not_reached();
658     }
659 
660     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
661 
662     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
663 
664     if (opcode < 2) {
665         /* SM3TT1A, SM3TT1B */
666         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
667 
668         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
669     } else {
670         /* SM3TT2A, SM3TT2B */
671         t += CR_ST_WORD(n, 3);
672         t ^= rol32(t, 9) ^ rol32(t, 17);
673 
674         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
675     }
676 
677     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
678     CR_ST_WORD(d, 3) = t;
679 
680     rd[0] = d.l[0];
681     rd[1] = d.l[1];
682 
683     clear_tail_16(rd, desc);
684 }
685 
686 #define DO_SM3TT(NAME, OPCODE) \
687     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
688     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
689 
690 DO_SM3TT(crypto_sm3tt1a, 0)
691 DO_SM3TT(crypto_sm3tt1b, 1)
692 DO_SM3TT(crypto_sm3tt2a, 2)
693 DO_SM3TT(crypto_sm3tt2b, 3)
694 
695 #undef DO_SM3TT
696 
697 static uint8_t const sm4_sbox[] = {
698     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
699     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
700     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
701     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
702     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
703     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
704     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
705     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
706     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
707     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
708     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
709     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
710     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
711     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
712     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
713     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
714     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
715     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
716     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
717     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
718     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
719     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
720     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
721     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
722     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
723     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
724     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
725     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
726     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
727     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
728     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
729     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
730 };
731 
732 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
733 {
734     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
735     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
736     uint32_t t, i;
737 
738     for (i = 0; i < 4; i++) {
739         t = CR_ST_WORD(d, (i + 1) % 4) ^
740             CR_ST_WORD(d, (i + 2) % 4) ^
741             CR_ST_WORD(d, (i + 3) % 4) ^
742             CR_ST_WORD(n, i);
743 
744         t = sm4_sbox[t & 0xff] |
745             sm4_sbox[(t >> 8) & 0xff] << 8 |
746             sm4_sbox[(t >> 16) & 0xff] << 16 |
747             sm4_sbox[(t >> 24) & 0xff] << 24;
748 
749         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
750                             rol32(t, 24);
751     }
752 
753     rd[0] = d.l[0];
754     rd[1] = d.l[1];
755 }
756 
757 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
758 {
759     intptr_t i, opr_sz = simd_oprsz(desc);
760 
761     for (i = 0; i < opr_sz; i += 16) {
762         do_crypto_sm4e(vd + i, vn + i, vm + i);
763     }
764     clear_tail(vd, opr_sz, simd_maxsz(desc));
765 }
766 
767 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
768 {
769     union CRYPTO_STATE d;
770     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
771     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
772     uint32_t t, i;
773 
774     d = n;
775     for (i = 0; i < 4; i++) {
776         t = CR_ST_WORD(d, (i + 1) % 4) ^
777             CR_ST_WORD(d, (i + 2) % 4) ^
778             CR_ST_WORD(d, (i + 3) % 4) ^
779             CR_ST_WORD(m, i);
780 
781         t = sm4_sbox[t & 0xff] |
782             sm4_sbox[(t >> 8) & 0xff] << 8 |
783             sm4_sbox[(t >> 16) & 0xff] << 16 |
784             sm4_sbox[(t >> 24) & 0xff] << 24;
785 
786         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
787     }
788 
789     rd[0] = d.l[0];
790     rd[1] = d.l[1];
791 }
792 
793 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
794 {
795     intptr_t i, opr_sz = simd_oprsz(desc);
796 
797     for (i = 0; i < opr_sz; i += 16) {
798         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
799     }
800     clear_tail(vd, opr_sz, simd_maxsz(desc));
801 }
802 
803 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
804 {
805     intptr_t i, opr_sz = simd_oprsz(desc);
806     uint64_t *d = vd, *n = vn, *m = vm;
807 
808     for (i = 0; i < opr_sz / 8; ++i) {
809         d[i] = n[i] ^ rol64(m[i], 1);
810     }
811     clear_tail(vd, opr_sz, simd_maxsz(desc));
812 }
813