xref: /qemu/target/arm/tcg/crypto_helper.c (revision 1738860d7e60dec5dbeba17f8b44d31aae3accac)
1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/aes.h"
18 #include "vec_internal.h"
19 
20 union CRYPTO_STATE {
21     uint8_t    bytes[16];
22     uint32_t   words[4];
23     uint64_t   l[2];
24 };
25 
26 #ifdef HOST_WORDS_BIGENDIAN
27 #define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
28 #define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
29 #else
30 #define CR_ST_BYTE(state, i)   (state.bytes[i])
31 #define CR_ST_WORD(state, i)   (state.words[i])
32 #endif
33 
34 static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
35                            uint64_t *rm, bool decrypt)
36 {
37     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
38     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
39     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
40     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
41     int i;
42 
43     /* xor state vector with round key */
44     rk.l[0] ^= st.l[0];
45     rk.l[1] ^= st.l[1];
46 
47     /* combine ShiftRows operation and sbox substitution */
48     for (i = 0; i < 16; i++) {
49         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
50     }
51 
52     rd[0] = st.l[0];
53     rd[1] = st.l[1];
54 }
55 
56 void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
57 {
58     intptr_t i, opr_sz = simd_oprsz(desc);
59     bool decrypt = simd_data(desc);
60 
61     for (i = 0; i < opr_sz; i += 16) {
62         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
63     }
64     clear_tail(vd, opr_sz, simd_maxsz(desc));
65 }
66 
67 static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
68 {
69     static uint32_t const mc[][256] = { {
70         /* MixColumns lookup table */
71         0x00000000, 0x03010102, 0x06020204, 0x05030306,
72         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
73         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
74         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
75         0x30101020, 0x33111122, 0x36121224, 0x35131326,
76         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
77         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
78         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
79         0x60202040, 0x63212142, 0x66222244, 0x65232346,
80         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
81         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
82         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
83         0x50303060, 0x53313162, 0x56323264, 0x55333366,
84         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
85         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
86         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
87         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
88         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
89         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
90         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
91         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
92         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
93         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
94         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
95         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
96         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
97         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
98         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
99         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
100         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
101         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
102         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
103         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
104         0x97848413, 0x94858511, 0x91868617, 0x92878715,
105         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
106         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
107         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
108         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
109         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
110         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
111         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
112         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
113         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
114         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
115         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
116         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
117         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
118         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
119         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
120         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
121         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
122         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
123         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
124         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
125         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
126         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
127         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
128         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
129         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
130         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
131         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
132         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
133         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
134         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
135     }, {
136         /* Inverse MixColumns lookup table */
137         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
138         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
139         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
140         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
141         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
142         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
143         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
144         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
145         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
146         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
147         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
148         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
149         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
150         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
151         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
152         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
153         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
154         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
155         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
156         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
157         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
158         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
159         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
160         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
161         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
162         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
163         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
164         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
165         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
166         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
167         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
168         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
169         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
170         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
171         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
172         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
173         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
174         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
175         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
176         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
177         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
178         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
179         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
180         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
181         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
182         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
183         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
184         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
185         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
186         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
187         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
188         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
189         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
190         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
191         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
192         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
193         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
194         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
195         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
196         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
197         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
198         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
199         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
200         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
201     } };
202 
203     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
204     int i;
205 
206     for (i = 0; i < 16; i += 4) {
207         CR_ST_WORD(st, i >> 2) =
208             mc[decrypt][CR_ST_BYTE(st, i)] ^
209             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
210             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
211             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
212     }
213 
214     rd[0] = st.l[0];
215     rd[1] = st.l[1];
216 }
217 
218 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
219 {
220     intptr_t i, opr_sz = simd_oprsz(desc);
221     bool decrypt = simd_data(desc);
222 
223     for (i = 0; i < opr_sz; i += 16) {
224         do_crypto_aesmc(vd + i, vm + i, decrypt);
225     }
226     clear_tail(vd, opr_sz, simd_maxsz(desc));
227 }
228 
229 /*
230  * SHA-1 logical functions
231  */
232 
233 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
234 {
235     return (x & (y ^ z)) ^ z;
236 }
237 
238 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
239 {
240     return x ^ y ^ z;
241 }
242 
243 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
244 {
245     return (x & y) | ((x | y) & z);
246 }
247 
248 void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
249 {
250     uint64_t *rd = vd;
251     uint64_t *rn = vn;
252     uint64_t *rm = vm;
253     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
254     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
255     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
256 
257     if (op == 3) { /* sha1su0 */
258         d.l[0] ^= d.l[1] ^ m.l[0];
259         d.l[1] ^= n.l[0] ^ m.l[1];
260     } else {
261         int i;
262 
263         for (i = 0; i < 4; i++) {
264             uint32_t t;
265 
266             switch (op) {
267             case 0: /* sha1c */
268                 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
269                 break;
270             case 1: /* sha1p */
271                 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
272                 break;
273             case 2: /* sha1m */
274                 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
275                 break;
276             default:
277                 g_assert_not_reached();
278             }
279             t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
280                  + CR_ST_WORD(m, i);
281 
282             CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
283             CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
284             CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
285             CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
286             CR_ST_WORD(d, 0) = t;
287         }
288     }
289     rd[0] = d.l[0];
290     rd[1] = d.l[1];
291 }
292 
293 void HELPER(crypto_sha1h)(void *vd, void *vm)
294 {
295     uint64_t *rd = vd;
296     uint64_t *rm = vm;
297     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
298 
299     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
300     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
301 
302     rd[0] = m.l[0];
303     rd[1] = m.l[1];
304 }
305 
306 void HELPER(crypto_sha1su1)(void *vd, void *vm)
307 {
308     uint64_t *rd = vd;
309     uint64_t *rm = vm;
310     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
311     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
312 
313     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
314     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
315     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
316     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
317 
318     rd[0] = d.l[0];
319     rd[1] = d.l[1];
320 }
321 
322 /*
323  * The SHA-256 logical functions, according to
324  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
325  */
326 
327 static uint32_t S0(uint32_t x)
328 {
329     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
330 }
331 
332 static uint32_t S1(uint32_t x)
333 {
334     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
335 }
336 
337 static uint32_t s0(uint32_t x)
338 {
339     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
340 }
341 
342 static uint32_t s1(uint32_t x)
343 {
344     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
345 }
346 
347 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
348 {
349     uint64_t *rd = vd;
350     uint64_t *rn = vn;
351     uint64_t *rm = vm;
352     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
353     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
354     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
355     int i;
356 
357     for (i = 0; i < 4; i++) {
358         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
359                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
360                      + CR_ST_WORD(m, i);
361 
362         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
363         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
364         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
365         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
366 
367         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
368              + S0(CR_ST_WORD(d, 0));
369 
370         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
371         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
372         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
373         CR_ST_WORD(d, 0) = t;
374     }
375 
376     rd[0] = d.l[0];
377     rd[1] = d.l[1];
378 }
379 
380 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
381 {
382     uint64_t *rd = vd;
383     uint64_t *rn = vn;
384     uint64_t *rm = vm;
385     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
386     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
387     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
388     int i;
389 
390     for (i = 0; i < 4; i++) {
391         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
392                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
393                      + CR_ST_WORD(m, i);
394 
395         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
396         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
397         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
398         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
399     }
400 
401     rd[0] = d.l[0];
402     rd[1] = d.l[1];
403 }
404 
405 void HELPER(crypto_sha256su0)(void *vd, void *vm)
406 {
407     uint64_t *rd = vd;
408     uint64_t *rm = vm;
409     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
410     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
411 
412     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
413     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
414     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
415     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
416 
417     rd[0] = d.l[0];
418     rd[1] = d.l[1];
419 }
420 
421 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
422 {
423     uint64_t *rd = vd;
424     uint64_t *rn = vn;
425     uint64_t *rm = vm;
426     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
427     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
428     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
429 
430     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
431     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
432     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
433     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
434 
435     rd[0] = d.l[0];
436     rd[1] = d.l[1];
437 }
438 
439 /*
440  * The SHA-512 logical functions (same as above but using 64-bit operands)
441  */
442 
443 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
444 {
445     return (x & (y ^ z)) ^ z;
446 }
447 
448 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
449 {
450     return (x & y) | ((x | y) & z);
451 }
452 
453 static uint64_t S0_512(uint64_t x)
454 {
455     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
456 }
457 
458 static uint64_t S1_512(uint64_t x)
459 {
460     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
461 }
462 
463 static uint64_t s0_512(uint64_t x)
464 {
465     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
466 }
467 
468 static uint64_t s1_512(uint64_t x)
469 {
470     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
471 }
472 
473 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
474 {
475     uint64_t *rd = vd;
476     uint64_t *rn = vn;
477     uint64_t *rm = vm;
478     uint64_t d0 = rd[0];
479     uint64_t d1 = rd[1];
480 
481     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
482     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
483 
484     rd[0] = d0;
485     rd[1] = d1;
486 }
487 
488 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
489 {
490     uint64_t *rd = vd;
491     uint64_t *rn = vn;
492     uint64_t *rm = vm;
493     uint64_t d0 = rd[0];
494     uint64_t d1 = rd[1];
495 
496     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
497     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
498 
499     rd[0] = d0;
500     rd[1] = d1;
501 }
502 
503 void HELPER(crypto_sha512su0)(void *vd, void *vn)
504 {
505     uint64_t *rd = vd;
506     uint64_t *rn = vn;
507     uint64_t d0 = rd[0];
508     uint64_t d1 = rd[1];
509 
510     d0 += s0_512(rd[1]);
511     d1 += s0_512(rn[0]);
512 
513     rd[0] = d0;
514     rd[1] = d1;
515 }
516 
517 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
518 {
519     uint64_t *rd = vd;
520     uint64_t *rn = vn;
521     uint64_t *rm = vm;
522 
523     rd[0] += s1_512(rn[0]) + rm[0];
524     rd[1] += s1_512(rn[1]) + rm[1];
525 }
526 
527 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
528 {
529     uint64_t *rd = vd;
530     uint64_t *rn = vn;
531     uint64_t *rm = vm;
532     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
533     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
534     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
535     uint32_t t;
536 
537     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
538     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
539 
540     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
541     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
542 
543     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
544     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
545 
546     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
547     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
548 
549     rd[0] = d.l[0];
550     rd[1] = d.l[1];
551 }
552 
553 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
554 {
555     uint64_t *rd = vd;
556     uint64_t *rn = vn;
557     uint64_t *rm = vm;
558     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
559     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
560     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
561     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
562 
563     CR_ST_WORD(d, 0) ^= t;
564     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
565     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
566     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
567                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
568 
569     rd[0] = d.l[0];
570     rd[1] = d.l[1];
571 }
572 
573 void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
574                           uint32_t opcode)
575 {
576     uint64_t *rd = vd;
577     uint64_t *rn = vn;
578     uint64_t *rm = vm;
579     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
580     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
581     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
582     uint32_t t;
583 
584     assert(imm2 < 4);
585 
586     if (opcode == 0 || opcode == 2) {
587         /* SM3TT1A, SM3TT2A */
588         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
589     } else if (opcode == 1) {
590         /* SM3TT1B */
591         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
592     } else if (opcode == 3) {
593         /* SM3TT2B */
594         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
595     } else {
596         g_assert_not_reached();
597     }
598 
599     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
600 
601     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
602 
603     if (opcode < 2) {
604         /* SM3TT1A, SM3TT1B */
605         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
606 
607         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
608     } else {
609         /* SM3TT2A, SM3TT2B */
610         t += CR_ST_WORD(n, 3);
611         t ^= rol32(t, 9) ^ rol32(t, 17);
612 
613         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
614     }
615 
616     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
617     CR_ST_WORD(d, 3) = t;
618 
619     rd[0] = d.l[0];
620     rd[1] = d.l[1];
621 }
622 
623 static uint8_t const sm4_sbox[] = {
624     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
625     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
626     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
627     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
628     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
629     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
630     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
631     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
632     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
633     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
634     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
635     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
636     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
637     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
638     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
639     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
640     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
641     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
642     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
643     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
644     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
645     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
646     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
647     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
648     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
649     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
650     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
651     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
652     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
653     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
654     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
655     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
656 };
657 
658 static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
659 {
660     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
661     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
662     uint32_t t, i;
663 
664     for (i = 0; i < 4; i++) {
665         t = CR_ST_WORD(d, (i + 1) % 4) ^
666             CR_ST_WORD(d, (i + 2) % 4) ^
667             CR_ST_WORD(d, (i + 3) % 4) ^
668             CR_ST_WORD(n, i);
669 
670         t = sm4_sbox[t & 0xff] |
671             sm4_sbox[(t >> 8) & 0xff] << 8 |
672             sm4_sbox[(t >> 16) & 0xff] << 16 |
673             sm4_sbox[(t >> 24) & 0xff] << 24;
674 
675         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
676                             rol32(t, 24);
677     }
678 
679     rd[0] = d.l[0];
680     rd[1] = d.l[1];
681 }
682 
683 void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
684 {
685     intptr_t i, opr_sz = simd_oprsz(desc);
686 
687     for (i = 0; i < opr_sz; i += 16) {
688         do_crypto_sm4e(vd + i, vn + i, vm + i);
689     }
690     clear_tail(vd, opr_sz, simd_maxsz(desc));
691 }
692 
693 static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
694 {
695     union CRYPTO_STATE d;
696     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
697     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
698     uint32_t t, i;
699 
700     d = n;
701     for (i = 0; i < 4; i++) {
702         t = CR_ST_WORD(d, (i + 1) % 4) ^
703             CR_ST_WORD(d, (i + 2) % 4) ^
704             CR_ST_WORD(d, (i + 3) % 4) ^
705             CR_ST_WORD(m, i);
706 
707         t = sm4_sbox[t & 0xff] |
708             sm4_sbox[(t >> 8) & 0xff] << 8 |
709             sm4_sbox[(t >> 16) & 0xff] << 16 |
710             sm4_sbox[(t >> 24) & 0xff] << 24;
711 
712         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
713     }
714 
715     rd[0] = d.l[0];
716     rd[1] = d.l[1];
717 }
718 
719 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
720 {
721     intptr_t i, opr_sz = simd_oprsz(desc);
722 
723     for (i = 0; i < opr_sz; i += 16) {
724         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
725     }
726     clear_tail(vd, opr_sz, simd_maxsz(desc));
727 }
728 
729 void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
730 {
731     intptr_t i, opr_sz = simd_oprsz(desc);
732     uint64_t *d = vd, *n = vn, *m = vm;
733 
734     for (i = 0; i < opr_sz / 8; ++i) {
735         d[i] = n[i] ^ rol64(m[i], 1);
736     }
737     clear_tail(vd, opr_sz, simd_maxsz(desc));
738 }
739