xref: /qemu/target/arm/tcg/crypto_helper.c (revision c29da5a7fe6a5d3e1719ce9a831076df2eff52d5)
19d935509SArd Biesheuvel /*
29d935509SArd Biesheuvel  * crypto_helper.c - emulate v8 Crypto Extensions instructions
39d935509SArd Biesheuvel  *
490b827d1SArd Biesheuvel  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
59d935509SArd Biesheuvel  *
69d935509SArd Biesheuvel  * This library is free software; you can redistribute it and/or
79d935509SArd Biesheuvel  * modify it under the terms of the GNU Lesser General Public
89d935509SArd Biesheuvel  * License as published by the Free Software Foundation; either
950f57e09SChetan Pant  * version 2.1 of the License, or (at your option) any later version.
109d935509SArd Biesheuvel  */
119d935509SArd Biesheuvel 
1274c21bd0SPeter Maydell #include "qemu/osdep.h"
139d935509SArd Biesheuvel 
149d935509SArd Biesheuvel #include "cpu.h"
152ef6175aSRichard Henderson #include "exec/helper-proto.h"
16a04b68e1SRichard Henderson #include "tcg/tcg-gvec-desc.h"
176f2945cdSDaniel P. Berrange #include "crypto/aes.h"
18*c29da5a7SWeiwei Li #include "crypto/sm4.h"
19a04b68e1SRichard Henderson #include "vec_internal.h"
209d935509SArd Biesheuvel 
21f1ecb913SArd Biesheuvel union CRYPTO_STATE {
229d935509SArd Biesheuvel     uint8_t    bytes[16];
23f1ecb913SArd Biesheuvel     uint32_t   words[4];
249d935509SArd Biesheuvel     uint64_t   l[2];
259d935509SArd Biesheuvel };
269d935509SArd Biesheuvel 
27e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN
28afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
29afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
30b449ca3cSArd Biesheuvel #else
31afc8b7d3SRichard Henderson #define CR_ST_BYTE(state, i)   ((state).bytes[i])
32afc8b7d3SRichard Henderson #define CR_ST_WORD(state, i)   ((state).words[i])
33b449ca3cSArd Biesheuvel #endif
34b449ca3cSArd Biesheuvel 
35aaffebd6SRichard Henderson /*
36aaffebd6SRichard Henderson  * The caller has not been converted to full gvec, and so only
37aaffebd6SRichard Henderson  * modifies the low 16 bytes of the vector register.
38aaffebd6SRichard Henderson  */
39aaffebd6SRichard Henderson static void clear_tail_16(void *vd, uint32_t desc)
40aaffebd6SRichard Henderson {
41aaffebd6SRichard Henderson     int opr_sz = simd_oprsz(desc);
42aaffebd6SRichard Henderson     int max_sz = simd_maxsz(desc);
43aaffebd6SRichard Henderson 
44aaffebd6SRichard Henderson     assert(opr_sz == 16);
45aaffebd6SRichard Henderson     clear_tail(vd, opr_sz, max_sz);
46aaffebd6SRichard Henderson }
47aaffebd6SRichard Henderson 
48a04b68e1SRichard Henderson static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
49a04b68e1SRichard Henderson                            uint64_t *rm, bool decrypt)
509d935509SArd Biesheuvel {
5159dcd29aSTom Musta     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
5259dcd29aSTom Musta     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
531a66ac61SRichard Henderson     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
54a04b68e1SRichard Henderson     union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
559d935509SArd Biesheuvel     int i;
569d935509SArd Biesheuvel 
579d935509SArd Biesheuvel     /* xor state vector with round key */
589d935509SArd Biesheuvel     rk.l[0] ^= st.l[0];
599d935509SArd Biesheuvel     rk.l[1] ^= st.l[1];
609d935509SArd Biesheuvel 
619d935509SArd Biesheuvel     /* combine ShiftRows operation and sbox substitution */
629d935509SArd Biesheuvel     for (i = 0; i < 16; i++) {
63b449ca3cSArd Biesheuvel         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
649d935509SArd Biesheuvel     }
659d935509SArd Biesheuvel 
661a66ac61SRichard Henderson     rd[0] = st.l[0];
671a66ac61SRichard Henderson     rd[1] = st.l[1];
689d935509SArd Biesheuvel }
699d935509SArd Biesheuvel 
70a04b68e1SRichard Henderson void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
71a04b68e1SRichard Henderson {
72a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
73a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
74a04b68e1SRichard Henderson 
75a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
76a04b68e1SRichard Henderson         do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
77a04b68e1SRichard Henderson     }
78a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
79a04b68e1SRichard Henderson }
80a04b68e1SRichard Henderson 
81a04b68e1SRichard Henderson static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
829d935509SArd Biesheuvel {
839d935509SArd Biesheuvel     static uint32_t const mc[][256] = { {
849d935509SArd Biesheuvel         /* MixColumns lookup table */
859d935509SArd Biesheuvel         0x00000000, 0x03010102, 0x06020204, 0x05030306,
869d935509SArd Biesheuvel         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
879d935509SArd Biesheuvel         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
889d935509SArd Biesheuvel         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
899d935509SArd Biesheuvel         0x30101020, 0x33111122, 0x36121224, 0x35131326,
909d935509SArd Biesheuvel         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
919d935509SArd Biesheuvel         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
929d935509SArd Biesheuvel         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
939d935509SArd Biesheuvel         0x60202040, 0x63212142, 0x66222244, 0x65232346,
949d935509SArd Biesheuvel         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
959d935509SArd Biesheuvel         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
969d935509SArd Biesheuvel         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
979d935509SArd Biesheuvel         0x50303060, 0x53313162, 0x56323264, 0x55333366,
989d935509SArd Biesheuvel         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
999d935509SArd Biesheuvel         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
1009d935509SArd Biesheuvel         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
1019d935509SArd Biesheuvel         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
1029d935509SArd Biesheuvel         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
1039d935509SArd Biesheuvel         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
1049d935509SArd Biesheuvel         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
1059d935509SArd Biesheuvel         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
1069d935509SArd Biesheuvel         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
1079d935509SArd Biesheuvel         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
1089d935509SArd Biesheuvel         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
1099d935509SArd Biesheuvel         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
1109d935509SArd Biesheuvel         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
1119d935509SArd Biesheuvel         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
1129d935509SArd Biesheuvel         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
1139d935509SArd Biesheuvel         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
1149d935509SArd Biesheuvel         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
1159d935509SArd Biesheuvel         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
1169d935509SArd Biesheuvel         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
1179d935509SArd Biesheuvel         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
1189d935509SArd Biesheuvel         0x97848413, 0x94858511, 0x91868617, 0x92878715,
1199d935509SArd Biesheuvel         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
1209d935509SArd Biesheuvel         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
1219d935509SArd Biesheuvel         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
1229d935509SArd Biesheuvel         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
1239d935509SArd Biesheuvel         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
1249d935509SArd Biesheuvel         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
1259d935509SArd Biesheuvel         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
1269d935509SArd Biesheuvel         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
1279d935509SArd Biesheuvel         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
1289d935509SArd Biesheuvel         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
1299d935509SArd Biesheuvel         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
1309d935509SArd Biesheuvel         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
1319d935509SArd Biesheuvel         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
1329d935509SArd Biesheuvel         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
1339d935509SArd Biesheuvel         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
1349d935509SArd Biesheuvel         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
1359d935509SArd Biesheuvel         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
1369d935509SArd Biesheuvel         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
1379d935509SArd Biesheuvel         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
1389d935509SArd Biesheuvel         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
1399d935509SArd Biesheuvel         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
1409d935509SArd Biesheuvel         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
1419d935509SArd Biesheuvel         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
1429d935509SArd Biesheuvel         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
1439d935509SArd Biesheuvel         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
1449d935509SArd Biesheuvel         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
1459d935509SArd Biesheuvel         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
1469d935509SArd Biesheuvel         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
1479d935509SArd Biesheuvel         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
1489d935509SArd Biesheuvel         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
1499d935509SArd Biesheuvel     }, {
1509d935509SArd Biesheuvel         /* Inverse MixColumns lookup table */
1519d935509SArd Biesheuvel         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
1529d935509SArd Biesheuvel         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
1539d935509SArd Biesheuvel         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
1549d935509SArd Biesheuvel         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
1559d935509SArd Biesheuvel         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
1569d935509SArd Biesheuvel         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
1579d935509SArd Biesheuvel         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
1589d935509SArd Biesheuvel         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
1599d935509SArd Biesheuvel         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
1609d935509SArd Biesheuvel         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
1619d935509SArd Biesheuvel         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
1629d935509SArd Biesheuvel         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
1639d935509SArd Biesheuvel         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
1649d935509SArd Biesheuvel         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
1659d935509SArd Biesheuvel         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
1669d935509SArd Biesheuvel         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
1679d935509SArd Biesheuvel         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
1689d935509SArd Biesheuvel         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
1699d935509SArd Biesheuvel         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
1709d935509SArd Biesheuvel         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
1719d935509SArd Biesheuvel         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
1729d935509SArd Biesheuvel         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
1739d935509SArd Biesheuvel         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
1749d935509SArd Biesheuvel         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
1759d935509SArd Biesheuvel         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
1769d935509SArd Biesheuvel         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
1779d935509SArd Biesheuvel         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
1789d935509SArd Biesheuvel         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
1799d935509SArd Biesheuvel         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
1809d935509SArd Biesheuvel         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
1819d935509SArd Biesheuvel         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
1829d935509SArd Biesheuvel         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
1839d935509SArd Biesheuvel         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
1849d935509SArd Biesheuvel         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
1859d935509SArd Biesheuvel         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
1869d935509SArd Biesheuvel         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
1879d935509SArd Biesheuvel         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
1889d935509SArd Biesheuvel         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
1899d935509SArd Biesheuvel         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
1909d935509SArd Biesheuvel         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
1919d935509SArd Biesheuvel         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
1929d935509SArd Biesheuvel         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
1939d935509SArd Biesheuvel         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
1949d935509SArd Biesheuvel         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
1959d935509SArd Biesheuvel         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
1969d935509SArd Biesheuvel         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
1979d935509SArd Biesheuvel         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
1989d935509SArd Biesheuvel         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
1999d935509SArd Biesheuvel         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
2009d935509SArd Biesheuvel         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
2019d935509SArd Biesheuvel         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
2029d935509SArd Biesheuvel         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
2039d935509SArd Biesheuvel         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
2049d935509SArd Biesheuvel         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
2059d935509SArd Biesheuvel         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
2069d935509SArd Biesheuvel         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
2079d935509SArd Biesheuvel         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
2089d935509SArd Biesheuvel         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
2099d935509SArd Biesheuvel         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
2109d935509SArd Biesheuvel         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
2119d935509SArd Biesheuvel         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
2129d935509SArd Biesheuvel         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
2139d935509SArd Biesheuvel         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
2149d935509SArd Biesheuvel         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
2159d935509SArd Biesheuvel     } };
2161a66ac61SRichard Henderson 
2171a66ac61SRichard Henderson     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
2189d935509SArd Biesheuvel     int i;
2199d935509SArd Biesheuvel 
2209d935509SArd Biesheuvel     for (i = 0; i < 16; i += 4) {
221b449ca3cSArd Biesheuvel         CR_ST_WORD(st, i >> 2) =
222b449ca3cSArd Biesheuvel             mc[decrypt][CR_ST_BYTE(st, i)] ^
223b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
224b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
225b449ca3cSArd Biesheuvel             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
2269d935509SArd Biesheuvel     }
2279d935509SArd Biesheuvel 
2281a66ac61SRichard Henderson     rd[0] = st.l[0];
2291a66ac61SRichard Henderson     rd[1] = st.l[1];
2309d935509SArd Biesheuvel }
231f1ecb913SArd Biesheuvel 
232a04b68e1SRichard Henderson void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
233a04b68e1SRichard Henderson {
234a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
235a04b68e1SRichard Henderson     bool decrypt = simd_data(desc);
236a04b68e1SRichard Henderson 
237a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
238a04b68e1SRichard Henderson         do_crypto_aesmc(vd + i, vm + i, decrypt);
239a04b68e1SRichard Henderson     }
240a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
241a04b68e1SRichard Henderson }
242a04b68e1SRichard Henderson 
243f1ecb913SArd Biesheuvel /*
244f1ecb913SArd Biesheuvel  * SHA-1 logical functions
245f1ecb913SArd Biesheuvel  */
246f1ecb913SArd Biesheuvel 
247f1ecb913SArd Biesheuvel static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
248f1ecb913SArd Biesheuvel {
249f1ecb913SArd Biesheuvel     return (x & (y ^ z)) ^ z;
250f1ecb913SArd Biesheuvel }
251f1ecb913SArd Biesheuvel 
252f1ecb913SArd Biesheuvel static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
253f1ecb913SArd Biesheuvel {
254f1ecb913SArd Biesheuvel     return x ^ y ^ z;
255f1ecb913SArd Biesheuvel }
256f1ecb913SArd Biesheuvel 
257f1ecb913SArd Biesheuvel static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
258f1ecb913SArd Biesheuvel {
259f1ecb913SArd Biesheuvel     return (x & y) | ((x | y) & z);
260f1ecb913SArd Biesheuvel }
261f1ecb913SArd Biesheuvel 
262afc8b7d3SRichard Henderson void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
263f1ecb913SArd Biesheuvel {
264afc8b7d3SRichard Henderson     uint64_t *d = vd, *n = vn, *m = vm;
265afc8b7d3SRichard Henderson     uint64_t d0, d1;
266afc8b7d3SRichard Henderson 
267afc8b7d3SRichard Henderson     d0 = d[1] ^ d[0] ^ m[0];
268afc8b7d3SRichard Henderson     d1 = n[0] ^ d[1] ^ m[1];
269afc8b7d3SRichard Henderson     d[0] = d0;
270afc8b7d3SRichard Henderson     d[1] = d1;
271afc8b7d3SRichard Henderson 
272afc8b7d3SRichard Henderson     clear_tail_16(vd, desc);
273afc8b7d3SRichard Henderson }
274afc8b7d3SRichard Henderson 
275afc8b7d3SRichard Henderson static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
276afc8b7d3SRichard Henderson                                     uint64_t *rm, uint32_t desc,
277afc8b7d3SRichard Henderson                                     uint32_t (*fn)(union CRYPTO_STATE *d))
278afc8b7d3SRichard Henderson {
2791a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
2801a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
2811a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
282f1ecb913SArd Biesheuvel     int i;
283f1ecb913SArd Biesheuvel 
284f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
285afc8b7d3SRichard Henderson         uint32_t t = fn(&d);
286f1ecb913SArd Biesheuvel 
287b449ca3cSArd Biesheuvel         t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
288b449ca3cSArd Biesheuvel              + CR_ST_WORD(m, i);
289f1ecb913SArd Biesheuvel 
290b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
291b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
292b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
293b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
294b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
295f1ecb913SArd Biesheuvel     }
2961a66ac61SRichard Henderson     rd[0] = d.l[0];
2971a66ac61SRichard Henderson     rd[1] = d.l[1];
298afc8b7d3SRichard Henderson 
299afc8b7d3SRichard Henderson     clear_tail_16(rd, desc);
300afc8b7d3SRichard Henderson }
301afc8b7d3SRichard Henderson 
302afc8b7d3SRichard Henderson static uint32_t do_sha1c(union CRYPTO_STATE *d)
303afc8b7d3SRichard Henderson {
304afc8b7d3SRichard Henderson     return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
305afc8b7d3SRichard Henderson }
306afc8b7d3SRichard Henderson 
307afc8b7d3SRichard Henderson void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
308afc8b7d3SRichard Henderson {
309afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
310afc8b7d3SRichard Henderson }
311afc8b7d3SRichard Henderson 
312afc8b7d3SRichard Henderson static uint32_t do_sha1p(union CRYPTO_STATE *d)
313afc8b7d3SRichard Henderson {
314afc8b7d3SRichard Henderson     return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
315afc8b7d3SRichard Henderson }
316afc8b7d3SRichard Henderson 
317afc8b7d3SRichard Henderson void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
318afc8b7d3SRichard Henderson {
319afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
320afc8b7d3SRichard Henderson }
321afc8b7d3SRichard Henderson 
322afc8b7d3SRichard Henderson static uint32_t do_sha1m(union CRYPTO_STATE *d)
323afc8b7d3SRichard Henderson {
324afc8b7d3SRichard Henderson     return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
325afc8b7d3SRichard Henderson }
326afc8b7d3SRichard Henderson 
327afc8b7d3SRichard Henderson void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
328afc8b7d3SRichard Henderson {
329afc8b7d3SRichard Henderson     crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
330f1ecb913SArd Biesheuvel }
331f1ecb913SArd Biesheuvel 
332effa992fSRichard Henderson void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
333f1ecb913SArd Biesheuvel {
3341a66ac61SRichard Henderson     uint64_t *rd = vd;
3351a66ac61SRichard Henderson     uint64_t *rm = vm;
3361a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
337f1ecb913SArd Biesheuvel 
338b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
339b449ca3cSArd Biesheuvel     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
340f1ecb913SArd Biesheuvel 
3411a66ac61SRichard Henderson     rd[0] = m.l[0];
3421a66ac61SRichard Henderson     rd[1] = m.l[1];
343effa992fSRichard Henderson 
344effa992fSRichard Henderson     clear_tail_16(vd, desc);
345f1ecb913SArd Biesheuvel }
346f1ecb913SArd Biesheuvel 
347effa992fSRichard Henderson void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
348f1ecb913SArd Biesheuvel {
3491a66ac61SRichard Henderson     uint64_t *rd = vd;
3501a66ac61SRichard Henderson     uint64_t *rm = vm;
3511a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3521a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
353f1ecb913SArd Biesheuvel 
354b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
355b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
356b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
357b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
358f1ecb913SArd Biesheuvel 
3591a66ac61SRichard Henderson     rd[0] = d.l[0];
3601a66ac61SRichard Henderson     rd[1] = d.l[1];
361effa992fSRichard Henderson 
362effa992fSRichard Henderson     clear_tail_16(vd, desc);
363f1ecb913SArd Biesheuvel }
364f1ecb913SArd Biesheuvel 
365f1ecb913SArd Biesheuvel /*
366f1ecb913SArd Biesheuvel  * The SHA-256 logical functions, according to
367f1ecb913SArd Biesheuvel  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
368f1ecb913SArd Biesheuvel  */
369f1ecb913SArd Biesheuvel 
370f1ecb913SArd Biesheuvel static uint32_t S0(uint32_t x)
371f1ecb913SArd Biesheuvel {
372f1ecb913SArd Biesheuvel     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
373f1ecb913SArd Biesheuvel }
374f1ecb913SArd Biesheuvel 
375f1ecb913SArd Biesheuvel static uint32_t S1(uint32_t x)
376f1ecb913SArd Biesheuvel {
377f1ecb913SArd Biesheuvel     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
378f1ecb913SArd Biesheuvel }
379f1ecb913SArd Biesheuvel 
380f1ecb913SArd Biesheuvel static uint32_t s0(uint32_t x)
381f1ecb913SArd Biesheuvel {
382f1ecb913SArd Biesheuvel     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
383f1ecb913SArd Biesheuvel }
384f1ecb913SArd Biesheuvel 
385f1ecb913SArd Biesheuvel static uint32_t s1(uint32_t x)
386f1ecb913SArd Biesheuvel {
387f1ecb913SArd Biesheuvel     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
388f1ecb913SArd Biesheuvel }
389f1ecb913SArd Biesheuvel 
390effa992fSRichard Henderson void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
391f1ecb913SArd Biesheuvel {
3921a66ac61SRichard Henderson     uint64_t *rd = vd;
3931a66ac61SRichard Henderson     uint64_t *rn = vn;
3941a66ac61SRichard Henderson     uint64_t *rm = vm;
3951a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
3961a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
3971a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
398f1ecb913SArd Biesheuvel     int i;
399f1ecb913SArd Biesheuvel 
400f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
401b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
402b449ca3cSArd Biesheuvel                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
403b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
404f1ecb913SArd Biesheuvel 
405b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
406b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
407b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
408b449ca3cSArd Biesheuvel         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
409f1ecb913SArd Biesheuvel 
410b449ca3cSArd Biesheuvel         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
411b449ca3cSArd Biesheuvel              + S0(CR_ST_WORD(d, 0));
412f1ecb913SArd Biesheuvel 
413b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
414b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
415b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
416b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = t;
417f1ecb913SArd Biesheuvel     }
418f1ecb913SArd Biesheuvel 
4191a66ac61SRichard Henderson     rd[0] = d.l[0];
4201a66ac61SRichard Henderson     rd[1] = d.l[1];
421effa992fSRichard Henderson 
422effa992fSRichard Henderson     clear_tail_16(vd, desc);
423f1ecb913SArd Biesheuvel }
424f1ecb913SArd Biesheuvel 
425effa992fSRichard Henderson void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
426f1ecb913SArd Biesheuvel {
4271a66ac61SRichard Henderson     uint64_t *rd = vd;
4281a66ac61SRichard Henderson     uint64_t *rn = vn;
4291a66ac61SRichard Henderson     uint64_t *rm = vm;
4301a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4311a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4321a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
433f1ecb913SArd Biesheuvel     int i;
434f1ecb913SArd Biesheuvel 
435f1ecb913SArd Biesheuvel     for (i = 0; i < 4; i++) {
436b449ca3cSArd Biesheuvel         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
437b449ca3cSArd Biesheuvel                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
438b449ca3cSArd Biesheuvel                      + CR_ST_WORD(m, i);
439f1ecb913SArd Biesheuvel 
440b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
441b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
442b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
443b449ca3cSArd Biesheuvel         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
444f1ecb913SArd Biesheuvel     }
445f1ecb913SArd Biesheuvel 
4461a66ac61SRichard Henderson     rd[0] = d.l[0];
4471a66ac61SRichard Henderson     rd[1] = d.l[1];
448effa992fSRichard Henderson 
449effa992fSRichard Henderson     clear_tail_16(vd, desc);
450f1ecb913SArd Biesheuvel }
451f1ecb913SArd Biesheuvel 
452effa992fSRichard Henderson void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
453f1ecb913SArd Biesheuvel {
4541a66ac61SRichard Henderson     uint64_t *rd = vd;
4551a66ac61SRichard Henderson     uint64_t *rm = vm;
4561a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4571a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
458f1ecb913SArd Biesheuvel 
459b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
460b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
461b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
462b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
463f1ecb913SArd Biesheuvel 
4641a66ac61SRichard Henderson     rd[0] = d.l[0];
4651a66ac61SRichard Henderson     rd[1] = d.l[1];
466effa992fSRichard Henderson 
467effa992fSRichard Henderson     clear_tail_16(vd, desc);
468f1ecb913SArd Biesheuvel }
469f1ecb913SArd Biesheuvel 
470effa992fSRichard Henderson void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
471f1ecb913SArd Biesheuvel {
4721a66ac61SRichard Henderson     uint64_t *rd = vd;
4731a66ac61SRichard Henderson     uint64_t *rn = vn;
4741a66ac61SRichard Henderson     uint64_t *rm = vm;
4751a66ac61SRichard Henderson     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
4761a66ac61SRichard Henderson     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
4771a66ac61SRichard Henderson     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
478f1ecb913SArd Biesheuvel 
479b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
480b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
481b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
482b449ca3cSArd Biesheuvel     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
483f1ecb913SArd Biesheuvel 
4841a66ac61SRichard Henderson     rd[0] = d.l[0];
4851a66ac61SRichard Henderson     rd[1] = d.l[1];
486effa992fSRichard Henderson 
487effa992fSRichard Henderson     clear_tail_16(vd, desc);
488f1ecb913SArd Biesheuvel }
48990b827d1SArd Biesheuvel 
49090b827d1SArd Biesheuvel /*
49190b827d1SArd Biesheuvel  * The SHA-512 logical functions (same as above but using 64-bit operands)
49290b827d1SArd Biesheuvel  */
49390b827d1SArd Biesheuvel 
49490b827d1SArd Biesheuvel static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
49590b827d1SArd Biesheuvel {
49690b827d1SArd Biesheuvel     return (x & (y ^ z)) ^ z;
49790b827d1SArd Biesheuvel }
49890b827d1SArd Biesheuvel 
49990b827d1SArd Biesheuvel static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
50090b827d1SArd Biesheuvel {
50190b827d1SArd Biesheuvel     return (x & y) | ((x | y) & z);
50290b827d1SArd Biesheuvel }
50390b827d1SArd Biesheuvel 
50490b827d1SArd Biesheuvel static uint64_t S0_512(uint64_t x)
50590b827d1SArd Biesheuvel {
50690b827d1SArd Biesheuvel     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
50790b827d1SArd Biesheuvel }
50890b827d1SArd Biesheuvel 
50990b827d1SArd Biesheuvel static uint64_t S1_512(uint64_t x)
51090b827d1SArd Biesheuvel {
51190b827d1SArd Biesheuvel     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
51290b827d1SArd Biesheuvel }
51390b827d1SArd Biesheuvel 
51490b827d1SArd Biesheuvel static uint64_t s0_512(uint64_t x)
51590b827d1SArd Biesheuvel {
51690b827d1SArd Biesheuvel     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
51790b827d1SArd Biesheuvel }
51890b827d1SArd Biesheuvel 
51990b827d1SArd Biesheuvel static uint64_t s1_512(uint64_t x)
52090b827d1SArd Biesheuvel {
52190b827d1SArd Biesheuvel     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
52290b827d1SArd Biesheuvel }
52390b827d1SArd Biesheuvel 
524aaffebd6SRichard Henderson void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
52590b827d1SArd Biesheuvel {
52690b827d1SArd Biesheuvel     uint64_t *rd = vd;
52790b827d1SArd Biesheuvel     uint64_t *rn = vn;
52890b827d1SArd Biesheuvel     uint64_t *rm = vm;
52990b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
53090b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
53190b827d1SArd Biesheuvel 
53290b827d1SArd Biesheuvel     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
53390b827d1SArd Biesheuvel     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
53490b827d1SArd Biesheuvel 
53590b827d1SArd Biesheuvel     rd[0] = d0;
53690b827d1SArd Biesheuvel     rd[1] = d1;
537aaffebd6SRichard Henderson 
538aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
53990b827d1SArd Biesheuvel }
54090b827d1SArd Biesheuvel 
541aaffebd6SRichard Henderson void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
54290b827d1SArd Biesheuvel {
54390b827d1SArd Biesheuvel     uint64_t *rd = vd;
54490b827d1SArd Biesheuvel     uint64_t *rn = vn;
54590b827d1SArd Biesheuvel     uint64_t *rm = vm;
54690b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
54790b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
54890b827d1SArd Biesheuvel 
54990b827d1SArd Biesheuvel     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
55090b827d1SArd Biesheuvel     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
55190b827d1SArd Biesheuvel 
55290b827d1SArd Biesheuvel     rd[0] = d0;
55390b827d1SArd Biesheuvel     rd[1] = d1;
554aaffebd6SRichard Henderson 
555aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
55690b827d1SArd Biesheuvel }
55790b827d1SArd Biesheuvel 
558aaffebd6SRichard Henderson void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
55990b827d1SArd Biesheuvel {
56090b827d1SArd Biesheuvel     uint64_t *rd = vd;
56190b827d1SArd Biesheuvel     uint64_t *rn = vn;
56290b827d1SArd Biesheuvel     uint64_t d0 = rd[0];
56390b827d1SArd Biesheuvel     uint64_t d1 = rd[1];
56490b827d1SArd Biesheuvel 
56590b827d1SArd Biesheuvel     d0 += s0_512(rd[1]);
56690b827d1SArd Biesheuvel     d1 += s0_512(rn[0]);
56790b827d1SArd Biesheuvel 
56890b827d1SArd Biesheuvel     rd[0] = d0;
56990b827d1SArd Biesheuvel     rd[1] = d1;
570aaffebd6SRichard Henderson 
571aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
57290b827d1SArd Biesheuvel }
57390b827d1SArd Biesheuvel 
574aaffebd6SRichard Henderson void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
57590b827d1SArd Biesheuvel {
57690b827d1SArd Biesheuvel     uint64_t *rd = vd;
57790b827d1SArd Biesheuvel     uint64_t *rn = vn;
57890b827d1SArd Biesheuvel     uint64_t *rm = vm;
57990b827d1SArd Biesheuvel 
58090b827d1SArd Biesheuvel     rd[0] += s1_512(rn[0]) + rm[0];
58190b827d1SArd Biesheuvel     rd[1] += s1_512(rn[1]) + rm[1];
582aaffebd6SRichard Henderson 
583aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
58490b827d1SArd Biesheuvel }
58580d6f4c6SArd Biesheuvel 
586aaffebd6SRichard Henderson void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
58780d6f4c6SArd Biesheuvel {
58880d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
58980d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
59080d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
59180d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
59280d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
59380d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
59480d6f4c6SArd Biesheuvel     uint32_t t;
59580d6f4c6SArd Biesheuvel 
59680d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
59780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
59880d6f4c6SArd Biesheuvel 
59980d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
60080d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
60180d6f4c6SArd Biesheuvel 
60280d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
60380d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
60480d6f4c6SArd Biesheuvel 
60580d6f4c6SArd Biesheuvel     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
60680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
60780d6f4c6SArd Biesheuvel 
60880d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
60980d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
610aaffebd6SRichard Henderson 
611aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
61280d6f4c6SArd Biesheuvel }
61380d6f4c6SArd Biesheuvel 
614aaffebd6SRichard Henderson void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
61580d6f4c6SArd Biesheuvel {
61680d6f4c6SArd Biesheuvel     uint64_t *rd = vd;
61780d6f4c6SArd Biesheuvel     uint64_t *rn = vn;
61880d6f4c6SArd Biesheuvel     uint64_t *rm = vm;
61980d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
62080d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
62180d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
62280d6f4c6SArd Biesheuvel     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
62380d6f4c6SArd Biesheuvel 
62480d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) ^= t;
62580d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
62680d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
62780d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
62880d6f4c6SArd Biesheuvel                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
62980d6f4c6SArd Biesheuvel 
63080d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
63180d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
632aaffebd6SRichard Henderson 
633aaffebd6SRichard Henderson     clear_tail_16(vd, desc);
63480d6f4c6SArd Biesheuvel }
63580d6f4c6SArd Biesheuvel 
63643fa36c9SRichard Henderson static inline void QEMU_ALWAYS_INLINE
63743fa36c9SRichard Henderson crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
63843fa36c9SRichard Henderson              uint32_t desc, uint32_t opcode)
63980d6f4c6SArd Biesheuvel {
64080d6f4c6SArd Biesheuvel     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
64180d6f4c6SArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
64280d6f4c6SArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
64343fa36c9SRichard Henderson     uint32_t imm2 = simd_data(desc);
64480d6f4c6SArd Biesheuvel     uint32_t t;
64580d6f4c6SArd Biesheuvel 
64680d6f4c6SArd Biesheuvel     assert(imm2 < 4);
64780d6f4c6SArd Biesheuvel 
64880d6f4c6SArd Biesheuvel     if (opcode == 0 || opcode == 2) {
64980d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT2A */
65080d6f4c6SArd Biesheuvel         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65180d6f4c6SArd Biesheuvel     } else if (opcode == 1) {
65280d6f4c6SArd Biesheuvel         /* SM3TT1B */
65380d6f4c6SArd Biesheuvel         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65480d6f4c6SArd Biesheuvel     } else if (opcode == 3) {
65580d6f4c6SArd Biesheuvel         /* SM3TT2B */
65680d6f4c6SArd Biesheuvel         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
65780d6f4c6SArd Biesheuvel     } else {
65843fa36c9SRichard Henderson         qemu_build_not_reached();
65980d6f4c6SArd Biesheuvel     }
66080d6f4c6SArd Biesheuvel 
66180d6f4c6SArd Biesheuvel     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
66280d6f4c6SArd Biesheuvel 
66380d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
66480d6f4c6SArd Biesheuvel 
66580d6f4c6SArd Biesheuvel     if (opcode < 2) {
66680d6f4c6SArd Biesheuvel         /* SM3TT1A, SM3TT1B */
66780d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
66880d6f4c6SArd Biesheuvel 
66980d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
67080d6f4c6SArd Biesheuvel     } else {
67180d6f4c6SArd Biesheuvel         /* SM3TT2A, SM3TT2B */
67280d6f4c6SArd Biesheuvel         t += CR_ST_WORD(n, 3);
67380d6f4c6SArd Biesheuvel         t ^= rol32(t, 9) ^ rol32(t, 17);
67480d6f4c6SArd Biesheuvel 
67580d6f4c6SArd Biesheuvel         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
67680d6f4c6SArd Biesheuvel     }
67780d6f4c6SArd Biesheuvel 
67880d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
67980d6f4c6SArd Biesheuvel     CR_ST_WORD(d, 3) = t;
68080d6f4c6SArd Biesheuvel 
68180d6f4c6SArd Biesheuvel     rd[0] = d.l[0];
68280d6f4c6SArd Biesheuvel     rd[1] = d.l[1];
68343fa36c9SRichard Henderson 
68443fa36c9SRichard Henderson     clear_tail_16(rd, desc);
68580d6f4c6SArd Biesheuvel }
686b6577bcdSArd Biesheuvel 
68743fa36c9SRichard Henderson #define DO_SM3TT(NAME, OPCODE) \
68843fa36c9SRichard Henderson     void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
68943fa36c9SRichard Henderson     { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
69043fa36c9SRichard Henderson 
69143fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1a, 0)
69243fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt1b, 1)
69343fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2a, 2)
69443fa36c9SRichard Henderson DO_SM3TT(crypto_sm3tt2b, 3)
69543fa36c9SRichard Henderson 
69643fa36c9SRichard Henderson #undef DO_SM3TT
69743fa36c9SRichard Henderson 
698a04b68e1SRichard Henderson static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
699b6577bcdSArd Biesheuvel {
700a04b68e1SRichard Henderson     union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
701a04b68e1SRichard Henderson     union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
702b6577bcdSArd Biesheuvel     uint32_t t, i;
703b6577bcdSArd Biesheuvel 
704b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
705b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
706b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
707b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
708b6577bcdSArd Biesheuvel             CR_ST_WORD(n, i);
709b6577bcdSArd Biesheuvel 
710b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
711b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
712b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
713b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
714b6577bcdSArd Biesheuvel 
715b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
716b6577bcdSArd Biesheuvel                             rol32(t, 24);
717b6577bcdSArd Biesheuvel     }
718b6577bcdSArd Biesheuvel 
719b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
720b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
721b6577bcdSArd Biesheuvel }
722b6577bcdSArd Biesheuvel 
723a04b68e1SRichard Henderson void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
724b6577bcdSArd Biesheuvel {
725a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
726a04b68e1SRichard Henderson 
727a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
728a04b68e1SRichard Henderson         do_crypto_sm4e(vd + i, vn + i, vm + i);
729a04b68e1SRichard Henderson     }
730a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
731a04b68e1SRichard Henderson }
732a04b68e1SRichard Henderson 
733a04b68e1SRichard Henderson static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
734a04b68e1SRichard Henderson {
735b6577bcdSArd Biesheuvel     union CRYPTO_STATE d;
736b6577bcdSArd Biesheuvel     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
737b6577bcdSArd Biesheuvel     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
738b6577bcdSArd Biesheuvel     uint32_t t, i;
739b6577bcdSArd Biesheuvel 
740b6577bcdSArd Biesheuvel     d = n;
741b6577bcdSArd Biesheuvel     for (i = 0; i < 4; i++) {
742b6577bcdSArd Biesheuvel         t = CR_ST_WORD(d, (i + 1) % 4) ^
743b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 2) % 4) ^
744b6577bcdSArd Biesheuvel             CR_ST_WORD(d, (i + 3) % 4) ^
745b6577bcdSArd Biesheuvel             CR_ST_WORD(m, i);
746b6577bcdSArd Biesheuvel 
747b6577bcdSArd Biesheuvel         t = sm4_sbox[t & 0xff] |
748b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 8) & 0xff] << 8 |
749b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 16) & 0xff] << 16 |
750b6577bcdSArd Biesheuvel             sm4_sbox[(t >> 24) & 0xff] << 24;
751b6577bcdSArd Biesheuvel 
752b6577bcdSArd Biesheuvel         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
753b6577bcdSArd Biesheuvel     }
754b6577bcdSArd Biesheuvel 
755b6577bcdSArd Biesheuvel     rd[0] = d.l[0];
756b6577bcdSArd Biesheuvel     rd[1] = d.l[1];
757b6577bcdSArd Biesheuvel }
758a04b68e1SRichard Henderson 
759a04b68e1SRichard Henderson void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
760a04b68e1SRichard Henderson {
761a04b68e1SRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
762a04b68e1SRichard Henderson 
763a04b68e1SRichard Henderson     for (i = 0; i < opr_sz; i += 16) {
764a04b68e1SRichard Henderson         do_crypto_sm4ekey(vd + i, vn + i, vm + i);
765a04b68e1SRichard Henderson     }
766a04b68e1SRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
767a04b68e1SRichard Henderson }
7681738860dSRichard Henderson 
7691738860dSRichard Henderson void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
7701738860dSRichard Henderson {
7711738860dSRichard Henderson     intptr_t i, opr_sz = simd_oprsz(desc);
7721738860dSRichard Henderson     uint64_t *d = vd, *n = vn, *m = vm;
7731738860dSRichard Henderson 
7741738860dSRichard Henderson     for (i = 0; i < opr_sz / 8; ++i) {
7751738860dSRichard Henderson         d[i] = n[i] ^ rol64(m[i], 1);
7761738860dSRichard Henderson     }
7771738860dSRichard Henderson     clear_tail(vd, opr_sz, simd_maxsz(desc));
7781738860dSRichard Henderson }
779